package com.example.es.test;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @author
* @Description es的from-size用法
* @date 2022/01/26 10:04
*/
public class ESTest_from_size {
public static final Logger logger = LoggerFactory.getLogger(ESTest_searchAfter.class);
public static void main(String[] args) throws Exception{
long startTime = System.currentTimeMillis();
// 创建ES客户端
RestHighLevelClient esClient = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http"))
);
// 1、创建searchRequest
SearchRequest searchRequest = new SearchRequest("audit2");
// 2、指定查询条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();//必须加上track_total_hits,不然就只显示10000
// 页面上的第一页等同于在es中的 0
sourceBuilder.from(0);
// 每页多少条数据
sourceBuilder.size(1000);
// 设置唯一排序值定位
sourceBuilder.sort(SortBuilders.fieldSort("operationtime").order(SortOrder.DESC));
//将sourceBuilder对象添加到搜索请求中
searchRequest.source(sourceBuilder);
// 发送请求
SearchResponse searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
List
运行结果:
10:08:40.466 [main] INFO com.example.es.test.ESTest_searchAfter - 查询出来的数据个数为:1000
10:08:40.474 [main] INFO com.example.es.test.ESTest_searchAfter - 运行时间: 1506ms
现象:
如果from size 查询的数据超过10000条,会报错误
package com.example.es.test;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @author
* @Description es 的search_after方法
* @date 2022/01/11 14:04
*/
public class ESTest_searchAfter {
public static final Logger logger = LoggerFactory.getLogger(ESTest_searchAfter.class);
public static void main(String[] args) throws Exception{
long startTime = System.currentTimeMillis();
// 创建ES客户端
RestHighLevelClient esClient = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http"))
);
// 1、创建searchRequest
SearchRequest searchRequest = new SearchRequest("audit2");
// 2、指定查询条件
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().trackTotalHits(true);//必须加上track_total_hits,不然就只显示10000
//设置每页查询的数据个数
sourceBuilder.size(1000);
// 设置唯一排序值定位
sourceBuilder.sort(SortBuilders.fieldSort("operationtime").order(SortOrder.DESC));//多条件查询
//将sourceBuilder对象添加到搜索请求中
searchRequest.source(sourceBuilder);
// 发送请求
SearchResponse searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits1 = searchResponse.getHits().getHits();
List
运行结果:
16:11:44.057 [main] INFO com.example.es.test.ESTest_searchAfter - 查询出来的数据个数为:64000
16:11:44.061 [main] INFO com.example.es.test.ESTest_searchAfter - 运行时间: 20979ms
现象:audit2该索引里面总共就69873条数据,控制台打印的信息是每1000条的查询打印出来,最终查询出来64000条记录,还有5873条数据丢失了。还有size如果超过10000,也会报错。
我自己的疑问:search after既然不能跳页查询,只能一页一页的查询出来,那前端调用这个接口后端不是还是返回全部的数据吗。那如果前端设置成向下滚动查询,然后滚轮向下几页后端就返回几页数据,后端不是会更省查询的时间吗。现在search after还是一次性将数据查询出来,只是内部它是一页一页查询出来的,最终展示出来的还是全部的数据。这个我有疑问,我应该怎么与前端对接这个接口。
package com.example.es.test;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.*;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @author
* @Description java 实现scroll滚动查询
* @date 2021/12/08 14:09
*/
public class ESTest_Scroll {
public static final Logger logger = LoggerFactory.getLogger(ESTest_Scroll.class);
public static void main(String[] args) throws Exception{
long startTime = System.currentTimeMillis();
// 创建ES客户端
RestHighLevelClient esClient = new RestHighLevelClient(
RestClient.builder(new HttpHost("localhost", 9200, "http"))
);
// 1、创建searchRequest
SearchRequest searchRequest = new SearchRequest("audit2");
// 2、指定scroll信息
searchRequest.scroll(TimeValue.timeValueMinutes(1L));
// 3、指定查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(1000);
searchSourceBuilder.sort(SortBuilders.fieldSort("operationtime").order(SortOrder.DESC));//多条件查询
searchRequest.source(searchSourceBuilder);
//4、获取返回结果scrollId,source
SearchResponse searchResponse = esClient.search(searchRequest, RequestOptions.DEFAULT); //通过发送初始搜索请求来初始化搜索上下文
String scrollId = searchResponse.getScrollId();
SearchHit[] searchHits = searchResponse.getHits().getHits();
List
运行结果:
16:20:54.794 [main] INFO com.example.es.test.ESTest_Scroll - 删除scrollId:true
16:20:54.795 [main] INFO com.example.es.test.ESTest_Scroll - 查询总个数:69873
16:20:54.797 [main] INFO com.example.es.test.ESTest_Scroll - 运行时间: 5716ms
现象:
audit2该索引里面总共就69873条数据,最终查询出来69873条记录,一条记录都没有丢失。还有size如果超过10000,也会报错。很奇怪,search after会丢失数据,而scroll一条记录没有丢失。