Elasticsearch学习之(一)线上迁移数据方案_elasticsearch 在线迁移

前端必填时间范围,所以我们查询数据就是根据时间戳确定要查询的数据位于哪些索引中就行了。

1、Es的工具类

/\*\*
 \* 通过时间戳获取索引名称
 \* @param timeStamp
 \* @return
 \*/
    public static String getIndexNameByTimeStamp(String indexPrefix,Long timeStamp){
        Date date = new Date(timeStamp);
        String dateDetail = new SimpleDateFormat(SEARCH\_DATE\_FORMAT\_YYYY\_MM\_DD\_HH\_MM\_SS)
                .format(date).split(" ")[0];
        String[] yearMonthDay = dateDetail.split("-");
        return Stream.of(indexPrefix,yearMonthDay[0], yearMonthDay[1]).collect(Collectors.joining());
    }

 /\*\*
 \* 通过 开始和结束时间戳 获取 索引
 \* @param start
 \* @param end
 \* @return
 \*/
    public static ListgetAllIndexNameByTimeStampRange(String indexPrefix,long start,long end){

        if(start <0 || end <0  || start > end ){
            throw new IllegalArgumentException("时间戳参数错误:start:"+start+",end:"+end);
        }
        if(StringUtils.isEmpty(indexPrefix)){
            throw new IllegalArgumentException("索引名称前缀为空! prefix:"+indexPrefix);
        }

        List indexNames = new ArrayList<>();
        Calendar startCalendar = Calendar.getInstance();
        startCalendar.setTimeInMillis(start);

        Calendar endCalendar = Calendar.getInstance();
        endCalendar.setTimeInMillis(end);

        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMM");

        // 计算开始和结束的年份
        int startYear = startCalendar.get(Calendar.YEAR);
        int endYear = endCalendar.get(Calendar.YEAR);

        // 遍历每个年份的月份,生成索引名称
        for (int year = startYear; year <= endYear; year++) {
            Calendar tempCalendar = Calendar.getInstance();
            tempCalendar.set(Calendar.YEAR, year);
            tempCalendar.set(Calendar.MONTH, Calendar.JANUARY); // 一月份

            // 对于开始年份,从开始的月份开始遍历
            int startMonth = year == startYear ? startCalendar.get(Calendar.MONTH) : Calendar.JANUARY;

            // 对于结束年份,到结束的月份结束遍历
            int endMonth = year == endYear ? endCalendar.get(Calendar.MONTH) : Calendar.DECEMBER;

            for (int month = startMonth; month <= endMonth; month++) {
                tempCalendar.set(Calendar.MONTH, month);
                indexNames.add( indexPrefix + dateFormat.format(tempCalendar.getTime()));
            }
        }


        return indexNames;
    }

2、数据访问

1、每个月的最后几天生成下个月的索引

注意点:
1、因为月份最后几天不确定,所以从28-31 都计算一下
2、创建索引的配置indexConfiguration自己写个json文件然后放到容器中读取即可

@Scheduled(cron = "0 0 10 28-31 \* ?")
    public void createIndex() throws IOException {
        //判断是否是最后一天
        if(!DateUtil.isLastDayOfMonth()){
            log.warn("索引初始化|判断不是本月最后一天|不进行处理");
            return;
        }

        // 每月最后一天生成下个月的索引
        Calendar instance = Calendar.getInstance();
        instance.add(Calendar.MONTH,1);
        instance.set(Calendar.DAY\_OF\_MONTH,1);

        String indexName = ElasticsearchUtil.getIndexNameByTimeStamp(newIndexPrefix, instance.getTimeInMillis());
        GetIndexRequest getIndexRequest = new GetIndexRequest(indexName);
        boolean exists = restHighLevelClient.indices().exists(getIndexRequest, RequestOptions.DEFAULT);
        if(exists){
            log.warn("索引初始化|索引已存在|index:{}",indexName);
            return;
        }
        if(StringUtils.isEmpty(indexConfiguration)){
            log.error("索引初始化|获取索引初始化配置为空|setting:{}",indexConfiguration);
            return;
        }
        CreateIndexRequest request = new CreateIndexRequest(indexName);
        // 初始化索引
        request.source(indexConfiguration,XContentType.JSON);
        CreateIndexResponse response = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
        boolean acknowledged = response.isAcknowledged();
        log.info("索引初始化|初始化完成|所有节点是否都已确认:{}",acknowledged);

    }

index初始化的配置
从resource下读取配置文件

@Bean("indexConfiguration")
    public String initIndexConfiguration() throws IOException {
        String indexConfiguration= initEsIndexSetting("indexInitialization.json");
        log.info("索引初始化|初始化索引配置文件:{}",indexConfiguration);
        return indexConfiguration;
    }


    private String initEsIndexSetting(String resource) throws IOException {
        ClassPathResource classPathResource = new ClassPathResource(resource);
        try (InputStream in = classPathResource.getInputStream()) {
            return StreamUtils.copyToString(in, StandardCharsets.UTF\_8);
        }catch (IOException e){
            throw new IOException(e);
        }
    }

2、数据查询

很明显根据时间戳确定索引名称然后查询就行了

public List queryLisNewt(int from, int pageSize, BoolQueryBuilder queryBuilder,FeedbackReport entity) {

        Listresults=new ArrayList<>();

        if( StringUtils.isEmpty(entity.getStartDate()) || StringUtils.isEmpty(entity.getEndDate()) ){
            log.error("queryLisNewt|查询错误|开始或结束时间为空|startDate:{}|endDate:{}",entity.getStartDate(),entity.getEndDate());
            return results;
        }
        long start=DateUtil.parseDateString(entity.getStartDate()).getTime();
        long end=DateUtil.parseDateString(entity.getEndDate()).getTime();
        if(start <=0l || end <=0l){
            log.error("queryLisNewt|查询错误|解析日期错误:param:{}",entity);
            return results;
        }
        List indexNames = ElasticsearchUtil.getAllIndexNameByTimeStampRange(newIndexPrefix, start, end);
        if(CollectionUtils.isEmpty(indexNames)){
            log.error("queryLisNewt|查询错误|获取索引名称为空:{}",entity);
            return results;
        }
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        searchSourceBuilder
                .query(queryBuilder)
                .from(from)
                .size(pageSize)
                .sort(SortBuilders.fieldSort("createTime")
                .order(SortOrder.DESC));

        SearchRequest searchRequest = new SearchRequest(indexNames.toArray(new String[]{}),searchSourceBuilder);
        RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
        builder.setHttpAsyncResponseConsumerFactory(new HttpAsyncResponseConsumerFactory
 .HeapBufferedResponseConsumerFactory(200\*1024\*1024));
        try {
            SearchResponse response = restHighLevelClient.search(searchRequest, builder.build());
            SearchHits hits = response.getHits();
            for (SearchHit hit : hits) {
                Map map = hit.getSourceAsMap();
                FeedbackReport obj = new FeedbackReport();
                org.apache.commons.beanutils.BeanUtils.populate(obj, map);
                results.add(obj);
            }
        } catch (Exception e) {
            log.error("queryLisNewt|查询错误|获取结果失败:{}",e.getMessage(),e);
            return results;
        }


        return results;
    }

3、数据写入

主要就是收到消息,然后还是根据时间戳写到索引里面就行了。

public boolean insertRecordNew(FeedbackReport record) throws Exception {

        if(record.getCreateTime() <=0 ){
            log.error("写入索引|写入失败|提交时间错误:{}|param:{}",record.getCreateTime(),JSON.toJSONString(record));
            return false;
        }
        // 根据时间戳 获取索引名称
        String indexName = ElasticsearchUtil.getIndexNameByTimeStamp(newIndexPrefix, record.getCreateTime());
        log.info("写入索引|准备写入|获取索引名称|name:{}",indexName);
        // 判断此内容是否存在
        GetRequest getRequest = new GetRequest(indexName);
        getRequest.id(record.getId());
        boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
        if(exists){
            log.error("写入索引|写入失败|内容重复|indexName:{}|time:{}|param:{}",indexName,record.getCreateTime(),JSON.toJSONString(record));
            return false;
        }
        IndexRequest indexRequest = new IndexRequest(indexName, "\_doc", record.getId());
        indexRequest.source(JSON.toJSONString(record),XContentType.JSON);
        restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);
        alarmService.addAlarm(record);
        log.info("写入索引|写入成功|createTime:{},id:{}",record.getCreateTime(),record.getId());
        return true;
    }

二、迁移数据

1、使用reindex API迁移数据
2、异步迁移,因为不用异步方式的话会超时,我迁移的数据量比较大
以某个月为例
从索引xxx-xxx-xxx迁移到xxx-xxx-xxx-xxx-202402,然后查询任务的状态是GET /_tasks/Ydx4P84WTrWGGjLPD5dJ6A:xxxx 其中Ydx4P84WTrWGGjLPD5dJ6A:xxxx你执行reindex返回的一个ID。

你可能感兴趣的:(elasticsearch,学习,jenkins)