使用influx已经五六年了,线上的数据库已经特别大了,尤其是在遇到资源回收时,磁盘空间占用,导致整个系统无法使用。为此为他进行按年存储,跨年查询,写入数据时需要考虑按年找库存储。
为了处理跨年数据集查询,主要考虑以下几点:
以下是增强后的实现方案:
import org.influxdb.InfluxDB;
import org.influxdb.dto.BatchPoints;
import org.influxdb.dto.Query;
import org.influxdb.dto.QueryResult;
import org.influxdb.impl.InfluxDBResultMapper;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
public class InfluxDBQueryUtil {
private final InfluxDB influxDB;
private final InfluxDBResultMapper resultMapper = new InfluxDBResultMapper();
private static final String DEFAULT_TIME_FIELD = "time";
public InfluxDBQueryUtil(InfluxDB influxDB) {
this.influxDB = influxDB;
}
/**
* 通用查询方法 - 查询所有数据(支持跨年)
*
* @param measurement 表名
* @param clazz 返回类型
* @param filters 过滤条件
* @param years 要查询的年份列表(用于确定数据库)
* @param 返回类型
* @return 查询结果列表(按时间倒序排列)
*/
public <T> List<T> queryAll(String measurement, Class<T> clazz,
Map<String, Object> filters, List<Integer> years) {
if (CollectionUtils.isEmpty(years)) {
years = Collections.singletonList(LocalDateTime.now().getYear());
}
// 并行查询各年数据
List<T> allResults = years.parallelStream()
.flatMap(year -> {
String dbName = getDatabaseName(year);
String queryString = buildQueryString(measurement, filters, null, null);
Query query = new Query(queryString, dbName);
QueryResult queryResult = influxDB.query(query);
return resultMapper.toPOJO(queryResult, clazz).stream();
})
.sorted((a, b) -> {
// 按时间倒序排列
Instant timeA = getTimeFieldValue(a);
Instant timeB = getTimeFieldValue(b);
return timeB.compareTo(timeA);
})
.collect(Collectors.toList());
return allResults;
}
/**
* 通用查询方法 - 分页查询(支持跨年)
*
* @param measurement 表名
* @param clazz 返回类型
* @param filters 过滤条件
* @param years 要查询的年份列表
* @param page 页码(从1开始)
* @param pageSize 每页大小
* @param 返回类型
* @return 分页查询结果
*/
public <T> PageResult<T> queryByPage(String measurement, Class<T> clazz,
Map<String, Object> filters, List<Integer> years,
int page, int pageSize) {
if (CollectionUtils.isEmpty(years)) {
years = Collections.singletonList(LocalDateTime.now().getYear());
}
// 计算总数
long total = count(measurement, filters, years);
// 计算需要获取的数据量
int offset = (page - 1) * pageSize;
int limit = pageSize;
// 获取各年数据并合并
List<T> allData = new ArrayList<>();
int remaining = limit;
// 按年份倒序查询(假设新数据在近年)
Collections.sort(years, Collections.reverseOrder());
for (Integer year : years) {
if (remaining <= 0) break;
String dbName = getDatabaseName(year);
String queryString = buildQueryString(measurement, filters, remaining, offset);
Query query = new Query(queryString, dbName);
QueryResult queryResult = influxDB.query(query);
List<T> yearData = resultMapper.toPOJO(queryResult, clazz);
allData.addAll(yearData);
remaining -= yearData.size();
offset = Math.max(0, offset - countFromYear(measurement, filters, year));
}
// 按时间排序并截取所需数据量
allData.sort((a, b) -> {
Instant timeA = getTimeFieldValue(a);
Instant timeB = getTimeFieldValue(b);
return timeB.compareTo(timeA);
});
List<T> pageData = allData.stream()
.limit(pageSize)
.collect(Collectors.toList());
return new PageResult<>(pageData, total, page, pageSize);
}
/**
* 计数方法(支持跨年)
*/
public long count(String measurement, Map<String, Object> filters, List<Integer> years) {
if (CollectionUtils.isEmpty(years)) {
years = Collections.singletonList(LocalDateTime.now().getYear());
}
return years.parallelStream()
.mapToLong(year -> {
String dbName = getDatabaseName(year);
String countQuery = "SELECT COUNT(*) FROM " + measurement + buildWhereClause(filters);
Query query = new Query(countQuery, dbName);
QueryResult queryResult = influxDB.query(query);
return extractCountFromResult(queryResult);
})
.sum();
}
/**
* 获取单个年份的数据量
*/
private long countFromYear(String measurement, Map<String, Object> filters, int year) {
String dbName = getDatabaseName(year);
String countQuery = "SELECT COUNT(*) FROM " + measurement + buildWhereClause(filters);
Query query = new Query(countQuery, dbName);
QueryResult queryResult = influxDB.query(query);
return extractCountFromResult(queryResult);
}
private long extractCountFromResult(QueryResult queryResult) {
if (queryResult.getResults() == null || queryResult.getResults().isEmpty()) {
return 0;
}
QueryResult.Result result = queryResult.getResults().get(0);
if (result.getSeries() == null || result.getSeries().isEmpty()) {
return 0;
}
List<List<Object>> values = result.getSeries().get(0).getValues();
if (values == null || values.isEmpty()) {
return 0;
}
return ((Number) values.get(0).get(0)).longValue();
}
/**
* 获取时间字段值
*/
private Instant getTimeFieldValue(Object obj) {
try {
java.lang.reflect.Field timeField = obj.getClass().getDeclaredField(DEFAULT_TIME_FIELD);
timeField.setAccessible(true);
return (Instant) timeField.get(obj);
} catch (Exception e) {
throw new RuntimeException("Failed to get time field value", e);
}
}
/**
* 构建查询语句
*/
private String buildQueryString(String measurement, Map<String, Object> filters,
Integer limit, Integer offset) {
StringBuilder queryBuilder = new StringBuilder("SELECT * FROM ").append(measurement);
queryBuilder.append(buildWhereClause(filters));
queryBuilder.append(" ORDER BY time DESC");
if (limit != null) {
queryBuilder.append(" LIMIT ").append(limit);
if (offset != null && offset > 0) {
queryBuilder.append(" OFFSET ").append(offset);
}
}
return queryBuilder.toString();
}
/**
* 构建WHERE子句
*/
private String buildWhereClause(Map<String, Object> filters) {
if (CollectionUtils.isEmpty(filters)) {
return "";
}
StringBuilder whereBuilder = new StringBuilder(" WHERE ");
boolean firstCondition = true;
for (Map.Entry<String, Object> entry : filters.entrySet()) {
if (!firstCondition) {
whereBuilder.append(" AND ");
}
String key = entry.getKey();
Object value = entry.getValue();
if (value instanceof String) {
whereBuilder.append(key).append(" = '").append(value).append("'");
} else if (value instanceof Collection) {
Collection<?> collection = (Collection<?>) value;
String inValues = collection.stream()
.map(v -> v instanceof String ? "'" + v + "'" : v.toString())
.collect(Collectors.joining(","));
whereBuilder.append(key).append(" IN (").append(inValues).append(")");
} else if (value instanceof Date) {
Instant instant = ((Date) value).toInstant();
String rfc3339 = DateTimeFormatter.ISO_INSTANT.format(instant);
whereBuilder.append(key).append(" = '").append(rfc3339).append("'");
} else if (value instanceof TimeRange) {
TimeRange range = (TimeRange) value;
whereBuilder.append(key).append(" >= '")
.append(DateTimeFormatter.ISO_INSTANT.format(range.getStart()))
.append("' AND ").append(key).append(" <= '")
.append(DateTimeFormatter.ISO_INSTANT.format(range.getEnd()))
.append("'");
} else {
whereBuilder.append(key).append(" = ").append(value);
}
firstCondition = false;
}
return whereBuilder.toString();
}
/**
* 获取数据库名称
*/
private String getDatabaseName(int year) {
return "db_" + year;
}
/**
* 分页结果封装类
*/
public static class PageResult<T> {
private List<T> data;
private long total;
private int page;
private int pageSize;
private int totalPages;
public PageResult(List<T> data, long total, int page, int pageSize) {
this.data = data;
this.total = total;
this.page = page;
this.pageSize = pageSize;
this.totalPages = (int) Math.ceil((double) total / pageSize);
}
// Getters
public List<T> getData() { return data; }
public long getTotal() { return total; }
public int getPage() { return page; }
public int getPageSize() { return pageSize; }
public int getTotalPages() { return totalPages; }
}
/**
* 时间范围查询条件
*/
public static class TimeRange {
private final Instant start;
private final Instant end;
public TimeRange(Instant start, Instant end) {
this.start = start;
this.end = end;
}
public Instant getStart() { return start; }
public Instant getEnd() { return end; }
}
}
并行查询优化:
智能分页处理:
时间范围支持:
TimeRange
类专门处理时间范围查询性能优化:
// 初始化
InfluxDB influxDB = InfluxDBFactory.connect("http://localhost:8086", "username", "password");
InfluxDBQueryUtil queryUtil = new InfluxDBQueryUtil(influxDB);
// 定义时间范围(跨年)
Instant start = Instant.parse("2022-11-01T00:00:00Z");
Instant end = Instant.parse("2023-02-01T00:00:00Z");
InfluxDBQueryUtil.TimeRange timeRange = new InfluxDBQueryUtil.TimeRange(start, end);
// 构建查询条件
Map<String, Object> filters = new HashMap<>();
filters.put("sensor_id", "sensor1");
filters.put("time", timeRange); // 使用时间范围条件
// 查询跨年数据(2022和2023年)
List<Integer> years = Arrays.asList(2022, 2023);
// 查询所有跨年数据
List<SensorData> allData = queryUtil.queryAll("sensor_measurement", SensorData.class, filters, years);
// 分页查询跨年数据
int page = 1;
int pageSize = 20;
PageResult<SensorData> pageResult = queryUtil.queryByPage(
"sensor_measurement", SensorData.class, filters, years, page, pageSize);
System.out.println("跨年数据总量: " + pageResult.getTotal());
System.out.println("当前页数据: " + pageResult.getData());
时间字段处理:确保您的POJO类中包含时间字段(默认命名为"time"),如需自定义可通过修改DEFAULT_TIME_FIELD
性能考虑:跨多年大数据集查询可能较慢,建议:
分页准确性:在数据频繁变化的场景下,分页可能不够精确,可以考虑基于时间戳的分页方案