数据库 | timescaledb时序表使用注意事项

官方文档:Timescale documentation

注意事项

  1. timescaledb本质上是基于PostgreSQL数据库的扩展,PostgreSQL的语法都支持.
  2. 建表,和普通关系数据库建表的差异点
    a. 时序表必须要有TIMESTAMP列,用于分区(分区列会默认建索引)
    示例:
begin;
--建普通表
CREATE table if not exists  tpt_warning_record (
  warning_time TIMESTAMP NOT NULL,
  tag_name VARCHAR(200)  NOT NULL ,
  info TEXT DEFAULT NULL
);
COMMENT ON COLUMN tpt_warning_record.warning_time IS '预警时间';
COMMENT ON COLUMN tpt_warning_record.tag_name IS '位号';
COMMENT ON COLUMN tpt_warning_record.info IS '预警信息';

--转时序表,同时会自动创建索引
SELECT create_hypertable('tpt_warning_record', 'warning_time', chunk_time_interval => interval '1 day');

--建其他索引,比如位号
CREATE INDEX idx_tpt_warning_record_time_name ON tpt_warning_record (tag_name,warning_time desc);

--设置分区保留时间
SELECT add_retention_policy('tpt_warning_record', INTERVAL '30 days');

--数据是否支持压缩
ALTER TABLE dm_tag_value SET (timescaledb.compress);

end;

b. 非分区列要建索引的话,需要跟分区列建复合索引
c. 分区大小规范:为了提高性能最好是整个分区能完全加载到内存,建议每个分区存储数据量大小是内存的四分之一,比如内存是4G那分区不要超过1G,具体多少条记录根据每行数据大小估算,以此来确定是7天还是1天或几个小时一个分区。
d. 建索引的时候可以选择正序或倒序,根据实际业务查询选择,比如使用的时候是时间倒序那么索引语句可以这样,如果查询排序和索引排序方向不一致很影响性能。

CREATE INDEX idx_tpt_warning_record_time_name ON tpt_warning_record (tag_name,warning_time desc);
  1. 写入,尽量用批量写入提高性能
  2. 查询
    a. 查询都要带上索引字段(建议都带上时间范围),不走索引的全量扫描的性能和普通数据库没有区别,表数据量大了就很慢。
    b. 因为数据库内存有限,查询结果大的情况使用分页查询
    c. 查询不要随意order by,尽量走默认排序(order by 和索引顺序一致)
    d. 时间过滤时不要用时间函数,会影响性能
    比如:
--错误如下:
SELECT app_time,tag_name,tag_value,quality FROM dm_tag_value WHERE DATE(app_time) = '2024-06-19'

--正确如下:
SELECT app_time,tag_name,tag_value,quality FROM dm_tag_value WHERE app_time >= '2024-06-19' and app_time < '2024-06-20'

e. 时序数据库有一些特有函数可以大大提高性能。
比如:按时间窗口聚合

SELECT time_bucket('1 day', time_column) AS day, AVG(value_column) 
FROM table_name
GROUP BY day;

比如:用物理视图做连续预聚合,适合要对大时间范围做聚合的场景

CREATE MATERIALIZED VIEW continuous_aggregate_view WITH (timescaledb.continuous) AS
SELECT time_bucket('1 day', time_column) AS day, AVG(value_column) 
FROM table_name
GROUP BY day;

SELECT add_continuous_aggregate_policy('continuous_aggregate_view', 
    start_offset => INTERVAL '1 month', 
    end_offset => INTERVAL '1 day', 
    schedule_interval => INTERVAL '1 hour');

其他一些高级查询:Timescale Documentation | Perform advanced analytic queries

一些自定义函数

其他一些方便查询的自定义函数,供参考
查询指定时间最近一条记录(opt可取:interpolate 插值、before 前值、after 后值)

CREATE OR REPLACE FUNCTION get_latest_records(
    tag_names text[], 
    t timestamp DEFAULT now(), 
    opt text DEFAULT 'before'
)
RETURNS TABLE(tag_name varchar(200), app_time timestamp, tag_value varchar(1000)) AS $$
DECLARE
    tag text;
    query text;
BEGIN
    FOR tag IN SELECT unnest(tag_names)
    LOOP
        IF opt = 'after' THEN
            query := format('
                SELECT dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_value
                FROM dm_tag_value
                WHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time >= %L
                ORDER BY dm_tag_value.app_time ASC
                LIMIT 1', tag, t);
        ELSIF  opt = 'interpolate' THEN
            query := format('WITH closest_before AS (
					    SELECT  dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_value
					    FROM dm_tag_value
					    WHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time <= %L
					    ORDER BY app_time DESC
					    LIMIT 1
					),
					closest_after AS (
					    SELECT  dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_value
					    FROM dm_tag_value
					    WHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time > %L
					    ORDER BY app_time ASC
					    LIMIT 1
					),
					combined AS (
						SELECT 
						        b.tag_name AS name,
						        EXTRACT(EPOCH FROM b.app_time) AS time_before, 
						        CAST(b.tag_value AS DOUBLE PRECISION) AS value_before, 
						        EXTRACT(EPOCH FROM a.app_time) AS time_after, 
						        CAST(a.tag_value AS DOUBLE PRECISION) AS value_after
					    FROM closest_before b
					    FULL JOIN closest_after a ON 1=1
					)
					SELECT 
					    COALESCE(combined.name,ca.tag_name, cb.tag_name) AS tag_name, 
					    CAST(%L AS TIMESTAMP) AS app_time,
					    CASE
					        WHEN cb.app_time IS NOT NULL AND ca.app_time IS NOT NULL THEN
					            CAST((value_before + (value_after - value_before) * (EXTRACT(EPOCH FROM TIMESTAMP %L) - time_before) / (time_after - time_before)) AS VARCHAR(1000))
					        WHEN cb.app_time IS NOT NULL THEN
					            CAST(cb.tag_value AS VARCHAR(1000))
					        WHEN ca.app_time IS NOT NULL THEN
					            CAST(ca.tag_value AS VARCHAR(1000))
					        ELSE
					            NULL
					    END AS tag_value
					FROM combined 
					LEFT JOIN closest_before cb ON 1=1 
					LEFT JOIN closest_after ca ON 1=1'
                 , tag, t , tag,t ,t, t);
        else 
            query := format('
                SELECT dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_value 
                FROM dm_tag_value
                WHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time <= %L
                ORDER BY dm_tag_value.app_time DESC
                LIMIT 1', tag, t);
        END IF;
        
        RETURN QUERY EXECUTE query;
    END LOOP;
END;
$$ LANGUAGE plpgsql;


SELECT *
FROM get_latest_records(ARRAY['TAG8710_ISYS7.1', 'TAG8728_ISYS7.1', 'TAG8727_ISYS7.1'], '2024-07-18','interpolate');

行转列

CREATE EXTENSION IF NOT EXISTS tablefunc;

SELECT *
FROM crosstab(
    'SELECT app_time, tag_name, tag_value FROM dm_tag_value where tag_name in (''TAG41'',''TAG42'',''TAG43'') and  app_time  between ''2024-06-04 00:00:00.000'' and ''2024-06-04 23:59:59.000''  ORDER BY 1, 2',
    'SELECT DISTINCT tag_name FROM dm_tag_value where tag_name in (''TAG41'',''TAG42'',''TAG43'') ORDER BY 1'
) AS ct (
    app_time TIMESTAMPTZ,
    tag_v1 NUMERIC,
    tag_v2 numeric,
    tag_v3 NUMERIC
);

间隔采样

--按间隔取位号数据
CREATE OR REPLACE FUNCTION get_one_tag_with_interval_seconds(
    tag_names TEXT[],
    interval_seconds INT,
    duration_minutes INT,
    delay_seconds INT
)
RETURNS TABLE (tag_name VARCHAR, app_time timestamp,tag_value TEXT) AS $$
BEGIN
    RETURN QUERY
    SELECT
        dm_tag_value.tag_name,
        time_bucket(INTERVAL '1 second' * interval_seconds, dm_tag_value.app_time) AS interval_time,
        min(concat(cast(dm_tag_value.app_time as VARCHAR),'|',dm_tag_value.tag_value,'|',dm_tag_value.quality)) as app_time_tag_value
    FROM
        dm_tag_value
    WHERE
        dm_tag_value.tag_name = ANY(tag_names)
        AND dm_tag_value.app_time >= NOW() - INTERVAL '1 minute' * duration_minutes - INTERVAL '1 second' * delay_seconds
        AND dm_tag_value.app_time < NOW() - INTERVAL '1 second' * delay_seconds
    GROUP BY
        dm_tag_value.tag_name, interval_time;
END;
$$ LANGUAGE plpgsql;

其他一些常用命令

查看分区空间占用:

SELECT relname AS table_name,
       pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
       pg_size_pretty(pg_relation_size(relid)) AS table_size,
       pg_size_pretty(pg_indexes_size(relid)) AS indexes_size
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC;

查看分区

SELECT show_chunks('dm_tag_value');

SELECT *
FROM timescaledb_information.chunks
WHERE hypertable_name = 'dm_tag_value';

删除分区

SELECT drop_chunks('dm_tag_value','2024-05-30'::timestamp);--删除小于等于指定日期的分区

压缩分区

SELECT compress_chunk('_timescaledb_internal._hyper_1_3_chunk', if_not_compressed => true);

你可能感兴趣的:(数据库)