greenplum使用总结之常用SQL及函数

---------------------------------------------------建表等-----------------------------------------------------
–创建序列
–eg:CREATE SEQUENCE users181020_ID_seq
2 START WITH 100
3 INCREMENT BY 1
4 NO MINVALUE
5 NO MAXVALUE
6 CACHE 1;
使用: select nextval(‘users181020_ID_seq’)

—游标
eg:游标:
DECLARE mycursor CURSOR FOR SELECT * FROM films;
使用:FETCH FORWARD 5 FROM mycursor;
关闭:CLOSE mycursor;

—表空间
CREATE TABLESPACE fastspace LOCATION ‘/ssd1/postgresql/data’;

--------------------------------创建只读用户并授权--------------------------------------
创建一个用户名为bi_read 密码为 readonly的用户
CREATE USER bi_read WITH ENCRYPTED PASSWORD ‘readonly’;

用户只读事务
alter user bi_read set default_transaction_read_only=on;

把所有库的语言的USAGE权限给到bi_read
GRANT USAGE ON SCHEMA public to readonly;

授予select权限(这句要进入具体数据库操作在哪个db环境执行就授予那个db的权)
grant select on all tables in schema public to bi_read ;
或者:
select ‘grant select on table ’ || schemaname || ‘.’ || tablename || ’ to bi_read;’ from pg_tables
where schemaname in( ‘bi_dim’ ) ;

select ‘grant select on table ’ || schemaname || ‘.’ || viewname || ’ to bi_read;’ from pg_views
where schemaname in( ‘bi_dim’ ) ;

------------------------1)批量表空间修改---------------------------------------
select ‘ALTER TABLE ‘||s.schemaname||’.’||s.tablename||’ set tablespace ‘||case when s.schemaname=‘bi_dim’ then ‘bi_dim_dat’
when s.schemaname=‘bi_dw_yx’ then ‘bi_dw_dat’
when s.schemaname=‘bi_ods_uc’ then ‘bi_ods_uc_dat’
when s.schemaname=‘bi_ods_mtds’ then ‘bi_ods_mtds’
when s.schemaname=‘bi_ods_wechat’ then ‘bi_ods_wechat_dat’
when s.schemaname=‘bi_ods_crm’ then ‘bi_ods_crm_dat’
end||’;’
from pg_tables s
where schemaname
in(‘bi_dim’,‘bi_dw_yx’,‘bi_ods_uc’,‘bi_ods_mtds’,‘bi_ods_wechat’,‘bi_ods_crm’)
and s.tablespace is null;
select * from pg_tablespace;

-------------------------------------关于数据库-----------------------------------
—启动停止(linux下gp用户)
gpstop -M fast
gpstart

----数据copy导出导入(linux下gp用户)
export PGPASSWORD=‘pass1’
psql -h 导出库ip -U 数据库用户 -d 导出库名 -c “\copy 表名或者sql to /home/xxx/xxx.data”

export PGPASSWORD=‘pass2’
psql -h 导入库ip -p 端口(不是默认的5432才要) -U 数据库用户 -d 导入库名 -c “\copy 表名 from /home/xxx/xxx.data”

–数据库备份
pg_dump和pg_restore

–查询表占用空间大小
eg:select pg_size_pretty(pg_relation_size(‘表名’));
–查询库占用磁盘大小
select pg_size_pretty(pg_database_size(‘MyDatabase’));

—表膨胀分析处理
select * from gp_toolkit.gp_bloat_diag order by bdirelpages desc, bdidiag
select pg_size_pretty(pg_relation_size(‘bi_dim.xxx’));
ALTER TABLE bi_dim.xxx SET WITH (REORGANIZE=TRUE);

–数据库空间
SELECT * FROM gp_toolkit.gp_disk_free ORDER BY dfsegment;
SELECT * FROM gp_toolkit.gp_size_of_database ORDER BY sodddatname;

–表分析
VACUUM ANALYZE tablename;
–表数据分布
SELECT gp_segment_id, count(*) FROM GROUP BY gp_segment_id;
–查看表分布键字段及修改
SELECT attname FROM pg_attribute
WHERE attrelid=‘表名’::regclass
and attnum
in (SELECT unnest(distkey) FROM pg_catalog.gp_distribution_policy t
WHERE localoid=‘表名’::regclass);

alter table 表名 set distributed by(字段名);
alter table BI_DW_YX.DW_TK_ANALYSIS_MTH_DTL set distributed RANDOMLY;---- 随机分布

------查看占用最大的表
SELECT
relname AS NAME,
sotdsize / 1024 / 1024 AS size_MB,
sotdtoastsize AS TOAST,
sotdadditionalsize AS other
FROM
gp_toolkit.gp_size_of_table_disk AS sotd,
pg_class
WHERE
sotd.sotdoid = pg_class.oid
ORDER BY
sotdsize desc
LIMIT 10;

–查看队列
SELECT * FROM pg_resqueue_status;

—查看最近OBJECT的操作
SELECT
schemaname AS SCHEMA,
objname AS TABLE,
usename AS ROLE,
actionname AS ACTION,
subtype AS TYPE,
statime AS TIME
FROM
pg_stat_operations order by statime desc ;

–当前正在执行的sql情况
select *
FROM pg_stat_activity where state=‘active’ order by query_start desc

----杀掉某个sql
–pg_cancel_backend 只是取消当前某一个进程的查询操作,但不能释放数据库连接
–pg_terminate_backend 可以在pg的后台杀死这个进程,从而释放出宝贵的连接资源
select pg_cancel_backend(传入procpid);
select pg_terminate_backend(传入procpid);

---------------------------------日期处理--------------------------------------
获取月初月末日期:
select date_trunc(‘month’,now()+‘1 months’)+‘-1 days’
select date_trunc(‘month’,now() )+‘-1 days’
select date_trunc(‘month’,now() )
– 当天:
select date_trunc(‘day’,now());
– 当季第一天
select date_trunc(‘quarter’,now());
– 当周第一天:
select date_trunc(‘week’,now());
– 小时 取整:
select date_trunc(‘hour’,now());
—时间差
select date_part(‘day’, now()-date’2021-07-29’) --(返回结果是整天)
select date_part(‘week’, now()) --(返回当天是多少周)
select EXTRACT(epoch from (now()-date’2021-07-29’))/60/60/24 --(返回结果是小数)
–当前时间
clock_timestamp() --与事务无关
now() --在同一个事务里是一样的

----------------------------------------------树结构-----------------
with recursive result_ as – 递归主体开始
(
select id – 首先是顶层节点
, name
, pid
, cast(id as varchar(100)) as path_id – 保证格式与目标表相同
, cast(name as varchar(500)) as path_name
, 1 as leve
from org
where id = ‘1’ – 指定顶层节点位置
union all – 下面是下层节点
select org.id
, org.name
, org.pid
, cast(r.id || ‘/’ || org.id as varchar(100)) as path_id – 拼接时加上斜杠
, cast(r.name || ‘/’ || org.name as varchar(500)) as path_name – 拼接时加上斜杠
, r.leve + 1 as leve – 每递归一次 + 1
, 0 as is_leaf
from result_ r – 注意这里是 result_
join org on org.pid = r.id – 指定父子关系,这里注意其实是inner join
where 1 = 1 – 有其他条件可加在这里
)
– 然后这里可以查询result_了,同时加工is_leaf字段
select t.id, t.name, t.pid, org.name as pname
, ‘/’ || t.path_id as path_id – 格式化避免顶层缺少斜杠
, ‘/’ || t.path_name as path_name
, t.leve
, case when trim(t.id) in (select distinct a1.pid from org a1 ) then ‘0’ else ‘1’ and as id_leaf – 判断是否叶子节点,写在此处当表数据量较大时效率较低,可以考虑额外跟新。
from result_ t
left join org on t.pid = org.id – 再关联一下父级信息

----------------------------------------相关函数--------------------------------------
–1)行合并函数string_agg(expression,delimiter)和ARRAY_AGG()
–eg:
SELECT DNO,ARRAY_TO_STRING(ARRAY_AGG(DNAME),‘|’) AS DNAME,string_agg(DNAME, ‘|’) AS XX
FROM (
SELECT ‘1’ AS DNO, ‘ABC’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘DEF’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘GHI’ AS DNAME FROM DUAL
) Z GROUP BY DNO;
–按顺序eg:
SELECT DNO,ARRAY_TO_STRING(ARRAY_AGG(DNAME ORDER BY DNAME),‘|’) AS DNAME,string_agg(DNAME, ‘|’ ORDER BY DNAME) AS XX
FROM (
SELECT ‘1’ AS DNO, ‘ABC’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘DEF’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘GHI’ AS DNAME FROM DUAL
) Z GROUP BY DNO ;

–2)拆分函数
–eg:
select DNO,REGEXP_SPLIT_TO_TABLE(XX, E’\|‘)
from(
SELECT DNO,ARRAY_TO_STRING(ARRAY_AGG(DNAME),’|‘) AS DNAME,string_agg(DNAME, ‘|’) AS XX
FROM (
SELECT ‘1’ AS DNO, ‘ABC’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘DEF’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘GHI’ AS DNAME FROM DUAL
UNION ALL
SELECT ‘2’ AS DNO, ‘JK’ AS DNAME FROM DUAL
) Z GROUP BY DNO) z2 ;
–按字母拆分:SELECT REGEXP_SPLIT_TO_TABLE(‘GHI|JK|DEF’, E’|‘)
–指定位置拆分
SELECT split_part(‘GHI|JK|DEF’, ‘|’,‘2’)
–拆分字符串到多行
SELECT ‘1’ aa,unnest(string_to_array(‘aaa,bbb,ccc’,’,')) as XX

–3指定字符串在目标字符串的位置strpos(string, substring)
–eg:strpos(‘high’, ‘ig’) = 2

–4得到pg客户端编码
–eg:select pg_client_encoding(); = “UTF8”

–5获取某个表在数据库哪个函数引用了
select proname from pg_proc where prosrc like ‘%BI_DIM.DIM_SHOP%’;
获取函数定义代码
select oid from pg_proc where proname=‘sp_dim_kn_info’ ;
select pg_get_functiondef(‘1238507’) as pf

6获取某个表的结构字段
select C .relname,
A .attname AS field
,col_description ( a.attrelid, a.attnum ) field_desc
,a.*
FROM
pg_class C,
pg_attribute A
where A .attrelid = C .oid
and C .relname=‘designer’
and a.attnum >0
and attisdropped=‘f’
order by a.attnum ;
获取某个模式下的表结构
SELECT
C .relname,
CAST (obj_description (c.relfilenode, ‘pg_class’) AS VARCHAR) AS table_COMMENT,
A .attname AS field,
concat_ws(‘’,t.typname,SUBSTRING(format_type(a.atttypid,a.atttypmod) from ‘(.*)’)) as type_length,
b.description AS field_COMMENT,
case when A .attnotnull = ‘t’ THEN ‘true’ else ‘false’ end AS NOTNULL,
case when(select count(pc.conname) from pg_constraint pc where a.attnum = pc.conkey [ 1 ] and pc.conrelid = c.oid) = ‘1’ then ‘true’ else ‘false’ end as iskey
FROM
pg_class C,
pg_attribute A
LEFT OUTER JOIN pg_description b ON A .attrelid = b.objoid
AND A .attnum = b.objsubid,
pg_type T
WHERE
C .relname in(select pt.tablename from pg_tables pt where pt.schemaname = ‘bi_ods_wc’)
AND A .attnum > 0
AND A .attrelid = C .oid
AND A .atttypid = T .oid
ORDER BY
C .relname,
A .attnum;

7)case when 函数变形(简化)
with tmp_0 as (
select ‘English’ as name,‘English’ as subject,99 as score
union all
select ‘Physics’ as name,‘Physics’ as subject,95 as score
union all
select ‘Math’ as name,‘Math’ as subject,92 as score
)
select sum(score) filter (where subject=‘English’) as English
from tmp_0;

你可能感兴趣的:(#,Greenplum,数据库,大数据)