数据探查-ODPS大数据引擎计算不同格式的日期时间类型的极值

思路:SQL实现,正则表达式匹配到不同格式然后将其转为同一种格式的日期时间进行比较

例如:将所有的日期格式转换为yyyyddMMhhmmsss的格式

#取最大值
select tmp1.birthday,tmp1.real_birthday  from (select regexp_replace(tmp.birthday, "[-|/|.|\\s|:|年|月|日]", "\\1", 0) as birthday,real_birthday from 
	(select  case when birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|\\.][1-9][-|/|\\.]([1-2]{1}[1-9])|10|20|30|31)$' 
				then regexp_replace(birthday, '[-|/|\\.|年]', '-0', 1)
		when birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|\\.|年](10|11|12)[-|/|\\.|月][1-9])$'
				then regexp_replace(birthday, '[-|/|\\.|月]', '-0', 2)
		when birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|\\.|年][1-9][-|/|\\.|月][1-9]日{0,1})$'
				then regexp_replace(birthday, '[-|/|\\.|年|月]', '-0', 0)
		when birthday rlike '^((0[1-9]|(10|11|12))[-|/|\\.](([0-2][1-9])|10|20|30|31)[-|/|\\.]((18|19|([23]\\d))\\d{2,2}))$'
				then concat(regexp_replace(birthday, '(0[1-9]|(10|11|12))[-|/|\\.](([0-2][1-9])|10|20|30|31)[-|/|\\.]', '', 0),
					regexp_replace(birthday, '[-|/|\\.]((18|19|([23]\\d))\\d{2,2})', '', 0))
		else birthday
		end as birthday,birthday as real_birthday
		from t_status 
	where birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|年]{0,1}((0{0,1}[1-9])|(10|11|12))[-|/|月]{0,1}(([0-2]{0,1}[1-9])|10|20|30|31)[日]{0,1}\\s*(\\s+(20|21|22|23|[0-1]\\d):{0,1}[0-5]\\d:{0,1}[0-5]\\d(\\.\\d{1,3}){0,1}){0,1})$') tmp
) tmp1  order by tmp1.birthday desc limit 1;

#取最小值

select distinct tmp1.birthday,tmp1.real_birthday  from (select regexp_replace(tmp.birthday, "[-|/|.|\\s|:|年|月|日]", "\\1", 0) as birthday,real_birthday from 
    (select  case when birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|\\.][1-9][-|/|\\.]([1-2]{1}[1-9])|10|20|30|31)$' 
                then regexp_replace(birthday, '[-|/|\\.|年]', '-0', 1)
        when birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|\\.|年](10|11|12)[-|/|\\.|月][1-9])$'
                then regexp_replace(birthday, '[-|/|\\.|月]', '-0', 2)
        when birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|\\.|年][1-9][-|/|\\.|月][1-9]日{0,1})$'
                then regexp_replace(birthday, '[-|/|\\.|年|月]', '-0', 0)
        when birthday rlike '^((0[1-9]|(10|11|12))[-|/|\\.](([0-2][1-9])|10|20|30|31)[-|/|\\.]((18|19|([23]\\d))\\d{2,2}))$'
                then concat(regexp_replace(birthday, '(0[1-9]|(10|11|12))[-|/|\\.](([0-2][1-9])|10|20|30|31)[-|/|\\.]', '', 0),
                    regexp_replace(birthday, '[-|/|\\.]((18|19|([23]\\d))\\d{2,2})', '', 0))
        else birthday
        end as birthday,birthday as real_birthday
        from t_status 
    where birthday rlike '^(((18|19|([23]\\d))\\d{2,2})[-|/|年]{0,1}((0{0,1}[1-9])|(10|11|12))[-|/|月]{0,1}(([0-2]{0,1}[1-9])|10|20|30|31)[日]{0,1}\\s*(\\s+(20|21|22|23|[0-1]\\d):{0,1}[0-5]\\d:{0,1}[0-5]\\d(\\.\\d{1,3}){0,1}){0,1})$') tmp
) tmp1  order by tmp1.birthday limit 1;    

可以处理的日期格式:

2019-06-09
2019-6-9
2019/06/09
2019/6/9
06/09/2019
6/9/2019
06-09-2019
20190609
2019.06.09
2019.6.9
2019年6月9日

2019-06-09 20:00:01
2019/06/09 20:00:01
2019.06.09 20:00:01
06-09-2019 10:50:24
20190609200001

你可能感兴趣的:(SQL)