使用python 获取mysql数据 并生成hive建表语句

使用python 获取mysql数据 并生成hive建表语句

# -*- coding: utf-8 -*-
import sys
reload(sys) 
sys.setdefaultencoding('utf-8')
import re
from collections import defaultdict

import MySQLdb
import pandas as pd
engine=MySQLdb.connect(host='xx.xxxxxxxxxxx',port=xxxxx,user='xxxxxx',passwd='xxxxxx',db='xxxx',connect_timeout=200,charset='utf8') 


col_nm=defaultdict(list)
table_nm=['xxxxx','xxxx','xxxxx','xxx']
for i in table_nm:
    sql="show create table "+i
    df = pd.read_sql(sql,engine)
    content=df['Create Table'].values[0]
    #print content
    with open('/xxxx/xxx/xxxxx.txt','w') as f:
        f.write(content)
    with open('/xxxxx/xxxx/xxxx.txt','r') as f:
        w=[]
        for lines in f.readlines():
            lines=lines.lower()
            content=re.sub(r'bigint.*$|tinyint.*$|int.*$', "bigint,", lines)
            content=re.sub(r'varchar.*$|timestamp.*$|datetime.*$', "string,", content)
            content=re.sub(r'`rs', "`ods.ods_rs", content)
            content=re.sub(r'` \(',"_95 ` (",content)
            content=re.sub(r'primary.*$|unique.*$|\).*$|key.*$',"",content)
            content=content.replace('`','')
            w.append(content.strip())
        w1=[]
        for i in w:
            if len(i)!=0:w1.append(i)
        table_cols=[]        	
        for i in w1[1:-1]:
        	  table_cols.append(i.split()[0])
        c=','.join(table_cols)+':'+table_cols[0]
        col_nm[w1[0].strip('create table ods.ods').strip('_95  (')]=c
        ods_table_95=w1[0].strip("create table").strip('  (;')
        w95='\n'.join(w1)+") partitioned by (dt string) row format delimited fields terminated by '\\t' lines terminated by '\\n';"
        ods_95=w95.replace(',) partitioned',') partitioned')
        print "drop table "+ods_table_95+";"
        print(ods_95+'\n')
        ods_248=ods_95.replace('95','248')
        ods_table_248=ods_table_95.replace('95','248')
        print "drop table "+ods_table_248+";"
        print(ods_248+'\n')
        dw_95=re.sub(r'ods.ods','dw.dw',re.sub(r'partitioned.*$','stored as orcfile;',ods_95))
        dw_table_95=ods_table_95.replace('ods','dw')
        print "drop table "+dw_table_95+";"
        print (dw_95+'\n')
        dw_table_248=ods_table_248.replace('ods','dw')
        dw_248=dw_95.replace('95','248')
        print "drop table "+dw_table_248+";"
        print dw_248
        
print "\n" 
print col_nm  
col=pd.Series(col_nm)
print col

你可能感兴趣的:(python)