spark sql 将数据导入到redis 里面

#coding=utf-8
from __future__ import print_function
from pyspark.sql import SparkSession
from pyspark.sql import Row
import sys
from decimal import *
from rediscluster import StrictRedisCluster
reload(sys)
sys.setdefaultencoding( "utf-8" )

sql_dict={
    "0":"""SELECT
	b.equip_id,
	a.`code`,
	a.equip_type_id,
	b.threshold,
	b.type_index_id,
	b.`condition`,
	a.status
FROM
	t_equip_type_index a,
	t_equip_threshold b
WHERE
	a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '0'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
	b.equip_id,a.code,threshold
 """,
    "1":"""SELECT
	b.equip_id,
	a.`code`,
	a.equip_type_id,
	min(b.threshold) as threshold,
	b.type_index_id,
	b.`condition`,
	a.status
FROM
	t_equip_type_index a,
	t_equip_threshold b
WHERE
	a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '1'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
	b.equip_id,a.code
 """,
        "2":"""SELECT
	b.equip_id,
	a.`code`,
	a.equip_type_id,
	min(b.threshold) as threshold,
	b.type_index_id,
	b.`condition`,
	a.status
FROM
	t_equip_type_index a,
	t_equip_threshold b
WHERE
	a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '2'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
	b.equip_id,a.code
 """,
            "3":"""SELECT
	b.equip_id,
	a.`code`,
	a.equip_type_id,
	max(b.threshold) as threshold,
	b.type_index_id,
	b.`condition`,
	a.status
FROM
	t_equip_type_index a,
	t_equip_threshold b
WHERE
	a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '3'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
	b.equip_id,a.code
 """,
    "4":"""SELECT
	b.equip_id,
	a.`code`,
	a.equip_type_id,
	max(b.threshold) as threshold ,
	b.type_index_id,
	b.`condition`,
	a.status
FROM
	t_equip_type_index a,
	t_equip_threshold b
WHERE
	a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '4'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
	b.equip_id,a.code
 """
}

def redis_cluster(key,value):
    redis_nodes =  [{'host':'172.16.11.136','port':6379},
                    {'host':'172.16.11.136','port':6380},
                    {'host':'172.16.11.137','port':6379},
                    {'host':'172.16.11.137','port':6380},
                    {'host':'172.16.11.138','port':6379},
                    {'host':'172.16.11.138','port':6380}
                   ]
    try:
        redisconn = StrictRedisCluster(startup_nodes=redis_nodes)
    except Exception,e:
        print( "Connect Error!")
        sys.exit(1)
    redisconn.set(str(key),str(value),ex=86400)

MYSQL_CONF = {
    'host': '172.16.11.108',
    'user': 'iot',
    'password': 'iot@#1234',
    'db': 'test',
    'port': 3306
}

jdbc_url = 'jdbc:mysql://{0}:{1}/{2}'.format(
            MYSQL_CONF['host'],
            MYSQL_CONF['port'],
            MYSQL_CONF['db']
        )


def jdbc_dataset(spark,contain_key,contain_sql):
    sql=contain_sql
    jdbcDF = spark.read \
        .format("jdbc") \
        .option("url", jdbc_url) \
        .option("dbtable", "("+sql+") tmp") \
        .option("user",  MYSQL_CONF['user']) \
        .option("password", MYSQL_CONF['password'])\
        .option("driver", 'com.mysql.jdbc.Driver')\
        .load()
    dict_value={}
    for item in jdbcDF.collect():
        equip_id=item['equip_id']
        code=item['code']
        equip_type_id=item['equip_type_id']
        threshold=item['threshold']
        type_index_id=item['type_index_id']
        condition=item['condition']
        status=item['status']
        key="iotwjj"+"_"+str(equip_id)+"_"+str(code)+"_"+str(condition)
        if key not in dict_value.keys():
            dict_value[key]={}
            dict_value[key]["equip_type_id"]=equip_type_id
            dict_value[key]["type_index_id"]=type_index_id
            dict_value[key]["threshold"]=[str(threshold.quantize(Decimal('0.0000')))]
        else:
            if str(threshold.quantize(Decimal('0.0000'))) not in dict_value[key]["threshold"]:
                dict_value[key]["threshold"].append(str(threshold.quantize(Decimal('0.0000'))))
    for itemkey in dict_value.keys():
        redis_cluster(itemkey,dict_value[itemkey])








if __name__ == "__main__":
    spark = SparkSession.builder.appName("Python Spark SQL data source example").getOrCreate()
    #spark.set('spark.cores.max',3)
    #spark.set('spark.network.timeout',10000000)
    #spark.set('spark.executor.heartbeatInterval',10000000)
    #spark.set('spark.memory.fraction',0.75)
    #spark.set('spark.storage.memoryFraction',0.45)
    for dict_sql_key in sql_dict:
        key=dict_sql_key
        sql=sql_dict.get(dict_sql_key)
        jdbc_dataset(spark,key,sql)
    spark.stop()


#####提交方式 
/usr/local/spark-2.3.1-bin-hadoop2.7/bin/spark-submit  --jars /usr/local/iot/pyspark/mysql-connector-java-5.1.41.jar pyspark_sql_mysql_redis.py 

#####注意集群需要安装pyredis 
https://files.pythonhosted.org/packages/f1/dd/4bb27bb3e3d03a01b0afd4a4ba13a4677b0f2d6552ff2841ac56591bfb29/redis-py-cluster-1.3.5.tar.gz


#######
https://files.pythonhosted.org/packages/3b/f6/7a76333cf0b9251ecf49efff635015171843d9b977e4ffcf59f9c4428052/redis-2.10.6-py2.py3-none-any.whl





 

你可能感兴趣的:(hadoop,平台,大数据平台)