#coding=utf-8
from __future__ import print_function
from pyspark.sql import SparkSession
from pyspark.sql import Row
import sys
from decimal import *
from rediscluster import StrictRedisCluster
reload(sys)
sys.setdefaultencoding( "utf-8" )
sql_dict={
"0":"""SELECT
b.equip_id,
a.`code`,
a.equip_type_id,
b.threshold,
b.type_index_id,
b.`condition`,
a.status
FROM
t_equip_type_index a,
t_equip_threshold b
WHERE
a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '0'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
b.equip_id,a.code,threshold
""",
"1":"""SELECT
b.equip_id,
a.`code`,
a.equip_type_id,
min(b.threshold) as threshold,
b.type_index_id,
b.`condition`,
a.status
FROM
t_equip_type_index a,
t_equip_threshold b
WHERE
a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '1'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
b.equip_id,a.code
""",
"2":"""SELECT
b.equip_id,
a.`code`,
a.equip_type_id,
min(b.threshold) as threshold,
b.type_index_id,
b.`condition`,
a.status
FROM
t_equip_type_index a,
t_equip_threshold b
WHERE
a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '2'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
b.equip_id,a.code
""",
"3":"""SELECT
b.equip_id,
a.`code`,
a.equip_type_id,
max(b.threshold) as threshold,
b.type_index_id,
b.`condition`,
a.status
FROM
t_equip_type_index a,
t_equip_threshold b
WHERE
a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '3'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
b.equip_id,a.code
""",
"4":"""SELECT
b.equip_id,
a.`code`,
a.equip_type_id,
max(b.threshold) as threshold ,
b.type_index_id,
b.`condition`,
a.status
FROM
t_equip_type_index a,
t_equip_threshold b
WHERE
a.id = b.type_index_id
and a.equip_type_id = b.equip_type_id
AND b.`condition` = '4'
AND a.`status` = '101001'
AND b.`status` = '101001'
GROUP BY
b.equip_id,a.code
"""
}
def redis_cluster(key,value):
redis_nodes = [{'host':'172.16.11.136','port':6379},
{'host':'172.16.11.136','port':6380},
{'host':'172.16.11.137','port':6379},
{'host':'172.16.11.137','port':6380},
{'host':'172.16.11.138','port':6379},
{'host':'172.16.11.138','port':6380}
]
try:
redisconn = StrictRedisCluster(startup_nodes=redis_nodes)
except Exception,e:
print( "Connect Error!")
sys.exit(1)
redisconn.set(str(key),str(value),ex=86400)
MYSQL_CONF = {
'host': '172.16.11.108',
'user': 'iot',
'password': 'iot@#1234',
'db': 'test',
'port': 3306
}
jdbc_url = 'jdbc:mysql://{0}:{1}/{2}'.format(
MYSQL_CONF['host'],
MYSQL_CONF['port'],
MYSQL_CONF['db']
)
def jdbc_dataset(spark,contain_key,contain_sql):
sql=contain_sql
jdbcDF = spark.read \
.format("jdbc") \
.option("url", jdbc_url) \
.option("dbtable", "("+sql+") tmp") \
.option("user", MYSQL_CONF['user']) \
.option("password", MYSQL_CONF['password'])\
.option("driver", 'com.mysql.jdbc.Driver')\
.load()
dict_value={}
for item in jdbcDF.collect():
equip_id=item['equip_id']
code=item['code']
equip_type_id=item['equip_type_id']
threshold=item['threshold']
type_index_id=item['type_index_id']
condition=item['condition']
status=item['status']
key="iotwjj"+"_"+str(equip_id)+"_"+str(code)+"_"+str(condition)
if key not in dict_value.keys():
dict_value[key]={}
dict_value[key]["equip_type_id"]=equip_type_id
dict_value[key]["type_index_id"]=type_index_id
dict_value[key]["threshold"]=[str(threshold.quantize(Decimal('0.0000')))]
else:
if str(threshold.quantize(Decimal('0.0000'))) not in dict_value[key]["threshold"]:
dict_value[key]["threshold"].append(str(threshold.quantize(Decimal('0.0000'))))
for itemkey in dict_value.keys():
redis_cluster(itemkey,dict_value[itemkey])
if __name__ == "__main__":
spark = SparkSession.builder.appName("Python Spark SQL data source example").getOrCreate()
#spark.set('spark.cores.max',3)
#spark.set('spark.network.timeout',10000000)
#spark.set('spark.executor.heartbeatInterval',10000000)
#spark.set('spark.memory.fraction',0.75)
#spark.set('spark.storage.memoryFraction',0.45)
for dict_sql_key in sql_dict:
key=dict_sql_key
sql=sql_dict.get(dict_sql_key)
jdbc_dataset(spark,key,sql)
spark.stop()
#####提交方式
/usr/local/spark-2.3.1-bin-hadoop2.7/bin/spark-submit --jars /usr/local/iot/pyspark/mysql-connector-java-5.1.41.jar pyspark_sql_mysql_redis.py
#####注意集群需要安装pyredis
https://files.pythonhosted.org/packages/f1/dd/4bb27bb3e3d03a01b0afd4a4ba13a4677b0f2d6552ff2841ac56591bfb29/redis-py-cluster-1.3.5.tar.gz
#######
https://files.pythonhosted.org/packages/3b/f6/7a76333cf0b9251ecf49efff635015171843d9b977e4ffcf59f9c4428052/redis-2.10.6-py2.py3-none-any.whl