首先是画3d直方图,一开始我是打算使用ECharts来的,但后面发现python居然可以画。
数据是MySQL里面的,弄了一些假数据。使用了ORM
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('mysql://root:[email protected]/student?charset=utf8')
Session = sessionmaker(bind=engine)
sess = Session()
Base = declarative_base()
class UserAction(Base):
__tablename__ = 'user_action'
id = Column(Integer, primary_key=True)
province = Column(String(255))
month = Column(String(255))
number = Column(Integer)
if __name__ == '__main__':
Base.metadata.create_all(engine)
接下来就是画图了
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from plot_orm import UserAction
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
engine = create_engine('mysql://root:[email protected]/student?charset=utf8')
Session = sessionmaker(bind=engine)
session = Session()
def gather_provinces():
users = session.query(UserAction)
provinces = [user.province for user in users]
provinces = list(set(provinces))
return provinces
def gather_months():
users = session.query(UserAction)
months = [user.month for user in users]
months = sorted(list(set(months)))
return months
def gather_number(province, month):
user = session.query(UserAction).filter_by(province=province, month=month)
return user[0].number
def plot_3d_bars(x, y):
# chinese config
plt.rcParams['font.sans-serif'] = ['SimHei']
# x--> months, y-->provinces
fake_x = [i for i in range(len(x))]
fake_y = [i for i in range(len(y))]
_xx, _yy = np.meshgrid(x, y)
fake_xx, fake_yy = np.meshgrid(fake_x, fake_y)
# cal data
xs, ys = _xx.ravel(), _yy.ravel()
# print(xs, ys)
fake_xs, fake_ys = fake_xx.ravel(), fake_yy.ravel()
top = [gather_number(ys[i], xs[i]) for i in range(len(xs))]
bottom = np.zeros_like(top)
width = depth = 1
# plot 3d bars
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# print(fake_xs, fake_ys, bottom, width, depth, top)
ax.bar3d(fake_xs, fake_ys, bottom, width, depth, top, shade=True)
ax.set_title('V.Vader')
ax.set_xlabel('month')
ax.set_ylabel('province')
ax.set_zlabel('number of people')
# ax.set_xlim(x)
ax.set_xticks(fake_x)
ax.set_xticklabels(x)
ax.set_yticks(fake_y)
ax.set_yticklabels(y)
plt.show()
def main():
plot_3d_bars(['1', '2', '3', '4'], ['南昌', '北京', '上海', '杭州'])
if __name__ == '__main__':
main()
启动zookeeper和Kafka
zkServer.sh start
kafka-server-start.sh /home/hadoop/app/kafka_2.11-0.9.0.0/config/server.properties
先是使用Kafka发送数据给userActionLog
import time
import csv
from kafka import KafkaProducer
producer = KafkaProducer(bootstrap_servers='192.168.0.106:9092')
csv_file = open('data/user_log.csv', 'r', encoding='utf-8')
reader = csv.reader(csv_file)
count = 0
for line in csv_file:
if count > 0:
info = line.split('\n')[0]
producer.send('userActionLog', value=info.encode('utf-8'))
print(info)
count += 1
time.sleep(1)
可以看到数据已经发好了(到Windows下开发的优点是IDE等比较happy,记得修改hosts文件不然会跑不起来)
接下来是写scale程序了,我之前是真的没有学过,所以花了2天学习了Scala,虽所Scala没有写过,上手还蛮简单的。
首先是Kafka的一些配置
package iceberg.kafka;
public class KafkaProperties {
public static final String ZK = "192.168.0.106:2181";
public static final String TOPIC = "userActionLog";
public static final String BROKER_LIST = "192.168.0.106:9092";
public static final String GROUP_ID = "V.Vader";
}
“18-30”岁、(2,3) “30-40”岁、(4, 5)“40-50”岁、6 “50-60”(7,8)岁男女购物人数
用户收藏数、购买数的top10商品,并把结果存储到MySQL数据库中
package iceberg
import java.sql.DriverManager
import java.util.HashMap
import iceberg.kafka.KafkaProperties
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
object UserTop {
def main(args: Array[String]): Unit = {
val kafkaTopic: Map[String, Int] = Map[String, Int](KafkaProperties.TOPIC -> 1)
val conf = new SparkConf().setAppName("KafkaSparkStream").setMaster("local[2]")
val ssc = new StreamingContext(conf, Seconds(6))
val stream = KafkaUtils.createStream(ssc, KafkaProperties.ZK, KafkaProperties.GROUP_ID, kafkaTopic, StorageLevel.MEMORY_ONLY)
val logs = stream.map(_._2)
val info = logs.map(line=>{(line.split(",")(8), line.split(",")(9))})
.map(line=>{
if (line._2.toInt == 0 ) {
(line._1, "gender:female")
}else{
if (line._2.toInt == 1 ) {
(line._1, "gender:male")
}else{
(line._1, "gender:others")
}
}
})
//“18-30”岁、(2,3) “30-40”岁、(4, 5)“40-50”岁、6 “50-60”(7,8)岁男女购物人数
val info2 = info.map(line =>{
if (line._1.toInt >= 2 ) {
if (line._1.toInt <=3){
(line._2, "age:18-30")
}else{
if (line._1.toInt <=5){
(line._2, "age:30-40")
}else{
if (line._1.toInt <=6){
(line._2, "age:40-50")
}else{
if (line._1.toInt <=8){
(line._2, "age:50-60")
}
}
}
}
}
else {(line._2, "age:others")}
})
val result = info2.map((_, 1)).reduceByKey(_+_)
//发送数据给kafka
val props = new HashMap[String, Object]()
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.0.106:9092")
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
"org.apache.kafka.common.serialization.StringSerializer")
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
"org.apache.kafka.common.serialization.StringSerializer")
result.foreachRDD(rdd=>{
rdd.foreachPartition(partitionOfRecoeds=>{
val producer = new KafkaProducer[String, String](props)
partitionOfRecoeds.foreach(pair=>{
val str = pair._1.toString + pair._2.toString
val message = new ProducerRecord[String, String]("result", null, str)
producer.send(message)
})
})
})
// (2)用户收藏数、购买数的top10商品,并把结果存储到mysql数据库中
val task2 = logs.map(line=>{(line.split(",")(2), line.split(",")(7))})
val collection = task2.filter(_._2=="3")
val buy = task2.filter(_._2=="2")
buy.map(collect => (collect._1, 1)).reduceByKey(_ + _).foreachRDD(rdd =>{
val connection = createConnection()
rdd.sortBy(_._2, false).take(10).foreach(pair=>{
println(pair)
val sql = "insert into top(cat, num) values ('"+ pair._1.toString +"' , '"+pair._2.toString+ "')"
connection.createStatement().execute(sql)
println("execute sql")
})
}
)
collection.map(collect => (collect._1, 1)).reduceByKey(_ + _).foreachRDD(rdd =>{
val connection = createConnection()
rdd.sortBy(_._2, false).take(10).foreach(pair=>{
println(pair)
val sql = "insert into top(cat, num) values ('"+ pair._1.toString +"' , '"+pair._2.toString+ "')"
connection.createStatement().execute(sql)
println("execute sql")
})
}
)
def createConnection()={
Class.forName("com.mysql.jdbc.Driver")
DriverManager.getConnection("jdbc:mysql://192.168.0.104:3306/student", "root","123456")
}
ssc.start()
ssc.awaitTermination()
}
}
接下来就是用flask显示数据了
之前一直用Django(其实我用的最多的web框架是spring boot)然后现在用了flask感觉这个flask也是 超级爽的
from flask import Flask, render_template
from flask_socketio import SocketIO
from kafka import KafkaConsumer
import time
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app)
thread = None
consumer = KafkaConsumer('result', bootstrap_servers='192.168.0.106:9092')
result_list = [{"g0": "0"}, {"g1": "0"}, {"g2": "0"}, {"g3": "0"}, {"b0": "0"}, {"b1": "0"}, {"b2": "0"}, {"b3": "0"}]
def background_thread():
for msg in consumer:
str_msg = msg.value.decode('utf-8')
info = str_msg.split(')')
condition = info[0].split('(')[1]
value = info[1]
if condition == 'gender:male,age:18-30':
result_list[0]['g0'] = value
if condition == 'gender:male,age:30-40':
result_list[1]['g1'] = value
if condition == 'gender:male,age:40-50':
result_list[2]['g2'] = value
if condition == 'gender:male,age:50-60':
result_list[3]['g3'] = value
if condition == 'gender:female,age:18-30':
result_list[4]['b0'] = value
if condition == 'gender:female,age:30-40':
result_list[5]['b1'] = value
if condition == 'gender:female,age:40-50':
result_list[6]['b2'] = value
if condition == 'gender:female,age:50-60':
result_list[7]['b3'] = value
print('test_message', result_list)
socketio.emit('test_message', {'data': result_list})
time.sleep(1)
@socketio.on('test_connect')
def connect(message):
print('message', message)
global thread
if thread is None:
print('thread is None starting socket_io')
thread = socketio.start_background_task(target=background_thread)
socketio.emit('connected', {'data': 'server connected'})
@app.route('/')
def hello_world():
return render_template("index.html")
if __name__ == '__main__':
socketio.run(app, debug=True)
剩下的就是页面显示了。
DashBoard
Girl:18-30:
Girl:30-40:
Girl:40-50:
Girl:50-60:
Boy:18-30:
Boy:30-40:
Boy:40-50:
Boy:50-60:
这样socket就可以把数据传出去了,看起来还是蛮有意思的,所以接下来打算使用Hbase,redis,python,tensorflow来写一个实时处理股票并显示的系统前台页面使用ECharts,期待自己的表现。(超级期待)
对啦下面是可以选择的,这样就可以筛选多个信息了