&&&
# -*- coding: utf-8 -* import requests import pandas as pd from bs4 import BeautifulSoup import time import sys import cStringIO reload(sys) sys.setdefaultencoding("utf-8") import pymysql def connectmysql(host,port,user,passwd,db,charset='utf8'): """ 连接数据库、创建游标,返回连接对象、游标对象 :param host: :param port: :param user: :param passwd: :param db: :param charset: :return: """ try: conn = pymysql.connect(host=str(host),port=int(port),user=str(user),passwd=str(passwd),db=str(db),charset=str(charset)) cursor = conn.cursor() return conn,cursor except Exception as e: return e def saveToTbale01(): """ 保存数据到数据库中 :return: """ conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') top250_url = "https://movie.douban.com/top250?start={}&filter=" for i in range(10): """ 因为top250 每页是25个,url变化的字段是start,第二页start=25,第三页start=50 ....... """ start = i*25 url_visit = top250_url.format(start) response = requests.get(url_visit) soup = BeautifulSoup(response.text,'html.parser') all_item_divs = soup.find_all(class_='item') for each_item_div in all_item_divs: pic_div = each_item_div.find(class_='pic') num = pic_div.find('em').get_text() #排名 moviename = pic_div.find('img')['alt'] #电影名称 bd_div = each_item_div.find(class_='bd') infos = bd_div.find('p').get_text().strip().split('\n') infos_2 = infos[1].strip().split('/') init_year = infos_2[0] #上映时间 area = infos_2[1].strip() #国家/地区 genre = ''.join(infos_2[2:]).strip() #电影类型 star_div = each_item_div.find(class_='star') rating_num = star_div.find(class_='rating_num').get_text() #评分 comment_num = star_div.find_all('span')[3].get_text()[:-3] #评价数量 quote = each_item_div.find(class_='quote') try: inq = quote.find(class_='inq').get_text() #一句话评价 except: inq = "None" sqli = "insert into douban01 values(%s,%s,%s,%s,%s,%s,%s,%s)" cursor.execute(sqli, (int(num),moviename,init_year,area,genre,rating_num,int(comment_num),inq)) time.sleep(3) conn.commit() cursor.close() conn.close() def createTbale01(): """ 创建表一 :return: """ conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "CREATE TABLE douban01(num INT NOT NULL,moviename VARCHAR(100) NOT NULL,init_year VARCHAR(100) NOT NULL,area VARCHAR(100) NOT NULL,genre VARCHAR(100) NOT NULL,rating_num INT NOT NULL,comment_num INT NOT NULL,inq VARCHAR(100) NOT NULL)ENGINE=InnoDB DEFAULT CHARSET=utf8;" cursor.execute(sql) conn.commit() cursor.close() conn.close() def createTbale02(): """ 创建表二 :return: """ #num#rank#alt_title#title#pubdate#language#writer#director#cast#movie_duration#year#movie_type#tags#image conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "CREATE TABLE douban02(num INT NOT NULL,rank INT NOT NULL,alt_title VARCHAR(100) NOT NULL,title VARCHAR(100) NOT NULL,pubdate VARCHAR(100) NOT NULL,language VARCHAR(100) NOT NULL,writer VARCHAR(100) NOT NULL,director VARCHAR(100) NOT NULL,cast VARCHAR(100) NOT NULL,movie_duration VARCHAR(100) NOT NULL,year VARCHAR(100) NOT NULL,movie_type VARCHAR(100) NOT NULL,tags VARCHAR(100) NOT NULL, image VARCHAR(100) NOT NULL)ENGINE=InnoDB DEFAULT CHARSET=utf8;" cursor.execute(sql) conn.commit() cursor.close() conn.close() def saveToTable02(): conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') for line in open("top250_f2.csv"): datalist = line.split('#') num = datalist[0] rank = datalist[1] alt_title = datalist[2] title = datalist[3] pubdate = datalist[4] language = datalist[5] writer = datalist[6] director = datalist[7] cast = datalist[8] movie_duration = datalist[9] year = datalist[10] movie_type = datalist[11] tags = datalist[12] image =datalist[13] sqli = "insert into douban02 values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" cursor.execute(sqli, (int(num),rank,alt_title,title,pubdate,language,writer,director,cast,movie_duration,year,movie_type,tags,image)) conn.commit() cursor.close() conn.close() if __name__ == "__main__": saveToTable02()
[root@xxn modles]# cat views.py # coding:utf-8 # Create your views here. from django.shortcuts import render, render_to_response from django.http import HttpResponse, HttpResponseRedirect from bs4 import BeautifulSoup import urllib import sys import re import json import jieba import pymysql import requests from operator import itemgetter from pytagcloud import create_tag_image, make_tags import random import time reload(sys) sys.setdefaultencoding('utf-8') import jieba from operator import itemgetter from pytagcloud.colors import COLOR_SCHEMES from pytagcloud import create_tag_image, create_html_data, make_tags, LAYOUT_HORIZONTAL, LAYOUTS def createtupian(request): conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select area from douban01" cursor.execute(sql) rows = cursor.fetchall() cursor.close() conn.close() fclist = [] for row in rows: fclist.append(row[0].encode("utf-8")) fcstr = " ".join(fclist) wg = jieba.cut_for_search(fcstr) wd = {} for w in wg: try: str(w) continue finally: if w not in wd: wd[w] = 1 else: wd[w] += 1 swd = sorted(wd.iteritems(), key=itemgetter(1), reverse=True) # tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values())) swd = swd[1:100] tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values())) #tags = make_tags(swd,maxsize = 100) try: #create_tag_image(tags,'1.jpg',background=(0, 0, 0, 255),size=(1200, 800),fontname="STKAITI") create_tag_image(tags, '/export/taobaomodels/modles/static/area.png', background=(0, 0, 0, 255),size=(2400, 1000),layout=LAYOUT_HORIZONTAL,fontname="STKAITI") #create_tag_image(tags,'1.jpg',size=(1200, 800),fontname="STKAITI") except Exception as e: print e conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select movie_type from douban02" cursor.execute(sql) rows = cursor.fetchall() cursor.close() conn.close() fclist = [] for row in rows: fclist.append(row[0].encode("utf-8")) fcstr = " ".join(fclist) wg = jieba.cut_for_search(fcstr) wd = {} for w in wg: try: str(w) continue finally: if w not in wd: wd[w] = 1 else: wd[w] += 1 swd = sorted(wd.iteritems(), key=itemgetter(1), reverse=True) # tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values())) swd = swd[1:100] tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values())) #tags = make_tags(swd,maxsize = 100) try: #create_tag_image(tags,'1.jpg',background=(0, 0, 0, 255),size=(1200, 800),fontname="STKAITI") create_tag_image(tags, '/export/taobaomodels/modles/static/movie_type.png', background=(0, 0, 0, 255),size=(2400, 1000),layout=LAYOUT_HORIZONTAL,fontname="STKAITI") #create_tag_image(tags,'1.jpg',size=(1200, 800),fontname="STKAITI") except Exception as e: print e return render(request,'cloud.html') def connectmysql(host, port, user, passwd, db, charset='utf8'): """ 连接数据库、创建游标,返回连接对象、游标对象 :param host: :param port: :param user: :param passwd: :param db: :param charset: :return: """ try: conn = pymysql.connect(host=str(host), port=int(port), user=str(user), passwd=str(passwd), db=str(db), charset=str(charset)) cursor = conn.cursor() return conn, cursor except Exception as e: return e def index(request): return render(request, 'index.html') def search(request): conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') if request.method == 'POST': moviename = request.POST['name'] sql = "select * from douban01 where moviename LIKE '%%%s%%';" % moviename search = cursor.execute(sql) try: info = cursor.fetchmany(search)[0] num = info[0] moviename = info[1] init_year = info[2] area = info[3] genre = info[4] rating_num = info[5] comment_num = info[6] inq = info[7] cursor.close() conn.close() print num, moviename, init_year, area, genre, rating_num, comment_num, inq return render(request, 'index.html', {'num': num, 'moviename': moviename, 'init_year': init_year, 'area': area, 'genre': genre, 'rating_num': rating_num, 'comment_num': comment_num, 'inq': inq}) except Exception as e: print e prompt = "sorry: 数据库中没有 " + moviename + " 这个电影的信息" return render(request, 'index.html', {'prompt': prompt}) else: return HttpResponse('提交的方式不是post') def top250infor(request): conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select * from douban01;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() return render_to_response('top250movielist.html', {'infor_list': infor_list}) def liuyan(request): conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select * from message;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() return render_to_response('liuyanban.html', {'infor_list': infor_list}) def show(request): conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select init_year from douban01" search = cursor.execute(sql) year_list = cursor.fetchmany(search) yearlist = [] for year in year_list: yearlist.append(year[0]) yearlistnew = sorted(list(set(yearlist))) cursor.close() conn.commit() conn.close() yearnumlist = [] conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') yearlistnew2 = [] for x in yearlistnew: yearlistnew2.append(int(x)) sql = "select count(*) from douban01 where init_year='%s'" % str(x) year = cursor.execute(sql) num = int(cursor.fetchmany(year)[0][0]) yearnumlist.append(num) cursor.close() conn.commit() conn.close() yearlistnew250 = yearlistnew2 yearnumlist250 = yearnumlist conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select init_year from douban01 limit 100" search = cursor.execute(sql) year_list = cursor.fetchmany(search) yearlist = [] for year in year_list: yearlist.append(year[0]) yearlistnew = sorted(list(set(yearlist))) cursor.close() conn.commit() conn.close() yearnumlist = [] conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') yearlistnew2 = [] for x in yearlistnew: yearlistnew2.append(int(x)) sql = "select count(*) from douban01 where init_year='%s'" % str(x) year = cursor.execute(sql) num = int(cursor.fetchmany(year)[0][0]) yearnumlist.append(num) cursor.close() conn.commit() conn.close() yearlistnew100 = yearlistnew2 yearnumlist100 = yearnumlist conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select init_year from douban01 limit 50" search = cursor.execute(sql) year_list = cursor.fetchmany(search) yearlist = [] for year in year_list: yearlist.append(year[0]) yearlistnew = sorted(list(set(yearlist))) cursor.close() conn.commit() conn.close() yearnumlist = [] conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') yearlistnew2 = [] for x in yearlistnew: yearlistnew2.append(int(x)) sql = "select count(*) from douban01 where init_year='%s'" % str(x) year = cursor.execute(sql) num = int(cursor.fetchmany(year)[0][0]) yearnumlist.append(num) cursor.close() conn.commit() conn.close() yearlistnew50 = yearlistnew2 yearnumlist50 = yearnumlist return render(request, 'show.html', {'yearlistnew250': yearlistnew250, 'yearnumlist250': yearnumlist250, 'yearlistnew100': yearlistnew100, 'yearnumlist100': yearnumlist100,'yearlistnew50': yearlistnew50, 'yearnumlist50': yearnumlist50}) def receive_message(request): """ 接收用户的留言并写入数据库中 :param request: :return: """ if request.method == 'POST': name = request.POST['name'] email = request.POST['email'] subject = request.POST['subject'] message = request.POST['message'] conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "insert into message values(%s,%s,%s,%s)" cursor.execute(sql,(name,email,subject,message)) cursor.close() conn.commit() conn.close() return render_to_response('index.html') def zidian(): """ 生成比较项的key-value字典 例子:动作:25 25就是类型动作的权重 :return: """ conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select genre from douban01;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() genrelist = [] for genre in infor_list: genre2list = genre[0].split() genrelist += genre2list genrelist2 =genrelist myset = set(genrelist2) genredict = {} # 打印出来 类型和权重 for item in myset: genredict[item] = genrelist2.count(item) genredicts =genredict conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select area from douban01;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() genrelist = [] for genre in infor_list: genre2list = genre[0].split() genrelist += genre2list genrelist2 = genrelist myset = set(genrelist2) genredict = {} # print genredict #打印出来 地区和权重 for item in myset: genredict[item] = genrelist2.count(item) areadict = genredict conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select writer from douban02;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() genrelist = [] for genre in infor_list: genre2list = genre[0].split() genrelist += genre2list genrelist2 = genrelist myset = set(genrelist2) genredict = {} # print genredict # 打印出来 编剧和权重 for item in myset: genredict[item] = genrelist2.count(item) writerdict = genredict conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select director from douban02;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() genrelist = [] for genre in infor_list: genre2list = genre[0].split() genrelist += genre2list genrelist2 = genrelist myset = set(genrelist2) genredict = {} for item in myset: #print genredict # 打印出来 导演和权重 genredict[item] = genrelist2.count(item) directordict = genredict conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select 'cast' from douban02;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() genrelist = [] for genre in infor_list: genre2list = genre[0].split() genrelist += genre2list genrelist2 = genrelist myset = set(genrelist2) genredict = {} for item in myset: #print genredict # 打印出来 演员和权重 genredict[item] = genrelist2.count(item) castdict = genredict return genredicts,areadict,writerdict,directordict,castdict def newmovie20(): url = "https://movie.douban.com/j/search_subjects?type=movie&tag=%E6%9C%80%E6%96%B0&page_limit=20&page_start=0" headers = { "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2", } response = requests.get(url,headers=headers) data = response.json()['subjects'] movieurllist = [] for movie in data: movieurllist.append(movie['url']) return movieurllist def movieinfo(url): """ 接收url返回电影信息 :param url: :return: 存储电影信息的表:create table newmovie20(moviename VARCHAR(100) NOT NULL,year VARCHAR(100) NOT NULL,director VARCHAR(100) NOT NULL,writer VARCHAR(100) NOT NULL, castlist VARCHAR(1000) NOT NULL, genrelist VARCHAR(1000) NOT NULL, area VARCHAR(100) NOT NULL)ENGINE=InnoDB DEFAULT CHARSET=utf8; """ headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2", } response = requests.get(url, headers=headers) soup =BeautifulSoup(response.text,'lxml') infodata = soup.find('div',id='wrapper') moviename = infodata.find('span',property='v:itemreviewed').string #电影名 year = infodata.find('span',class_="year").string #电影上映时间 infodata2 = infodata.find('div',class_='subject clearfix').find('div',id='info') director = infodata2.find_all('span')[0].text.split()[1] #导演 writer = infodata2.find_all('span')[2].string #编剧 try: castlistnew = [] castlist = infodata2.find_all('a',rel="v:starring") #演员列表 for cast in castlist: castlistnew.append(cast.string) castlist =castlistnew[0] except: castlist = 'None' movietypelist = infodata2.find_all("span",property="v:genre") genrelist = [] #类型列表 for type in movietypelist: genrelist.append(type.string) genrelist =genrelist[0] area = infodata2.find('span',property="v:initialReleaseDate").string.split('(')[1].split(')')[0] return moviename,year,director,writer,castlist,genrelist,area def savetomysql(request): """ 保存最新的20个电影数据到数据库中 :return: """ conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "truncate table newmovie20;" cursor.execute(sql) urllist = newmovie20() for url in urllist: moviename, year, director, writer, castlist, genrelist, area = movieinfo(url) sqli = "insert into newmovie20 values(%s,%s,%s,%s,%s,%s,%s)" cursor.execute(sqli, (str(moviename),str(year),str(director),str(writer),str(castlist),str(genrelist),str(area))) conn.commit() cursor.close() conn.close() conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select * from newmovie20;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) cursor.close() conn.close() return render_to_response('new20movielist.html', {'infor_list': infor_list}) def yuce(): """ 针对电影进行预测 :return: """ genredicts, areadict, writerdict, directordict, castdict = zidian() conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi') sql = "select * from newmovie20;" search = cursor.execute(sql) infor_list = cursor.fetchmany(search) sortedict = {} for item in infor_list: number = 0 moviename, year, director, writer, castlist, genrelist, area = item try: number += areadict[area] except: number += 0 try: number += directordict[director] except: number += 0 try: number += writerdict[writer] except: number += 0 try: number += genredicts[genrelist] except: number += 0 try: number += castdict[castlist] except: number += 0 sortedict[moviename] = number cursor.close() conn.close() return sorted(sortedict.items(), key=lambda x: x[1], reverse=True) def yucemovie(request): infor_list = yuce() return render_to_response('yuce.html', {'infor_list': infor_list})
留言信息展示 {% block content %}豆瓣20部新电影预测
电影 | 权重 |
---|---|
{{ infor.0 }} | {{ infor.1 }} |
&&&