&&&

# -*- coding: utf-8 -*
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time
import sys
import cStringIO
reload(sys)
sys.setdefaultencoding("utf-8")
import pymysql


def connectmysql(host,port,user,passwd,db,charset='utf8'):
    """
    连接数据库、创建游标,返回连接对象、游标对象
    :param host:
    :param port:
    :param user:
    :param passwd:
    :param db:
    :param charset:
    :return:
    """
    try:
        conn = pymysql.connect(host=str(host),port=int(port),user=str(user),passwd=str(passwd),db=str(db),charset=str(charset))
        cursor = conn.cursor()
        return conn,cursor
    except Exception as e:
        return e


def saveToTbale01():
    """
    保存数据到数据库中
    :return:
    """

    conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    top250_url = "https://movie.douban.com/top250?start={}&filter="
    for i in range(10):
        """
        因为top250 每页是25个,url变化的字段是start,第二页start=25,第三页start=50 .......
        """
        start = i*25
        url_visit = top250_url.format(start)

        response  = requests.get(url_visit)
        soup = BeautifulSoup(response.text,'html.parser')
        all_item_divs = soup.find_all(class_='item')
        for each_item_div in all_item_divs:

            pic_div = each_item_div.find(class_='pic')

            num = pic_div.find('em').get_text()   #排名
            moviename = pic_div.find('img')['alt']    #电影名称

            bd_div = each_item_div.find(class_='bd')

            infos = bd_div.find('p').get_text().strip().split('\n')

            infos_2 = infos[1].strip().split('/')

            init_year = infos_2[0]    #上映时间
            area = infos_2[1].strip()    #国家/地区
            genre = ''.join(infos_2[2:]).strip()  #电影类型


            star_div = each_item_div.find(class_='star')

            rating_num = star_div.find(class_='rating_num').get_text()      #评分
            comment_num = star_div.find_all('span')[3].get_text()[:-3]      #评价数量
            quote = each_item_div.find(class_='quote')
            try:
                inq = quote.find(class_='inq').get_text()                #一句话评价
            except:
                inq = "None"
            sqli = "insert into douban01 values(%s,%s,%s,%s,%s,%s,%s,%s)"
            cursor.execute(sqli, (int(num),moviename,init_year,area,genre,rating_num,int(comment_num),inq))

        time.sleep(3)
    conn.commit()
    cursor.close()
    conn.close()


def createTbale01():
    """
    创建表一
    :return:
    """
    conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "CREATE TABLE  douban01(num  INT NOT NULL,moviename VARCHAR(100) NOT NULL,init_year VARCHAR(100) NOT NULL,area VARCHAR(100) NOT NULL,genre VARCHAR(100) NOT NULL,rating_num INT NOT NULL,comment_num INT NOT NULL,inq  VARCHAR(100) NOT NULL)ENGINE=InnoDB DEFAULT CHARSET=utf8;"
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()

def createTbale02():
    """
    创建表二
    :return:
    """
    #num#rank#alt_title#title#pubdate#language#writer#director#cast#movie_duration#year#movie_type#tags#image
    conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "CREATE TABLE  douban02(num  INT NOT NULL,rank INT NOT NULL,alt_title VARCHAR(100) NOT NULL,title VARCHAR(100) NOT NULL,pubdate VARCHAR(100) NOT NULL,language VARCHAR(100) NOT NULL,writer  VARCHAR(100) NOT NULL,director  VARCHAR(100) NOT NULL,cast  VARCHAR(100) NOT NULL,movie_duration  VARCHAR(100) NOT NULL,year   VARCHAR(100) NOT NULL,movie_type  VARCHAR(100) NOT NULL,tags  VARCHAR(100) NOT NULL, image   VARCHAR(100) NOT NULL)ENGINE=InnoDB DEFAULT CHARSET=utf8;"
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()

def  saveToTable02():
    conn, cursor = connectmysql("123.206.224.92", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    for line in open("top250_f2.csv"):
        datalist = line.split('#')
        num = datalist[0]
        rank = datalist[1]
        alt_title = datalist[2]
        title = datalist[3]
        pubdate = datalist[4]
        language = datalist[5]
        writer = datalist[6]
        director = datalist[7]
        cast = datalist[8]
        movie_duration = datalist[9]
        year = datalist[10]
        movie_type = datalist[11]
        tags = datalist[12]
        image =datalist[13]
        sqli = "insert into douban02 values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        cursor.execute(sqli, (int(num),rank,alt_title,title,pubdate,language,writer,director,cast,movie_duration,year,movie_type,tags,image))
    conn.commit()
    cursor.close()
    conn.close()

if __name__ == "__main__":
    saveToTable02()
[root@xxn modles]# cat views.py
# coding:utf-8
# Create your views here.
from django.shortcuts import render, render_to_response
from django.http import HttpResponse, HttpResponseRedirect

from bs4 import BeautifulSoup
import urllib

import sys
import re
import json
import jieba
import pymysql
import requests
from operator import itemgetter
from pytagcloud import create_tag_image, make_tags
import random
import time
reload(sys)
sys.setdefaultencoding('utf-8')
import jieba
from operator import itemgetter
from pytagcloud.colors import COLOR_SCHEMES
from pytagcloud import create_tag_image, create_html_data, make_tags, LAYOUT_HORIZONTAL, LAYOUTS


def createtupian(request):
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select area  from douban01"
    cursor.execute(sql)
    rows = cursor.fetchall()
    cursor.close()
    conn.close()
    fclist = []
    for row in rows:
        fclist.append(row[0].encode("utf-8"))
    fcstr = " ".join(fclist)
    wg = jieba.cut_for_search(fcstr)
    wd = {}
    for w in wg:
        try:
            str(w)
            continue
        finally:
            if w not in wd:
                wd[w] = 1
            else:
                wd[w] += 1

    swd = sorted(wd.iteritems(), key=itemgetter(1), reverse=True)
   # tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values()))
    swd = swd[1:100]
    tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values()))
    #tags = make_tags(swd,maxsize = 100)
    try:
        #create_tag_image(tags,'1.jpg',background=(0, 0, 0, 255),size=(1200, 800),fontname="STKAITI")
        create_tag_image(tags, '/export/taobaomodels/modles/static/area.png', background=(0, 0, 0, 255),size=(2400, 1000),layout=LAYOUT_HORIZONTAL,fontname="STKAITI")
        #create_tag_image(tags,'1.jpg',size=(1200, 800),fontname="STKAITI")
    except Exception as e:
        print e
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select movie_type  from douban02"
    cursor.execute(sql)
    rows = cursor.fetchall()
    cursor.close()
    conn.close()
    fclist = []
    for row in rows:
        fclist.append(row[0].encode("utf-8"))
    fcstr = " ".join(fclist)
    wg = jieba.cut_for_search(fcstr)
    wd = {}
    for w in wg:
        try:
            str(w)
            continue
        finally:
            if w not in wd:
                wd[w] = 1
            else:
                wd[w] += 1

    swd = sorted(wd.iteritems(), key=itemgetter(1), reverse=True)
    # tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values()))
    swd = swd[1:100]
    tags = make_tags(swd,minsize = 50, maxsize = 240,colors=random.choice(COLOR_SCHEMES.values()))
    #tags = make_tags(swd,maxsize = 100)
    try:
        #create_tag_image(tags,'1.jpg',background=(0, 0, 0, 255),size=(1200, 800),fontname="STKAITI")
        create_tag_image(tags, '/export/taobaomodels/modles/static/movie_type.png', background=(0, 0, 0, 255),size=(2400, 1000),layout=LAYOUT_HORIZONTAL,fontname="STKAITI")
        #create_tag_image(tags,'1.jpg',size=(1200, 800),fontname="STKAITI")
    except Exception as e:
        print e
    return render(request,'cloud.html')

def connectmysql(host, port, user, passwd, db, charset='utf8'):
    """
    连接数据库、创建游标,返回连接对象、游标对象
    :param host:
    :param port:
    :param user:
    :param passwd:
    :param db:
    :param charset:
    :return:
    """
    try:
        conn = pymysql.connect(host=str(host), port=int(port), user=str(user), passwd=str(passwd), db=str(db),
                               charset=str(charset))
        cursor = conn.cursor()
        return conn, cursor
    except Exception as e:
        return e


def index(request):
    return render(request, 'index.html')


def search(request):
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    if request.method == 'POST':
        moviename = request.POST['name']
        sql = "select * from douban01 where moviename LIKE  '%%%s%%';" % moviename
        search = cursor.execute(sql)
        try:
            info = cursor.fetchmany(search)[0]
            num = info[0]
            moviename = info[1]
            init_year = info[2]
            area = info[3]
            genre = info[4]
            rating_num = info[5]
            comment_num = info[6]
            inq = info[7]
            cursor.close()
            conn.close()
            print num, moviename, init_year, area, genre, rating_num, comment_num, inq
            return render(request, 'index.html',
                          {'num': num, 'moviename': moviename, 'init_year': init_year, 'area': area, 'genre': genre,
                           'rating_num': rating_num, 'comment_num': comment_num, 'inq': inq})
        except  Exception as e:
            print e
            prompt = "sorry: 数据库中没有  " + moviename + "   这个电影的信息"
            return render(request, 'index.html', {'prompt': prompt})
    else:
        return HttpResponse('提交的方式不是post')


def top250infor(request):
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select * from douban01;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    return render_to_response('top250movielist.html', {'infor_list': infor_list})

def liuyan(request):
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select * from message;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    return render_to_response('liuyanban.html', {'infor_list': infor_list})




def show(request):
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select init_year  from douban01"
    search = cursor.execute(sql)
    year_list = cursor.fetchmany(search)
    yearlist = []
    for year in year_list:
        yearlist.append(year[0])
    yearlistnew = sorted(list(set(yearlist)))
    cursor.close()
    conn.commit()
    conn.close()
    yearnumlist = []
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    yearlistnew2 = []
    for x in yearlistnew:
        yearlistnew2.append(int(x))
        sql = "select count(*) from douban01 where init_year='%s'" % str(x)
        year = cursor.execute(sql)
        num = int(cursor.fetchmany(year)[0][0])
        yearnumlist.append(num)
    cursor.close()
    conn.commit()
    conn.close()
    yearlistnew250 = yearlistnew2
    yearnumlist250 = yearnumlist

    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select init_year  from douban01 limit 100"
    search = cursor.execute(sql)
    year_list = cursor.fetchmany(search)
    yearlist = []
    for year in year_list:
        yearlist.append(year[0])
    yearlistnew = sorted(list(set(yearlist)))
    cursor.close()
    conn.commit()
    conn.close()
    yearnumlist = []
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    yearlistnew2 = []
    for x in yearlistnew:
        yearlistnew2.append(int(x))
        sql = "select count(*) from douban01 where init_year='%s'" % str(x)
        year = cursor.execute(sql)
        num = int(cursor.fetchmany(year)[0][0])
        yearnumlist.append(num)
    cursor.close()
    conn.commit()
    conn.close()
    yearlistnew100 = yearlistnew2
    yearnumlist100 = yearnumlist

    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select init_year  from douban01 limit 50"
    search = cursor.execute(sql)
    year_list = cursor.fetchmany(search)
    yearlist = []
    for year in year_list:
        yearlist.append(year[0])
    yearlistnew = sorted(list(set(yearlist)))
    cursor.close()
    conn.commit()
    conn.close()
    yearnumlist = []
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    yearlistnew2 = []
    for x in yearlistnew:
        yearlistnew2.append(int(x))
        sql = "select count(*) from douban01 where init_year='%s'" % str(x)
        year = cursor.execute(sql)
        num = int(cursor.fetchmany(year)[0][0])
        yearnumlist.append(num)
    cursor.close()
    conn.commit()
    conn.close()
    yearlistnew50 = yearlistnew2
    yearnumlist50 = yearnumlist


    return render(request, 'show.html',
                  {'yearlistnew250': yearlistnew250, 'yearnumlist250': yearnumlist250, 'yearlistnew100': yearlistnew100,
                   'yearnumlist100': yearnumlist100,'yearlistnew50': yearlistnew50, 'yearnumlist50': yearnumlist50})



def receive_message(request):
    """
    接收用户的留言并写入数据库中
    :param request:
    :return:
    """
    if request.method == 'POST':
        name = request.POST['name']
        email = request.POST['email']
        subject = request.POST['subject']
        message = request.POST['message']
        conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
        sql = "insert into message values(%s,%s,%s,%s)"
        cursor.execute(sql,(name,email,subject,message))
        cursor.close()
        conn.commit()
        conn.close()
        return render_to_response('index.html')


def zidian():
    """
    生成比较项的key-value字典  例子:动作:25   25就是类型动作的权重
    :return:
    """
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select genre from douban01;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    genrelist = []
    for genre in infor_list:
        genre2list = genre[0].split()
        genrelist += genre2list

    genrelist2 =genrelist
    myset = set(genrelist2)
    genredict = {}        # 打印出来       类型和权重
    for item in myset:
        genredict[item] = genrelist2.count(item)
    genredicts  =genredict



    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select area from douban01;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    genrelist = []
    for genre in infor_list:
        genre2list = genre[0].split()
        genrelist += genre2list

    genrelist2 = genrelist
    myset = set(genrelist2)
    genredict = {}           # print genredict    #打印出来       地区和权重
    for item in myset:
        genredict[item] = genrelist2.count(item)
    areadict = genredict

    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select writer  from douban02;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    genrelist = []
    for genre in infor_list:
        genre2list = genre[0].split()
        genrelist += genre2list

    genrelist2 = genrelist
    myset = set(genrelist2)
    genredict = {}                 # print genredict  # 打印出来      编剧和权重
    for item in myset:
        genredict[item] = genrelist2.count(item)
    writerdict = genredict


    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select director  from douban02;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    genrelist = []
    for genre in infor_list:
        genre2list = genre[0].split()
        genrelist += genre2list

    genrelist2 = genrelist
    myset = set(genrelist2)
    genredict = {}
    for item in myset:      #print genredict  # 打印出来      导演和权重
        genredict[item] = genrelist2.count(item)
    directordict = genredict

    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select 'cast'  from douban02;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    genrelist = []
    for genre in infor_list:
        genre2list = genre[0].split()
        genrelist += genre2list

    genrelist2 = genrelist
    myset = set(genrelist2)
    genredict = {}
    for item in myset:      #print genredict  # 打印出来      演员和权重
        genredict[item] = genrelist2.count(item)
    castdict = genredict

    return  genredicts,areadict,writerdict,directordict,castdict




def newmovie20():
    url = "https://movie.douban.com/j/search_subjects?type=movie&tag=%E6%9C%80%E6%96%B0&page_limit=20&page_start=0"
    headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2",
    }
    response = requests.get(url,headers=headers)
    data = response.json()['subjects']
    movieurllist = []
    for movie in data:
        movieurllist.append(movie['url'])
    return  movieurllist



def  movieinfo(url):
    """
    接收url返回电影信息
    :param url:
    :return:
     存储电影信息的表:create table newmovie20(moviename VARCHAR(100) NOT NULL,year VARCHAR(100) NOT NULL,director VARCHAR(100) NOT NULL,writer  VARCHAR(100) NOT NULL, castlist   VARCHAR(1000) NOT NULL, genrelist  VARCHAR(1000) NOT NULL, area  VARCHAR(100) NOT NULL)ENGINE=InnoDB DEFAULT CHARSET=utf8;
    """

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2",
    }
    response = requests.get(url, headers=headers)
    soup =BeautifulSoup(response.text,'lxml')
    infodata = soup.find('div',id='wrapper')
    moviename =  infodata.find('span',property='v:itemreviewed').string    #电影名
    year = infodata.find('span',class_="year").string                   #电影上映时间
    infodata2 = infodata.find('div',class_='subject clearfix').find('div',id='info')

    director =   infodata2.find_all('span')[0].text.split()[1]        #导演
    writer = infodata2.find_all('span')[2].string                     #编剧
    try:
        castlistnew = []
        castlist =  infodata2.find_all('a',rel="v:starring")  #演员列表
        for cast in castlist:
            castlistnew.append(cast.string)
        castlist =castlistnew[0]
    except:
        castlist = 'None'

    movietypelist = infodata2.find_all("span",property="v:genre")
    genrelist = []                                      #类型列表
    for type in movietypelist:
        genrelist.append(type.string)
    genrelist =genrelist[0]
    area = infodata2.find('span',property="v:initialReleaseDate").string.split('(')[1].split(')')[0]
    return moviename,year,director,writer,castlist,genrelist,area


def savetomysql(request):
    """
    保存最新的20个电影数据到数据库中
    :return:
    """
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "truncate table newmovie20;"
    cursor.execute(sql)
    urllist = newmovie20()
    for url in urllist:
        moviename, year, director, writer, castlist, genrelist, area  = movieinfo(url)
        sqli = "insert into newmovie20 values(%s,%s,%s,%s,%s,%s,%s)"
        cursor.execute(sqli, (str(moviename),str(year),str(director),str(writer),str(castlist),str(genrelist),str(area)))
    conn.commit()
    cursor.close()
    conn.close()

    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select * from newmovie20;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    cursor.close()
    conn.close()
    return render_to_response('new20movielist.html', {'infor_list': infor_list})




def  yuce():
    """
    针对电影进行预测
    :return:
    """
    genredicts, areadict, writerdict, directordict, castdict = zidian()
    conn, cursor = connectmysql("127.0.0.1", 3306, "root", 'lzhlmclyhblsqt', 'doubanfenxi')
    sql = "select * from newmovie20;"
    search = cursor.execute(sql)
    infor_list = cursor.fetchmany(search)
    sortedict = {}
    for item in infor_list:
        number = 0
        moviename, year, director, writer, castlist, genrelist, area = item
        try:
            number += areadict[area]
        except:
            number += 0
        try:
            number += directordict[director]
        except:
            number += 0
        try:
            number += writerdict[writer]
        except:
            number += 0
        try:
            number += genredicts[genrelist]
        except:
            number += 0
        try:
            number += castdict[castlist]
        except:
            number += 0

        sortedict[moviename] = number
    cursor.close()
    conn.close()
    return sorted(sortedict.items(), key=lambda x: x[1], reverse=True)


def yucemovie(request):
    infor_list = yuce()
    return render_to_response('yuce.html', {'infor_list': infor_list})




豆瓣分析预测系统_第1张图片


豆瓣分析预测系统_第2张图片

豆瓣分析预测系统_第3张图片

豆瓣分析预测系统_第4张图片

豆瓣分析预测系统_第5张图片


豆瓣分析预测系统_第6张图片


豆瓣分析预测系统_第7张图片





留言信息展示

 


豆瓣20部新电影预测

  {% block content %} 电影 权重   {% for infor in infor_list %}   {{ infor.0 }}  {{ infor.1 }}    {% endfor %} {% endblock %}                    

&&&