python爬取lol所有英雄信息


import json
import time
import urllib.request
import pymysql
def save_to_mysql(db, items):
    # 获取cursor
    cursor = db.cursor()
    # 拼接sql语句
    sql = 'insert into hero(c_name, tags, image, name, shuxing,jineng,beijing) values("%s","%s","%s","%s","%s","%s","%s")' % (
    items['c_name'], items['tags'], items['image'], items['name'], items['shuxing'], items['jineng'], items['beijing'])
    # 执行sql语句
    try:
        cursor.execute(sql)
        db.commit()
    except Exception as e:
        print(e)
        db.rollback()
#保存数据库

def get_it(db):
#js获取
    url = 'https://lol.qq.com/biz/hero/champion.js'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
    }
    requests = urllib.request.Request(url=url, headers=headers)
    content = urllib.request.urlopen(requests).read().decode('gb2312')
    content = content.split(';')[1]
    content = content.lstrip('LOLherojs.champion =')
    # print(type(content))
    # print(content)
    content = json.loads(content)
    #拿到json数据
    hero_list = list(content['keys'].values())
    # print(hero_list)
    items = {}

    fp = open('hero.json', 'w', encoding='utf8')
    #存入文件
    for hero_name in hero_list:
        src = 'https://lol.qq.com/biz/hero/' + hero_name + '.js'
        #print(src)
        requests = urllib.request.Request(url=src, headers=headers)
        content = urllib.request.urlopen(requests).read().decode('gb2312')
        content = content.split(';')[1]
        content = content.split('=', 1)[1]
        content = json.loads(content)

        c_name1 = content['data']['title']
        c_name2 = content['data']['name']

        items['c_name']=c_name1 + c_name2
        # 英雄名称
        items['tags'] = content['data']['tags']
        # 英雄标签
        items['image'] = content['data']['image']['full']
        # 英雄图片

        items['name'] = content['data']['id']
		#英文名称
        print('正在保存--%s'%items['name'])

        items['shuxing'] = content['data']['info']
        
        jineng = {}
        for i in content['data']['spells']:
            jineng[i['id']] = i['description']
        items['jineng'] = jineng
        #技能被动描述
        items['beijing'] = content['data']['blurb']
		#英雄背景可以切词
        save_to_mysql(db, items)
		#保存数据库
        string = json.dumps(items, ensure_ascii=False)
        #python转为json格式保存
        fp.write(string + '\n')
        print('完成保存--%s' % items['name'])
        time.sleep(3)
    fp.close()


def connect_db():
    db = pymysql.Connect(host='localhost', port=3306, user='root', password='123456', database='lol', charset='utf8')
    # 两种引擎  innodb   myisam
    return db


def main():
    db = connect_db()
    get_it(db)

if __name__ == '__main__':
    main()

你可能感兴趣的:(python爬取lol所有英雄信息)