三十行代码爬取B站弹幕并生成词云

概述

因为B站的AV号变成了BV号,所以代码有了些许的改动

import requests
import json
import re
from wordcloud import WordCloud


# 用BV号得到cid号
def get_cid(bv):
    bv = bv.strip('BV1')
    url = f'https://api.bilibili.com/x/player/pagelist?bvid={bv}&jsonp=jsonp'
    res = requests.get(url)
    res_text = res.text
    res_dict = json.loads(res_text)
    cid = res_dict['data'][0]['cid']
    return cid


# 用cid号获取弹幕
def get_bullet_chat(cid):
    url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}'
    res = requests.get(url)
    res.xml = res.content.decode('utf-8')
    patt = re.compile('(.*?)')
    bullet_list = patt.findall(res.xml)
    return bullet_list


# 生成词云
def wold_could(bullent):
    bullent = str(bullent)  # 因为获取的弹幕是list,但是词云得是str类型
    wold_could_png = WordCloud(font_path='msyh.ttc').generate(bullent)
    wold_could_png.to_file('弹幕.png')


if __name__ == '__main__':
    bv = 'BV1Jz411b7zG'  # 用的是华农兄弟的视频
    cid = get_cid(bv)
    bullent = get_bullet_chat(cid)
    wold_could(bullent)

你可能感兴趣的:(三十行代码爬取B站弹幕并生成词云)