Python爬取豆瓣电影TOP250(名字+年份+评分+评论人数)

import re
import requests
import csv

# 拿到页面源代码
url = "https://movie.douban.com/top250"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}
params = {
    'start': 0,
    'filter': ''
}
lib = {}
# 爬取后面的页数
for i in range(0, 10):
    params['start'] = i * 25

    resp = requests.get(url, headers=headers, params=params)
    page_content = resp.text

    # 开始解析
    obj = re.compile(r'
  • .*?
    .*?(?P.*?)' r'.*?

    .*?
    (?P.*?) .*?' r'(?P.*?)' r'.*?(?P.*?)', re.S) result = obj.finditer(page_content) # 将每次的结果都放到一个新的字典中 for i in result: lib[i] = i.groupdict() f = open("douban.csv", "w", encoding="utf-8") writer = csv.writer(f) for i in lib: # print("名字:"+i.group('name')+" 年份:"+i.group('year').strip()+" 评分:"+i.group('print')+" 评价数:"+i.group('people')) dic = i.groupdict() dic['year'] = dic['year'].strip() writer.writerow(dic.values()) f.close() resp.close() print("200")

  •  
     

    你可能感兴趣的:(爬虫,python,开发语言,爬虫)