百思不得姐视频爬取

# -*- coding:utf-8 -*-
from Tkinter import *
from ScrolledText import ScrolledText
import urllib,requests
import re
import threading
import sys
url_name = []
a = 1
def get():
    global a#全局变量
    hd = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}
    url = 'http://www.budejie.com/video/'+str(a)
    varl.set('已经获取到第%s页视频'%(a))
    html = requests.get(url,headers=hd).text
    #print html
    a+=1
    url_content= re.compile(r'
.*?
.*?
',re.S) url_contents =re.findall(url_content,html) #print url_contents for i in url_contents: url_reg = r'data-mp4="(.*?)">'#正则表达式 url_items = re.findall(url_reg,i) #print url_items#视频列表 if url_items:#判断地址视频存不存在 name_reg = re.compile(r'(.*?),re.S) name_items = re.findall(name_reg,i) #print name_items for i,k in zip(name_items,url_items):#标题与视频结合 url_name.append([i,k]) print i,k return url_name id = 1 def write(): global id while id<10: url_name = get() for i in url_name: urllib.urlretrieve(i[1],'video\\%s.mp4' % (i[0])) text.insert(END,str(id)+'.'+i[1]+'\n'+i[0]+'\n') url_name.pop(0) id +=1 varl.set('抓取完毕') def start(): th = threading.Thread(target=write) th.start()#触发 root = Tk() root.title('视频爬取') root.geometry('+400+100')#指定位置 text = ScrolledText(root,font=('微软雅黑',10)) text.grid() button = Button(root,text='开始爬取',font=('微软雅黑',10),fg='blue',command=start) button.grid() varl = StringVar() label = Label(root,font=('微软雅黑',10),fg='black',textvariable = varl) label.grid() varl.set('已准备...') root.mainloop()

你可能感兴趣的:(python)