```#!/usr/bin/env python
# -*- coding:utf-8 -*-
#Author: Mr Gee
import requests
import urllib
from urllib import request
import time
from lxml import etree
import re
import pymysql
from gevent import monkey;monkey.patch_all()
import gevent

class BithCoin:
    def __init__(self):
        self.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36'
        self.header = {'User-Agent': self.user_agent}

    def get_html(self,url):
        html = request.Request(url,headers=self.header)
        respon = request.urlopen(html)
        page = respon.read().decode('utf-8')
        return page
    def get_title(self,text):
        # selector = etree.HTML(text)
        title = re.compile('

(.*?)

') title_text = re.findall(title,text) self.titles = ''.join(title_text) return self.titles def get_content(self,text): content = re.compile('
(.*?)
',re.S) content_text = re.findall(content,text) self.contents= ''.join(content_text).replace(" ","") return self.contents def get_author(self,text): author = re.compile('
.*?(.*?)',re.S) author_text = re.findall(author,text) self.author = ''.join(author_text) return self.author # return self.''.join(author_text) def get_date(self,text): date = re.compile(' (.*?)',re.S) date_text = re.findall(date,text) self.dates = ''.join(date_text) # print(type(self.dates)) return self.dates def my_db(self,end_url): conn = pymysql.connect(host="localhost",user="root",passwd="123456",db="ljj",charset="utf8") print('链接上了') db = conn.cursor() insert_data = ("INSERT INTO bitcoin(url,post_author,post_date,post_content,post_title)" "VALUES(%s,%s,%s,%s,%s)") data_value = (end_url,self.author,self.dates,self.contents,self.titles) # data_value = (end_url,' ',' 2017-03-17',' ',' ') try: db.execute(insert_data,data_value) conn.commit() except: conn.rollback() print ('已经结束') # data_colum=("INSERT INTO ljj(id,url)" "VALUES(%s,%s)") # data = ('https://www.5186.net/article-show-id-1',end_url) # db.execute(data_colum,data) db.close() conn.close() if __name__ == '__main__': Coin = BithCoin() for i in range(1,1000,1): base_url = 'https://www.5186.net/article-show-id-' + '%s'%(i) print(base_url) # print(base_url) end_html = Coin.get_html(base_url) task = [gevent.spawn(Coin.get_author,end_html),gevent.spawn(Coin.get_date,end_html),gevent.spawn(Coin.get_content,end_html),gevent.spawn(Coin.get_title,end_html),gevent.spawn(Coin.my_db,base_url)] gevent.joinall(task) ![](https://s4.51cto.com/images/blog/201803/08/c11b7e91367fd20454e48e5589753666.png?x-oss-process=image/watermark,size_16,text_QDUxQ1RP5Y2a5a6i,color_FFFFFF,t_100,g_se,x_10,y_10,shadow_90,type_ZmFuZ3poZW5naGVpdGk=)