爬取斗图图片链接

#!/usr/bin/env python
# -*-coding: utf-8-*-
# author: editor time 2017/11/26

import requests
import re,lxml
import time
import pymysql


from lxml import etree

db= pymysql.connect(
    host='localhost',
    port=3306,
    user='user',
    passwd='passwd',
    db='test',
    charset='utf8'
)
cursor=db.cursor()
# createtable='create table doutu(name varchar(256),link varchar(256))'
# cursor.execute(createtable)
# db.commit()

def getImage(page=1):
    htm=requests.get('http://www.doutula.com/photo/list/?page={}'.format(page))
    htm.encoding='utf-8'
    # 正则表达式
    # imglist=re.findall(r'data-original="(.+?)".*?alt="(.*?)"',htm.text,re.S)
    # print(len(imglist))

    # lxml选取
    req=etree.HTML(htm.text)
    link=req.xpath("//div[@class='page-content text-center']/div/a/@href")      #图片链接
    name=req.xpath("//div[@class='page-content text-center']/div/a/p/text()")   #图片名称
    # print(len(name))
    # print(len(link))
    for i in range(len(name)):
        print('正在输入。。。。。。{}'.format(name[i]))
        try:
            cursor.execute("insert into doutu values('{}','{}')".format(name[i].replace('\'',''), link[i]))  # 执行sql语句
        except e:
            print(e)
        db.commit()
    # for a,b in imglist:
    #     print(a,b,re.S)
for i in range(120):
    getImage(i)
db.close()


你可能感兴趣的:(爬虫)