腾讯招聘爬取

import requests
import re
from lxml import etree
from mysql import MysqlHelper
# 实例化mysql封装的类
mc = MysqlHelper()
# sql语句
sql = 'INSERT INTO tencent(didian,zhiwei,people,works) VALUE (%s,%s,%s,%s)'
def tencent(url,headers):
    for i in range(0,30,10):
        fullurl = url.format(i)
        response = requests.get(fullurl,headers=headers)
        html = response.text
        # print(html)
        data_url = re.findall(r'',html)
        # print(data_url)
        for i in data_url:
            # print(i)
            data_urls = 'https://hr.tencent.com/' + i.replace('"','')
            # print(data_urls)
            response = requests.get(data_urls,headers=headers)
            # print(response)
            html = response.text
            # print(html)
            html_data = etree.HTML(html)
            # print(html_data)
            zhaopin = html_data.xpath('//table[@class="tablelist textl"]')
            # print(zhaopin)
            for i in zhaopin:
                # 取出工作地点
                didian = i.xpath('./tr[@class="c bottomline"]/td[1]/text()')[0]
                # print(didian)
                # 职位
                zhiwei = i.xpath('./tr[@class="c bottomline"]/td[2]/text()')[0]
                # print(zhiwei)
                # 人数
                people = i.xpath('./tr[@class="c bottomline"]/td[3]/text()')[0]
                # print('---' * 50)
                # print(people)
                # print('---'*50)
                # 职责
                work = i.xpath('./tr[@class="c"]/td/ul/li/text()')
                work = str(work).replace(',','')
                print(work)
                # print(type(work))


                data = (didian, zhiwei, people, work)

                mc.execute_modify_sql(sql,data)
if __name__ == '__main__':
    url = 'https://hr.tencent.com/position.php?&start={}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }

    tencent(url,headers)

你可能感兴趣的:(腾讯招聘爬取)