爬虫——爬取证券交易数据

import requests
from multiprocessing.dummy import Pool as ThreadingPool
from lxml import etree
from pymongo import MongoClient
Url = 'http://www.szse.cn/main/marketdata/jypz/colist/ '
def Write_To_Mongo(content):
    if content:
        Client = MongoClient('localhost')
        Db = Client.stock
        Table = Db.info
        Dicts = {}
        Dicts['content'] = content
        Table.insert(Dicts)
def Main(Page_Num):
    for Tab in ['tab1', 'tab2', 'tab3', 'tab4']:
        form_data = {
            'ACTIONID': '7',
            'AJAX': 'AJAX - TRUE',
            'CATALOGID': '1110',
            Tab + 'PAGENO': Page_Num,
            'TABKEY': Tab
        }
        Resp_Html = requests.post(url=Url, data=form_data)
        Resp_Html.encoding = 'gb2312'
        Resp_Html = Resp_Html.text
        Html = etree.HTML(Resp_Html)
        Tr_List = Html.xpath('//table[@id="REPORTID_' + Tab + '"]//tr')[1:]
        for tr in Tr_List:
            content = tr.xpath('./td/a/u/text()') + tr.xpath('./td/text()')[1:]
            Write_To_Mongo(content)
if __name__ == '__main__':
    Page_List = [i for i in range(1, 16)]
    Pool = ThreadingPool(4)
    Pool.map(Main, Page_List)
    Pool.close()
    Pool.join()

你可能感兴趣的:(爬虫)