京东商品信息爬取

京东商品信息爬取

from  selenium import webdriver #驱动浏览器
from selenium.webdriver.common.by import By #选择器
from selenium.webdriver.common.keys import Keys #按键
from selenium.webdriver.support.wait import WebDriverWait #等待页面加载完毕,寻找某些元素
from selenium.webdriver.support import expected_conditions as EC #等待指定标签加载完毕
import time
from bs4 import BeautifulSoup
class Spider():
    def __init__(self):
        self.url = 'https://www.jd.com/'
        self.opt = webdriver.ChromeOptions()
        self.opt.add_experimental_option('excludeSwitches', ['enable-automation'])  # 写入参数
        self.browser = webdriver.Chrome(options=self.opt)  # 驱动浏览器时带个参数options
    def get_jd(self):
        self.browser.get(self.url)
        #最好写一个等待事件
        wait = WebDriverWait(self.browser,280)
        wait.until(EC.presence_of_element_located((By.ID,'key')))#如果280s内没有响应则执行
        text = self.browser.find_element(By.ID,'key')#找到输入框
        text.send_keys('美食')#像输入框输入
        text.send_keys(Keys.ENTER)#点击确定
        time.sleep(20)
    def get_data(self):
        data = self.browser.page_source#获取网页源码
        # print(data)
        print(self.browser.current_url)
        self.parse_data(data=data)
    def parse_data(self,data):
        soup = BeautifulSoup(data,'lxml')
        title = soup.select('.gl-i-wrap a em') #标题 (class 后面的id要加“.”)
        price = soup.select('.gl-i-wrap .p-price strong i') #价格
        dianming = soup.select('.gl-i-wrap .J_im_icon a')#店名
        for titles,prices,dianmings in zip(title,price,dianming):
            n = titles.get_text()
            p = prices.get_text()
            d = dianmings.get_text()
            print(n)
            print(p)
            print(d)
            print('==============')
if __name__ == '__main__':
    s = Spider()
    s.get_jd()
    s.get_data()











你可能感兴趣的:(爬虫,python,开发语言)