Selenium访问网页三种等待方法-强隐显

三种等待方法:强制等待,隐式灯带,显式等待

代码如下:

# coding:utf-8

import time
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By


def get_force_wait(url):

    # 获取执行驱动路径, 驱动放在项目根目录下, 驱动下载地址:https://chromedriver.storage.googleapis.com/index.html
    driver_path = os.path.dirname(os.path.abspath(__file__)) + os.sep + "chromedriver"

    # 添加选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--no-sandbox")

    # 启动webdriver
    session = webdriver.Chrome(executable_path=driver_path, chrome_options=chrome_options)

    # 访问url
    session.get(url)

    # 强制等待方法, 访问url后等待三秒,不稳定.
    time.sleep(3)

    # 获取网页源代码
    content = session.page_source

    # 退出webdriver, 否则会在后台留下chromedriver驱动进程
    session.close()
    return content


def get_hidden_wait(url):

    # 获取执行驱动路径, 驱动放在项目根目录下, 驱动下载地址:https://chromedriver.storage.googleapis.com/index.html
    driver_path = os.path.dirname(os.path.abspath(__file__)) + os.sep + "chromedriver"

    # 添加选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--no-sandbox")

    # 启动webdriver
    session = webdriver.Chrome(executable_path=driver_path, chrome_options=chrome_options)

    # 访问url
    session.get(url)

    # 隐式等待方法,设置最长等待时间,规定时间内加载完网页在执行下一步,否则抛出异常,不稳定.
    session.implicitly_wait(20)

    # 获取网页源代码
    content = session.page_source

    # 退出webdriver, 否则会在后台留下chromedriver驱动进程
    session.close()
    return content


def get_obvious_wait(url):

    # 获取执行驱动路径, 驱动放在项目根目录下, 驱动下载地址:https://chromedriver.storage.googleapis.com/index.html
    driver_path = os.path.dirname(os.path.abspath(__file__)) + os.sep + "chromedriver"

    # 添加选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--no-sandbox")

    # 启动webdriver
    session = webdriver.Chrome(executable_path=driver_path, chrome_options=chrome_options)

    # 访问url
    session.get(url)

    # 定位器
    locator = (By.LINK_TEXT, '普天&同庆')

    # 显式等待方法, 每0.5秒检查一次,直到被定位元素出现为止, 超时为20秒.
    try:
        WebDriverWait(session, 20, 0.5).until(EC.presence_of_element_located(locator))
        # 获取网页源代码
        content = session.page_source
    finally:
        # 退出webdriver, 否则会在后台留下chromedriver驱动进程
        session.close()
    return content

 

你可能感兴趣的:(算法)