crawl4ai实操8

import os
import sys
import asyncio
from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig

# Adjust paths as needed
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

async def main():
    async with AsyncWebCrawler() as crawler:
        # Request both PDF and screenshot
        result = await crawler.arun(
            url='https://en.wikipedia.org/wiki/List_of_common_misconceptions',
            config=CrawlerRunConfig(
                cache_mode=CacheMode.BYPASS,
                pdf=True,
                screenshot=True
            )
        )
        
        if result.success:
            # Save screenshot
            if result.screenshot:
                from base64 import b64decode
                with open(os.path.join(__location__, "screenshot.png"), "wb") as f:
                    f.write(b64decode(result.screenshot))
            
            # Save PDF
            if result.pdf:
                with open(os.path.join(__location__, "page.pdf"), "wb") as f:
                    f.write(result.pdf)

if __name__ == "__main__":
    asyncio.run(main())
PS E:\AI-lab\n8n> & D:/anaconda3/envs/crawl4ai-python311/python.exe e:/AI-lab/n8n/test2.py
[INIT].... → Crawl4AI 0.6.3 
[EXPORT].. ℹ Exporting media (PDF/MHTML/screenshot) took 8.08s 
[FETCH]... ↓ https://en.wikipedia.org/wiki/List_of_common_misconceptions
|| ⏱: 11.72s
[SCRAPE].. ◆ https://en.wikipedia.org/wiki/List_of_common_misconceptions
|| ⏱: 0.50s
[COMPLETE] ● https://en.wikipedia.org/wiki/List_of_common_misconceptions
|| ⏱: 12.23s
PS E:\AI-lab\n8n> 

你可能感兴趣的:(网络爬虫,网络爬虫)