import asyncio
import time
from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
from crawl4ai.deep_crawling.filters import (
FilterChain,
URLPatternFilter,
DomainFilter,
ContentTypeFilter,
ContentRelevanceFilter,
SEOFilter,
)
from crawl4ai.deep_crawling.scorers import (
KeywordRelevanceScorer,
)
async def basic_deep_crawl():
"""
PART 1: Basic Deep Crawl setup - Demonstrates a simple two-level deep crawl.
This function shows:
- How to set up BFSDeepCrawlStrategy (Breadth-First Search)
- Setting depth and domain parameters
- Processing the results to show the hierarchy
"""
print("\n===== BASIC DEEP CRAWL SETUP =====")
config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(max_depth=2, include_external=False),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
)
async with AsyncWebCrawler() as crawler:
start_time = time.perf_counter()
results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
pages_by_depth = {}
for result in results:
depth = result.metadata.get("depth", 0)
if depth not in pages_by_depth:
pages_by_depth[depth] = []
pages_by_depth[depth].append(result.url)
print(f"✅ Crawled {len(results)} pages total")
for depth, urls in sorted(pages_by_depth.items()):
print(f"\nDepth {depth}: {len(urls)} pages")
for url in urls[:3]:
print(f" → {url}")
if len(urls) > 3:
print(f" ... and {len(urls) - 3} more")
print(
f"\n✅ Performance: {len(results)} pages in {time.perf_counter() - start_time:.2f} seconds"
)
async def stream_vs_nonstream():
"""
PART 2: Demonstrates the difference between stream and non-stream execution.
Non-stream: Waits for all results before processing
Stream: Processes results as they become available
"""
print("\n===== STREAM VS. NON-STREAM EXECUTION =====")
base_config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(max_depth=1, include_external=False),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=False,
)
async with AsyncWebCrawler() as crawler:
print("\n NON-STREAMING MODE:")
print(" In this mode, all results are collected before being returned.")
non_stream_config = base_config.clone()
non_stream_config.stream = False
start_time = time.perf_counter()
results = await crawler.arun(
url="https://docs.crawl4ai.com", config=non_stream_config
)
print(f" ✅ Received all {len(results)} results at once")
print(f" ✅ Total duration: {time.perf_counter() - start_time:.2f} seconds")
print("\n STREAMING MODE:")
print(" In this mode, results are processed as they become available.")
stream_config = base_config.clone()
stream_config.stream = True
start_time = time.perf_counter()
result_count = 0
first_result_time = None
async for result in await crawler.arun(
url="https://docs.crawl4ai.com", config=stream_config
):
result_count += 1
if result_count == 1:
first_result_time = time.perf_counter() - start_time
print(
f" ✅ First result received after {first_result_time:.2f} seconds: {result.url}"
)
elif result_count % 5 == 0:
print(f" → Result #{result_count}: {result.url}")
print(f" ✅ Total: {result_count} results")
print(f" ✅ First result: {first_result_time:.2f} seconds")
print(f" ✅ All results: {time.perf_counter() - start_time:.2f} seconds")
print("\n Key Takeaway: Streaming allows processing results immediately")
async def filters_and_scorers():
"""
PART 3: Demonstrates the use of filters and scorers for more targeted crawling.
This function progressively adds:
1. A single URL pattern filter
2. Multiple filters in a chain
3. Scorers for prioritizing pages
"""
print("\n===== FILTERS AND SCORERS =====")
async with AsyncWebCrawler() as crawler:
print("\n EXAMPLE 1: SINGLE URL PATTERN FILTER")
print(" Only crawl pages containing 'core' in the URL")
url_filter = URLPatternFilter(patterns=["*core*"])
config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(
max_depth=1,
include_external=False,
filter_chain=FilterChain([url_filter]),
),
scraping_strategy=LXMLWebScrapingStrategy(),
cache_mode=CacheMode.BYPASS,
verbose=True,
)
results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
print(f" ✅ Crawled {len(results)} pages matching '*core*'")
for result in results[:3]:
print(f" → {result.url}")
if len(results) > 3:
print(f" ... and {len(results) - 3} more")
print("\n EXAMPLE 2: MULTIPLE FILTERS IN A CHAIN")
print(" Only crawl pages that:")
print(" 1. Contain '2024' in the URL")
print(" 2. Are from 'techcrunch.com'")
print(" 3. Are of text/html or application/javascript content type")
filter_chain = FilterChain(
[
URLPatternFilter(patterns=["*2024*"]),
DomainFilter(
allowed_domains=["techcrunch.com"],
blocked_domains=["guce.techcrunch.com", "oidc.techcrunch.com"],
),
ContentTypeFilter(
allowed_types=["text/html", "application/javascript"]
),
]
)
config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(
max_depth=1, include_external=False, filter_chain=filter_chain
),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
)
results = await crawler.arun(url="https://techcrunch.com", config=config)
print(f" ✅ Crawled {len(results)} pages after applying all filters")
for result in results[:3]:
print(f" → {result.url}")
if len(results) > 3:
print(f" ... and {len(results) - 3} more")
print("\n EXAMPLE 3: USING A KEYWORD RELEVANCE SCORER")
print(
"Score pages based on relevance to keywords: 'crawl', 'example', 'async', 'configuration','javascript','css'"
)
keyword_scorer = KeywordRelevanceScorer(
keywords=["crawl", "example", "async", "configuration","javascript","css"], weight=1
)
config = CrawlerRunConfig(
deep_crawl_strategy=BestFirstCrawlingStrategy(
max_depth=1, include_external=False, url_scorer=keyword_scorer
),
scraping_strategy=LXMLWebScrapingStrategy(),
cache_mode=CacheMode.BYPASS,
verbose=True,
stream=True,
)
results = []
async for result in await crawler.arun(
url="https://docs.crawl4ai.com", config=config
):
results.append(result)
score = result.metadata.get("score")
print(f" → Score: {score:.2f} | {result.url}")
print(f" ✅ Crawler prioritized {len(results)} pages by relevance score")
print(" Note: BestFirstCrawlingStrategy visits highest-scoring pages first")
async def advanced_filters():
"""
PART 4: Demonstrates advanced filtering techniques for specialized crawling.
This function covers:
- SEO filters
- Text relevancy filtering
- Combining advanced filters
"""
print("\n===== ADVANCED FILTERS =====")
async with AsyncWebCrawler() as crawler:
print("\n EXAMPLE 1: SEO FILTERS")
print(
"Quantitative SEO quality assessment filter based searching keywords in the head section"
)
seo_filter = SEOFilter(
threshold=0.5, keywords=["dynamic", "interaction", "javascript"]
)
config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(
max_depth=1, filter_chain=FilterChain([seo_filter])
),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
cache_mode=CacheMode.BYPASS,
)
results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
print(f" ✅ Found {len(results)} pages with relevant keywords")
for result in results:
print(f" → {result.url}")
print("\n EXAMPLE 2: ADVANCED TEXT RELEVANCY FILTER")
relevance_filter = ContentRelevanceFilter(
query="Interact with the web using your authentic digital identity",
threshold=0.7,
)
config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(
max_depth=1, filter_chain=FilterChain([relevance_filter])
),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
cache_mode=CacheMode.BYPASS,
)
results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)
print(f" ✅ Found {len(results)} pages")
for result in results:
relevance_score = result.metadata.get("relevance_score", 0)
print(f" → Score: {relevance_score:.2f} | {result.url}")
async def max_pages_and_thresholds():
"""
PART 5: Demonstrates using max_pages and score_threshold parameters with different strategies.
This function shows:
- How to limit the number of pages crawled
- How to set score thresholds for more targeted crawling
- Comparing BFS, DFS, and Best-First strategies with these parameters
"""
print("\n===== MAX PAGES AND SCORE THRESHOLDS =====")
from crawl4ai.deep_crawling import DFSDeepCrawlStrategy
async with AsyncWebCrawler() as crawler:
keyword_scorer = KeywordRelevanceScorer(
keywords=["browser", "crawler", "web", "automation"],
weight=1.0
)
print("\n EXAMPLE 1: BFS STRATEGY WITH MAX PAGES LIMIT")
print(" Limit the crawler to a maximum of 5 pages")
bfs_config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy(
max_depth=2,
include_external=False,
url_scorer=keyword_scorer,
max_pages=5
),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
cache_mode=CacheMode.BYPASS,
)
results = await crawler.arun(url="https://docs.crawl4ai.com", config=bfs_config)
print(f" ✅ Crawled exactly {len(results)} pages as specified by max_pages")
for result in results:
depth = result.metadata.get("depth", 0)
print(f" → Depth: {depth} | {result.url}")
print("\n EXAMPLE 2: DFS STRATEGY WITH SCORE THRESHOLD")
print(" Only crawl pages with a relevance score above 0.5")
dfs_config = CrawlerRunConfig(
deep_crawl_strategy=DFSDeepCrawlStrategy(
max_depth=2,
include_external=False,
url_scorer=keyword_scorer,
score_threshold=0.7,
max_pages=10
),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
cache_mode=CacheMode.BYPASS,
)
results = await crawler.arun(url="https://docs.crawl4ai.com", config=dfs_config)
print(f" ✅ Crawled {len(results)} pages with scores above threshold")
for result in results:
score = result.metadata.get("score", 0)
depth = result.metadata.get("depth", 0)
print(f" → Depth: {depth} | Score: {score:.2f} | {result.url}")
print("\n EXAMPLE 3: BEST-FIRST STRATEGY WITH BOTH CONSTRAINTS")
print(" Limit to 7 pages with scores above 0.3, prioritizing highest scores")
bf_config = CrawlerRunConfig(
deep_crawl_strategy=BestFirstCrawlingStrategy(
max_depth=2,
include_external=False,
url_scorer=keyword_scorer,
max_pages=7,
),
scraping_strategy=LXMLWebScrapingStrategy(),
verbose=True,
cache_mode=CacheMode.BYPASS,
stream=True,
)
results = []
async for result in await crawler.arun(url="https://docs.crawl4ai.com", config=bf_config):
results.append(result)
score = result.metadata.get("score", 0)
depth = result.metadata.get("depth", 0)
print(f" → Depth: {depth} | Score: {score:.2f} | {result.url}")
print(f" ✅ Crawled {len(results)} high-value pages with scores above 0.3")
if results:
avg_score = sum(r.metadata.get('score', 0) for r in results) / len(results)
print(f" ✅ Average score: {avg_score:.2f}")
print(" Note: BestFirstCrawlingStrategy visited highest-scoring pages first")
async def wrap_up():
"""
PART 6: Wrap-Up and Key Takeaways
Summarize the key concepts learned in this tutorial.
"""
print("\n===== COMPLETE CRAWLER EXAMPLE =====")
print("Combining filters, scorers, and streaming for an optimized crawl")
filter_chain = FilterChain(
[
DomainFilter(
allowed_domains=["docs.crawl4ai.com"],
blocked_domains=["old.docs.crawl4ai.com"],
),
URLPatternFilter(patterns=["*core*", "*advanced*", "*blog*"]),
ContentTypeFilter(allowed_types=["text/html"]),
]
)
keyword_scorer = KeywordRelevanceScorer(
keywords=["crawl", "example", "async", "configuration"], weight=0.7
)
config = CrawlerRunConfig(
deep_crawl_strategy=BestFirstCrawlingStrategy(
max_depth=1,
include_external=False,
filter_chain=filter_chain,
url_scorer=keyword_scorer,
),
scraping_strategy=LXMLWebScrapingStrategy(),
stream=True,
verbose=True,
)
results = []
start_time = time.perf_counter()
async with AsyncWebCrawler() as crawler:
async for result in await crawler.arun(
url="https://docs.crawl4ai.com", config=config
):
results.append(result)
score = result.metadata.get("score", 0)
depth = result.metadata.get("depth", 0)
print(f"→ Depth: {depth} | Score: {score:.2f} | {result.url}")
duration = time.perf_counter() - start_time
print(f"\n✅ Crawled {len(results)} high-value pages in {duration:.2f} seconds")
print(
f"✅ Average score: {sum(r.metadata.get('score', 0) for r in results) / len(results):.2f}"
)
depth_counts = {}
for result in results:
depth = result.metadata.get("depth", 0)
depth_counts[depth] = depth_counts.get(depth, 0) + 1
print("\n Pages crawled by depth:")
for depth, count in sorted(depth_counts.items()):
print(f" Depth {depth}: {count} pages")
async def run_tutorial():
"""
Executes all tutorial sections in sequence.
"""
print("\n CRAWL4AI DEEP CRAWLING TUTORIAL ")
print("======================================")
print("This tutorial will walk you through deep crawling techniques,")
print("from basic to advanced, using the Crawl4AI library.")
tutorial_sections = [
basic_deep_crawl,
stream_vs_nonstream,
filters_and_scorers,
max_pages_and_thresholds,
advanced_filters,
wrap_up,
]
for section in tutorial_sections:
await section()
print("\n TUTORIAL COMPLETE! ")
print("You now have a comprehensive understanding of deep crawling with Crawl4AI.")
print("For more information, check out https://docs.crawl4ai.com")
if __name__ == "__main__":
asyncio.run(run_tutorial())
✓ | ⏱: 0.08s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.24s
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.56s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.59s
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 2.89s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 2.93s
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.57s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.62s
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.31s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.33s
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.39s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.43s
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.47s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.09s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.11s
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.72s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.75s
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.80s
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.42s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.45s
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.15s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.19s
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 2.89s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 2.95s
[ERROR]... × https://old.docs.crawl4ai.com | Error: Unexpected
error in _crawl_web at line 744 in _crawl_web
(D:\anaconda3\envs\crawl4ai-python311\Lib\site-packages\crawl4ai\async_crawler_strat
egy.py):
Error: Failed on navigating ACS-GOTO:
Page.goto: net::ERR_CONNECTION_CLOSED at https://old.docs.crawl4ai.com/
Call log:
- navigating to "https://old.docs.crawl4ai.com/", waiting until "domcontentloaded"
Code context:
739 response = await page.goto(
740 url, wait_until=config.wait_until,
timeout=config.page_timeout
741 )
742 redirected_url = page.url
743 except Error as e:
744 → raise RuntimeError(f"Failed on navigating
ACS-GOTO:\n{str(e)}")
745
746 await self.execute_hook(
747 "after_goto", page, context=context, url=url,
response=response, config=config
748 )
749
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.4.0
| ✓ | ⏱: 1.14s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.4.0
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.4.0
| ✓ | ⏱: 1.16s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.4.2
| ✓ | ⏱: 1.46s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.4.2
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.4.2
| ✓ | ⏱: 1.48s
[FETCH]... ↓ https://docs.crawl4ai.com/deploy/docker/README.md
| ✓ | ⏱: 1.47s
[SCRAPE].. ◆ https://docs.crawl4ai.com/deploy/docker/README.md
| ✓ | ⏱: 0.00s
[COMPLETE] ● https://docs.crawl4ai.com/deploy/docker/README.md
| ✓ | ⏱: 1.48s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.6.0
| ✓ | ⏱: 2.05s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.6.0
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.6.0
| ✓ | ⏱: 2.07s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.5.0
| ✓ | ⏱: 2.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.5.0
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.5.0
| ✓ | ⏱: 2.36s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.4.1
| ✓ | ⏱: 2.41s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.4.1
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.4.1
| ✓ | ⏱: 2.43s
✅ Crawled 49 pages total
Depth 0: 1 pages
→ https://docs.crawl4ai.com
Depth 1: 42 pages
→ https://docs.crawl4ai.com
→ https://docs.crawl4ai.com/advanced/ssl-certificate
→ https://docs.crawl4ai.com/api/arun
... and 39 more
Depth 2: 6 pages
→ https://docs.crawl4ai.com/blog/releases/0.4.0
→ https://docs.crawl4ai.com/blog/releases/0.4.2
→ https://docs.crawl4ai.com/deploy/docker/README.md
... and 3 more
✅ Performance: 49 pages in 22.85 seconds
===== STREAM VS. NON-STREAM EXECUTION =====
[INIT].... → Crawl4AI 0.6.3
NON-STREAMING MODE:
In this mode, all results are collected before being returned.
✅ Received all 43 results at once
✅ Total duration: 17.67 seconds
STREAMING MODE:
In this mode, results are processed as they become available.
✅ First result received after 6.64 seconds: https://docs.crawl4ai.com
→ Result
→ Result
→ Result
→ Result
→ Result
→ Result
→ Result
→ Result
✅ Total: 42 results
✅ First result: 6.64 seconds
✅ All results: 18.32 seconds
Key Takeaway: Streaming allows processing results immediately
===== FILTERS AND SCORERS =====
[INIT].... → Crawl4AI 0.6.3
EXAMPLE 1: SINGLE URL PATTERN FILTER
Only crawl pages containing 'core' in the URL
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 6.39s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 6.41s
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.86s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.87s
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.29s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.34s
[FETCH]... ↓ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.55s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.58s
[FETCH]... ↓ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.07s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.11s
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.40s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.07s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.48s
[FETCH]... ↓ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.64s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.69s
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 2.81s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 2.86s
[FETCH]... ↓ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 3.05s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 3.10s
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.16s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.22s
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.72s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.75s
[FETCH]... ↓ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.81s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.84s
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.98s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 3.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 3.02s
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 3.10s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 3.14s
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 3.15s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 3.18s
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 1.95s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 1.99s
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.27s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.29s
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.66s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.70s
✅ Crawled 19 pages matching '*core*'
→ https://docs.crawl4ai.com
→ https://docs.crawl4ai.com/core/ask-ai
→ https://docs.crawl4ai.com/core/examples
... and 16 more
EXAMPLE 2: MULTIPLE FILTERS IN A CHAIN
Only crawl pages that:
1. Contain '2024' in the URL
2. Are from 'techcrunch.com'
3. Are of text/html or application/javascript content type
[FETCH]... ↓ https://techcrunch.com
| ✓ | ⏱: 2.07s
[SCRAPE].. ◆ https://techcrunch.com
| ✓ | ⏱: 0.14s
[COMPLETE] ● https://techcrunch.com
| ✓ | ⏱: 2.22s
[FETCH]... ↓ https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
| ✓ | ⏱: 1.50s
[SCRAPE].. ◆ https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
| ✓ | ⏱: 0.07s
[COMPLETE] ● https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
| ✓ | ⏱: 1.58s
✅ Crawled 2 pages after applying all filters
→ https://techcrunch.com
→ https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
EXAMPLE 3: USING A KEYWORD RELEVANCE SCORER
Score pages based on relevance to keywords: 'crawl', 'example', 'async', 'configuration','javascript','css'
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 7.39s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 7.41s
→ Score: 0.00 | https://docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.26s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.29s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/advanced-features
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 1.18s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 1.20s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/lazy-loading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 1.63s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 1.65s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/crawl-dispatcher
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 1.77s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 1.81s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/multi-url-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 2.20s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 2.22s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/proxy-security
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.23s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.26s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/network-console-capture
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.38s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.41s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/session-management
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 2.56s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 2.58s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/file-downloading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 2.58s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 2.61s
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 2.63s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 2.66s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/identity-based-crawling
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/hooks-auth
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.35s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.37s
→ Score: 0.17 | https://docs.crawl4ai.com/advanced/ssl-certificate
[FETCH]... ↓ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 1.82s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog
| ✓ | ⏱: 1.85s
→ Score: 0.17 | https://docs.crawl4ai.com/blog
[FETCH]... ↓ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.91s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.94s
→ Score: 0.17 | https://docs.crawl4ai.com/core/cache-modes
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.03s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.05s
→ Score: 0.17 | https://docs.crawl4ai.com/core/ask-ai
[FETCH]... ↓ https://docs.crawl4ai.com/api/strategies
| ✓ | ⏱: 2.06s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/strategies
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/api/strategies
| ✓ | ⏱: 2.10s
→ Score: 0.17 | https://docs.crawl4ai.com/api/strategies
[FETCH]... ↓ https://docs.crawl4ai.com/api/arun_many
| ✓ | ⏱: 2.58s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/arun_many
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/api/arun_many
| ✓ | ⏱: 2.61s
[FETCH]... ↓ https://docs.crawl4ai.com/api/arun
| ✓ | ⏱: 2.62s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/arun
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/api/arun
| ✓ | ⏱: 2.66s
→ Score: 0.17 | https://docs.crawl4ai.com/api/arun_many
→ Score: 0.17 | https://docs.crawl4ai.com/api/arun
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 2.80s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 2.84s
→ Score: 0.17 | https://docs.crawl4ai.com/core/browser-crawler-config
[FETCH]... ↓ https://docs.crawl4ai.com/api/crawl-result
| ✓ | ⏱: 3.03s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/crawl-result
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/api/crawl-result
| ✓ | ⏱: 3.10s
[FETCH]... ↓ https://docs.crawl4ai.com/api/parameters
| ✓ | ⏱: 3.10s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/parameters
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/api/parameters
| ✓ | ⏱: 3.15s
→ Score: 0.17 | https://docs.crawl4ai.com/api/crawl-result
→ Score: 0.17 | https://docs.crawl4ai.com/api/parameters
[FETCH]... ↓ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 1.98s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.01s
→ Score: 0.17 | https://docs.crawl4ai.com/core/cli
[FETCH]... ↓ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 1.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 1.80s
→ Score: 0.17 | https://docs.crawl4ai.com/core/fit-markdown
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.97s
→ Score: 0.17 | https://docs.crawl4ai.com/core/installation
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 1.96s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 2.00s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.01s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.02s
→ Score: 0.17 | https://docs.crawl4ai.com/core/llmtxt
→ Score: 0.17 | https://docs.crawl4ai.com/core/crawler-result
[FETCH]... ↓ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.36s
→ Score: 0.17 | https://docs.crawl4ai.com/core/content-selection
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.36s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.40s
→ Score: 0.17 | https://docs.crawl4ai.com/core/link-media
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.59s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.61s
→ Score: 0.17 | https://docs.crawl4ai.com/core/local-files
[FETCH]... ↓ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 2.85s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 2.90s
→ Score: 0.17 | https://docs.crawl4ai.com/core/deep-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 3.24s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 3.30s
→ Score: 0.17 | https://docs.crawl4ai.com/core/docker-deployment
[FETCH]... ↓ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.21s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.24s
→ Score: 0.33 | https://docs.crawl4ai.com/api/async-webcrawler
[ERROR]... × https://old.docs.crawl4ai.com | Error: Unexpected
error in _crawl_web at line 744 in _crawl_web
(D:\anaconda3\envs\crawl4ai-python311\Lib\site-packages\crawl4ai\async_crawler_strat
egy.py):
Error: Failed on navigating ACS-GOTO:
Page.goto: net::ERR_CONNECTION_CLOSED at https://old.docs.crawl4ai.com/
Call log:
- navigating to "https://old.docs.crawl4ai.com/", waiting until "domcontentloaded"
Code context:
739 response = await page.goto(
740 url, wait_until=config.wait_until,
timeout=config.page_timeout
741 )
742 redirected_url = page.url
743 except Error as e:
744 → raise RuntimeError(f"Failed on navigating
ACS-GOTO:\n{str(e)}")
745
746 await self.execute_hook(
747 "after_goto", page, context=context, url=url,
response=response, config=config
748 )
749
→ Score: 0.17 | https://old.docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.60s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.62s
→ Score: 0.17 | https://docs.crawl4ai.com/core/simple-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/chunking
| ✓ | ⏱: 2.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/chunking
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/chunking
| ✓ | ⏱: 2.47s
→ Score: 0.17 | https://docs.crawl4ai.com/extraction/chunking
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.48s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.52s
→ Score: 0.17 | https://docs.crawl4ai.com/extraction/clustring-strategies
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.69s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.73s
→ Score: 0.17 | https://docs.crawl4ai.com/core/page-interaction
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.84s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.88s
→ Score: 0.17 | https://docs.crawl4ai.com/extraction/llm-strategies
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.98s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 3.02s
→ Score: 0.17 | https://docs.crawl4ai.com/core/quickstart
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 3.04s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 3.09s
→ Score: 0.17 | https://docs.crawl4ai.com/extraction/no-llm-strategies
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.12s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.16s
→ Score: 0.17 | https://docs.crawl4ai.com/core/markdown-generation
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.28s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.32s
→ Score: 0.33 | https://docs.crawl4ai.com/core/examples
✅ Crawler prioritized 42 pages by relevance score
Note: BestFirstCrawlingStrategy visits highest-scoring pages first
===== MAX PAGES AND SCORE THRESHOLDS =====
[INIT].... → Crawl4AI 0.6.3
EXAMPLE 1: BFS STRATEGY WITH MAX PAGES LIMIT
Limit the crawler to a maximum of 5 pages
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 7.23s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 7.25s
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 0.69s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 0.72s
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 1.26s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 1.30s
[FETCH]... ↓ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.57s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.60s
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 1.65s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 1.68s
✅ Crawled exactly 5 pages as specified by max_pages
→ Depth: 0 | https://docs.crawl4ai.com
→ Depth: 1 | https://docs.crawl4ai.com
→ Depth: 1 | https://docs.crawl4ai.com/core/crawler-result
→ Depth: 1 | https://docs.crawl4ai.com/api/async-webcrawler
→ Depth: 1 | https://docs.crawl4ai.com/core/browser-crawler-config
EXAMPLE 2: DFS STRATEGY WITH SCORE THRESHOLD
Only crawl pages with a relevance score above 0.5
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 0.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 0.55s
✅ Crawled 1 pages with scores above threshold
→ Depth: 0 | Score: 0.00 | https://docs.crawl4ai.com
EXAMPLE 3: BEST-FIRST STRATEGY WITH BOTH CONSTRAINTS
Limit to 7 pages with scores above 0.3, prioritizing highest scores
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 10.89s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 10.92s
→ Depth: 0 | Score: 0.00 | https://docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.92s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.93s
→ Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/ask-ai
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.09s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.11s
→ Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/installation
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.80s
→ Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/examples
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 1.82s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 1.85s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.87s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.88s
→ Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/llmtxt
→ Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/quickstart
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.31s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.36s
→ Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/docker-deployment
✅ Crawled 7 high-value pages with scores above 0.3
✅ Average score: 0.00
Note: BestFirstCrawlingStrategy visited highest-scoring pages first
===== ADVANCED FILTERS =====
[INIT].... → Crawl4AI 0.6.3
EXAMPLE 1: SEO FILTERS
Quantitative SEO quality assessment filter based searching keywords in the head section
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 9.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 9.56s
✅ Found 1 pages with relevant keywords
→ https://docs.crawl4ai.com
EXAMPLE 2: ADVANCED TEXT RELEVANCY FILTER
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 0.52s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 0.54s
✅ Found 1 pages
→ Score: 0.00 | https://docs.crawl4ai.com
===== COMPLETE CRAWLER EXAMPLE =====
Combining filters, scorers, and streaming for an optimized crawl
[INIT].... → Crawl4AI 0.6.3
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 6.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 6.03s
→ Depth: 0 | Score: 0.00 | https://docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.36s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/advanced-features
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.35s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/session-management
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.80s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/network-console-capture
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 2.91s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 2.94s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/lazy-loading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 3.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 3.04s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/identity-based-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 3.19s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 3.21s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/file-downloading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 3.23s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 3.26s
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 3.27s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 3.31s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/hooks-auth
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/multi-url-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 3.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 3.47s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/proxy-security
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 3.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 3.96s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/crawl-dispatcher
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.19s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.22s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/ssl-certificate
[FETCH]... ↓ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 1.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 1.57s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/deep-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 1.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.00s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/docker-deployment
[FETCH]... ↓ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 2.17s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 2.20s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/cache-modes
[FETCH]... ↓ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 2.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog
| ✓ | ⏱: 2.47s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/blog
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.50s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.52s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/ask-ai
[FETCH]... ↓ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.59s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.63s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/cli
[FETCH]... ↓ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 3.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 3.04s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/content-selection
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 3.04s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 3.07s
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.07s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.11s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/browser-crawler-config
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/crawler-result
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.96s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.99s
→ Depth: 1 | Score: 0.35 | https://docs.crawl4ai.com/core/examples
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.83s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.86s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/simple-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.87s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.88s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/llmtxt
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.37s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.40s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/local-files
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.58s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/quickstart
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.63s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.69s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/markdown-generation
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.71s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.73s
[FETCH]... ↓ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.74s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.77s
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.78s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.81s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/fit-markdown
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/page-interaction
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/installation
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.83s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.87s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/link-media
✅ Crawled 31 high-value pages in 22.47 seconds
✅ Average score: 0.17
Pages crawled by depth:
Depth 0: 1 pages
Depth 1: 30 pages
TUTORIAL COMPLETE!
You now have a comprehensive understanding of deep crawling with Crawl4AI.
For more information, check out https://docs.crawl4ai.com
PS E:\AI-lab\n8n>