人工智能我来了

crawl4ai实操7

import asyncio
import time

from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, CacheMode
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, BestFirstCrawlingStrategy
from crawl4ai.deep_crawling.filters import (
    FilterChain,
    URLPatternFilter,
    DomainFilter,
    ContentTypeFilter,
    ContentRelevanceFilter,
    SEOFilter,
)
from crawl4ai.deep_crawling.scorers import (
    KeywordRelevanceScorer,
)


# 1️⃣ Basic Deep Crawl Setup
async def basic_deep_crawl():
    """
    PART 1: Basic Deep Crawl setup - Demonstrates a simple two-level deep crawl.

    This function shows:
    - How to set up BFSDeepCrawlStrategy (Breadth-First Search)
    - Setting depth and domain parameters
    - Processing the results to show the hierarchy
    """
    print("\n===== BASIC DEEP CRAWL SETUP =====")

    # Configure a 2-level deep crawl using Breadth-First Search strategy
    # max_depth=2 means: initial page (depth 0) + 2 more levels
    # include_external=False means: only follow links within the same domain
    config = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(max_depth=2, include_external=False),
        scraping_strategy=LXMLWebScrapingStrategy(),
        verbose=True,  # Show progress during crawling
    )

    async with AsyncWebCrawler() as crawler:
        start_time = time.perf_counter()
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)

        # Group results by depth to visualize the crawl tree
        pages_by_depth = {}
        for result in results:
            depth = result.metadata.get("depth", 0)
            if depth not in pages_by_depth:
                pages_by_depth[depth] = []
            pages_by_depth[depth].append(result.url)

        print(f"✅ Crawled {len(results)} pages total")

        # Display crawl structure by depth
        for depth, urls in sorted(pages_by_depth.items()):
            print(f"\nDepth {depth}: {len(urls)} pages")
            # Show first 3 URLs for each depth as examples
            for url in urls[:3]:
                print(f"  → {url}")
            if len(urls) > 3:
                print(f"  ... and {len(urls) - 3} more")

        print(
            f"\n✅ Performance: {len(results)} pages in {time.perf_counter() - start_time:.2f} seconds"
        )

# 2️⃣ Stream vs. Non-Stream Execution
async def stream_vs_nonstream():
    """
    PART 2: Demonstrates the difference between stream and non-stream execution.

    Non-stream: Waits for all results before processing
    Stream: Processes results as they become available
    """
    print("\n===== STREAM VS. NON-STREAM EXECUTION =====")

    # Common configuration for both examples
    base_config = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(max_depth=1, include_external=False),
        scraping_strategy=LXMLWebScrapingStrategy(),
        verbose=False,
    )

    async with AsyncWebCrawler() as crawler:
        # NON-STREAMING MODE
        print("\n NON-STREAMING MODE:")
        print("  In this mode, all results are collected before being returned.")

        non_stream_config = base_config.clone()
        non_stream_config.stream = False

        start_time = time.perf_counter()
        results = await crawler.arun(
            url="https://docs.crawl4ai.com", config=non_stream_config
        )

        print(f"  ✅ Received all {len(results)} results at once")
        print(f"  ✅ Total duration: {time.perf_counter() - start_time:.2f} seconds")

        # STREAMING MODE
        print("\n STREAMING MODE:")
        print("  In this mode, results are processed as they become available.")

        stream_config = base_config.clone()
        stream_config.stream = True

        start_time = time.perf_counter()
        result_count = 0
        first_result_time = None

        async for result in await crawler.arun(
            url="https://docs.crawl4ai.com", config=stream_config
        ):
            result_count += 1
            if result_count == 1:
                first_result_time = time.perf_counter() - start_time
                print(
                    f"  ✅ First result received after {first_result_time:.2f} seconds: {result.url}"
                )
            elif result_count % 5 == 0:  # Show every 5th result for brevity
                print(f"  → Result #{result_count}: {result.url}")

        print(f"  ✅ Total: {result_count} results")
        print(f"  ✅ First result: {first_result_time:.2f} seconds")
        print(f"  ✅ All results: {time.perf_counter() - start_time:.2f} seconds")
        print("\n Key Takeaway: Streaming allows processing results immediately")

# 3️⃣ Introduce Filters & Scorers
async def filters_and_scorers():
    """
    PART 3: Demonstrates the use of filters and scorers for more targeted crawling.

    This function progressively adds:
    1. A single URL pattern filter
    2. Multiple filters in a chain
    3. Scorers for prioritizing pages
    """
    print("\n===== FILTERS AND SCORERS =====")

    async with AsyncWebCrawler() as crawler:
        # SINGLE FILTER EXAMPLE
        print("\n EXAMPLE 1: SINGLE URL PATTERN FILTER")
        print("  Only crawl pages containing 'core' in the URL")

        # Create a filter that only allows URLs with 'guide' in them
        url_filter = URLPatternFilter(patterns=["*core*"])

        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1,
                include_external=False,
                filter_chain=FilterChain([url_filter]),  # Single filter
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            cache_mode=CacheMode.BYPASS,
            verbose=True,
        )

        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)

        print(f"  ✅ Crawled {len(results)} pages matching '*core*'")
        for result in results[:3]:  # Show first 3 results
            print(f"  → {result.url}")
        if len(results) > 3:
            print(f"  ... and {len(results) - 3} more")

        # MULTIPLE FILTERS EXAMPLE
        print("\n EXAMPLE 2: MULTIPLE FILTERS IN A CHAIN")
        print("  Only crawl pages that:")
        print("  1. Contain '2024' in the URL")
        print("  2. Are from 'techcrunch.com'")
        print("  3. Are of text/html or application/javascript content type")

        # Create a chain of filters
        filter_chain = FilterChain(
            [
                URLPatternFilter(patterns=["*2024*"]),
                DomainFilter(
                    allowed_domains=["techcrunch.com"],
                    blocked_domains=["guce.techcrunch.com", "oidc.techcrunch.com"],
                ),
                ContentTypeFilter(
                    allowed_types=["text/html", "application/javascript"]
                ),
            ]
        )

        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, include_external=False, filter_chain=filter_chain
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
        )

        results = await crawler.arun(url="https://techcrunch.com", config=config)

        print(f"  ✅ Crawled {len(results)} pages after applying all filters")
        for result in results[:3]:
            print(f"  → {result.url}")
        if len(results) > 3:
            print(f"  ... and {len(results) - 3} more")

        # SCORERS EXAMPLE
        print("\n EXAMPLE 3: USING A KEYWORD RELEVANCE SCORER")
        print(
            "Score pages based on relevance to keywords: 'crawl', 'example', 'async', 'configuration','javascript','css'"
        )

        # Create a keyword relevance scorer
        keyword_scorer = KeywordRelevanceScorer(
            keywords=["crawl", "example", "async", "configuration","javascript","css"], weight=1
        )

        config = CrawlerRunConfig(
            deep_crawl_strategy=BestFirstCrawlingStrategy(  
                max_depth=1, include_external=False, url_scorer=keyword_scorer
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            cache_mode=CacheMode.BYPASS,
            verbose=True,
            stream=True,
        )

        results = []
        async for result in await crawler.arun(
            url="https://docs.crawl4ai.com", config=config
        ):
            results.append(result)
            score = result.metadata.get("score")
            print(f"  → Score: {score:.2f} | {result.url}")

        print(f"  ✅ Crawler prioritized {len(results)} pages by relevance score")
        print("   Note: BestFirstCrawlingStrategy visits highest-scoring pages first")

# 4️⃣ Advanced Filters
async def advanced_filters():
    """
    PART 4: Demonstrates advanced filtering techniques for specialized crawling.

    This function covers:
    - SEO filters
    - Text relevancy filtering
    - Combining advanced filters
    """
    print("\n===== ADVANCED FILTERS =====")

    async with AsyncWebCrawler() as crawler:
        # SEO FILTER EXAMPLE
        print("\n EXAMPLE 1: SEO FILTERS")
        print(
            "Quantitative SEO quality assessment filter based searching keywords in the head section"
        )

        seo_filter = SEOFilter(
            threshold=0.5, keywords=["dynamic", "interaction", "javascript"]
        )

        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, filter_chain=FilterChain([seo_filter])
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )

        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)

        print(f"  ✅ Found {len(results)} pages with relevant keywords")
        for result in results:
            print(f"  → {result.url}")

        # ADVANCED TEXT RELEVANCY FILTER
        print("\n EXAMPLE 2: ADVANCED TEXT RELEVANCY FILTER")

        # More sophisticated content relevance filter
        relevance_filter = ContentRelevanceFilter(
            query="Interact with the web using your authentic digital identity",
            threshold=0.7,
        )

        config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=1, filter_chain=FilterChain([relevance_filter])
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )

        results = await crawler.arun(url="https://docs.crawl4ai.com", config=config)

        print(f"  ✅ Found {len(results)} pages")
        for result in results:
            relevance_score = result.metadata.get("relevance_score", 0)
            print(f"  → Score: {relevance_score:.2f} | {result.url}")

# 5️⃣ Max Pages and Score Thresholds
async def max_pages_and_thresholds():
    """
    PART 5: Demonstrates using max_pages and score_threshold parameters with different strategies.
    
    This function shows:
    - How to limit the number of pages crawled
    - How to set score thresholds for more targeted crawling
    - Comparing BFS, DFS, and Best-First strategies with these parameters
    """
    print("\n===== MAX PAGES AND SCORE THRESHOLDS =====")
    
    from crawl4ai.deep_crawling import DFSDeepCrawlStrategy
    
    async with AsyncWebCrawler() as crawler:
        # Define a common keyword scorer for all examples
        keyword_scorer = KeywordRelevanceScorer(
            keywords=["browser", "crawler", "web", "automation"], 
            weight=1.0
        )
        
        # EXAMPLE 1: BFS WITH MAX PAGES
        print("\n EXAMPLE 1: BFS STRATEGY WITH MAX PAGES LIMIT")
        print("  Limit the crawler to a maximum of 5 pages")
        
        bfs_config = CrawlerRunConfig(
            deep_crawl_strategy=BFSDeepCrawlStrategy(
                max_depth=2, 
                include_external=False,
                url_scorer=keyword_scorer,
                max_pages=5  # Only crawl 5 pages
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=bfs_config)
        
        print(f"  ✅ Crawled exactly {len(results)} pages as specified by max_pages")
        for result in results:
            depth = result.metadata.get("depth", 0)
            print(f"  → Depth: {depth} | {result.url}")
            
        # EXAMPLE 2: DFS WITH SCORE THRESHOLD
        print("\n EXAMPLE 2: DFS STRATEGY WITH SCORE THRESHOLD")
        print("  Only crawl pages with a relevance score above 0.5")
        
        dfs_config = CrawlerRunConfig(
            deep_crawl_strategy=DFSDeepCrawlStrategy(
                max_depth=2,
                include_external=False, 
                url_scorer=keyword_scorer,
                score_threshold=0.7,  # Only process URLs with scores above 0.5
                max_pages=10
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
        )
        
        results = await crawler.arun(url="https://docs.crawl4ai.com", config=dfs_config)
        
        print(f"  ✅ Crawled {len(results)} pages with scores above threshold")
        for result in results:
            score = result.metadata.get("score", 0)
            depth = result.metadata.get("depth", 0)
            print(f"  → Depth: {depth} | Score: {score:.2f} | {result.url}")
            
        # EXAMPLE 3: BEST-FIRST WITH BOTH CONSTRAINTS
        print("\n EXAMPLE 3: BEST-FIRST STRATEGY WITH BOTH CONSTRAINTS")
        print("  Limit to 7 pages with scores above 0.3, prioritizing highest scores")
        
        bf_config = CrawlerRunConfig(
            deep_crawl_strategy=BestFirstCrawlingStrategy(
                max_depth=2,
                include_external=False,
                url_scorer=keyword_scorer,
                max_pages=7,          # Limit to 7 pages total
            ),
            scraping_strategy=LXMLWebScrapingStrategy(),
            verbose=True,
            cache_mode=CacheMode.BYPASS,
            stream=True,
        )
        
        results = []
        async for result in await crawler.arun(url="https://docs.crawl4ai.com", config=bf_config):
            results.append(result)
            score = result.metadata.get("score", 0)
            depth = result.metadata.get("depth", 0)
            print(f"  → Depth: {depth} | Score: {score:.2f} | {result.url}")
            
        print(f"  ✅ Crawled {len(results)} high-value pages with scores above 0.3")
        if results:
            avg_score = sum(r.metadata.get('score', 0) for r in results) / len(results)
            print(f"  ✅ Average score: {avg_score:.2f}")
            print("   Note: BestFirstCrawlingStrategy visited highest-scoring pages first")

# 6️⃣ Wrap-Up and Key Takeaways
async def wrap_up():
    """
    PART 6: Wrap-Up and Key Takeaways

    Summarize the key concepts learned in this tutorial.
    """
    print("\n===== COMPLETE CRAWLER EXAMPLE =====")
    print("Combining filters, scorers, and streaming for an optimized crawl")

    # Create a sophisticated filter chain
    filter_chain = FilterChain(
        [
            DomainFilter(
                allowed_domains=["docs.crawl4ai.com"],
                blocked_domains=["old.docs.crawl4ai.com"],
            ),
            URLPatternFilter(patterns=["*core*", "*advanced*", "*blog*"]),
            ContentTypeFilter(allowed_types=["text/html"]),
        ]
    )

    # Create a composite scorer that combines multiple scoring strategies
    keyword_scorer = KeywordRelevanceScorer(
        keywords=["crawl", "example", "async", "configuration"], weight=0.7
    )
    # Set up the configuration
    config = CrawlerRunConfig(
        deep_crawl_strategy=BestFirstCrawlingStrategy(
            max_depth=1,
            include_external=False,
            filter_chain=filter_chain,
            url_scorer=keyword_scorer,
        ),
        scraping_strategy=LXMLWebScrapingStrategy(),
        stream=True,
        verbose=True,
    )

    # Execute the crawl
    results = []
    start_time = time.perf_counter()

    async with AsyncWebCrawler() as crawler:
        async for result in await crawler.arun(
            url="https://docs.crawl4ai.com", config=config
        ):
            results.append(result)
            score = result.metadata.get("score", 0)
            depth = result.metadata.get("depth", 0)
            print(f"→ Depth: {depth} | Score: {score:.2f} | {result.url}")

    duration = time.perf_counter() - start_time

    # Summarize the results
    print(f"\n✅ Crawled {len(results)} high-value pages in {duration:.2f} seconds")
    print(
        f"✅ Average score: {sum(r.metadata.get('score', 0) for r in results) / len(results):.2f}"
    )

    # Group by depth
    depth_counts = {}
    for result in results:
        depth = result.metadata.get("depth", 0)
        depth_counts[depth] = depth_counts.get(depth, 0) + 1

    print("\n Pages crawled by depth:")
    for depth, count in sorted(depth_counts.items()):
        print(f"  Depth {depth}: {count} pages")


async def run_tutorial():
    """
    Executes all tutorial sections in sequence.
    """
    print("\n CRAWL4AI DEEP CRAWLING TUTORIAL ")
    print("======================================")
    print("This tutorial will walk you through deep crawling techniques,")
    print("from basic to advanced, using the Crawl4AI library.")

    # Define sections - uncomment to run specific parts during development
    tutorial_sections = [
        basic_deep_crawl,
        stream_vs_nonstream,
        filters_and_scorers,
        max_pages_and_thresholds, 
        advanced_filters,
        wrap_up,
    ]

    for section in tutorial_sections:
        await section()

    print("\n TUTORIAL COMPLETE! ")
    print("You now have a comprehensive understanding of deep crawling with Crawl4AI.")
    print("For more information, check out https://docs.crawl4ai.com")

# Execute the tutorial when run directly
if __name__ == "__main__":
    asyncio.run(run_tutorial())

 ✓ | ⏱: 0.08s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.24s
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.56s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.59s
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 2.89s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 2.93s
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.57s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.62s
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.31s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.33s
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.39s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.43s
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.47s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.09s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.11s
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.72s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.75s
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.80s
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.42s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.45s
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.15s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.19s
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 2.89s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 2.95s
[ERROR]... × https://old.docs.crawl4ai.com                      | Error: Unexpected 
error in _crawl_web at line 744 in _crawl_web
(D:\anaconda3\envs\crawl4ai-python311\Lib\site-packages\crawl4ai\async_crawler_strat
egy.py):
Error: Failed on navigating ACS-GOTO:
Page.goto: net::ERR_CONNECTION_CLOSED at https://old.docs.crawl4ai.com/
Call log:
  - navigating to "https://old.docs.crawl4ai.com/", waiting until "domcontentloaded"


Code context:
 739                       response = await page.goto(
 740                           url, wait_until=config.wait_until,
timeout=config.page_timeout
 741                       )
 742                       redirected_url = page.url
 743                   except Error as e:
 744 →                     raise RuntimeError(f"Failed on navigating
ACS-GOTO:\n{str(e)}")
 745
 746                   await self.execute_hook(
 747                       "after_goto", page, context=context, url=url,
response=response, config=config
 748                   )
 749
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.4.0
| ✓ | ⏱: 1.14s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.4.0
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.4.0
| ✓ | ⏱: 1.16s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.4.2
| ✓ | ⏱: 1.46s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.4.2
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.4.2
| ✓ | ⏱: 1.48s
[FETCH]... ↓ https://docs.crawl4ai.com/deploy/docker/README.md
| ✓ | ⏱: 1.47s
[SCRAPE].. ◆ https://docs.crawl4ai.com/deploy/docker/README.md
| ✓ | ⏱: 0.00s
[COMPLETE] ● https://docs.crawl4ai.com/deploy/docker/README.md
| ✓ | ⏱: 1.48s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.6.0
| ✓ | ⏱: 2.05s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.6.0
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.6.0
| ✓ | ⏱: 2.07s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.5.0
| ✓ | ⏱: 2.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.5.0
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.5.0
| ✓ | ⏱: 2.36s
[FETCH]... ↓ https://docs.crawl4ai.com/blog/releases/0.4.1
| ✓ | ⏱: 2.41s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog/releases/0.4.1
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog/releases/0.4.1
| ✓ | ⏱: 2.43s
✅ Crawled 49 pages total

Depth 0: 1 pages
  → https://docs.crawl4ai.com

Depth 1: 42 pages
  → https://docs.crawl4ai.com
  → https://docs.crawl4ai.com/advanced/ssl-certificate
  → https://docs.crawl4ai.com/api/arun
  ... and 39 more

Depth 2: 6 pages
  → https://docs.crawl4ai.com/blog/releases/0.4.0
  → https://docs.crawl4ai.com/blog/releases/0.4.2
  → https://docs.crawl4ai.com/deploy/docker/README.md
  ... and 3 more

✅ Performance: 49 pages in 22.85 seconds

===== STREAM VS. NON-STREAM EXECUTION =====
[INIT].... → Crawl4AI 0.6.3 

 NON-STREAMING MODE:
  In this mode, all results are collected before being returned.
  ✅ Received all 43 results at once
  ✅ Total duration: 17.67 seconds

 STREAMING MODE:
  In this mode, results are processed as they become available.
  ✅ First result received after 6.64 seconds: https://docs.crawl4ai.com
  → Result #5: https://docs.crawl4ai.com/api/arun_many
  → Result #10: https://docs.crawl4ai.com/api/crawl-result
  → Result #15: https://docs.crawl4ai.com/advanced/lazy-loading
  → Result #20: https://docs.crawl4ai.com/advanced/multi-url-crawling
  → Result #25: https://docs.crawl4ai.com/core/examples
  → Result #30: https://docs.crawl4ai.com/core/markdown-generation
  → Result #35: https://docs.crawl4ai.com/core/llmtxt
  → Result #40: https://docs.crawl4ai.com/extraction/no-llm-strategies
  ✅ Total: 42 results
  ✅ First result: 6.64 seconds
  ✅ All results: 18.32 seconds

 Key Takeaway: Streaming allows processing results immediately

===== FILTERS AND SCORERS =====
[INIT].... → Crawl4AI 0.6.3 

 EXAMPLE 1: SINGLE URL PATTERN FILTER
  Only crawl pages containing 'core' in the URL
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 6.39s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 6.41s
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.86s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.87s
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.29s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.34s
[FETCH]... ↓ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.55s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.58s
[FETCH]... ↓ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.07s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.11s
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.40s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.07s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.48s
[FETCH]... ↓ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.64s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.69s
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 2.81s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 2.86s
[FETCH]... ↓ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 3.05s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 3.10s
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.16s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.22s
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.72s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.75s
[FETCH]... ↓ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.81s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.84s
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.98s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 3.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 3.02s
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 3.10s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 3.14s
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 3.15s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 3.18s
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 1.95s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 1.99s
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.27s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 2.29s
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.66s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.70s
  ✅ Crawled 19 pages matching '*core*'
  → https://docs.crawl4ai.com
  → https://docs.crawl4ai.com/core/ask-ai
  → https://docs.crawl4ai.com/core/examples
  ... and 16 more

 EXAMPLE 2: MULTIPLE FILTERS IN A CHAIN
  Only crawl pages that:
  1. Contain '2024' in the URL
  2. Are from 'techcrunch.com'
  3. Are of text/html or application/javascript content type
[FETCH]... ↓ https://techcrunch.com
| ✓ | ⏱: 2.07s
[SCRAPE].. ◆ https://techcrunch.com
| ✓ | ⏱: 0.14s
[COMPLETE] ● https://techcrunch.com
| ✓ | ⏱: 2.22s
[FETCH]... ↓ https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
| ✓ | ⏱: 1.50s
[SCRAPE].. ◆ https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
| ✓ | ⏱: 0.07s
[COMPLETE] ● https://techcrunch.com/2025/02/28/tech-layoffs-2024-list
| ✓ | ⏱: 1.58s
  ✅ Crawled 2 pages after applying all filters
  → https://techcrunch.com
  → https://techcrunch.com/2025/02/28/tech-layoffs-2024-list

 EXAMPLE 3: USING A KEYWORD RELEVANCE SCORER
Score pages based on relevance to keywords: 'crawl', 'example', 'async', 'configuration','javascript','css'
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 7.39s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 7.41s
  → Score: 0.00 | https://docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.26s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.29s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/advanced-features
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 1.18s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 1.20s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/lazy-loading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 1.63s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 1.65s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/crawl-dispatcher
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 1.77s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 1.81s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/multi-url-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 2.20s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 2.22s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/proxy-security
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.23s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.26s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/network-console-capture
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.38s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.41s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/session-management
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 2.56s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 2.58s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/file-downloading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 2.58s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 2.61s
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 2.63s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 2.66s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/identity-based-crawling        
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/hooks-auth
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.35s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.37s
  → Score: 0.17 | https://docs.crawl4ai.com/advanced/ssl-certificate
[FETCH]... ↓ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 1.82s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog
| ✓ | ⏱: 1.85s
  → Score: 0.17 | https://docs.crawl4ai.com/blog
[FETCH]... ↓ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.91s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 1.94s
  → Score: 0.17 | https://docs.crawl4ai.com/core/cache-modes
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.03s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.05s
  → Score: 0.17 | https://docs.crawl4ai.com/core/ask-ai
[FETCH]... ↓ https://docs.crawl4ai.com/api/strategies
| ✓ | ⏱: 2.06s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/strategies
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/api/strategies
| ✓ | ⏱: 2.10s
  → Score: 0.17 | https://docs.crawl4ai.com/api/strategies
[FETCH]... ↓ https://docs.crawl4ai.com/api/arun_many
| ✓ | ⏱: 2.58s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/arun_many
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/api/arun_many
| ✓ | ⏱: 2.61s
[FETCH]... ↓ https://docs.crawl4ai.com/api/arun
| ✓ | ⏱: 2.62s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/arun
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/api/arun
| ✓ | ⏱: 2.66s
  → Score: 0.17 | https://docs.crawl4ai.com/api/arun_many
  → Score: 0.17 | https://docs.crawl4ai.com/api/arun
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 2.80s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 2.84s
  → Score: 0.17 | https://docs.crawl4ai.com/core/browser-crawler-config
[FETCH]... ↓ https://docs.crawl4ai.com/api/crawl-result
| ✓ | ⏱: 3.03s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/crawl-result
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/api/crawl-result
| ✓ | ⏱: 3.10s
[FETCH]... ↓ https://docs.crawl4ai.com/api/parameters
| ✓ | ⏱: 3.10s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/parameters
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/api/parameters
| ✓ | ⏱: 3.15s
  → Score: 0.17 | https://docs.crawl4ai.com/api/crawl-result
  → Score: 0.17 | https://docs.crawl4ai.com/api/parameters
[FETCH]... ↓ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 1.98s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.01s
  → Score: 0.17 | https://docs.crawl4ai.com/core/cli
[FETCH]... ↓ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 1.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 1.80s
  → Score: 0.17 | https://docs.crawl4ai.com/core/fit-markdown
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.97s
  → Score: 0.17 | https://docs.crawl4ai.com/core/installation
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 1.96s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 2.00s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.01s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 2.02s
  → Score: 0.17 | https://docs.crawl4ai.com/core/llmtxt
  → Score: 0.17 | https://docs.crawl4ai.com/core/crawler-result
[FETCH]... ↓ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 2.36s
  → Score: 0.17 | https://docs.crawl4ai.com/core/content-selection
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.36s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.40s
  → Score: 0.17 | https://docs.crawl4ai.com/core/link-media
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.59s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.61s
  → Score: 0.17 | https://docs.crawl4ai.com/core/local-files
[FETCH]... ↓ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 2.85s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 2.90s
  → Score: 0.17 | https://docs.crawl4ai.com/core/deep-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 3.24s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 3.30s
  → Score: 0.17 | https://docs.crawl4ai.com/core/docker-deployment
[FETCH]... ↓ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.21s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.24s
  → Score: 0.33 | https://docs.crawl4ai.com/api/async-webcrawler
[ERROR]... × https://old.docs.crawl4ai.com                      | Error: Unexpected 
error in _crawl_web at line 744 in _crawl_web
(D:\anaconda3\envs\crawl4ai-python311\Lib\site-packages\crawl4ai\async_crawler_strat
egy.py):
Error: Failed on navigating ACS-GOTO:
Page.goto: net::ERR_CONNECTION_CLOSED at https://old.docs.crawl4ai.com/
Call log:
  - navigating to "https://old.docs.crawl4ai.com/", waiting until "domcontentloaded"


Code context:
 739                       response = await page.goto(
 740                           url, wait_until=config.wait_until,
timeout=config.page_timeout
 741                       )
 742                       redirected_url = page.url
 743                   except Error as e:
 744 →                     raise RuntimeError(f"Failed on navigating
ACS-GOTO:\n{str(e)}")
 745
 746                   await self.execute_hook(
 747                       "after_goto", page, context=context, url=url,
response=response, config=config
 748                   )
 749
  → Score: 0.17 | https://old.docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.60s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.62s
  → Score: 0.17 | https://docs.crawl4ai.com/core/simple-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/chunking
| ✓ | ⏱: 2.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/chunking
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/chunking
| ✓ | ⏱: 2.47s
  → Score: 0.17 | https://docs.crawl4ai.com/extraction/chunking
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.48s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/clustring-strategies
| ✓ | ⏱: 2.52s
  → Score: 0.17 | https://docs.crawl4ai.com/extraction/clustring-strategies
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.69s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.73s
  → Score: 0.17 | https://docs.crawl4ai.com/core/page-interaction
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.84s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/llm-strategies
| ✓ | ⏱: 2.88s
  → Score: 0.17 | https://docs.crawl4ai.com/extraction/llm-strategies
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.98s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 3.02s
  → Score: 0.17 | https://docs.crawl4ai.com/core/quickstart
[FETCH]... ↓ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 3.04s
[SCRAPE].. ◆ https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/extraction/no-llm-strategies
| ✓ | ⏱: 3.09s
  → Score: 0.17 | https://docs.crawl4ai.com/extraction/no-llm-strategies
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.12s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 3.16s
  → Score: 0.17 | https://docs.crawl4ai.com/core/markdown-generation
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.28s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.32s
  → Score: 0.33 | https://docs.crawl4ai.com/core/examples
  ✅ Crawler prioritized 42 pages by relevance score
   Note: BestFirstCrawlingStrategy visits highest-scoring pages first

===== MAX PAGES AND SCORE THRESHOLDS =====
[INIT].... → Crawl4AI 0.6.3 

 EXAMPLE 1: BFS STRATEGY WITH MAX PAGES LIMIT
  Limit the crawler to a maximum of 5 pages
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 7.23s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 7.25s
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 0.69s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 0.72s
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 1.26s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 1.30s
[FETCH]... ↓ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.57s
[SCRAPE].. ◆ https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/api/async-webcrawler
| ✓ | ⏱: 1.60s
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 1.65s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 1.68s
  ✅ Crawled exactly 5 pages as specified by max_pages
  → Depth: 0 | https://docs.crawl4ai.com
  → Depth: 1 | https://docs.crawl4ai.com
  → Depth: 1 | https://docs.crawl4ai.com/core/crawler-result
  → Depth: 1 | https://docs.crawl4ai.com/api/async-webcrawler
  → Depth: 1 | https://docs.crawl4ai.com/core/browser-crawler-config

 EXAMPLE 2: DFS STRATEGY WITH SCORE THRESHOLD
  Only crawl pages with a relevance score above 0.5
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 0.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 0.55s
  ✅ Crawled 1 pages with scores above threshold
  → Depth: 0 | Score: 0.00 | https://docs.crawl4ai.com

 EXAMPLE 3: BEST-FIRST STRATEGY WITH BOTH CONSTRAINTS
  Limit to 7 pages with scores above 0.3, prioritizing highest scores
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 10.89s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 10.92s
  → Depth: 0 | Score: 0.00 | https://docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.92s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.93s
  → Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/ask-ai
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.09s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 1.11s
  → Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/installation
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.80s
  → Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/examples
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 1.82s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 1.85s
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.87s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.88s
  → Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/llmtxt
  → Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/quickstart
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.31s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.36s
  → Depth: 1 | Score: 0.00 | https://docs.crawl4ai.com/core/docker-deployment       
  ✅ Crawled 7 high-value pages with scores above 0.3
  ✅ Average score: 0.00
   Note: BestFirstCrawlingStrategy visited highest-scoring pages first

===== ADVANCED FILTERS =====
[INIT].... → Crawl4AI 0.6.3 

 EXAMPLE 1: SEO FILTERS
Quantitative SEO quality assessment filter based searching keywords in the head section
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 9.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 9.56s
  ✅ Found 1 pages with relevant keywords
  → https://docs.crawl4ai.com

 EXAMPLE 2: ADVANCED TEXT RELEVANCY FILTER
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 0.52s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 0.54s
  ✅ Found 1 pages
  → Score: 0.00 | https://docs.crawl4ai.com

===== COMPLETE CRAWLER EXAMPLE =====
Combining filters, scorers, and streaming for an optimized crawl
[INIT].... → Crawl4AI 0.6.3 
[FETCH]... ↓ https://docs.crawl4ai.com
| ✓ | ⏱: 6.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com
| ✓ | ⏱: 6.03s
→ Depth: 0 | Score: 0.00 | https://docs.crawl4ai.com
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/advanced-features
| ✓ | ⏱: 1.36s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/advanced-features
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.32s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/session-management
| ✓ | ⏱: 2.35s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/session-management    
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.76s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/network-console-capture
| ✓ | ⏱: 2.80s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/network-console-capture
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 2.91s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/lazy-loading
| ✓ | ⏱: 2.94s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/lazy-loading
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 3.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/identity-based-crawling
| ✓ | ⏱: 3.04s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/identity-based-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 3.19s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/file-downloading
| ✓ | ⏱: 3.21s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/file-downloading      
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 3.23s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/hooks-auth
| ✓ | ⏱: 3.26s
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 3.27s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/multi-url-crawling
| ✓ | ⏱: 3.31s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/hooks-auth
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/multi-url-crawling    
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 3.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/proxy-security
| ✓ | ⏱: 3.47s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/proxy-security        
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 3.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/crawl-dispatcher
| ✓ | ⏱: 3.96s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/crawl-dispatcher
[FETCH]... ↓ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.19s
[SCRAPE].. ◆ https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/advanced/ssl-certificate
| ✓ | ⏱: 1.22s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/advanced/ssl-certificate       
[FETCH]... ↓ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 1.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/deep-crawling
| ✓ | ⏱: 1.57s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/deep-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 1.94s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 0.06s
[COMPLETE] ● https://docs.crawl4ai.com/core/docker-deployment
| ✓ | ⏱: 2.00s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/docker-deployment
[FETCH]... ↓ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 2.17s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/cache-modes
| ✓ | ⏱: 2.20s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/cache-modes
[FETCH]... ↓ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 2.45s
[SCRAPE].. ◆ https://docs.crawl4ai.com/blog
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/blog
| ✓ | ⏱: 2.47s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/blog
[FETCH]... ↓ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.50s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/ask-ai
| ✓ | ⏱: 2.52s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/ask-ai
[FETCH]... ↓ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.59s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/cli
| ✓ | ⏱: 2.63s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/cli
[FETCH]... ↓ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 3.00s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/content-selection
| ✓ | ⏱: 3.04s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/content-selection
[FETCH]... ↓ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 3.04s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/crawler-result
| ✓ | ⏱: 3.07s
[FETCH]... ↓ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.07s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/browser-crawler-config
| ✓ | ⏱: 3.11s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/browser-crawler-config
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/crawler-result
[FETCH]... ↓ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.96s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/examples
| ✓ | ⏱: 1.99s
→ Depth: 1 | Score: 0.35 | https://docs.crawl4ai.com/core/examples
[FETCH]... ↓ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.83s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/simple-crawling
| ✓ | ⏱: 1.86s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/simple-crawling
[FETCH]... ↓ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.87s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 0.01s
[COMPLETE] ● https://docs.crawl4ai.com/core/llmtxt
| ✓ | ⏱: 1.88s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/llmtxt
[FETCH]... ↓ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.37s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/local-files
| ✓ | ⏱: 2.40s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/local-files
[FETCH]... ↓ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.53s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/quickstart
| ✓ | ⏱: 2.58s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/quickstart
[FETCH]... ↓ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.63s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 0.05s
[COMPLETE] ● https://docs.crawl4ai.com/core/markdown-generation
| ✓ | ⏱: 2.69s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/markdown-generation       
[FETCH]... ↓ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.71s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/installation
| ✓ | ⏱: 2.73s
[FETCH]... ↓ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.74s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 0.02s
[COMPLETE] ● https://docs.crawl4ai.com/core/fit-markdown
| ✓ | ⏱: 2.77s
[FETCH]... ↓ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.78s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 0.03s
[COMPLETE] ● https://docs.crawl4ai.com/core/page-interaction
| ✓ | ⏱: 2.81s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/fit-markdown
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/page-interaction
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/installation
[FETCH]... ↓ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.83s
[SCRAPE].. ◆ https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 0.04s
[COMPLETE] ● https://docs.crawl4ai.com/core/link-media
| ✓ | ⏱: 2.87s
→ Depth: 1 | Score: 0.17 | https://docs.crawl4ai.com/core/link-media

✅ Crawled 31 high-value pages in 22.47 seconds
✅ Average score: 0.17

 Pages crawled by depth:
  Depth 0: 1 pages
  Depth 1: 30 pages

 TUTORIAL COMPLETE! 
You now have a comprehensive understanding of deep crawling with Crawl4AI.
For more information, check out https://docs.crawl4ai.com
PS E:\AI-lab\n8n>

你可能感兴趣的:(网络爬虫,python,开发语言)

python 读excel每行替换_Python脚本操作Excel实现批量替换功能 weixin_39646695 python 读excel每行替换
Python脚本操作Excel实现批量替换功能大家好，给大家分享下如何使用Python脚本操作Excel实现批量替换。使用的工具Openpyxl，一个处理excel的python库，处理excel，其实针对的就是WorkBook，Sheet，Cell这三个最根本的元素~明确需求原始excel如下我们的目标是把下面excel工作表的sheet1表页A列的内容“替换我吧”批量替换为B列的“我用来替换的
python笔记14介绍几个魔法方法抢公主的大魔王 python python
python笔记14介绍几个魔法方法先声明一下各位大佬，这是我的笔记。如有错误，恳请指正。另外，感谢您的观看，谢谢啦！(1).__doc__输出对应的函数，类的说明文档print(print.__doc__)print(value,...,sep='',end='\n',file=sys.stdout,flush=False)Printsthevaluestoastream,ortosys.std
Anaconda 和 Miniconda：功能详解与选择建议古月฿ python入门 python conda
Anaconda和Miniconda详细介绍一、Anaconda的详细介绍1.什么是Anaconda？Anaconda是一个开源的包管理和环境管理工具，在数据科学、机器学习以及科学计算领域发挥着关键作用。它以Python和R语言为基础，为用户精心准备了大量预装库和工具，极大地缩短了搭建数据科学环境的时间。对于那些想要快速开展数据分析、模型训练等工作的人员来说，Anaconda就像是一个一站式的“数
环境搭建 | Python + Anaconda / Miniconda + PyCharm 的安装、配置与使用
本文将分别介绍Python、Anaconda/Miniconda、PyCharm的安装、配置与使用，详细介绍Python环境搭建的全过程，涵盖Python、Pip、PythonLauncher、Anaconda、Miniconda、Pycharm等内容，以官方文档为参照，使用经验为补充，内容全面而详实。由于图片太多，就先贴一个无图简化版吧，详情请查看Python+Anaconda/Minicond
你竟然还在用克隆删除？Conda最新版rename命令全攻略！曦紫沐 Python基础知识 conda 虚拟环境管理
文章摘要Conda虚拟环境管理终于迎来革命性升级！本文揭秘Conda4.9+版本新增的rename黑科技，彻底告别传统“克隆+删除”的繁琐操作。从命令解析到实战案例，手把手教你如何安全高效地重命名Python虚拟环境，附带版本检测、环境迁移、故障排查等进阶技巧，助你提升开发效率10倍！一、颠覆认知：Conda居然自带重命名功能？很多开发者仍停留在“Conda无法直接重命名环境”的认知阶段，实际上自
centos7安装配置 Anaconda3
Anaconda是一个用于科学计算的Python发行版,Anaconda于Python，相当于centos于linux。下载[root@testsrc]#mwgethttps://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-5.2.0-Linux-x86_64.shBegintodownload:Anaconda3-5.2.0-L
Pandas：数据科学的超级瑞士军刀科技林总 DeepSeek学AI 人工智能
**——从零基础到高效分析的进化指南**###**一、Pandas诞生：数据革命的救世主****2010年前的数据分析噩梦**：```python#传统Python处理表格数据data=[]forrowincsv_file:ifrow[3]>100androw[2]=="China":data.append(float(row[5])#代码冗长易错！```**核心痛点**：-Excel处理百万行崩
【Jupyter】个人开发常见命令 TIM老师 #Pycharm &VSCode python Jupyter
1.查看python版本importsysprint(sys.version)2.ipynb/py文件转换jupyternbconvert--topythonmy_file.ipynbipynb转换为mdjupyternbconvert--tomdmy_file.ipynbipynb转为htmljupyternbconvert--tohtmlmy_file.ipynbipython转换为pdfju
用 Python 开发小游戏：零基础也能做出《贪吃蛇》
本文专为零基础学习者打造，详细介绍如何用Python开发经典小游戏《贪吃蛇》。无需复杂编程知识，从环境搭建到代码编写、功能实现，逐步讲解核心逻辑与操作。涵盖Pygame库的基础运用、游戏界面设计、蛇的移动与食物生成规则等，让新手能按步骤完成开发，同时融入SEO优化要点，帮助读者轻松入门Python游戏开发，体验从0到1做出游戏的乐趣。一、为什么选择用Python开发《贪吃蛇》对于零基础学习者来说，
基于Python的AI健康助手：开发与部署全攻略 AI算力网络与通信 AI算力网络与通信原理 AI人工智能大数据架构 python 人工智能开发语言 ai
基于Python的AI健康助手：开发与部署全攻略关键词：Python、AI健康助手、机器学习、自然语言处理、Flask、部署、健康管理摘要：本文将详细介绍如何使用Python开发一个AI健康助手，从需求分析、技术选型到核心功能实现，再到最终部署上线的完整过程。我们将使用自然语言处理技术理解用户健康咨询，通过机器学习模型提供个性化建议，并展示如何用Flask框架构建Web应用接口。文章包含大量实际代
AI人工智能中的数据挖掘：提升智能决策能力
AI人工智能中的数据挖掘：提升智能决策能力关键词：数据挖掘、人工智能、机器学习、智能决策、数据分析、特征工程、模型优化摘要：本文深入探讨了数据挖掘在人工智能领域中的核心作用，重点分析了如何通过数据挖掘技术提升智能决策能力。文章从基础概念出发，详细介绍了数据挖掘的关键算法、数学模型和实际应用场景，并通过Python代码示例展示了数据挖掘的全流程。最后，文章展望了数据挖掘技术的未来发展趋势和面临的挑战
lesson20：Python函数的标注你的电影很有趣 python 开发语言
目录引言：为什么函数标注是现代Python开发的必备技能一、函数标注的基础语法1.1参数与返回值标注1.2支持的标注类型1.3Python3.9+的重大改进：标准集合泛型二、高级标注技巧与最佳实践2.1复杂参数结构标注2.2函数类型与回调标注2.3变量注解与类型别名三、静态类型检查工具应用3.1mypy：最流行的类型检查器3.2Pyright与IDE集成3.3运行时类型验证四、函数标注的工程价值与
Jupyter Notebook：数据科学的“瑞士军刀” a小胡哦机器学习基础人工智能机器学习
在数据科学的世界里，JupyterNotebook是一个不可或缺的工具，它就像是数据科学家手中的“瑞士军刀”，功能强大且灵活多变。今天，就让我们一起深入了解这个神奇的工具。一、JupyterNotebook是什么？JupyterNotebook是一个开源的Web应用程序，它允许你创建和共享包含实时代码、方程、可视化和解释性文本的文档。它支持多种编程语言，其中Python是最常用的语言之一。Jupy
Django学习笔记（一）
学习视频为：pythondjangoweb框架开发入门全套视频教程一、安装pipinstalldjango==****检查是否安装成功django.get_version()二、django新建项目操作1、新建一个项目django-adminstartprojectproject_name2、新建APPcdproject_namedjango-adminstartappApp注：一个project
Python 程序设计讲义（26）：字符串的用法——字符的编码睿思达DBA_WGX Python 讲义 python 开发语言
Python程序设计讲义（26）：字符串的用法——字符的编码目录Python程序设计讲义（26）：字符串的用法——字符的编码一、字符的编码二、`ASCII`编码三、`Unicode`编码四、使用`ord()`函数查询一个字符对应的`Unicode`编码五、使用`chr()`函数查询一个`Unicode`编码对应的字符六、`Python`字符串的特征一、字符的编码计算机默认只能处理二进制数，而不能处
【Python】pypinyin-汉字拼音转换工具鸟哥大大 Python python 自然语言处理
文章目录1.主要功能2.安装3.常用API3.1拼音风格3.2核心API3.2.1pypinyin.pinyin()3.2.2pypinyin.lazy_pinyin()3.2.3pypinyin.load_single_dict()3.2.4pypinyin.load_phrases_dict()3.2.5pypinyin.slug()3.3注册新的拼音风格4.基本用法4.1库导入4.2基本汉字
python编程第十四课：数据可视化小小源助手 Python代码实例信息可视化 python 开发语言
Python数据可视化：让数据“开口说话”在当今数据爆炸的时代，数据可视化已成为探索数据规律、传达数据信息的关键技术。Python凭借其丰富的第三方库，为数据可视化提供了强大而灵活的解决方案。本文将带你深入了解Matplotlib库的基础绘图、Seaborn库的高级可视化以及交互式可视化工具Plotly，帮助你通过图表清晰地展示数据背后的故事。一、Matplotlib库基础绘图Matplotlib
Python数据可视化：用代码绘制数据背后的故事 AAEllisonPang Python 信息可视化 python 开发语言
引言：当数据会说话在数据爆炸的时代，可视化是解锁数据价值的金钥匙。Python凭借其丰富的可视化生态库，已成为数据科学家的首选工具。本文将带您从基础到高级，探索如何用Python将冰冷数字转化为引人入胜的视觉叙事。一、基础篇：二维可视化的艺术表达1.1Matplotlib：可视化领域的瑞士军刀importmatplotlib.pyplotaspltimportnumpyasnpx=np.linsp
python学习笔记（汇总）朕的剑还未配妥 python学习笔记整理 python 学习开发语言
文章目录一.基础知识二.python中的数据类型三.运算符四.程序的控制结构五.列表六.字典七.元组八.集合九.字符串十.函数十一.解决bug一.基础知识print函数字符串要加引号，数字可不加引号，如print(123.4)print('小谢')print("洛天依")还可输入表达式，如print(1+3)如果使用三引号，print打印的内容可不在同一行print("line1line2line
PDF转Markdown - Python 实现方案与代码 Eiceblue Python Python PDF pdf python 开发语言 vscode
PDF作为广泛使用的文档格式，转换为轻量级标记语言Markdown后，可无缝集成到技术文档、博客平台和版本控制系统中，提高内容的可编辑性和可访问性。本文将详细介绍如何使用国产Spire.PDFforPython库将PDF文档转换为Markdown格式。技术优势：精准保留原始文档结构（段落/列表/表格）完整提取文本和图像内容无需Adobe依赖的纯Python实现支持Linux/Windows/mac
使用Python和Gradio构建实时数据可视化工具 PythonAI编程架构实战家信息可视化 python 开发语言 ai
使用Python和Gradio构建实时数据可视化工具关键词：Python、Gradio、数据可视化、实时数据、Web应用、交互式界面、数据科学摘要：本文将详细介绍如何使用Python和Gradio框架构建一个实时数据可视化工具。我们将从基础概念开始，逐步深入到核心算法实现，包括数据处理、可视化技术以及Gradio的交互式界面设计。通过实际项目案例，读者将学习如何创建一个功能完整、响应迅速的实时数据
Python Gradio：实现交互式图像编辑 PythonAI编程架构实战家 Python编程之道 python 开发语言 ai
PythonGradio：实现交互式图像编辑关键词：Python,Gradio,交互式图像编辑,计算机视觉,深度学习,图像处理,Web应用摘要：本文将深入探讨如何使用Python的Gradio库构建交互式图像编辑应用。我们将从基础概念开始，逐步介绍Gradio的核心功能，并通过实际代码示例展示如何实现各种图像处理功能。文章将涵盖图像滤镜应用、对象检测、风格迁移等高级功能，同时提供完整的项目实战案例
数据可视化：数据世界的直观呈现卢政权1 信息可视化数据分析数据挖掘
在当今数字化浪潮中，数据呈爆炸式增长。数据可视化作为一种强大的技术手段，能够将复杂的数据转化为直观的图形、图表等形式，让数据背后的信息一目了然。无论是在商业决策、科学研究还是日常数据分析中，数据可视化都发挥着极为重要的作用。它帮助我们快速理解数据的分布、趋势、关联等特征，从而为进一步的分析和行动提供有力支持。接下来，我们将深入探讨数据可视化的奥秘，并通过代码示例展示其实际应用。一、Python数据
Python 程序设计讲义（25）：循环结构——嵌套循环
Python程序设计讲义（25）：循环结构——嵌套循环目录Python程序设计讲义（25）：循环结构——嵌套循环一、嵌套循环的执行流程二、嵌套循环对应的几种情况1、内循环和外循环互不影响2、外循环迭代影响内循环的条件3、外循环迭代影响内循环的循环体嵌套循环是指在一个循环体中嵌套另一个循环。while循环中可以嵌入另一个while循环或for循环。反之，也可以在for循环中嵌入另一个for循环或wh
基于Python引擎的PP-OCR模型库推理张欣-男 python ocr 开发语言 PaddleOCR PaddlePaddle
基于Python引擎的PP-OCR模型库推理1.文本检测模型推理#下载超轻量中文检测模型：wgethttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tartarxfch_PP-OCRv3_det_infer.tarpython3tools/infer/predict_det.py--image_dir=".
一个开源AI牛马神器 | AiPy，平替Manus，装完直接上手写Python！ Agent加载失败人工智能 python 开源算法 AI编程
还记得三个月前那个在闲鱼被炒到万元邀请码的Manus吗？现在你点官网，直接提示「所在地区不可用」了它走了，但更香的国产开源项目出现了：AiPy（爱派）。主打一个极致简化的AIAgent理念：别搞什么插件市场、Agent路由，直接给AI一个Python解释器，让它用自然语言写代码干活。听起来狠活？实际体验更狠：•完全本地化，界面傻瓜式操作，支持自然语言生成&执行Python任务；•数据清洗、文档总结
零数学基础理解AI核心概念：梯度下降可视化实战九章云极AladdinEdu 人工智能 gpu算力深度学习 pytorch python 语言模型 opencv
点击“AladdinEdu，同学们用得起的【H卡】算力平台”，H卡级别算力，按量计费，灵活弹性，顶级配置，学生专属优惠。用Python动画演示损失函数优化过程，数学公式具象化读者收获：直观理解模型训练本质，破除"数学恐惧症"当盲人登山者摸索下山路径时，他本能地运用了梯度下降算法。本文将用动态可视化技术，让你像感受重力一样理解AI训练的核心原理——无需任何数学公式推导。一、梯度下降：AI世界的"万有
2025.07 Java入门笔记01 殷浩焕笔记
一、熟悉IDEA和Java语法（一）LiuCourseJavaOOP1.一直在用C++开发，python也用了些，Java是真的不熟，用什么IDE还是问的同事；2.一开始安装了jdk-23，拿VSCode当编辑器，在cmd窗口编译运行，也能玩；但是想正儿八经搞项目开发，还是需要IDE；3.安装了IDEA社区版：（1）IDE通常自带对应编程语言的安装包，例如IDEA自带jbr-21（和jdk是不同的
响应式编程实践：Spring Boot WebFlux构建高性能非阻塞服务 fanxbl957 Web spring boot 后端 java
博主介绍：Java、Python、js全栈开发“多面手”，精通多种编程语言和技术，痴迷于人工智能领域。秉持着对技术的热爱与执着，持续探索创新，愿在此分享交流和学习，与大家共进步。全栈开发环境搭建运行攻略：多语言一站式指南(环境搭建+运行+调试+发布+保姆级详解)感兴趣的可以先收藏起来，希望帮助更多的人响应式编程实践：SpringBootWebFlux构建高性能非阻塞服务一、引言在当今数字化时代，互
Python STL概念学习与代码实践体制教科书
本文还有配套的精品资源，点击获取简介：通过”py_stl_learning”项目，学习者可以使用Python实现和理解C++STL的概念，包括数据结构、算法、容器适配器、模板和泛型容器等。Python中的列表、集合、字典等数据结构与STL中的vector、set、map等类似，而Python的itertools和functools模块提供了STL风格的算法功能。Python通过其面向对象的特性以及
深入浅出Java Annotation(元注解和自定义注解） Josh_Persistence Java Annotation 元注解自定义注解
一、基本概述　　 Annontation是Java5开始引入的新特征。中文名称一般叫注解。它提供了一种安全的类似注释的机制，用来将任何的信息或元数据（metadata）与程序元素（类、方法、成员变量等）进行关联。　　更通俗的意思是为程序的元素（类、方法、成员变量）加上更直观更明了的说明，这些说明信息是与程序的业务逻辑无关，并且是供指定的工具或
mysql优化特定类型的查询 annan211 java 工作 mysql
本节所介绍的查询优化的技巧都是和特定版本相关的，所以对于未来mysql的版本未必适用。 1 优化count查询对于count这个函数的网上的大部分资料都是错误的或者是理解的都是一知半解的。在做优化之前我们先来看看真正的count()函数的作用到底是什么。 count()是一个特殊的函数，有两种非常不同的作用，他可以统计某个列值的数量，也可以统计行数。在统
MAC下安装多版本JDK和切换几种方式棋子chessman jdk
环境： MAC AIR,OS X 10.10,64位历史：过去 Mac 上的 Java 都是由 Apple 自己提供，只支持到 Java 6，并且OS X 10.7 开始系统并不自带（而是可选安装）（原自带的是1.6）。后来 Apple 加入 OpenJDK 继续支持 Java 6，而 Java 7 将由 Oracle 负责提供。在终端中输入jav
javaScript （1） Array_06 JavaScript java 浏览器
JavaScript 1、运算符　　运算符就是完成操作的一系列符号，它有七类：　　赋值运算符（=,+=,-=,*=,/=,%=,<<=,>>=,|=,&=）、算术运算符(+,-,*,/,++,--,%)、比较运算符(>,<,<=,>=,==,===,!=,!==)、逻辑运算符(||,&&,!)、条件运算(?:)、位
国内顶级代码分享网站袁潇含 java jdk oracle .net PHP
现在国内很多开源网站感觉都是为了利益而做的当然利益是肯定的,否则谁也不会免费的去做网站 &
Elasticsearch、MongoDB和Hadoop比较随意而生 mongodb hadoop 搜索引擎
IT界在过去几年中出现了一个有趣的现象。很多新的技术出现并立即拥抱了“大数据”。稍微老一点的技术也会将大数据添进自己的特性，避免落大部队太远，我们看到了不同技术之间的边际的模糊化。假如你有诸如Elasticsearch或者Solr这样的搜索引擎，它们存储着JSON文档，MongoDB存着JSON文档，或者一堆JSON文档存放在一个Hadoop集群的HDFS中。你可以使用这三种配
mac os 系统科研软件总结张亚雄 mac os
1.1 Microsoft Office for Mac 2011 大客户版，自行搜索。 1.2 Latex （MacTex）: 系统环境：https://tug.org/mactex/ &nb
Maven实战（四）生命周期 AdyZhang maven
1. 三套生命周期 Maven拥有三套相互独立的生命周期，它们分别为clean，default和site。每个生命周期包含一些阶段，这些阶段是有顺序的，并且后面的阶段依赖于前面的阶段，用户和Maven最直接的交互方式就是调用这些生命周期阶段。以clean生命周期为例，它包含的阶段有pre-clean, clean 和 post
Linux下Jenkins迁移 aijuans Jenkins
1. 将Jenkins程序目录copy过去源程序在/export/data/tomcatRoot/ofctest-jenkins.jd.com下面 tar -cvzf jenkins.tar.gz ofctest-jenkins.jd.com &
request.getInputStream()只能获取一次的问题 ayaoxinchao request Inputstream
问题：在使用HTTP协议实现应用间接口通信时，服务端读取客户端请求过来的数据，会用到request.getInputStream()，第一次读取的时候可以读取到数据，但是接下来的读取操作都读取不到数据原因： 1. 一个InputStream对象在被读取完成后，将无法被再次读取，始终返回-1； 2. InputStream并没有实现reset方法（可以重
数据库SQL优化大总结之百万级数据库优化方案 BigBird2012 SQL优化
网上关于SQL优化的教程很多，但是比较杂乱。近日有空整理了一下，写出来跟大家分享一下，其中有错误和不足的地方，还请大家纠正补充。这篇文章我花费了大量的时间查找资料、修改、排版，希望大家阅读之后，感觉好的话推荐给更多的人，让更多的人看到、纠正以及补充。 1.对查询进行优化，要尽量避免全表扫描，首先应考虑在 where 及 order by 涉及的列上建立索引。 2.应尽量避免在 where
jsonObject的使用 bijian1013 java json
在项目中难免会用java处理json格式的数据，因此封装了一个JSONUtil工具类。 JSONUtil.java package com.bijian.json.study; import java.util.ArrayList; import java.util.Date; import java.util.HashMap;
[Zookeeper学习笔记之六]Zookeeper源代码分析之Zookeeper.WatchRegistration bit1129 zookeeper
Zookeeper类是Zookeeper提供给用户访问Zookeeper service的主要API，它包含了如下几个内部类首先分析它的内部类，从WatchRegistration开始，为指定的znode path注册一个Watcher， /** * Register a watcher for a particular p
【Scala十三】Scala核心七：部分应用函数 bit1129 scala
何为部分应用函数？ Partially applied function: A function that’s used in an expression and that misses some of its arguments.For instance, if function f has type Int => Int => Int, then f and f(1) are p
Tomcat Error listenerStart 终极大法 ronin47 tomcat
Tomcat报的错太含糊了，什么错都没报出来，只提示了Error listenerStart。为了调试，我们要获得更详细的日志。可以在WEB-INF/classes目录下新建一个文件叫logging.properties，内容如下 Java代码 handlers = org.apache.juli.FileHandler, java.util.logging.ConsoleHa
不用加减符号实现加减法 BrokenDreams 实现
今天有群友发了一个问题，要求不用加减符号(包括负号)来实现加减法。分析一下，先看最简单的情况，假设1+1，按二进制算的话结果是10，可以看到从右往左的第一位变为0，第二位由于进位变为1。
读《研磨设计模式》-代码笔记-状态模式-State bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ /* 当一个对象的内在状态改变时允许改变其行为，这个对象看起来像是改变了其类状态模式主要解决的是当控制一个对象状态的条件表达式过于复杂时的情况把状态的判断逻辑转移到表示不同状态的一系列类中，可以把复杂的判断逻辑简化如果在
CUDA程序block和thread超出硬件允许值时的异常 cherishLC CUDA
调用CUDA的核函数时指定block 和 thread大小，该大小可以是dim3类型的（三维数组），只用一维时可以是usigned int型的。以下程序验证了当block或thread大小超出硬件允许值时会产生异常！！！GPU根本不会执行运算！！！所以验证结果的正确性很重要！！！在VS中创建CUDA项目会有一个模板，里面有更详细的状态验证。以下程序在K5000GPU上跑的。
诡异的超长时间GC问题定位 chenchao051 jvm cms GC hbase swap
HBase的GC策略采用PawNew+CMS, 这是大众化的配置，ParNew经常会出现停顿时间特别长的情况，有时候甚至长到令人发指的地步，例如请看如下日志： 2012-10-17T05:54:54.293+0800: 739594.224: [GC 739606.508: [ParNew: 996800K->110720K(996800K), 178.8826900 secs] 3700
maven环境快速搭建 daizj 安装 mavne 环境配置
一下载maven 安装maven之前，要先安装jdk及配置JAVA_HOME环境变量。这个安装和配置java环境不用多说。 maven下载地址：http://maven.apache.org/download.html，目前最新的是这个apache-maven-3.2.5-bin.zip，然后解压在任意位置，最好地址中不要带中文字符，这个做java 的都知道，地址中出现中文会出现很多
PHP网站安全，避免PHP网站受到攻击的方法 dcj3sjt126com PHP
对于PHP网站安全主要存在这样几种攻击方式:1、命令注入(Command Injection)2、eval注入(Eval Injection)3、客户端脚本攻击(Script Insertion)4、跨网站脚本攻击(Cross Site Scripting, XSS)5、SQL注入攻击(SQL injection)6、跨网站请求伪造攻击(Cross Site Request Forgerie
yii中给CGridView设置默认的排序根据时间倒序的方法 dcj3sjt126com GridView
public function searchWithRelated() { $criteria = new CDbCriteria; $criteria->together = true; //without th
Java集合对象和数组对象的转换 dyy_gusi java集合
在开发中，我们经常需要将集合对象（List，Set）转换为数组对象，或者将数组对象转换为集合对象。Java提供了相互转换的工具，但是我们使用的时候需要注意，不能乱用滥用。 1、数组对象转换为集合对象最暴力的方式是new一个集合对象，然后遍历数组，依次将数组中的元素放入到新的集合中，但是这样做显然过
nginx同一主机部署多个应用 geeksun nginx
近日有一需求，需要在一台主机上用nginx部署2个php应用，分别是wordpress和wiki，探索了半天，终于部署好了，下面把过程记录下来。 1. 在nginx下创建vhosts目录，用以放置vhost文件。 mkdir vhosts 2. 修改nginx.conf的配置，在http节点增加下面内容设置，用来包含vhosts里的配置文件 #
ubuntu添加admin权限的用户账号 hongtoushizi ubuntu useradd
ubuntu创建账号的方式通常用到两种：useradd 和adduser . 本人尝试了useradd方法，步骤如下： 1:useradd 使用useradd时，如果后面不加任何参数的话，如：sudo useradd sysadm 创建出来的用户将是默认的三无用户：无home directory ,无密码,无系统shell。顾应该如下操作：
第五章常用Lua开发库2-JSON库、编码转换、字符串处理 jinnianshilongnian nginx lua
JSON库在进行数据传输时JSON格式目前应用广泛，因此从Lua对象与JSON字符串之间相互转换是一个非常常见的功能；目前Lua也有几个JSON库，本人用过cjson、dkjson。其中cjson的语法严格（比如unicode \u0020\u7eaf），要求符合规范否则会解析失败（如\u002），而dkjson相对宽松，当然也可以通过修改cjson的源码来完成
Spring定时器配置的两种实现方式OpenSymphony Quartz和java Timer详解 yaerfeng1989 timer quartz 定时器
原创整理不易，转载请注明出处：Spring定时器配置的两种实现方式OpenSymphony Quartz和java Timer详解代码下载地址：http://www.zuidaima.com/share/1772648445103104.htm 有两种流行Spring定时器配置：Java的Timer类和OpenSymphony的Quartz。 1.Java Timer定时首先继承jav
Linux下df与du两个命令的差别？ pda158 linux
　一、df显示文件系统的使用情况，与du比較，就是更全盘化。　　最经常使用的就是 df -T，显示文件系统的使用情况并显示文件系统的类型。　　举比例如以下：　　[root@localhost ~]# df -T 　　Filesystem Type &n
[转]SQLite的工具类 ---- 通过反射把Cursor封装到VO对象 ctfzh VO android sqlite 反射 Cursor
在写DAO层时，觉得从Cursor里一个一个的取出字段值再装到VO(值对象)里太麻烦了，就写了一个工具类，用到了反射，可以把查询记录的值装到对应的VO里，也可以生成该VO的List。使用时需要注意：考虑到Android的性能问题，VO没有使用Setter和Getter，而是直接用public的属性。表中的字段名需要和VO的属性名一样，要是不一样就得在查询的SQL中
该学习笔记用到的Employee表 vipbooks oracle sql 工作
这是我在学习Oracle是用到的Employee表，在该笔记中用到的就是这张表，大家可以用它来学习和练习。 drop table Employee; -- 员工信息表 create table Employee( -- 员工编号 EmpNo number(3) primary key, -- 姓