From 8f824d8990e11af2e20f301ab2b15b7687116e0d Mon Sep 17 00:00:00 2001 From: "Arik Jones (aider)" Date: Sat, 21 Sep 2024 10:57:51 -0500 Subject: [PATCH] feat: enhance logging in runWeb function for better debugging --- cmd/web.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/cmd/web.go b/cmd/web.go index 10c2fec..7a47774 100644 --- a/cmd/web.go +++ b/cmd/web.go @@ -39,10 +39,12 @@ func init() { } func runWeb(cmd *cobra.Command, args []string) error { + log.Printf("Starting web scraping process with verbose mode: %v", verbose) scraperConfig.Verbose = verbose var siteConfigs []scraper.SiteConfig if len(cfg.Scrape.Sites) > 0 { + log.Printf("Using configuration from rollup.yml for %d sites", len(cfg.Scrape.Sites)) siteConfigs = make([]scraper.SiteConfig, len(cfg.Scrape.Sites)) for i, site := range cfg.Scrape.Sites { siteConfigs[i] = scraper.SiteConfig{ @@ -55,9 +57,11 @@ func runWeb(cmd *cobra.Command, args []string) error { OutputAlias: site.OutputAlias, PathOverrides: convertPathOverrides(site.PathOverrides), } + log.Printf("Site %d configuration: BaseURL=%s, CSSLocator=%s, MaxDepth=%d, AllowedPaths=%v", + i+1, site.BaseURL, site.CSSLocator, site.MaxDepth, site.AllowedPaths) } } else { - // Fallback to URL-based configuration if no sites are defined + log.Printf("No sites defined in rollup.yml, falling back to URL-based configuration") siteConfigs = make([]scraper.SiteConfig, len(urls)) for i, u := range urls { siteConfigs[i] = scraper.SiteConfig{ @@ -66,10 +70,13 @@ func runWeb(cmd *cobra.Command, args []string) error { ExcludeSelectors: excludeSelectors, MaxDepth: depth, } + log.Printf("URL %d configuration: BaseURL=%s, CSSLocator=%s, MaxDepth=%d", + i+1, u, includeSelector, depth) } } if len(siteConfigs) == 0 { + log.Println("Error: No sites or URLs provided") return fmt.Errorf("no sites or URLs provided. Use --urls flag with comma-separated URLs or set 'scrape.sites' in the rollup.yml file") } @@ -82,15 +89,22 @@ func runWeb(cmd *cobra.Command, args []string) error { BurstLimit: cfg.Scrape.BurstLimit, }, } + log.Printf("Scraper configuration: OutputType=%s, RequestsPerSecond=%f, BurstLimit=%d", + outputType, cfg.Scrape.RequestsPerSecond, cfg.Scrape.BurstLimit) + log.Println("Starting scraping process") scrapedContent, err := scraper.ScrapeSites(scraperConfig) if err != nil { + log.Printf("Error occurred during scraping: %v", err) return fmt.Errorf("error scraping content: %v", err) } + log.Printf("Scraping completed. Total content scraped: %d", len(scrapedContent)) if outputType == "single" { + log.Println("Writing content to a single file") return writeSingleFile(scrapedContent) } else { + log.Println("Writing content to multiple files") return writeMultipleFiles(scrapedContent) } }