diff --git a/internal/scraper/scraper.go b/internal/scraper/scraper.go index 39e07bf..dbd9815 100644 --- a/internal/scraper/scraper.go +++ b/internal/scraper/scraper.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "log" "math/rand" + "regexp" "strings" "time" @@ -218,17 +219,7 @@ func ProcessHTMLContent(htmlContent string, config Config) (string, error) { } selection := doc.Find("body") - if config.CSSLocator != "" { - log.Printf("Using CSS locator: %s\n", config.CSSLocator) - tempSelection := doc.Find(config.CSSLocator) - if tempSelection.Length() > 0 { - selection = tempSelection - } else { - log.Printf("Warning: No content found with CSS locator: %s. Falling back to body content.\n", config.CSSLocator) - } - } else { - log.Println("No CSS locator provided, processing entire body") - } + log.Println("Processing entire body") if selection.Length() == 0 { return "", fmt.Errorf("no content found in the document") diff --git a/main.go b/main.go index 85b8145..7c7b07c 100644 --- a/main.go +++ b/main.go @@ -30,7 +30,8 @@ func main() { defer scraper.ClosePlaywright() scraperConfig := scraper.Config{ - CSSLocator: cfg.Scrape.CSSLocator, + URLs: []scraper.URLConfig{{URL: cfg.Scrape.URLs[0].URL}}, + OutputType: cfg.Scrape.OutputType, Verbose: false, // Set default verbosity }