From 01465a08b7cef4d3919723f53149453f2210e19c Mon Sep 17 00:00:00 2001 From: "Arik Jones (aider)" Date: Mon, 30 Sep 2024 14:19:00 -0500 Subject: [PATCH] fix: set default values for requests_per_second and burst_limit in configuration to prevent rate limiter errors --- cmd/web.go | 17 +++++++++++++++++ internal/config/config.go | 7 +++++++ internal/scraper/scraper.go | 8 ++++++++ 3 files changed, 32 insertions(+) diff --git a/cmd/web.go b/cmd/web.go index e986305..68a9b31 100644 --- a/cmd/web.go +++ b/cmd/web.go @@ -36,6 +36,16 @@ func init() { webCmd.Flags().StringSliceVar(&excludeSelectors, "exclude", []string{}, "CSS selectors to exclude from the extracted content (comma-separated)") } +func validateScrapeConfig(scrapeConfig config.ScrapeConfig) error { + if scrapeConfig.RequestsPerSecond <= 0 { + return fmt.Errorf("requests_per_second must be greater than 0") + } + if scrapeConfig.BurstLimit <= 0 { + return fmt.Errorf("burst_limit must be greater than 0") + } + return nil +} + func runWeb(cmd *cobra.Command, args []string) error { scraper.SetupLogger(verbose) logger := log.New(os.Stdout, "WEB: ", log.LstdFlags) @@ -97,6 +107,13 @@ func runWeb(cmd *cobra.Command, args []string) error { logger.Printf("Scraper configuration: OutputType=%s, RequestsPerSecond=%f, BurstLimit=%d", outputType, scraperConfig.Scrape.RequestsPerSecond, scraperConfig.Scrape.BurstLimit) + // Validate scrape configuration + err := validateScrapeConfig(cfg.Scrape) + if err != nil { + logger.Printf("Invalid scrape configuration: %v", err) + return err + } + // Start scraping using scraper.ScrapeSites logger.Println("Starting scraping process") scrapedContent, err := scraper.ScrapeSites(scraperConfig) diff --git a/internal/config/config.go b/internal/config/config.go index 923c62c..3297871 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -51,6 +51,13 @@ func Load(configPath string) (*Config, error) { return nil, fmt.Errorf("error parsing config file: %v", err) } + // Set default values if they are zero or missing + if config.Scrape.RequestsPerSecond <= 0 { + config.Scrape.RequestsPerSecond = 1.0 + } + if config.Scrape.BurstLimit <= 0 { + config.Scrape.BurstLimit = 5 + } return &config, nil } diff --git a/internal/scraper/scraper.go b/internal/scraper/scraper.go index 7aaafbd..255fe89 100644 --- a/internal/scraper/scraper.go +++ b/internal/scraper/scraper.go @@ -68,6 +68,14 @@ func ScrapeSites(config Config) (map[string]string, error) { err error }) + // Ensure RequestsPerSecond and BurstLimit are valid + if config.Scrape.RequestsPerSecond <= 0 { + config.Scrape.RequestsPerSecond = 1.0 + } + if config.Scrape.BurstLimit <= 0 { + config.Scrape.BurstLimit = 5 + } + limiter := rate.NewLimiter(rate.Limit(config.Scrape.RequestsPerSecond), config.Scrape.BurstLimit) logger.Printf("Rate limiter configured with %f requests per second and burst limit of %d\n", config.Scrape.RequestsPerSecond, config.Scrape.BurstLimit)