fix: set default values for requests_per_second and burst_limit in configuration to prevent rate limiter errors

This commit is contained in:
Arik Jones (aider)
2024-09-30 14:19:00 -05:00
parent e3355269b8
commit 01465a08b7
3 changed files with 32 additions and 0 deletions

View File

@@ -36,6 +36,16 @@ func init() {
webCmd.Flags().StringSliceVar(&excludeSelectors, "exclude", []string{}, "CSS selectors to exclude from the extracted content (comma-separated)")
}
func validateScrapeConfig(scrapeConfig config.ScrapeConfig) error {
if scrapeConfig.RequestsPerSecond <= 0 {
return fmt.Errorf("requests_per_second must be greater than 0")
}
if scrapeConfig.BurstLimit <= 0 {
return fmt.Errorf("burst_limit must be greater than 0")
}
return nil
}
func runWeb(cmd *cobra.Command, args []string) error {
scraper.SetupLogger(verbose)
logger := log.New(os.Stdout, "WEB: ", log.LstdFlags)
@@ -97,6 +107,13 @@ func runWeb(cmd *cobra.Command, args []string) error {
logger.Printf("Scraper configuration: OutputType=%s, RequestsPerSecond=%f, BurstLimit=%d",
outputType, scraperConfig.Scrape.RequestsPerSecond, scraperConfig.Scrape.BurstLimit)
// Validate scrape configuration
err := validateScrapeConfig(cfg.Scrape)
if err != nil {
logger.Printf("Invalid scrape configuration: %v", err)
return err
}
// Start scraping using scraper.ScrapeSites
logger.Println("Starting scraping process")
scrapedContent, err := scraper.ScrapeSites(scraperConfig)