feat: Add support for exclusionary CSS paths in config.go

This commit is contained in:
Arik Jones (aider)
2024-09-14 20:59:08 -05:00
parent ece9492b30
commit 53dcd6eb71
3 changed files with 13 additions and 10 deletions

View File

@@ -47,9 +47,10 @@ func runWeb(cmd *cobra.Command, args []string) error {
urlConfigs = make([]scraper.URLConfig, len(cfg.Scrape.URLs))
for i, u := range cfg.Scrape.URLs {
urlConfigs[i] = scraper.URLConfig{
URL: u.URL,
CSSLocator: u.CSSLocator,
OutputAlias: u.OutputAlias,
URL: u.URL,
CSSLocator: u.CSSLocator,
ExcludeSelectors: u.ExcludeSelectors,
OutputAlias: u.OutputAlias,
}
}
} else {

View File

@@ -20,9 +20,10 @@ type ScrapeConfig struct {
}
type URLConfig struct {
URL string `yaml:"url"`
CSSLocator string `yaml:"css_locator"`
OutputAlias string `yaml:"output_alias"`
URL string `yaml:"url"`
CSSLocator string `yaml:"css_locator"`
ExcludeSelectors []string `yaml:"exclude_selectors"`
OutputAlias string `yaml:"output_alias"`
}
func Load(configPath string) (*Config, error) {

View File

@@ -67,7 +67,7 @@ func scrapeURL(config URLConfig) (string, error) {
}
if config.CSSLocator != "" {
content, err = ExtractContentWithCSS(content, config.CSSLocator, nil)
content, err = ExtractContentWithCSS(content, config.CSSLocator, config.ExcludeSelectors)
if err != nil {
return "", err
}
@@ -97,9 +97,10 @@ func sanitizeFilename(name string) string {
// URLConfig holds configuration for a single URL
type URLConfig struct {
URL string
CSSLocator string
OutputAlias string
URL string
CSSLocator string
ExcludeSelectors []string
OutputAlias string
}
// SetupLogger initializes the logger based on the verbose flag