feat: Add support for exclusionary CSS paths in config.go

This commit is contained in:
Arik Jones (aider)
2024-09-14 20:59:08 -05:00
parent ece9492b30
commit 53dcd6eb71
3 changed files with 13 additions and 10 deletions

View File

@@ -47,9 +47,10 @@ func runWeb(cmd *cobra.Command, args []string) error {
urlConfigs = make([]scraper.URLConfig, len(cfg.Scrape.URLs)) urlConfigs = make([]scraper.URLConfig, len(cfg.Scrape.URLs))
for i, u := range cfg.Scrape.URLs { for i, u := range cfg.Scrape.URLs {
urlConfigs[i] = scraper.URLConfig{ urlConfigs[i] = scraper.URLConfig{
URL: u.URL, URL: u.URL,
CSSLocator: u.CSSLocator, CSSLocator: u.CSSLocator,
OutputAlias: u.OutputAlias, ExcludeSelectors: u.ExcludeSelectors,
OutputAlias: u.OutputAlias,
} }
} }
} else { } else {

View File

@@ -20,9 +20,10 @@ type ScrapeConfig struct {
} }
type URLConfig struct { type URLConfig struct {
URL string `yaml:"url"` URL string `yaml:"url"`
CSSLocator string `yaml:"css_locator"` CSSLocator string `yaml:"css_locator"`
OutputAlias string `yaml:"output_alias"` ExcludeSelectors []string `yaml:"exclude_selectors"`
OutputAlias string `yaml:"output_alias"`
} }
func Load(configPath string) (*Config, error) { func Load(configPath string) (*Config, error) {

View File

@@ -67,7 +67,7 @@ func scrapeURL(config URLConfig) (string, error) {
} }
if config.CSSLocator != "" { if config.CSSLocator != "" {
content, err = ExtractContentWithCSS(content, config.CSSLocator, nil) content, err = ExtractContentWithCSS(content, config.CSSLocator, config.ExcludeSelectors)
if err != nil { if err != nil {
return "", err return "", err
} }
@@ -97,9 +97,10 @@ func sanitizeFilename(name string) string {
// URLConfig holds configuration for a single URL // URLConfig holds configuration for a single URL
type URLConfig struct { type URLConfig struct {
URL string URL string
CSSLocator string CSSLocator string
OutputAlias string ExcludeSelectors []string
OutputAlias string
} }
// SetupLogger initializes the logger based on the verbose flag // SetupLogger initializes the logger based on the verbose flag