fix: resolve undefined types and import issues in scraper.go

This commit is contained in:
Arik Jones (aider)
2024-09-19 16:10:06 -05:00
parent 569ff9924d
commit e3fddf101c

View File

@@ -5,6 +5,7 @@ import (
"io/ioutil" "io/ioutil"
"log" "log"
"math/rand" "math/rand"
"net/url"
"regexp" "regexp"
"strings" "strings"
"time" "time"
@@ -26,7 +27,7 @@ var (
// Config holds the scraper configuration // Config holds the scraper configuration
type Config struct { type Config struct {
URLs []URLConfig Sites []SiteConfig
OutputType string OutputType string
Verbose bool Verbose bool
Scrape ScrapeConfig Scrape ScrapeConfig
@@ -38,6 +39,25 @@ type ScrapeConfig struct {
BurstLimit int BurstLimit int
} }
// SiteConfig holds configuration for a single site
type SiteConfig struct {
BaseURL string
CSSLocator string
ExcludeSelectors []string
MaxDepth int
AllowedPaths []string
ExcludePaths []string
OutputAlias string
PathOverrides []PathOverride
}
// PathOverride holds path-specific overrides
type PathOverride struct {
Path string
CSSLocator string
ExcludeSelectors []string
}
func ScrapeSites(config Config) (map[string]string, error) { func ScrapeSites(config Config) (map[string]string, error) {
results := make(chan struct { results := make(chan struct {
url string url string
@@ -124,8 +144,8 @@ func scrapeSite(site SiteConfig, config Config, results chan<- struct {
} }
} }
func isAllowedURL(url string, site SiteConfig) bool { func isAllowedURL(urlStr string, site SiteConfig) bool {
parsedURL, err := url.Parse(url) parsedURL, err := url.Parse(urlStr)
if err != nil { if err != nil {
return false return false
} }
@@ -150,8 +170,8 @@ func isAllowedURL(url string, site SiteConfig) bool {
return false return false
} }
func getOverrides(url string, site SiteConfig) (string, []string) { func getOverrides(urlStr string, site SiteConfig) (string, []string) {
parsedURL, _ := url.Parse(url) parsedURL, _ := url.Parse(urlStr)
path := parsedURL.Path path := parsedURL.Path
for _, override := range site.PathOverrides { for _, override := range site.PathOverrides {