Main Content
This is the main content.
package scraper import ( "io" "log" // "net/http" // "net/http/httptest" "reflect" "strings" "testing" ) func TestIsAllowedURL(t *testing.T) { site := SiteConfig{ BaseURL: "https://example.com", AllowedPaths: []string{"/blog", "/products"}, ExcludePaths: []string{"/admin", "/private"}, } tests := []struct { url string expected bool }{ {"https://example.com/blog/post1", true}, {"https://example.com/products/item1", true}, {"https://example.com/admin/dashboard", false}, {"https://example.com/private/data", false}, {"https://example.com/other/page", false}, {"https://othersite.com/blog/post1", false}, } for _, test := range tests { result := isAllowedURL(test.url, site) if result != test.expected { t.Errorf("isAllowedURL(%q) = %v, want %v", test.url, result, test.expected) } } } func TestGetOverrides(t *testing.T) { site := SiteConfig{ CSSLocator: "main", ExcludeSelectors: []string{".ads"}, PathOverrides: []PathOverride{ { Path: "/special", CSSLocator: ".special-content", ExcludeSelectors: []string{".sidebar"}, }, }, } tests := []struct { url string expectedLocator string expectedExcludes []string }{ {"https://example.com/normal", "main", []string{".ads"}}, {"https://example.com/special", ".special-content", []string{".sidebar"}}, {"https://example.com/special/page", ".special-content", []string{".sidebar"}}, } for _, test := range tests { locator, excludes := getOverrides(test.url, site) if locator != test.expectedLocator { t.Errorf("getOverrides(%q) locator = %q, want %q", test.url, locator, test.expectedLocator) } if !reflect.DeepEqual(excludes, test.expectedExcludes) { t.Errorf("getOverrides(%q) excludes = %v, want %v", test.url, excludes, test.expectedExcludes) } } } func TestExtractContentWithCSS(t *testing.T) { // Initialize logger for testing logger = log.New(io.Discard, "", 0) html := `
This is the main content.
This is the main content.
\nThis is the main content.
"}, {"aside", nil, "Sidebar content"}, } for _, test := range tests { result, err := ExtractContentWithCSS(html, test.includeSelector, test.excludeSelectors) if err != nil { t.Errorf("ExtractContentWithCSS() returned error: %v", err) continue } if strings.TrimSpace(result) != strings.TrimSpace(test.expected) { t.Errorf("ExtractContentWithCSS() = %q, want %q", result, test.expected) } } } func TestProcessHTMLContent(t *testing.T) { html := `This is a test paragraph.