diff --git a/cmd/files_test.go b/cmd/files_test.go new file mode 100644 index 0000000..6b4a597 --- /dev/null +++ b/cmd/files_test.go @@ -0,0 +1,143 @@ +package cmd + +import ( + "os" + "path/filepath" + "testing" +) + +func TestMatchGlob(t *testing.T) { + tests := []struct { + pattern string + path string + expected bool + }{ + {"*.go", "file.go", true}, + {"*.go", "file.txt", false}, + {"**/*.go", "dir/file.go", true}, + {"**/*.go", "dir/subdir/file.go", true}, + {"dir/*.go", "dir/file.go", true}, + {"dir/*.go", "otherdir/file.go", false}, + } + + for _, test := range tests { + result := matchGlob(test.pattern, test.path) + if result != test.expected { + t.Errorf("matchGlob(%q, %q) = %v; want %v", test.pattern, test.path, result, test.expected) + } + } +} + +func TestIsCodeGenerated(t *testing.T) { + patterns := []string{"generated_*.go", "**/auto_*.go"} + tests := []struct { + path string + expected bool + }{ + {"generated_file.go", true}, + {"normal_file.go", false}, + {"subdir/auto_file.go", true}, + {"subdir/normal_file.go", false}, + } + + for _, test := range tests { + result := isCodeGenerated(test.path, patterns) + if result != test.expected { + t.Errorf("isCodeGenerated(%q, %v) = %v; want %v", test.path, patterns, result, test.expected) + } + } +} + +func TestIsIgnored(t *testing.T) { + patterns := []string{"*.tmp", "**/*.log"} + tests := []struct { + path string + expected bool + }{ + {"file.tmp", true}, + {"file.go", false}, + {"subdir/file.log", true}, + {"subdir/file.txt", false}, + } + + for _, test := range tests { + result := isIgnored(test.path, patterns) + if result != test.expected { + t.Errorf("isIgnored(%q, %v) = %v; want %v", test.path, patterns, result, test.expected) + } + } +} + +func TestRunRollup(t *testing.T) { + // Create a temporary directory for testing + tempDir, err := os.MkdirTemp("", "rollup_test") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tempDir) + + // Create some test files + files := map[string]string{ + "file1.go": "package main\n\nfunc main() {}\n", + "file2.txt": "This is a text file.\n", + "subdir/file3.go": "package subdir\n\nfunc Func() {}\n", + "subdir/file4.json": "{\"key\": \"value\"}\n", + } + + for name, content := range files { + path := filepath.Join(tempDir, name) + err := os.MkdirAll(filepath.Dir(path), 0755) + if err != nil { + t.Fatalf("Failed to create directory: %v", err) + } + err = os.WriteFile(path, []byte(content), 0644) + if err != nil { + t.Fatalf("Failed to write file: %v", err) + } + } + + // Set up test configuration + cfg = &config.Config{ + FileTypes: []string{"go", "txt"}, + Ignore: []string{"*.json"}, + } + path = tempDir + + // Run the rollup + err = runRollup() + if err != nil { + t.Fatalf("runRollup() failed: %v", err) + } + + // Check if the output file was created + outputFiles, err := filepath.Glob(filepath.Join(tempDir, "*.rollup.md")) + if err != nil { + t.Fatalf("Failed to glob output files: %v", err) + } + if len(outputFiles) != 1 { + t.Fatalf("Expected 1 output file, got %d", len(outputFiles)) + } + + // Read the content of the output file + content, err := os.ReadFile(outputFiles[0]) + if err != nil { + t.Fatalf("Failed to read output file: %v", err) + } + + // Check if the content includes the expected files + expectedContent := []string{ + "# File: file1.go", + "# File: file2.txt", + "# File: subdir/file3.go", + } + for _, expected := range expectedContent { + if !strings.Contains(string(content), expected) { + t.Errorf("Output file does not contain expected content: %s", expected) + } + } + + // Check if the ignored file is not included + if strings.Contains(string(content), "file4.json") { + t.Errorf("Output file contains ignored file: file4.json") + } +} diff --git a/cmd/web_test.go b/cmd/web_test.go new file mode 100644 index 0000000..e996d28 --- /dev/null +++ b/cmd/web_test.go @@ -0,0 +1,134 @@ +package cmd + +import ( + "testing" + "net/url" + "github.com/tnypxl/rollup/internal/config" + "github.com/tnypxl/rollup/internal/scraper" +) + +func TestConvertPathOverrides(t *testing.T) { + configOverrides := []config.PathOverride{ + { + Path: "/blog", + CSSLocator: "article", + ExcludeSelectors: []string{".ads", ".comments"}, + }, + { + Path: "/products", + CSSLocator: ".product-description", + ExcludeSelectors: []string{".related-items"}, + }, + } + + scraperOverrides := convertPathOverrides(configOverrides) + + if len(scraperOverrides) != len(configOverrides) { + t.Errorf("Expected %d overrides, got %d", len(configOverrides), len(scraperOverrides)) + } + + for i, override := range scraperOverrides { + if override.Path != configOverrides[i].Path { + t.Errorf("Expected Path %s, got %s", configOverrides[i].Path, override.Path) + } + if override.CSSLocator != configOverrides[i].CSSLocator { + t.Errorf("Expected CSSLocator %s, got %s", configOverrides[i].CSSLocator, override.CSSLocator) + } + if len(override.ExcludeSelectors) != len(configOverrides[i].ExcludeSelectors) { + t.Errorf("Expected %d ExcludeSelectors, got %d", len(configOverrides[i].ExcludeSelectors), len(override.ExcludeSelectors)) + } + for j, selector := range override.ExcludeSelectors { + if selector != configOverrides[i].ExcludeSelectors[j] { + t.Errorf("Expected ExcludeSelector %s, got %s", configOverrides[i].ExcludeSelectors[j], selector) + } + } + } +} + +func TestSanitizeFilename(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"Hello, World!", "Hello_World"}, + {"file/with/path", "file_with_path"}, + {"file.with.dots", "file_with_dots"}, + {"___leading_underscores___", "leading_underscores"}, + {"", "untitled"}, + {"!@#$%^&*()", "untitled"}, + } + + for _, test := range tests { + result := sanitizeFilename(test.input) + if result != test.expected { + t.Errorf("sanitizeFilename(%q) = %q; want %q", test.input, result, test.expected) + } + } +} + +func TestGetFilenameFromContent(t *testing.T) { + tests := []struct { + content string + url string + expected string + }{ + {"Test Page", "http://example.com", "Test_Page.md"}, + {"No title here", "http://example.com/page", "http___example_com_page.md"}, + {" Trim Me ", "http://example.com", "Trim_Me.md"}, + {"", "http://example.com", "http___example_com.md"}, + } + + for _, test := range tests { + result := getFilenameFromContent(test.content, test.url) + if result != test.expected { + t.Errorf("getFilenameFromContent(%q, %q) = %q; want %q", test.content, test.url, result, test.expected) + } + } +} + +// Mock functions for testing +func mockExtractAndConvertContent(urlStr string) (string, error) { + return "Mocked content for " + urlStr, nil +} + +func mockExtractLinks(urlStr string) ([]string, error) { + return []string{"http://example.com/link1", "http://example.com/link2"}, nil +} + +func TestScrapeURL(t *testing.T) { + // Replace the actual functions with mocks + oldExtractAndConvertContent := extractAndConvertContent + oldExtractLinks := scraper.ExtractLinks + defer func() { + extractAndConvertContent = oldExtractAndConvertContent + scraper.ExtractLinks = oldExtractLinks + }() + extractAndConvertContent = mockExtractAndConvertContent + scraper.ExtractLinks = mockExtractLinks + + tests := []struct { + url string + depth int + expectedCalls int + }{ + {"http://example.com", 0, 1}, + {"http://example.com", 1, 3}, + {"http://example.com", 2, 3}, // Same as depth 1 because our mock only returns 2 links + } + + for _, test := range tests { + visited := make(map[string]bool) + content, err := scrapeURL(test.url, test.depth, visited) + if err != nil { + t.Errorf("scrapeURL(%q, %d) returned error: %v", test.url, test.depth, err) + continue + } + if len(visited) != test.expectedCalls { + t.Errorf("scrapeURL(%q, %d) made %d calls, expected %d", test.url, test.depth, len(visited), test.expectedCalls) + } + expectedContent := "Mocked content for " + test.url + if !strings.Contains(content, expectedContent) { + t.Errorf("scrapeURL(%q, %d) content doesn't contain %q", test.url, test.depth, expectedContent) + } + } +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..a05c23f --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,120 @@ +package config + +import ( + "os" + "reflect" + "testing" +) + +func TestLoad(t *testing.T) { + // Create a temporary config file + content := []byte(` +file_types: + - go + - md +ignore: + - "*.tmp" + - "**/*.log" +code_generated: + - "generated_*.go" +scrape: + sites: + - base_url: "https://example.com" + css_locator: "main" + exclude_selectors: + - ".ads" + max_depth: 2 + allowed_paths: + - "/blog" + exclude_paths: + - "/admin" + output_alias: "example" + path_overrides: + - path: "/special" + css_locator: ".special-content" + exclude_selectors: + - ".sidebar" + output_type: "single" + requests_per_second: 1.0 + burst_limit: 5 +`) + + tmpfile, err := os.CreateTemp("", "config*.yml") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpfile.Name()) + + if _, err := tmpfile.Write(content); err != nil { + t.Fatalf("Failed to write to temp file: %v", err) + } + if err := tmpfile.Close(); err != nil { + t.Fatalf("Failed to close temp file: %v", err) + } + + // Test loading the config + config, err := Load(tmpfile.Name()) + if err != nil { + t.Fatalf("Load() failed: %v", err) + } + + // Check if the loaded config matches the expected values + expectedConfig := &Config{ + FileTypes: []string{"go", "md"}, + Ignore: []string{"*.tmp", "**/*.log"}, + CodeGenerated: []string{"generated_*.go"}, + Scrape: ScrapeConfig{ + Sites: []SiteConfig{ + { + BaseURL: "https://example.com", + CSSLocator: "main", + ExcludeSelectors: []string{".ads"}, + MaxDepth: 2, + AllowedPaths: []string{"/blog"}, + ExcludePaths: []string{"/admin"}, + OutputAlias: "example", + PathOverrides: []PathOverride{ + { + Path: "/special", + CSSLocator: ".special-content", + ExcludeSelectors: []string{".sidebar"}, + }, + }, + }, + }, + OutputType: "single", + RequestsPerSecond: 1.0, + BurstLimit: 5, + }, + } + + if !reflect.DeepEqual(config, expectedConfig) { + t.Errorf("Loaded config does not match expected config.\nGot: %+v\nWant: %+v", config, expectedConfig) + } +} + +func TestDefaultConfigPath(t *testing.T) { + expected := "rollup.yml" + result := DefaultConfigPath() + if result != expected { + t.Errorf("DefaultConfigPath() = %q, want %q", result, expected) + } +} + +func TestFileExists(t *testing.T) { + // Test with an existing file + tmpfile, err := os.CreateTemp("", "testfile") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpfile.Name()) + + if !FileExists(tmpfile.Name()) { + t.Errorf("FileExists(%q) = false, want true", tmpfile.Name()) + } + + // Test with a non-existing file + if FileExists("non_existing_file.txt") { + t.Errorf("FileExists(\"non_existing_file.txt\") = true, want false") + } +} diff --git a/internal/scraper/scraper_test.go b/internal/scraper/scraper_test.go new file mode 100644 index 0000000..df36dec --- /dev/null +++ b/internal/scraper/scraper_test.go @@ -0,0 +1,169 @@ +package scraper + +import ( + "testing" + "net/http" + "net/http/httptest" + "strings" +) + +func TestIsAllowedURL(t *testing.T) { + site := SiteConfig{ + BaseURL: "https://example.com", + AllowedPaths: []string{"/blog", "/products"}, + ExcludePaths: []string{"/admin", "/private"}, + } + + tests := []struct { + url string + expected bool + }{ + {"https://example.com/blog/post1", true}, + {"https://example.com/products/item1", true}, + {"https://example.com/admin/dashboard", false}, + {"https://example.com/private/data", false}, + {"https://example.com/other/page", false}, + {"https://othersite.com/blog/post1", false}, + } + + for _, test := range tests { + result := isAllowedURL(test.url, site) + if result != test.expected { + t.Errorf("isAllowedURL(%q) = %v, want %v", test.url, result, test.expected) + } + } +} + +func TestGetOverrides(t *testing.T) { + site := SiteConfig{ + CSSLocator: "main", + ExcludeSelectors: []string{".ads"}, + PathOverrides: []PathOverride{ + { + Path: "/special", + CSSLocator: ".special-content", + ExcludeSelectors: []string{".sidebar"}, + }, + }, + } + + tests := []struct { + url string + expectedLocator string + expectedExcludes []string + }{ + {"https://example.com/normal", "main", []string{".ads"}}, + {"https://example.com/special", ".special-content", []string{".sidebar"}}, + {"https://example.com/special/page", ".special-content", []string{".sidebar"}}, + } + + for _, test := range tests { + locator, excludes := getOverrides(test.url, site) + if locator != test.expectedLocator { + t.Errorf("getOverrides(%q) locator = %q, want %q", test.url, locator, test.expectedLocator) + } + if !reflect.DeepEqual(excludes, test.expectedExcludes) { + t.Errorf("getOverrides(%q) excludes = %v, want %v", test.url, excludes, test.expectedExcludes) + } + } +} + +func TestExtractContentWithCSS(t *testing.T) { + html := ` + + +
+

Main Content

+

This is the main content.

+
Advertisement
+
+ + + + ` + + tests := []struct { + includeSelector string + excludeSelectors []string + expected string + }{ + {"main", nil, "

Main Content

\n

This is the main content.

\n
Advertisement
"}, + {"main", []string{".ads"}, "

Main Content

\n

This is the main content.

"}, + {"aside", nil, "Sidebar content"}, + } + + for _, test := range tests { + result, err := ExtractContentWithCSS(html, test.includeSelector, test.excludeSelectors) + if err != nil { + t.Errorf("ExtractContentWithCSS() returned error: %v", err) + continue + } + if strings.TrimSpace(result) != strings.TrimSpace(test.expected) { + t.Errorf("ExtractContentWithCSS() = %q, want %q", result, test.expected) + } + } +} + +func TestProcessHTMLContent(t *testing.T) { + html := ` + + +

Test Heading

+

This is a test paragraph.

+ + + + ` + + expected := strings.TrimSpace(` +# Test Heading + +This is a **test** paragraph. + +- Item 1 +- Item 2 + `) + + result, err := ProcessHTMLContent(html, Config{}) + if err != nil { + t.Fatalf("ProcessHTMLContent() returned error: %v", err) + } + + if strings.TrimSpace(result) != expected { + t.Errorf("ProcessHTMLContent() = %q, want %q", result, expected) + } +} + +func TestExtractLinks(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` + + + Page 1 + Page 2 + Other Site + + + `)) + })) + defer server.Close() + + links, err := ExtractLinks(server.URL) + if err != nil { + t.Fatalf("ExtractLinks() returned error: %v", err) + } + + expectedLinks := []string{ + "https://example.com/page1", + "https://example.com/page2", + "https://othersite.com", + } + + if !reflect.DeepEqual(links, expectedLinks) { + t.Errorf("ExtractLinks() = %v, want %v", links, expectedLinks) + } +}