diff --git a/cmd/files_test.go b/cmd/files_test.go
new file mode 100644
index 0000000..6b4a597
--- /dev/null
+++ b/cmd/files_test.go
@@ -0,0 +1,143 @@
+package cmd
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+func TestMatchGlob(t *testing.T) {
+ tests := []struct {
+ pattern string
+ path string
+ expected bool
+ }{
+ {"*.go", "file.go", true},
+ {"*.go", "file.txt", false},
+ {"**/*.go", "dir/file.go", true},
+ {"**/*.go", "dir/subdir/file.go", true},
+ {"dir/*.go", "dir/file.go", true},
+ {"dir/*.go", "otherdir/file.go", false},
+ }
+
+ for _, test := range tests {
+ result := matchGlob(test.pattern, test.path)
+ if result != test.expected {
+ t.Errorf("matchGlob(%q, %q) = %v; want %v", test.pattern, test.path, result, test.expected)
+ }
+ }
+}
+
+func TestIsCodeGenerated(t *testing.T) {
+ patterns := []string{"generated_*.go", "**/auto_*.go"}
+ tests := []struct {
+ path string
+ expected bool
+ }{
+ {"generated_file.go", true},
+ {"normal_file.go", false},
+ {"subdir/auto_file.go", true},
+ {"subdir/normal_file.go", false},
+ }
+
+ for _, test := range tests {
+ result := isCodeGenerated(test.path, patterns)
+ if result != test.expected {
+ t.Errorf("isCodeGenerated(%q, %v) = %v; want %v", test.path, patterns, result, test.expected)
+ }
+ }
+}
+
+func TestIsIgnored(t *testing.T) {
+ patterns := []string{"*.tmp", "**/*.log"}
+ tests := []struct {
+ path string
+ expected bool
+ }{
+ {"file.tmp", true},
+ {"file.go", false},
+ {"subdir/file.log", true},
+ {"subdir/file.txt", false},
+ }
+
+ for _, test := range tests {
+ result := isIgnored(test.path, patterns)
+ if result != test.expected {
+ t.Errorf("isIgnored(%q, %v) = %v; want %v", test.path, patterns, result, test.expected)
+ }
+ }
+}
+
+func TestRunRollup(t *testing.T) {
+ // Create a temporary directory for testing
+ tempDir, err := os.MkdirTemp("", "rollup_test")
+ if err != nil {
+ t.Fatalf("Failed to create temp dir: %v", err)
+ }
+ defer os.RemoveAll(tempDir)
+
+ // Create some test files
+ files := map[string]string{
+ "file1.go": "package main\n\nfunc main() {}\n",
+ "file2.txt": "This is a text file.\n",
+ "subdir/file3.go": "package subdir\n\nfunc Func() {}\n",
+ "subdir/file4.json": "{\"key\": \"value\"}\n",
+ }
+
+ for name, content := range files {
+ path := filepath.Join(tempDir, name)
+ err := os.MkdirAll(filepath.Dir(path), 0755)
+ if err != nil {
+ t.Fatalf("Failed to create directory: %v", err)
+ }
+ err = os.WriteFile(path, []byte(content), 0644)
+ if err != nil {
+ t.Fatalf("Failed to write file: %v", err)
+ }
+ }
+
+ // Set up test configuration
+ cfg = &config.Config{
+ FileTypes: []string{"go", "txt"},
+ Ignore: []string{"*.json"},
+ }
+ path = tempDir
+
+ // Run the rollup
+ err = runRollup()
+ if err != nil {
+ t.Fatalf("runRollup() failed: %v", err)
+ }
+
+ // Check if the output file was created
+ outputFiles, err := filepath.Glob(filepath.Join(tempDir, "*.rollup.md"))
+ if err != nil {
+ t.Fatalf("Failed to glob output files: %v", err)
+ }
+ if len(outputFiles) != 1 {
+ t.Fatalf("Expected 1 output file, got %d", len(outputFiles))
+ }
+
+ // Read the content of the output file
+ content, err := os.ReadFile(outputFiles[0])
+ if err != nil {
+ t.Fatalf("Failed to read output file: %v", err)
+ }
+
+ // Check if the content includes the expected files
+ expectedContent := []string{
+ "# File: file1.go",
+ "# File: file2.txt",
+ "# File: subdir/file3.go",
+ }
+ for _, expected := range expectedContent {
+ if !strings.Contains(string(content), expected) {
+ t.Errorf("Output file does not contain expected content: %s", expected)
+ }
+ }
+
+ // Check if the ignored file is not included
+ if strings.Contains(string(content), "file4.json") {
+ t.Errorf("Output file contains ignored file: file4.json")
+ }
+}
diff --git a/cmd/web_test.go b/cmd/web_test.go
new file mode 100644
index 0000000..e996d28
--- /dev/null
+++ b/cmd/web_test.go
@@ -0,0 +1,134 @@
+package cmd
+
+import (
+ "testing"
+ "net/url"
+ "github.com/tnypxl/rollup/internal/config"
+ "github.com/tnypxl/rollup/internal/scraper"
+)
+
+func TestConvertPathOverrides(t *testing.T) {
+ configOverrides := []config.PathOverride{
+ {
+ Path: "/blog",
+ CSSLocator: "article",
+ ExcludeSelectors: []string{".ads", ".comments"},
+ },
+ {
+ Path: "/products",
+ CSSLocator: ".product-description",
+ ExcludeSelectors: []string{".related-items"},
+ },
+ }
+
+ scraperOverrides := convertPathOverrides(configOverrides)
+
+ if len(scraperOverrides) != len(configOverrides) {
+ t.Errorf("Expected %d overrides, got %d", len(configOverrides), len(scraperOverrides))
+ }
+
+ for i, override := range scraperOverrides {
+ if override.Path != configOverrides[i].Path {
+ t.Errorf("Expected Path %s, got %s", configOverrides[i].Path, override.Path)
+ }
+ if override.CSSLocator != configOverrides[i].CSSLocator {
+ t.Errorf("Expected CSSLocator %s, got %s", configOverrides[i].CSSLocator, override.CSSLocator)
+ }
+ if len(override.ExcludeSelectors) != len(configOverrides[i].ExcludeSelectors) {
+ t.Errorf("Expected %d ExcludeSelectors, got %d", len(configOverrides[i].ExcludeSelectors), len(override.ExcludeSelectors))
+ }
+ for j, selector := range override.ExcludeSelectors {
+ if selector != configOverrides[i].ExcludeSelectors[j] {
+ t.Errorf("Expected ExcludeSelector %s, got %s", configOverrides[i].ExcludeSelectors[j], selector)
+ }
+ }
+ }
+}
+
+func TestSanitizeFilename(t *testing.T) {
+ tests := []struct {
+ input string
+ expected string
+ }{
+ {"Hello, World!", "Hello_World"},
+ {"file/with/path", "file_with_path"},
+ {"file.with.dots", "file_with_dots"},
+ {"___leading_underscores___", "leading_underscores"},
+ {"", "untitled"},
+ {"!@#$%^&*()", "untitled"},
+ }
+
+ for _, test := range tests {
+ result := sanitizeFilename(test.input)
+ if result != test.expected {
+ t.Errorf("sanitizeFilename(%q) = %q; want %q", test.input, result, test.expected)
+ }
+ }
+}
+
+func TestGetFilenameFromContent(t *testing.T) {
+ tests := []struct {
+ content string
+ url string
+ expected string
+ }{
+ {"
Test Page", "http://example.com", "Test_Page.md"},
+ {"No title here", "http://example.com/page", "http___example_com_page.md"},
+ {" Trim Me ", "http://example.com", "Trim_Me.md"},
+ {"", "http://example.com", "http___example_com.md"},
+ }
+
+ for _, test := range tests {
+ result := getFilenameFromContent(test.content, test.url)
+ if result != test.expected {
+ t.Errorf("getFilenameFromContent(%q, %q) = %q; want %q", test.content, test.url, result, test.expected)
+ }
+ }
+}
+
+// Mock functions for testing
+func mockExtractAndConvertContent(urlStr string) (string, error) {
+ return "Mocked content for " + urlStr, nil
+}
+
+func mockExtractLinks(urlStr string) ([]string, error) {
+ return []string{"http://example.com/link1", "http://example.com/link2"}, nil
+}
+
+func TestScrapeURL(t *testing.T) {
+ // Replace the actual functions with mocks
+ oldExtractAndConvertContent := extractAndConvertContent
+ oldExtractLinks := scraper.ExtractLinks
+ defer func() {
+ extractAndConvertContent = oldExtractAndConvertContent
+ scraper.ExtractLinks = oldExtractLinks
+ }()
+ extractAndConvertContent = mockExtractAndConvertContent
+ scraper.ExtractLinks = mockExtractLinks
+
+ tests := []struct {
+ url string
+ depth int
+ expectedCalls int
+ }{
+ {"http://example.com", 0, 1},
+ {"http://example.com", 1, 3},
+ {"http://example.com", 2, 3}, // Same as depth 1 because our mock only returns 2 links
+ }
+
+ for _, test := range tests {
+ visited := make(map[string]bool)
+ content, err := scrapeURL(test.url, test.depth, visited)
+ if err != nil {
+ t.Errorf("scrapeURL(%q, %d) returned error: %v", test.url, test.depth, err)
+ continue
+ }
+ if len(visited) != test.expectedCalls {
+ t.Errorf("scrapeURL(%q, %d) made %d calls, expected %d", test.url, test.depth, len(visited), test.expectedCalls)
+ }
+ expectedContent := "Mocked content for " + test.url
+ if !strings.Contains(content, expectedContent) {
+ t.Errorf("scrapeURL(%q, %d) content doesn't contain %q", test.url, test.depth, expectedContent)
+ }
+ }
+}
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
new file mode 100644
index 0000000..a05c23f
--- /dev/null
+++ b/internal/config/config_test.go
@@ -0,0 +1,120 @@
+package config
+
+import (
+ "os"
+ "reflect"
+ "testing"
+)
+
+func TestLoad(t *testing.T) {
+ // Create a temporary config file
+ content := []byte(`
+file_types:
+ - go
+ - md
+ignore:
+ - "*.tmp"
+ - "**/*.log"
+code_generated:
+ - "generated_*.go"
+scrape:
+ sites:
+ - base_url: "https://example.com"
+ css_locator: "main"
+ exclude_selectors:
+ - ".ads"
+ max_depth: 2
+ allowed_paths:
+ - "/blog"
+ exclude_paths:
+ - "/admin"
+ output_alias: "example"
+ path_overrides:
+ - path: "/special"
+ css_locator: ".special-content"
+ exclude_selectors:
+ - ".sidebar"
+ output_type: "single"
+ requests_per_second: 1.0
+ burst_limit: 5
+`)
+
+ tmpfile, err := os.CreateTemp("", "config*.yml")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpfile.Name())
+
+ if _, err := tmpfile.Write(content); err != nil {
+ t.Fatalf("Failed to write to temp file: %v", err)
+ }
+ if err := tmpfile.Close(); err != nil {
+ t.Fatalf("Failed to close temp file: %v", err)
+ }
+
+ // Test loading the config
+ config, err := Load(tmpfile.Name())
+ if err != nil {
+ t.Fatalf("Load() failed: %v", err)
+ }
+
+ // Check if the loaded config matches the expected values
+ expectedConfig := &Config{
+ FileTypes: []string{"go", "md"},
+ Ignore: []string{"*.tmp", "**/*.log"},
+ CodeGenerated: []string{"generated_*.go"},
+ Scrape: ScrapeConfig{
+ Sites: []SiteConfig{
+ {
+ BaseURL: "https://example.com",
+ CSSLocator: "main",
+ ExcludeSelectors: []string{".ads"},
+ MaxDepth: 2,
+ AllowedPaths: []string{"/blog"},
+ ExcludePaths: []string{"/admin"},
+ OutputAlias: "example",
+ PathOverrides: []PathOverride{
+ {
+ Path: "/special",
+ CSSLocator: ".special-content",
+ ExcludeSelectors: []string{".sidebar"},
+ },
+ },
+ },
+ },
+ OutputType: "single",
+ RequestsPerSecond: 1.0,
+ BurstLimit: 5,
+ },
+ }
+
+ if !reflect.DeepEqual(config, expectedConfig) {
+ t.Errorf("Loaded config does not match expected config.\nGot: %+v\nWant: %+v", config, expectedConfig)
+ }
+}
+
+func TestDefaultConfigPath(t *testing.T) {
+ expected := "rollup.yml"
+ result := DefaultConfigPath()
+ if result != expected {
+ t.Errorf("DefaultConfigPath() = %q, want %q", result, expected)
+ }
+}
+
+func TestFileExists(t *testing.T) {
+ // Test with an existing file
+ tmpfile, err := os.CreateTemp("", "testfile")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpfile.Name())
+
+ if !FileExists(tmpfile.Name()) {
+ t.Errorf("FileExists(%q) = false, want true", tmpfile.Name())
+ }
+
+ // Test with a non-existing file
+ if FileExists("non_existing_file.txt") {
+ t.Errorf("FileExists(\"non_existing_file.txt\") = true, want false")
+ }
+}
diff --git a/internal/scraper/scraper_test.go b/internal/scraper/scraper_test.go
new file mode 100644
index 0000000..df36dec
--- /dev/null
+++ b/internal/scraper/scraper_test.go
@@ -0,0 +1,169 @@
+package scraper
+
+import (
+ "testing"
+ "net/http"
+ "net/http/httptest"
+ "strings"
+)
+
+func TestIsAllowedURL(t *testing.T) {
+ site := SiteConfig{
+ BaseURL: "https://example.com",
+ AllowedPaths: []string{"/blog", "/products"},
+ ExcludePaths: []string{"/admin", "/private"},
+ }
+
+ tests := []struct {
+ url string
+ expected bool
+ }{
+ {"https://example.com/blog/post1", true},
+ {"https://example.com/products/item1", true},
+ {"https://example.com/admin/dashboard", false},
+ {"https://example.com/private/data", false},
+ {"https://example.com/other/page", false},
+ {"https://othersite.com/blog/post1", false},
+ }
+
+ for _, test := range tests {
+ result := isAllowedURL(test.url, site)
+ if result != test.expected {
+ t.Errorf("isAllowedURL(%q) = %v, want %v", test.url, result, test.expected)
+ }
+ }
+}
+
+func TestGetOverrides(t *testing.T) {
+ site := SiteConfig{
+ CSSLocator: "main",
+ ExcludeSelectors: []string{".ads"},
+ PathOverrides: []PathOverride{
+ {
+ Path: "/special",
+ CSSLocator: ".special-content",
+ ExcludeSelectors: []string{".sidebar"},
+ },
+ },
+ }
+
+ tests := []struct {
+ url string
+ expectedLocator string
+ expectedExcludes []string
+ }{
+ {"https://example.com/normal", "main", []string{".ads"}},
+ {"https://example.com/special", ".special-content", []string{".sidebar"}},
+ {"https://example.com/special/page", ".special-content", []string{".sidebar"}},
+ }
+
+ for _, test := range tests {
+ locator, excludes := getOverrides(test.url, site)
+ if locator != test.expectedLocator {
+ t.Errorf("getOverrides(%q) locator = %q, want %q", test.url, locator, test.expectedLocator)
+ }
+ if !reflect.DeepEqual(excludes, test.expectedExcludes) {
+ t.Errorf("getOverrides(%q) excludes = %v, want %v", test.url, excludes, test.expectedExcludes)
+ }
+ }
+}
+
+func TestExtractContentWithCSS(t *testing.T) {
+ html := `
+
+
+
+ Main Content
+ This is the main content.
+ Advertisement
+
+
+
+
+ `
+
+ tests := []struct {
+ includeSelector string
+ excludeSelectors []string
+ expected string
+ }{
+ {"main", nil, "Main Content
\nThis is the main content.
\nAdvertisement
"},
+ {"main", []string{".ads"}, "Main Content
\nThis is the main content.
"},
+ {"aside", nil, "Sidebar content"},
+ }
+
+ for _, test := range tests {
+ result, err := ExtractContentWithCSS(html, test.includeSelector, test.excludeSelectors)
+ if err != nil {
+ t.Errorf("ExtractContentWithCSS() returned error: %v", err)
+ continue
+ }
+ if strings.TrimSpace(result) != strings.TrimSpace(test.expected) {
+ t.Errorf("ExtractContentWithCSS() = %q, want %q", result, test.expected)
+ }
+ }
+}
+
+func TestProcessHTMLContent(t *testing.T) {
+ html := `
+
+
+ Test Heading
+ This is a test paragraph.
+
+
+
+ `
+
+ expected := strings.TrimSpace(`
+# Test Heading
+
+This is a **test** paragraph.
+
+- Item 1
+- Item 2
+ `)
+
+ result, err := ProcessHTMLContent(html, Config{})
+ if err != nil {
+ t.Fatalf("ProcessHTMLContent() returned error: %v", err)
+ }
+
+ if strings.TrimSpace(result) != expected {
+ t.Errorf("ProcessHTMLContent() = %q, want %q", result, expected)
+ }
+}
+
+func TestExtractLinks(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/html")
+ w.Write([]byte(`
+
+
+ Page 1
+ Page 2
+ Other Site
+
+
+ `))
+ }))
+ defer server.Close()
+
+ links, err := ExtractLinks(server.URL)
+ if err != nil {
+ t.Fatalf("ExtractLinks() returned error: %v", err)
+ }
+
+ expectedLinks := []string{
+ "https://example.com/page1",
+ "https://example.com/page2",
+ "https://othersite.com",
+ }
+
+ if !reflect.DeepEqual(links, expectedLinks) {
+ t.Errorf("ExtractLinks() = %v, want %v", links, expectedLinks)
+ }
+}