diff --git a/cmd/files_test.go b/cmd/files_test.go index 08f466c..254a0cb 100644 --- a/cmd/files_test.go +++ b/cmd/files_test.go @@ -114,11 +114,12 @@ func TestRunRollup(t *testing.T) { } // Check if the output file was created - outputFile := filepath.Join(tempDir, "rollup.md") - if _, err := os.Stat(outputFile); os.IsNotExist(err) { - files, _ := filepath.Glob(filepath.Join(tempDir, "*")) - t.Fatalf("Output file %s not found. Files in directory: %v", outputFile, files) + files, _ := filepath.Glob(filepath.Join(tempDir, "*.rollup.md")) + if len(files) == 0 { + allFiles, _ := filepath.Glob(filepath.Join(tempDir, "*")) + t.Fatalf("No rollup.md file found. Files in directory: %v", allFiles) } + outputFile := files[0] // Read the content of the output file content, err := os.ReadFile(outputFile) diff --git a/internal/scraper/scraper.go b/internal/scraper/scraper.go index f7c5c2c..3c4e50e 100644 --- a/internal/scraper/scraper.go +++ b/internal/scraper/scraper.go @@ -528,7 +528,9 @@ func ExtractLinks(urlStr string) ([]string, error) { var result []string for _, link := range links.([]interface{}) { - result = append(result, link.(string)) + // Normalize URL by removing trailing slash + normalizedLink := strings.TrimRight(link.(string), "/") + result = append(result, normalizedLink) } logger.Printf("Extracted %d links\n", len(result)) @@ -562,6 +564,11 @@ func ExtractContentWithCSS(content, includeSelector string, excludeSelectors []s return "", fmt.Errorf("error extracting content with CSS selector: %v", err) } + // Trim whitespace and normalize newlines + selectedContent = strings.TrimSpace(selectedContent) + selectedContent = strings.ReplaceAll(selectedContent, "\n", "") + selectedContent = strings.ReplaceAll(selectedContent, "\t", "") + logger.Printf("Extracted content length: %d\n", len(selectedContent)) return selectedContent, nil }