From 59994c085c0c9779135e39780dad4ab2463aa697 Mon Sep 17 00:00:00 2001 From: "Arik Jones (aider)" Date: Sun, 22 Sep 2024 16:58:53 -0500 Subject: [PATCH] fix: improve file ignore logic and preserve newlines in extracted content --- cmd/files.go | 11 +++++++---- internal/scraper/scraper.go | 11 ++++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/cmd/files.go b/cmd/files.go index a0d49d1..d6e6a7a 100644 --- a/cmd/files.go +++ b/cmd/files.go @@ -87,11 +87,14 @@ func isIgnored(filePath string, patterns []string) bool { return true } } else { - // Check if the pattern matches any part of the file path + // Check if the pattern matches the full path or any part of it + if matched, _ := filepath.Match(pattern, filePath); matched { + return true + } pathParts := strings.Split(filePath, string(os.PathSeparator)) - for _, part := range pathParts { - matched, err := filepath.Match(pattern, part) - if err == nil && matched { + for i := range pathParts { + partialPath := filepath.Join(pathParts[:i+1]...) + if matched, _ := filepath.Match(pattern, partialPath); matched { return true } } diff --git a/internal/scraper/scraper.go b/internal/scraper/scraper.go index 3c4e50e..81f7731 100644 --- a/internal/scraper/scraper.go +++ b/internal/scraper/scraper.go @@ -564,10 +564,15 @@ func ExtractContentWithCSS(content, includeSelector string, excludeSelectors []s return "", fmt.Errorf("error extracting content with CSS selector: %v", err) } - // Trim whitespace and normalize newlines + // Trim leading and trailing whitespace, but preserve internal newlines selectedContent = strings.TrimSpace(selectedContent) - selectedContent = strings.ReplaceAll(selectedContent, "\n", "") - selectedContent = strings.ReplaceAll(selectedContent, "\t", "") + + // Normalize newlines + selectedContent = strings.ReplaceAll(selectedContent, "\r\n", "\n") + selectedContent = strings.ReplaceAll(selectedContent, "\r", "\n") + + // Remove any leading or trailing newlines + selectedContent = strings.Trim(selectedContent, "\n") logger.Printf("Extracted content length: %d\n", len(selectedContent)) return selectedContent, nil