From d5a94f54685b422e9d25f617deceda2e0ad80210 Mon Sep 17 00:00:00 2001 From: "Arik Jones (aider)" Date: Sun, 22 Sep 2024 17:00:16 -0500 Subject: [PATCH] fix: remove indentation while preserving HTML structure in ExtractContentWithCSS --- internal/scraper/scraper.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/internal/scraper/scraper.go b/internal/scraper/scraper.go index 81f7731..e5413e0 100644 --- a/internal/scraper/scraper.go +++ b/internal/scraper/scraper.go @@ -564,13 +564,20 @@ func ExtractContentWithCSS(content, includeSelector string, excludeSelectors []s return "", fmt.Errorf("error extracting content with CSS selector: %v", err) } - // Trim leading and trailing whitespace, but preserve internal newlines + // Trim leading and trailing whitespace selectedContent = strings.TrimSpace(selectedContent) // Normalize newlines selectedContent = strings.ReplaceAll(selectedContent, "\r\n", "\n") selectedContent = strings.ReplaceAll(selectedContent, "\r", "\n") + // Remove indentation while preserving structure + lines := strings.Split(selectedContent, "\n") + for i, line := range lines { + lines[i] = strings.TrimSpace(line) + } + selectedContent = strings.Join(lines, "\n") + // Remove any leading or trailing newlines selectedContent = strings.Trim(selectedContent, "\n")