fix: update getFilenameFromContent to handle invalid URLs and use .rollup.md suffix

This commit is contained in:
Arik Jones (aider)
2024-09-19 16:31:43 -05:00
parent 30e11153f9
commit 7c8fcc3261
2 changed files with 22 additions and 12 deletions

View File

@@ -203,23 +203,23 @@ func extractAndConvertContent(urlStr string) (string, error) {
return header + markdown + "\n\n", nil return header + markdown + "\n\n", nil
} }
func getFilenameFromContent(content, urlStr string) string { func getFilenameFromContent(content, urlStr string) (string, error) {
// Try to extract title from content // Try to extract title from content
titleStart := strings.Index(content, "<title>") titleStart := strings.Index(content, "<title>")
titleEnd := strings.Index(content, "</title>") titleEnd := strings.Index(content, "</title>")
if titleStart != -1 && titleEnd != -1 && titleEnd > titleStart { if titleStart != -1 && titleEnd != -1 && titleEnd > titleStart {
title := content[titleStart+7 : titleEnd] title := content[titleStart+7 : titleEnd]
return sanitizeFilename(title) + ".md" return sanitizeFilename(title) + ".rollup.md", nil
} }
// If no title found, use the URL without the protocol // If no title found, use the URL without the protocol
parsedURL, err := url.Parse(urlStr) parsedURL, err := url.Parse(urlStr)
if err != nil { if err != nil {
return "untitled.md" return "", fmt.Errorf("invalid URL: %v", err)
} }
filename := parsedURL.Host + parsedURL.Path filename := parsedURL.Host + parsedURL.Path
return sanitizeFilename(filename) + ".md" return sanitizeFilename(filename) + ".rollup.md", nil
} }
func sanitizeFilename(name string) string { func sanitizeFilename(name string) string {

View File

@@ -70,21 +70,31 @@ func TestGetFilenameFromContent(t *testing.T) {
content string content string
url string url string
expected string expected string
expectErr bool
}{ }{
{"<title>Test Page</title>", "http://example.com", "Test_Page.md"}, {"<title>Test Page</title>", "http://example.com", "Test_Page.rollup.md", false},
{"No title here", "http://example.com/page", "example_com_page.md"}, {"No title here", "http://example.com/page", "example_com_page.rollup.md", false},
{"<title> Trim Me </title>", "http://example.com", "Trim_Me.md"}, {"<title> Trim Me </title>", "http://example.com", "Trim_Me.rollup.md", false},
{"<title></title>", "http://example.com", "example_com.md"}, {"<title></title>", "http://example.com", "example_com.rollup.md", false},
{"Invalid URL", "not a valid url", "untitled.md"}, {"Invalid URL", "not a valid url", "", true},
} }
for _, test := range tests { for _, test := range tests {
result := getFilenameFromContent(test.content, test.url) result, err := getFilenameFromContent(test.content, test.url)
if test.expectErr {
if err == nil {
t.Errorf("getFilenameFromContent(%q, %q) expected an error, but got none", test.content, test.url)
}
} else {
if err != nil {
t.Errorf("getFilenameFromContent(%q, %q) unexpected error: %v", test.content, test.url, err)
}
if result != test.expected { if result != test.expected {
t.Errorf("getFilenameFromContent(%q, %q) = %q; want %q", test.content, test.url, result, test.expected) t.Errorf("getFilenameFromContent(%q, %q) = %q; want %q", test.content, test.url, result, test.expected)
} }
} }
} }
}
// Mock functions for testing // Mock functions for testing
func mockExtractAndConvertContent(urlStr string) (string, error) { func mockExtractAndConvertContent(urlStr string) (string, error) {