refactor: update getFilenameFromContent to remove http from filenames

This commit is contained in:
Arik Jones (aider)
2024-09-19 16:29:55 -05:00
parent 0219881f61
commit 30e11153f9
2 changed files with 12 additions and 5 deletions

View File

@@ -203,7 +203,7 @@ func extractAndConvertContent(urlStr string) (string, error) {
return header + markdown + "\n\n", nil
}
func getFilenameFromContent(content, url string) string {
func getFilenameFromContent(content, urlStr string) string {
// Try to extract title from content
titleStart := strings.Index(content, "<title>")
titleEnd := strings.Index(content, "</title>")
@@ -212,8 +212,14 @@ func getFilenameFromContent(content, url string) string {
return sanitizeFilename(title) + ".md"
}
// If no title found, use the URL
return sanitizeFilename(url) + ".md"
// If no title found, use the URL without the protocol
parsedURL, err := url.Parse(urlStr)
if err != nil {
return "untitled.md"
}
filename := parsedURL.Host + parsedURL.Path
return sanitizeFilename(filename) + ".md"
}
func sanitizeFilename(name string) string {

View File

@@ -72,9 +72,10 @@ func TestGetFilenameFromContent(t *testing.T) {
expected string
}{
{"<title>Test Page</title>", "http://example.com", "Test_Page.md"},
{"No title here", "http://example.com/page", "http___example_com_page.md"},
{"No title here", "http://example.com/page", "example_com_page.md"},
{"<title> Trim Me </title>", "http://example.com", "Trim_Me.md"},
{"<title></title>", "http://example.com", "http___example_com.md"},
{"<title></title>", "http://example.com", "example_com.md"},
{"Invalid URL", "not a valid url", "untitled.md"},
}
for _, test := range tests {