fix: improve URL parsing and title extraction in getFilenameFromContent

2025-12-13 06:23:18 +00:00 · 2024-09-19 16:33:55 -05:00
parent 237ed512fc
commit 32499abbc0
2 changed files with 15 additions and 4 deletions
--- a/cmd/web.go
+++ b/cmd/web.go
@@ -211,17 +211,26 @@ func getFilenameFromContent(content, urlStr string) (string, error) {
 	titleStart := strings.Index(content, "<title>")
 	titleEnd := strings.Index(content, "</title>")
 	if titleStart != -1 && titleEnd != -1 && titleEnd > titleStart {
-		title := content[titleStart+7 : titleEnd]
-		return sanitizeFilename(title) + ".rollup.md", nil
+		title := strings.TrimSpace(content[titleStart+7 : titleEnd])
+		if title != "" {
+			return sanitizeFilename(title) + ".rollup.md", nil
+		}
 	}

-	// If no title found, use the URL without the protocol
+	// If no title found or title is empty, use the URL
 	parsedURL, err := url.Parse(urlStr)
 	if err != nil {
 		return "", fmt.Errorf("invalid URL: %v", err)
 	}
 	
-	filename := parsedURL.Host + parsedURL.Path
+	if parsedURL.Host == "" {
+		return "", fmt.Errorf("invalid URL: missing host")
+	}
+	
+	filename := parsedURL.Host
+	if parsedURL.Path != "" && parsedURL.Path != "/" {
+		filename += strings.TrimSuffix(parsedURL.Path, "/")
+	}
 	return sanitizeFilename(filename) + ".rollup.md", nil
 }

--- a/cmd/web_test.go
+++ b/cmd/web_test.go
@@ -76,7 +76,9 @@ func TestGetFilenameFromContent(t *testing.T) {
 		{"No title here", "http://example.com/page", "example_com_page.rollup.md", false},
 		{"<title>  Trim  Me  </title>", "http://example.com", "Trim_Me.rollup.md", false},
 		{"<title></title>", "http://example.com", "example_com.rollup.md", false},
+		{"<title>   </title>", "http://example.com", "example_com.rollup.md", false},
 		{"Invalid URL", "not a valid url", "", true},
+		{"No host", "http://", "", true},
 	}

 	for _, test := range tests {