mirror of
https://github.com/tnypxl/rollup.git
synced 2025-12-13 06:23:18 +00:00
fix: improve URL parsing and title extraction in getFilenameFromContent
This commit is contained in:
17
cmd/web.go
17
cmd/web.go
@@ -211,17 +211,26 @@ func getFilenameFromContent(content, urlStr string) (string, error) {
|
||||
titleStart := strings.Index(content, "<title>")
|
||||
titleEnd := strings.Index(content, "</title>")
|
||||
if titleStart != -1 && titleEnd != -1 && titleEnd > titleStart {
|
||||
title := content[titleStart+7 : titleEnd]
|
||||
return sanitizeFilename(title) + ".rollup.md", nil
|
||||
title := strings.TrimSpace(content[titleStart+7 : titleEnd])
|
||||
if title != "" {
|
||||
return sanitizeFilename(title) + ".rollup.md", nil
|
||||
}
|
||||
}
|
||||
|
||||
// If no title found, use the URL without the protocol
|
||||
// If no title found or title is empty, use the URL
|
||||
parsedURL, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid URL: %v", err)
|
||||
}
|
||||
|
||||
filename := parsedURL.Host + parsedURL.Path
|
||||
if parsedURL.Host == "" {
|
||||
return "", fmt.Errorf("invalid URL: missing host")
|
||||
}
|
||||
|
||||
filename := parsedURL.Host
|
||||
if parsedURL.Path != "" && parsedURL.Path != "/" {
|
||||
filename += strings.TrimSuffix(parsedURL.Path, "/")
|
||||
}
|
||||
return sanitizeFilename(filename) + ".rollup.md", nil
|
||||
}
|
||||
|
||||
|
||||
@@ -76,7 +76,9 @@ func TestGetFilenameFromContent(t *testing.T) {
|
||||
{"No title here", "http://example.com/page", "example_com_page.rollup.md", false},
|
||||
{"<title> Trim Me </title>", "http://example.com", "Trim_Me.rollup.md", false},
|
||||
{"<title></title>", "http://example.com", "example_com.rollup.md", false},
|
||||
{"<title> </title>", "http://example.com", "example_com.rollup.md", false},
|
||||
{"Invalid URL", "not a valid url", "", true},
|
||||
{"No host", "http://", "", true},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
Reference in New Issue
Block a user