fix: Handle missing content in ProcessHTMLContent

This commit is contained in:
Arik Jones (aider)
2024-09-14 19:43:58 -05:00
parent 2ab0d74279
commit 3378402fb9

View File

@@ -129,21 +129,27 @@ func ProcessHTMLContent(htmlContent string, config Config) (string, error) {
return "", fmt.Errorf("error parsing HTML: %v", err) return "", fmt.Errorf("error parsing HTML: %v", err)
} }
var content string var selection *goquery.Selection
if config.CSSLocator != "" { if config.CSSLocator != "" {
log.Printf("Using CSS locator: %s\n", config.CSSLocator) log.Printf("Using CSS locator: %s\n", config.CSSLocator)
content, err = doc.Find(config.CSSLocator).Html() selection = doc.Find(config.CSSLocator)
if err != nil { if selection.Length() == 0 {
log.Printf("Error extracting content with CSS locator: %v\n", err) log.Printf("Warning: No content found with CSS locator: %s. Falling back to body content.\n", config.CSSLocator)
return "", fmt.Errorf("error extracting content with CSS locator: %v", err) selection = doc.Find("body")
} }
} else { } else {
log.Println("No CSS locator provided, processing entire body") log.Println("No CSS locator provided, processing entire body")
content, err = doc.Find("body").Html() selection = doc.Find("body")
if err != nil { }
log.Printf("Error extracting body content: %v\n", err)
return "", fmt.Errorf("error extracting body content: %v", err) if selection.Length() == 0 {
} return "", fmt.Errorf("no content found in the document")
}
content, err := selection.Html()
if err != nil {
log.Printf("Error extracting content: %v\n", err)
return "", fmt.Errorf("error extracting content: %v", err)
} }
markdown := convertToMarkdown(content) markdown := convertToMarkdown(content)