mirror of
https://github.com/tnypxl/rollup.git
synced 2025-12-15 15:03:17 +00:00
fix: Update scraper to handle empty URLs
This commit is contained in:
committed by
Arik Jones (aider)
parent
eaa7135eab
commit
2ab0d74279
@@ -212,7 +212,7 @@ func scrollPage(page playwright.Page) error {
|
|||||||
// ExtractLinks extracts all links from the given URL
|
// ExtractLinks extracts all links from the given URL
|
||||||
func ExtractLinks(urlStr string) ([]string, error) {
|
func ExtractLinks(urlStr string) ([]string, error) {
|
||||||
log.Printf("Extracting links from URL: %s\n", urlStr)
|
log.Printf("Extracting links from URL: %s\n", urlStr)
|
||||||
|
|
||||||
page, err := browser.NewPage()
|
page, err := browser.NewPage()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("could not create page: %v", err)
|
return nil, fmt.Errorf("could not create page: %v", err)
|
||||||
@@ -245,7 +245,7 @@ func ExtractLinks(urlStr string) ([]string, error) {
|
|||||||
// ExtractContentWithCSS extracts content from HTML using a CSS selector
|
// ExtractContentWithCSS extracts content from HTML using a CSS selector
|
||||||
func ExtractContentWithCSS(content, includeSelector string, excludeSelectors []string) (string, error) {
|
func ExtractContentWithCSS(content, includeSelector string, excludeSelectors []string) (string, error) {
|
||||||
log.Printf("Extracting content with CSS selector: %s\n", includeSelector)
|
log.Printf("Extracting content with CSS selector: %s\n", includeSelector)
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error parsing HTML: %v", err)
|
return "", fmt.Errorf("error parsing HTML: %v", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user