fix: resolve nil logger panic and CLI URL processing (#5)

- Initialize logger before Playwright to prevent nil pointer dereference
- Set AllowedPaths for CLI URLs so they get processed by scraper

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
tnypxl
2025-11-27 11:04:08 -06:00
committed by GitHub
parent eb3b611864
commit 9495ddd788

View File

@@ -29,6 +29,8 @@ var webCmd = &cobra.Command{
Short: "Scrape main content from webpages and convert to Markdown", Short: "Scrape main content from webpages and convert to Markdown",
Long: `Scrape the main content from one or more webpages, ignoring navigational elements, ads, and other UI aspects. Convert the content to a well-structured Markdown file.`, Long: `Scrape the main content from one or more webpages, ignoring navigational elements, ads, and other UI aspects. Convert the content to a well-structured Markdown file.`,
PreRunE: func(cmd *cobra.Command, args []string) error { PreRunE: func(cmd *cobra.Command, args []string) error {
// Setup logger before initializing Playwright
scraper.SetupLogger(verbose)
// Initialize Playwright for web scraping // Initialize Playwright for web scraping
if err := scraper.InitPlaywright(); err != nil { if err := scraper.InitPlaywright(); err != nil {
return fmt.Errorf("failed to initialize Playwright: %w", err) return fmt.Errorf("failed to initialize Playwright: %w", err)
@@ -83,6 +85,7 @@ func runWeb(cmd *cobra.Command, args []string) error {
BaseURL: u, BaseURL: u,
CSSLocator: includeSelector, CSSLocator: includeSelector,
ExcludeSelectors: excludeSelectors, ExcludeSelectors: excludeSelectors,
AllowedPaths: []string{""},
} }
logger.Printf("URL %d configuration: BaseURL=%s, CSSLocator=%s", logger.Printf("URL %d configuration: BaseURL=%s, CSSLocator=%s",
i+1, u, includeSelector) i+1, u, includeSelector)