mirror of
https://github.com/tnypxl/rollup.git
synced 2025-12-15 15:03:17 +00:00
feat: Pass scraper configuration to command execution
This commit is contained in:
18
cmd/web.go
18
cmd/web.go
@@ -20,6 +20,11 @@ var (
|
|||||||
xpathSelector string
|
xpathSelector string
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
cfg *config.Config
|
||||||
|
scraperConfig scraper.Config
|
||||||
|
)
|
||||||
|
|
||||||
var webCmd = &cobra.Command{
|
var webCmd = &cobra.Command{
|
||||||
Use: "web",
|
Use: "web",
|
||||||
Short: "Scrape main content from webpages and convert to Markdown",
|
Short: "Scrape main content from webpages and convert to Markdown",
|
||||||
@@ -37,15 +42,6 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func runWeb(cmd *cobra.Command, args []string) error {
|
func runWeb(cmd *cobra.Command, args []string) error {
|
||||||
var err error
|
|
||||||
cfg, err = config.Load("rollup.yml")
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return fmt.Errorf("rollup.yml file not found. Please create a configuration file or provide command-line arguments")
|
|
||||||
}
|
|
||||||
return fmt.Errorf("error loading configuration: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use config if available, otherwise use command-line flags
|
// Use config if available, otherwise use command-line flags
|
||||||
if len(urls) == 0 && cfg.Scrape.URL != "" {
|
if len(urls) == 0 && cfg.Scrape.URL != "" {
|
||||||
urls = []string{cfg.Scrape.URL}
|
urls = []string{cfg.Scrape.URL}
|
||||||
@@ -135,8 +131,8 @@ func extractAndConvertContent(urlStr string) (string, error) {
|
|||||||
return "", fmt.Errorf("error fetching webpage content: %v", err)
|
return "", fmt.Errorf("error fetching webpage content: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if cssSelector != "" {
|
if scraperConfig.CSSLocator != "" {
|
||||||
content, err = scraper.ExtractContentWithCSS(content, cssSelector)
|
content, err = scraper.ExtractContentWithCSS(content, scraperConfig.CSSLocator)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("error extracting content with CSS selector: %v", err)
|
return "", fmt.Errorf("error extracting content with CSS selector: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
6
main.go
6
main.go
@@ -26,7 +26,11 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer scraper.ClosePlaywright()
|
defer scraper.ClosePlaywright()
|
||||||
|
|
||||||
if err := cmd.Execute(); err != nil {
|
scraperConfig := scraper.Config{
|
||||||
|
CSSLocator: cfg.Scrape.CSSLocator,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cmd.Execute(cfg, scraperConfig); err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user