Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add firecrawl map endpoint #881

Merged
merged 7 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions camel/loaders/firecrawl_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from pydantic import BaseModel

from camel.utils import api_keys_required

class Firecrawl:
r"""Firecrawl allows you to turn entire websites into LLM-ready markdown.
Expand All @@ -30,6 +31,7 @@ class Firecrawl:
https://docs.firecrawl.dev/introduction
"""

@api_keys_required("FIRECRAWL_API_KEY")
def __init__(
self,
api_key: Optional[str] = None,
Expand Down Expand Up @@ -211,3 +213,24 @@ def tidy_scrape(self, url: str) -> str:
return scrape_result.get("markdown", "")
except Exception as e:
raise RuntimeError(f"Failed to perform tidy scrape: {e}")

def map_site(
self, url: str, params: Optional[Dict[str, Any]] = None
) -> list:
r"""Map a website to retrieve all accessible URLs.

Args:
url (str): The URL of the site to map.
params (Optional[Dict[str, Any]]): Additional parameters for the
map request. Defaults to `None`.

Returns:
list: A list containing the URLs found on the site.

Raises:
RuntimeError: If the mapping process fails.
"""
try:
return self.app.map_url(url=url, params=params)
except Exception as e:
raise RuntimeError(f"Failed to map the site: {e}")
16 changes: 16 additions & 0 deletions examples/loaders/firecrawl_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,19 @@ class TopArticlesSchema(BaseModel):
'amalinovic', 'commentsURL': 'item?id=41108662'}]}
===============================================================================
'''

map_result = firecrawl.map_site(url="https://www.camel-ai.org")

print(map_result)
"""
===============================================================================
['https://www.camel-ai.org', 'https://www.camel-ai.org/blog', 'https://www.
camel-ai.org/checkout', 'https://www.camel-ai.org/contact', 'https://www.camel-
ai.org/features', 'https://www.camel-ai.org/order-confirmation', 'https://www.
camel-ai.org/paypal-checkout', 'https://www.camel-ai.org/about', 'https://www.
camel-ai.org/integration', 'https://www.camel-ai.org/search', 'https://www.
camel-ai.org/post/crab', 'https://www.camel-ai.org/post/tool-usage', 'https://
www.camel-ai.org/post/releasenotes-sprint4', 'https://www.camel-ai.org/post/
releasenotes-sprint56']
===============================================================================
"""
Loading
Loading