diff --git a/.env.example b/.env.example index a217af8d..ed540810 100644 --- a/.env.example +++ b/.env.example @@ -23,4 +23,4 @@ SOLANA_PRIVATE_KEY= DISCORD_TOKEN= XAI_API_KEY= TOGETHER_API_KEY= - +FIRECRAWL_API_KEY= diff --git a/README.md b/README.md index 80fe8b54..d29da723 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,9 @@ similar core functionality as Zerebro. For creative outputs, you'll need to fine - AI/ML Tools: - GOAT (Onchain Agent Toolkit) - Allora (Network inference) +- Others: + - Perplexity + - Firecrawl ### Language Model Support @@ -135,6 +138,8 @@ poetry run python main.py configure-connection hyperbolic # For Hyperbolic configure-connection groq # For GROQ configure-connection together # For Together AI + configure-connection perplexity # For Perplexity + configure-connection firecrawl # For Firecrawl ``` 2. Use `list-connections` to see all available connections and their status diff --git a/agents/example.json b/agents/example.json index 1fb89e84..55cca720 100644 --- a/agents/example.json +++ b/agents/example.json @@ -109,6 +109,9 @@ "message_read_count": 10, "message_emoji_name": "ā¤ļø", "server_id": "1234567890" + }, + { + "name": "firecrawl" } ], "tasks": [ diff --git a/poetry.lock b/poetry.lock index 20372526..07b93b86 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1199,6 +1199,24 @@ docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2. testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] typing = ["typing-extensions (>=4.12.2)"] +[[package]] +name = "firecrawl-py" +version = "1.12.0" +description = "Python SDK for Firecrawl API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "firecrawl_py-1.12.0-py3-none-any.whl", hash = "sha256:2b9c549315027da32421aca2a7ca597cb05cdbb968cfe0a89f389c7bb20afa4a"}, + {file = "firecrawl_py-1.12.0.tar.gz", hash = "sha256:bbf883f6c774f05a5426121b85978a5f7b5ab11e614aff609f0673b097c3e553"}, +] + +[package.dependencies] +nest-asyncio = "*" +pydantic = ">=2.10.3" +python-dotenv = "*" +requests = "*" +websockets = "*" + [[package]] name = "frozenlist" version = "1.5.0" @@ -1855,6 +1873,17 @@ files = [ {file = "multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a"}, ] +[[package]] +name = "nest-asyncio" +version = "1.6.0" +description = "Patch asyncio to allow nested event loops" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, +] + [[package]] name = "numpy" version = "2.2.2" @@ -2777,7 +2806,6 @@ files = [ [package.dependencies] markdown-it-py = ">=2.2.0" pygments = ">=2.13.0,<3.0.0" -typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.11\""} [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] @@ -3427,4 +3455,4 @@ server = ["fastapi", "requests", "uvicorn"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "54c122c4f40fce6a813645e76e7005a56b83846bc14149a0375a9bf53cade3b4" \ No newline at end of file +content-hash = "9c7b8842ae724f49aac22388eed98560e1b35b3189d1843575ee0878bb76db7d" diff --git a/pyproject.toml b/pyproject.toml index ce1a7a7d..27825ac1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,10 +28,13 @@ requests-oauthlib = "^1.3.1" together = "^1.3.14" fastapi = { version = "^0.109.0", optional = true } uvicorn = { version = "^0.27.0", optional = true } +firecrawl-py = "^1.7.0" + [tool.poetry.extras] server = ["fastapi", "uvicorn", "requests"] + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/src/connection_manager.py b/src/connection_manager.py index b2c890a0..d48da618 100644 --- a/src/connection_manager.py +++ b/src/connection_manager.py @@ -21,6 +21,7 @@ from src.connections.together_connection import TogetherAIConnection from src.connections.evm_connection import EVMConnection from src.connections.perplexity_connection import PerplexityConnection +from src.connections.firecrawl_connection import FirecrawlConnection logger = logging.getLogger("connection_manager") @@ -72,7 +73,10 @@ def _class_name_to_type(class_name: str) -> Type[BaseConnection]: elif class_name == "evm": return EVMConnection elif class_name == "perplexity": - return PerplexityConnection + return PerplexityConnection + elif class_name == "firecrawl": + return FirecrawlConnection + return None def _register_connection(self, config_dic: Dict[str, Any]) -> None: diff --git a/src/connections/firecrawl_connection.py b/src/connections/firecrawl_connection.py new file mode 100644 index 00000000..31ea55c9 --- /dev/null +++ b/src/connections/firecrawl_connection.py @@ -0,0 +1,138 @@ +import logging +import os +from typing import Dict, Any +from dotenv import load_dotenv, set_key +from src.connections.base_connection import BaseConnection, Action, ActionParameter +from firecrawl import FirecrawlApp + + +logger = logging.getLogger(__name__) + +class FirecrawlConnectionError(Exception): + """Base exception for FireCrawl connection errors""" + pass + + +class FirecrawlConfigurationError(FirecrawlConnectionError): + """Raised when there are configuration/credential issues""" + pass + +class FirecrawlAPIError(FirecrawlConnectionError): + """Raised when Firecrawl API requests fail""" + pass + +class FirecrawlConnection(BaseConnection): + def __init__(self, config: Dict[str, Any]): + super().__init__(config) + self._client = None + + @property + def is_llm_provider(self) -> bool: + return False + + def validate_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Validate Firecrawl configuration from JSON""" + required_fields = [] + missing_fields = [field for field in required_fields if field not in config] + + if missing_fields: + raise ValueError(f"Missing required configuration fields: {', '.join(missing_fields)}") + + return config + + def register_actions(self) -> None: + """Register available Firecrawl actions""" + self.actions = { + "scrape-page": Action( + name = "scrape-page", + parameters = [ + ActionParameter("url", True, str, "The URL of the page to scrape"), + ], + description = "Scrape a page for text data" + ) + } + + def _get_client(self) -> FirecrawlApp: + """Get or create Firecrawl app client""" + if not self._client: + api_key = os.getenv("FIRECRAWL_API_KEY") + if not api_key: + raise FirecrawlConfigurationError("Firecrawl API key not found in environment") + self._client = FirecrawlApp(api_key=api_key) + return self._client + + def configure(self) -> bool: + """Sets up Firecrawl authentication""" + print("\nšŸŒ Firecrawl API SETUP") + + if self.is_configured(): + print("\Firecrawl API is already configured.") + response = input("Do you want to reconfigure? (y/n): ") + if response.lower() != 'y': + return True + + print("\nšŸ“ To get your Firecrawl API credentials:") + print("1. Go to https://www.firecrawl.dev/app/api-keys") + print("2. Navigate to the API keys section and create a new API key") + + api_key = input("\nEnter your Firecrawl API key: ") + + try: + if not os.path.exists('.env'): + with open('.env', 'w') as f: + f.write('') + + set_key('.env', 'FIRECRAWL_API_KEY', api_key) + + client = FirecrawlApp(api_key=api_key) + + print("\nāœ… Firecrawl API configuration successfully saved!") + print("Your API key has been stored in the .env file.") + return True + + except Exception as e: + logger.error(f"Configuration failed: {e}") + return False + + def is_configured(self, verbose = False) -> bool: + """Check if Firecrawl API key is configured and valid""" + try: + load_dotenv() + api_key = os.getenv('FIRECRAWL_API_KEY') + if not api_key: + return False + + client = FirecrawlApp(api_key=api_key) + return True + + except Exception as e: + if verbose: + logger.debug(f"Configuration check failed: {e}") + return False + + + def scrape_page(self, url: str) -> str: + """Scrape a page for text data""" + try: + client = self._get_client() + response = client.scrape_url(url, params={'formats': ['markdown']}) + logger.info(f"Successfully scraped page : {url}") + return response['markdown'] + except Exception as e: + raise FirecrawlAPIError(f"Scraping {url} failed: {e}") + + + def perform_action(self, action_name: str, kwargs) -> Any: + """Execute a Twitter action with validation""" + if action_name not in self.actions: + raise KeyError(f"Unknown action: {action_name}") + + action = self.actions[action_name] + errors = action.validate_params(kwargs) + if errors: + raise ValueError(f"Invalid parameters: {', '.join(errors)}") + + # Call the appropriate method based on action name + method_name = action_name.replace('-', '_') + method = getattr(self, method_name) + return method(**kwargs) \ No newline at end of file