From f424be86b90b4d46525ba0f63787e52ce459a09d Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Sat, 28 Dec 2024 03:47:51 -0500 Subject: [PATCH 1/7] add firecrawl connection --- agents/example.json | 4 + poetry.lock | 109 +++++++++++++++++- pyproject.toml | 1 + src/connection_manager.py | 3 + src/connections/firecrawl_connection.py | 142 ++++++++++++++++++++++++ 5 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 src/connections/firecrawl_connection.py diff --git a/agents/example.json b/agents/example.json index 4ca3a297..5e812faf 100644 --- a/agents/example.json +++ b/agents/example.json @@ -30,6 +30,10 @@ { "name": "anthropic", "model": "claude-3-5-sonnet-20241022" + }, + { + "name": "firecrawl", + "enabled": true } ], "tasks": [ diff --git a/poetry.lock b/poetry.lock index 37eefd72..a7e9d918 100644 --- a/poetry.lock +++ b/poetry.lock @@ -218,6 +218,24 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "firecrawl-py" +version = "1.7.0" +description = "Python SDK for Firecrawl API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "firecrawl_py-1.7.0-py3-none-any.whl", hash = "sha256:da6272e1f373b709f743d4d61b4cc8ec2e7893851b403f2465ebc81c92c8018a"}, + {file = "firecrawl_py-1.7.0.tar.gz", hash = "sha256:fe13cead4c2d570d078a0f31b5062f6b7ef492d3c2ea43186d6e84302b8bf649"}, +] + +[package.dependencies] +nest-asyncio = "*" +pydantic = ">=2.10.3" +python-dotenv = "*" +requests = "*" +websockets = "*" + [[package]] name = "h11" version = "0.14.0" @@ -373,6 +391,17 @@ files = [ {file = "jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d"}, ] +[[package]] +name = "nest-asyncio" +version = "1.6.0" +description = "Patch asyncio to allow nested event loops" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, +] + [[package]] name = "oauthlib" version = "3.2.2" @@ -707,7 +736,85 @@ files = [ {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, ] +[[package]] +name = "websockets" +version = "14.1" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "websockets-14.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a0adf84bc2e7c86e8a202537b4fd50e6f7f0e4a6b6bf64d7ccb96c4cd3330b29"}, + {file = "websockets-14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90b5d9dfbb6d07a84ed3e696012610b6da074d97453bd01e0e30744b472c8179"}, + {file = "websockets-14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2177ee3901075167f01c5e335a6685e71b162a54a89a56001f1c3e9e3d2ad250"}, + {file = "websockets-14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f14a96a0034a27f9d47fd9788913924c89612225878f8078bb9d55f859272b0"}, + {file = "websockets-14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f874ba705deea77bcf64a9da42c1f5fc2466d8f14daf410bc7d4ceae0a9fcb0"}, + {file = "websockets-14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9607b9a442392e690a57909c362811184ea429585a71061cd5d3c2b98065c199"}, + {file = "websockets-14.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bea45f19b7ca000380fbd4e02552be86343080120d074b87f25593ce1700ad58"}, + {file = "websockets-14.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:219c8187b3ceeadbf2afcf0f25a4918d02da7b944d703b97d12fb01510869078"}, + {file = "websockets-14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ad2ab2547761d79926effe63de21479dfaf29834c50f98c4bf5b5480b5838434"}, + {file = "websockets-14.1-cp310-cp310-win32.whl", hash = "sha256:1288369a6a84e81b90da5dbed48610cd7e5d60af62df9851ed1d1d23a9069f10"}, + {file = "websockets-14.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0744623852f1497d825a49a99bfbec9bea4f3f946df6eb9d8a2f0c37a2fec2e"}, + {file = "websockets-14.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:449d77d636f8d9c17952628cc7e3b8faf6e92a17ec581ec0c0256300717e1512"}, + {file = "websockets-14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a35f704be14768cea9790d921c2c1cc4fc52700410b1c10948511039be824aac"}, + {file = "websockets-14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b1f3628a0510bd58968c0f60447e7a692933589b791a6b572fcef374053ca280"}, + {file = "websockets-14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c3deac3748ec73ef24fc7be0b68220d14d47d6647d2f85b2771cb35ea847aa1"}, + {file = "websockets-14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7048eb4415d46368ef29d32133134c513f507fff7d953c18c91104738a68c3b3"}, + {file = "websockets-14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cf0ad281c979306a6a34242b371e90e891bce504509fb6bb5246bbbf31e7b6"}, + {file = "websockets-14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cc1fc87428c1d18b643479caa7b15db7d544652e5bf610513d4a3478dbe823d0"}, + {file = "websockets-14.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f95ba34d71e2fa0c5d225bde3b3bdb152e957150100e75c86bc7f3964c450d89"}, + {file = "websockets-14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9481a6de29105d73cf4515f2bef8eb71e17ac184c19d0b9918a3701c6c9c4f23"}, + {file = "websockets-14.1-cp311-cp311-win32.whl", hash = "sha256:368a05465f49c5949e27afd6fbe0a77ce53082185bbb2ac096a3a8afaf4de52e"}, + {file = "websockets-14.1-cp311-cp311-win_amd64.whl", hash = "sha256:6d24fc337fc055c9e83414c94e1ee0dee902a486d19d2a7f0929e49d7d604b09"}, + {file = "websockets-14.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ed907449fe5e021933e46a3e65d651f641975a768d0649fee59f10c2985529ed"}, + {file = "websockets-14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:87e31011b5c14a33b29f17eb48932e63e1dcd3fa31d72209848652310d3d1f0d"}, + {file = "websockets-14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bc6ccf7d54c02ae47a48ddf9414c54d48af9c01076a2e1023e3b486b6e72c707"}, + {file = "websockets-14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9777564c0a72a1d457f0848977a1cbe15cfa75fa2f67ce267441e465717dcf1a"}, + {file = "websockets-14.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a655bde548ca98f55b43711b0ceefd2a88a71af6350b0c168aa77562104f3f45"}, + {file = "websockets-14.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3dfff83ca578cada2d19e665e9c8368e1598d4e787422a460ec70e531dbdd58"}, + {file = "websockets-14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6a6c9bcf7cdc0fd41cc7b7944447982e8acfd9f0d560ea6d6845428ed0562058"}, + {file = "websockets-14.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4b6caec8576e760f2c7dd878ba817653144d5f369200b6ddf9771d64385b84d4"}, + {file = "websockets-14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb6d38971c800ff02e4a6afd791bbe3b923a9a57ca9aeab7314c21c84bf9ff05"}, + {file = "websockets-14.1-cp312-cp312-win32.whl", hash = "sha256:1d045cbe1358d76b24d5e20e7b1878efe578d9897a25c24e6006eef788c0fdf0"}, + {file = "websockets-14.1-cp312-cp312-win_amd64.whl", hash = "sha256:90f4c7a069c733d95c308380aae314f2cb45bd8a904fb03eb36d1a4983a4993f"}, + {file = "websockets-14.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3630b670d5057cd9e08b9c4dab6493670e8e762a24c2c94ef312783870736ab9"}, + {file = "websockets-14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36ebd71db3b89e1f7b1a5deaa341a654852c3518ea7a8ddfdf69cc66acc2db1b"}, + {file = "websockets-14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5b918d288958dc3fa1c5a0b9aa3256cb2b2b84c54407f4813c45d52267600cd3"}, + {file = "websockets-14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00fe5da3f037041da1ee0cf8e308374e236883f9842c7c465aa65098b1c9af59"}, + {file = "websockets-14.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8149a0f5a72ca36720981418eeffeb5c2729ea55fa179091c81a0910a114a5d2"}, + {file = "websockets-14.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77569d19a13015e840b81550922056acabc25e3f52782625bc6843cfa034e1da"}, + {file = "websockets-14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cf5201a04550136ef870aa60ad3d29d2a59e452a7f96b94193bee6d73b8ad9a9"}, + {file = "websockets-14.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:88cf9163ef674b5be5736a584c999e98daf3aabac6e536e43286eb74c126b9c7"}, + {file = "websockets-14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:836bef7ae338a072e9d1863502026f01b14027250a4545672673057997d5c05a"}, + {file = "websockets-14.1-cp313-cp313-win32.whl", hash = "sha256:0d4290d559d68288da9f444089fd82490c8d2744309113fc26e2da6e48b65da6"}, + {file = "websockets-14.1-cp313-cp313-win_amd64.whl", hash = "sha256:8621a07991add373c3c5c2cf89e1d277e49dc82ed72c75e3afc74bd0acc446f0"}, + {file = "websockets-14.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01bb2d4f0a6d04538d3c5dfd27c0643269656c28045a53439cbf1c004f90897a"}, + {file = "websockets-14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:414ffe86f4d6f434a8c3b7913655a1a5383b617f9bf38720e7c0799fac3ab1c6"}, + {file = "websockets-14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8fda642151d5affdee8a430bd85496f2e2517be3a2b9d2484d633d5712b15c56"}, + {file = "websockets-14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd7c11968bc3860d5c78577f0dbc535257ccec41750675d58d8dc66aa47fe52c"}, + {file = "websockets-14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a032855dc7db987dff813583d04f4950d14326665d7e714d584560b140ae6b8b"}, + {file = "websockets-14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7e7ea2f782408c32d86b87a0d2c1fd8871b0399dd762364c731d86c86069a78"}, + {file = "websockets-14.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:39450e6215f7d9f6f7bc2a6da21d79374729f5d052333da4d5825af8a97e6735"}, + {file = "websockets-14.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ceada5be22fa5a5a4cdeec74e761c2ee7db287208f54c718f2df4b7e200b8d4a"}, + {file = "websockets-14.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3fc753451d471cff90b8f467a1fc0ae64031cf2d81b7b34e1811b7e2691bc4bc"}, + {file = "websockets-14.1-cp39-cp39-win32.whl", hash = "sha256:14839f54786987ccd9d03ed7f334baec0f02272e7ec4f6e9d427ff584aeea8b4"}, + {file = "websockets-14.1-cp39-cp39-win_amd64.whl", hash = "sha256:d9fd19ecc3a4d5ae82ddbfb30962cf6d874ff943e56e0c81f5169be2fda62979"}, + {file = "websockets-14.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e5dc25a9dbd1a7f61eca4b7cb04e74ae4b963d658f9e4f9aad9cd00b688692c8"}, + {file = "websockets-14.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:04a97aca96ca2acedf0d1f332c861c5a4486fdcba7bcef35873820f940c4231e"}, + {file = "websockets-14.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df174ece723b228d3e8734a6f2a6febbd413ddec39b3dc592f5a4aa0aff28098"}, + {file = "websockets-14.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:034feb9f4286476f273b9a245fb15f02c34d9586a5bc936aff108c3ba1b21beb"}, + {file = "websockets-14.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:660c308dabd2b380807ab64b62985eaccf923a78ebc572bd485375b9ca2b7dc7"}, + {file = "websockets-14.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5a42d3ecbb2db5080fc578314439b1d79eef71d323dc661aa616fb492436af5d"}, + {file = "websockets-14.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ddaa4a390af911da6f680be8be4ff5aaf31c4c834c1a9147bc21cbcbca2d4370"}, + {file = "websockets-14.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a4c805c6034206143fbabd2d259ec5e757f8b29d0a2f0bf3d2fe5d1f60147a4a"}, + {file = "websockets-14.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:205f672a6c2c671a86d33f6d47c9b35781a998728d2c7c2a3e1cf3333fcb62b7"}, + {file = "websockets-14.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef440054124728cc49b01c33469de06755e5a7a4e83ef61934ad95fc327fbb0"}, + {file = "websockets-14.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7591d6f440af7f73c4bd9404f3772bfee064e639d2b6cc8c94076e71b2471c1"}, + {file = "websockets-14.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:25225cc79cfebc95ba1d24cd3ab86aaa35bcd315d12fa4358939bd55e9bd74a5"}, + {file = "websockets-14.1-py3-none-any.whl", hash = "sha256:4d4fc827a20abe6d544a119896f6b78ee13fe81cbfef416f3f2ddf09a03f0e2e"}, + {file = "websockets-14.1.tar.gz", hash = "sha256:398b10c77d471c0aab20a845e7a60076b6390bfdaac7a6d2edb0d2c59d75e8d8"}, +] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "00d5f8a53dcf61aa2173352bcae9388f64ce401c4f7962fc2627b1ed7d8aee5a" +content-hash = "cfc05c354d052d89a271abbfcdade852951d1fbea4bc94a74b84b9bd8abdea98" diff --git a/pyproject.toml b/pyproject.toml index 33ce6f09..c75876cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ openai = "^1.57.2" tweepy = "^4.14.0" prompt-toolkit = "^3.0.48" anthropic = "^0.42.0" +firecrawl-py = "^1.7.0" [build-system] diff --git a/src/connection_manager.py b/src/connection_manager.py index 25980b68..f242ca31 100644 --- a/src/connection_manager.py +++ b/src/connection_manager.py @@ -5,6 +5,7 @@ from src.connections.eternalai_connection import EternalAIConnection from src.connections.openai_connection import OpenAIConnection from src.connections.twitter_connection import TwitterConnection +from src.connections.firecrawl_connection import FirecrawlConnection logger = logging.getLogger("connection_manager") @@ -24,6 +25,8 @@ def _class_name_to_type(class_name: str) -> Type[BaseConnection]: return OpenAIConnection elif class_name == "eternalai": return EternalAIConnection + elif class_name == "firecrawl": + return FirecrawlConnection return None diff --git a/src/connections/firecrawl_connection.py b/src/connections/firecrawl_connection.py new file mode 100644 index 00000000..b694ac4f --- /dev/null +++ b/src/connections/firecrawl_connection.py @@ -0,0 +1,142 @@ +import logging +import os +from typing import Dict, Any +from dotenv import load_dotenv, set_key +from src.connections.base_connection import BaseConnection, Action, ActionParameter +from firecrawl import FirecrawlApp + + +logger = logging.getLogger(__name__) + +class FirecrawlConnectionError(Exception): + """Base exception for FireCrawl connection errors""" + pass + + +class FirecrawlConfigurationError(FirecrawlConnectionError): + """Raised when there are configuration/credential issues""" + pass + +class FirecrawlAPIError(FirecrawlConnectionError): + """Raised when Firecrawl API requests fail""" + pass + +class FirecrawlConnection(BaseConnection): + def __init__(self, config: Dict[str, Any]): + super().__init__(config) + self._client = None + + @property + def is_llm_provider(self) -> bool: + return False + + def validate_config(self, config: Dict[str, Any]) -> Dict[str, Any]: + """Validate Firecrawl configuration from JSON""" + required_fields = ["enabled"] + missing_fields = [field for field in required_fields if field not in config] + + if missing_fields: + raise ValueError(f"Missing required configuration fields: {', '.join(missing_fields)}") + + return config + + def register_actions(self) -> None: + """Register available Firecrawl actions""" + self.actions = { + "scrape-page": Action( + name = "scrape-page", + parameters = [ + ActionParameter("url", True, str, "The URL of the page to scrape"), + ], + description = "Scrape a page for text data" + ) + } + + def _get_client(self) -> FirecrawlApp: + """Get or create Firecrawl app client""" + if not self._client: + api_key = os.getenv("FIRECRAWL_API_KEY") + if not api_key: + raise FirecrawlConfigurationError("Firecrawl API key not found in environment") + self._client = FirecrawlApp(api_key=api_key) + return self._client + + def configure(self) -> bool: + """Sets up Firecrawl authentication""" + print("\nšŸŒ Firecrawl API SETUP") + + if self.is_configured(): + print("\Firecrawl API is already configured.") + response = input("Do you want to reconfigure? (y/n): ") + if response.lower() != 'y': + return True + + print("\nšŸ“ To get your Firecrawl API credentials:") + print("1. Go to https://www.firecrawl.dev/app/api-keys") + print("2. Navigate to the API keys section and create a new API key") + + api_key = input("\nEnter your Firecrawl API key: ") + + try: + if not os.path.exists('.env'): + with open('.env', 'w') as f: + f.write('') + + set_key('.env', 'FIRECRAWL_API_KEY', api_key) + + # Validate the API key by trying to list models + client = FirecrawlApp(api_key=api_key) + + print("\nāœ… Firecrawl API configuration successfully saved!") + print("Your API key has been stored in the .env file.") + return True + + except Exception as e: + logger.error(f"Configuration failed: {e}") + return False + + def is_configured(self, verbose = False) -> bool: + """Check if Firecrawl API key is configured and valid""" + try: + load_dotenv() + api_key = os.getenv('FIRECRAWL_API_KEY') + if not api_key: + return False + + client = FirecrawlApp(api_key=api_key) + return True + + except Exception as e: + if verbose: + logger.debug(f"Configuration check failed: {e}") + return False + + + def scrape_page(self, url: str) -> str: + """Scrape a page for text data""" + if not self.config.get("enabled",True): + #logger.warning("Firecrawl scraping is disabled in the configuration.") + return "" + try: + client = self._get_client() + response = client.scrape_url(url, params={'formats': ['markdown']}) + logger.info(f"Successfully scraped page : {url}") + return response + except Exception as e: + raise FirecrawlAPIError(f"Scraping failed: {e}") + + + def perform_action(self, action_name: str, kwargs) -> Any: + """Execute a Twitter action with validation""" + if action_name not in self.actions: + raise KeyError(f"Unknown action: {action_name}") + + action = self.actions[action_name] + errors = action.validate_params(kwargs) + if errors: + raise ValueError(f"Invalid parameters: {', '.join(errors)}") + + # Call the appropriate method based on action name + method_name = action_name.replace('-', '_') + method = getattr(self, method_name) + return method(**kwargs) \ No newline at end of file From 0528172657ea97c8e872ab59c2ea2edc04a6f621 Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Sat, 28 Dec 2024 04:18:50 -0500 Subject: [PATCH 2/7] fix response --- src/connections/firecrawl_connection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/connections/firecrawl_connection.py b/src/connections/firecrawl_connection.py index b694ac4f..5fc5702c 100644 --- a/src/connections/firecrawl_connection.py +++ b/src/connections/firecrawl_connection.py @@ -120,8 +120,8 @@ def scrape_page(self, url: str) -> str: try: client = self._get_client() response = client.scrape_url(url, params={'formats': ['markdown']}) - logger.info(f"Successfully scraped page : {url}") - return response + logger.info(f"Successfully scraped page : {url}") + return response['markdown'] except Exception as e: raise FirecrawlAPIError(f"Scraping failed: {e}") From 13db3d7622210d9529d85ea676ab3f8e8f222a75 Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Sat, 28 Dec 2024 05:19:54 -0500 Subject: [PATCH 3/7] cleanup --- src/connections/firecrawl_connection.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/connections/firecrawl_connection.py b/src/connections/firecrawl_connection.py index 5fc5702c..16e8fceb 100644 --- a/src/connections/firecrawl_connection.py +++ b/src/connections/firecrawl_connection.py @@ -84,7 +84,6 @@ def configure(self) -> bool: set_key('.env', 'FIRECRAWL_API_KEY', api_key) - # Validate the API key by trying to list models client = FirecrawlApp(api_key=api_key) print("\nāœ… Firecrawl API configuration successfully saved!") @@ -123,7 +122,7 @@ def scrape_page(self, url: str) -> str: logger.info(f"Successfully scraped page : {url}") return response['markdown'] except Exception as e: - raise FirecrawlAPIError(f"Scraping failed: {e}") + raise FirecrawlAPIError(f"Scraping {url} failed: {e}") def perform_action(self, action_name: str, kwargs) -> Any: From 31ea3809836ba15d51d598f29d97d18075d1bf25 Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Sat, 28 Dec 2024 06:08:55 -0500 Subject: [PATCH 4/7] enable config fix --- agents/example.json | 2 +- src/connections/firecrawl_connection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/example.json b/agents/example.json index 5e812faf..52c472c8 100644 --- a/agents/example.json +++ b/agents/example.json @@ -33,7 +33,7 @@ }, { "name": "firecrawl", - "enabled": true + "enabled": false } ], "tasks": [ diff --git a/src/connections/firecrawl_connection.py b/src/connections/firecrawl_connection.py index 16e8fceb..756e4e69 100644 --- a/src/connections/firecrawl_connection.py +++ b/src/connections/firecrawl_connection.py @@ -113,7 +113,7 @@ def is_configured(self, verbose = False) -> bool: def scrape_page(self, url: str) -> str: """Scrape a page for text data""" - if not self.config.get("enabled",True): + if not self.config["enabled"]: #logger.warning("Firecrawl scraping is disabled in the configuration.") return "" try: From 1e36c5c048662a9d284939cba1b0caeeb2a6b3f7 Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Fri, 21 Feb 2025 17:03:32 -0500 Subject: [PATCH 5/7] firecrawl poetry --- poetry.lock | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 20372526..07b93b86 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1199,6 +1199,24 @@ docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2. testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] typing = ["typing-extensions (>=4.12.2)"] +[[package]] +name = "firecrawl-py" +version = "1.12.0" +description = "Python SDK for Firecrawl API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "firecrawl_py-1.12.0-py3-none-any.whl", hash = "sha256:2b9c549315027da32421aca2a7ca597cb05cdbb968cfe0a89f389c7bb20afa4a"}, + {file = "firecrawl_py-1.12.0.tar.gz", hash = "sha256:bbf883f6c774f05a5426121b85978a5f7b5ab11e614aff609f0673b097c3e553"}, +] + +[package.dependencies] +nest-asyncio = "*" +pydantic = ">=2.10.3" +python-dotenv = "*" +requests = "*" +websockets = "*" + [[package]] name = "frozenlist" version = "1.5.0" @@ -1855,6 +1873,17 @@ files = [ {file = "multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a"}, ] +[[package]] +name = "nest-asyncio" +version = "1.6.0" +description = "Patch asyncio to allow nested event loops" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, +] + [[package]] name = "numpy" version = "2.2.2" @@ -2777,7 +2806,6 @@ files = [ [package.dependencies] markdown-it-py = ">=2.2.0" pygments = ">=2.13.0,<3.0.0" -typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.11\""} [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] @@ -3427,4 +3455,4 @@ server = ["fastapi", "requests", "uvicorn"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "54c122c4f40fce6a813645e76e7005a56b83846bc14149a0375a9bf53cade3b4" \ No newline at end of file +content-hash = "9c7b8842ae724f49aac22388eed98560e1b35b3189d1843575ee0878bb76db7d" From 2dc1244317feb7e11ac1f87fd88cb2a849e66f53 Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Fri, 21 Feb 2025 17:11:49 -0500 Subject: [PATCH 6/7] cleanup --- .env.example | 2 +- agents/example.json | 3 +-- src/connections/firecrawl_connection.py | 5 +---- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.env.example b/.env.example index a217af8d..ed540810 100644 --- a/.env.example +++ b/.env.example @@ -23,4 +23,4 @@ SOLANA_PRIVATE_KEY= DISCORD_TOKEN= XAI_API_KEY= TOGETHER_API_KEY= - +FIRECRAWL_API_KEY= diff --git a/agents/example.json b/agents/example.json index c65119a4..55cca720 100644 --- a/agents/example.json +++ b/agents/example.json @@ -111,8 +111,7 @@ "server_id": "1234567890" }, { - "name": "firecrawl", - "enabled": false + "name": "firecrawl" } ], "tasks": [ diff --git a/src/connections/firecrawl_connection.py b/src/connections/firecrawl_connection.py index 756e4e69..31ea55c9 100644 --- a/src/connections/firecrawl_connection.py +++ b/src/connections/firecrawl_connection.py @@ -32,7 +32,7 @@ def is_llm_provider(self) -> bool: def validate_config(self, config: Dict[str, Any]) -> Dict[str, Any]: """Validate Firecrawl configuration from JSON""" - required_fields = ["enabled"] + required_fields = [] missing_fields = [field for field in required_fields if field not in config] if missing_fields: @@ -113,9 +113,6 @@ def is_configured(self, verbose = False) -> bool: def scrape_page(self, url: str) -> str: """Scrape a page for text data""" - if not self.config["enabled"]: - #logger.warning("Firecrawl scraping is disabled in the configuration.") - return "" try: client = self._get_client() response = client.scrape_url(url, params={'formats': ['markdown']}) From eeb392526f02ddaab71f07de3cfbc59dab052eb2 Mon Sep 17 00:00:00 2001 From: Peter Jonathan Date: Fri, 21 Feb 2025 17:22:11 -0500 Subject: [PATCH 7/7] readme update --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 80fe8b54..d29da723 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,9 @@ similar core functionality as Zerebro. For creative outputs, you'll need to fine - AI/ML Tools: - GOAT (Onchain Agent Toolkit) - Allora (Network inference) +- Others: + - Perplexity + - Firecrawl ### Language Model Support @@ -135,6 +138,8 @@ poetry run python main.py configure-connection hyperbolic # For Hyperbolic configure-connection groq # For GROQ configure-connection together # For Together AI + configure-connection perplexity # For Perplexity + configure-connection firecrawl # For Firecrawl ``` 2. Use `list-connections` to see all available connections and their status