TogetherCrew · amindadgar · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025
diff --git a/hivemind_etl/mediawiki/etl.py b/hivemind_etl/mediawiki/etl.py
@@ -99,7 +99,7 @@ def transform(self) -> list[Document]:
     def load(self, documents: list[Document]) -> None:
         logging.info(f"Loading {len(documents)} documents into Qdrant!")
         ingestion_pipeline = CustomIngestionPipeline(
-            self.community_id, collection_name=self.platform_id
+            self.community_id, collection_name=self.platform_id, use_cache=False,
         )
 
         # Process batches in parallel using ThreadPoolExecutor

diff --git a/hivemind_etl/simple_ingestion/pipeline.py b/hivemind_etl/simple_ingestion/pipeline.py
@@ -147,6 +147,7 @@ async def process_document(
     pipeline = CustomIngestionPipeline(
         community_id=ingestion_request.communityId,
         collection_name=collection_name,
+        use_cache=False,
     )
 
     document = Document(
@@ -188,6 +189,7 @@ async def process_documents_batch(
     pipeline = CustomIngestionPipeline(
         community_id=batch_chunk.communityId,
         collection_name=collection_name,
+        use_cache=False,
     )
 
     # Convert all documents in this chunk to Document objects

diff --git a/hivemind_etl/website/website_etl.py b/hivemind_etl/website/website_etl.py
@@ -30,7 +30,7 @@ def __init__(
 
         # preparing the ingestion pipeline
         self.ingestion_pipeline = CustomIngestionPipeline(
-            self.community_id, collection_name=self.platform_id
+            self.community_id, collection_name=self.platform_id, use_cache=False,
         )
 
     async def extract(

diff --git a/hivemind_summarizer/activities.py b/hivemind_summarizer/activities.py
@@ -97,6 +97,7 @@ async def fetch_platform_summaries_by_date(
             pipeline = CustomIngestionPipeline(
                 community_id=community_id,
                 collection_name=f"{input.platform_id}_summary",
+                use_cache=False,
             )
             # get the latest date from the collection
             latest_date = pipeline.get_latest_document_date(
@@ -211,6 +212,7 @@ async def fetch_platform_summaries_by_date_range(
                 extract_text_only=extract_text_only,
                 platform_id=input.platform_id,
                 community_id=community_id,
+                use_cache=False,
             )
             summaries = await fetch_platform_summaries_by_date(date_input)
             result[date] = summaries