From a20c1d73c7ac976f9626268d2ec99b5fdc6e1a62 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 00:09:19 +0000
Subject: [PATCH 01/25] Refactor configuration management in wizard and
 ChronicleSetup

- Updated wizard.py to read Obsidian/Neo4j configuration from config.yml, enhancing flexibility and error handling.
- Refactored ChronicleSetup to utilize ConfigManager for loading and verifying config.yml, ensuring a single source of truth.
- Improved user feedback for missing configuration files and streamlined the setup process for memory and transcription providers.
---
 backends/advanced/init.py | 91 ++++++++++++++-------------------------
 wizard.py                 | 20 +++++----
 2 files changed, 44 insertions(+), 67 deletions(-)

diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index f093bf4d..f8231db8 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -33,22 +33,21 @@ def __init__(self, args=None):
         self.config: Dict[str, Any] = {}
         self.args = args or argparse.Namespace()
         self.config_yml_path = Path("../../config/config.yml")  # Main config at config/config.yml
-        self.config_yml_data = None
 
         # Check if we're in the right directory
         if not Path("pyproject.toml").exists() or not Path("src").exists():
             self.console.print("[red][ERROR][/red] Please run this script from the backends/advanced directory")
             sys.exit(1)
 
-        # Initialize ConfigManager
+        # Initialize ConfigManager (single source of truth for config.yml)
         self.config_manager = ConfigManager(service_path="backends/advanced")
         self.console.print(f"[blue][INFO][/blue] Using config.yml at: {self.config_manager.config_yml_path}")
 
-        # Load existing config or create default structure
-        self.config_yml_data = self.config_manager.get_full_config()
-        if not self.config_yml_data:
-            self.console.print("[yellow][WARNING][/yellow] config.yml not found, will create default structure")
-            self.config_yml_data = self._get_default_config_structure()
+        # Verify config.yml exists - fail fast if missing
+        if not self.config_manager.config_yml_path.exists():
+            self.console.print("[red][ERROR][/red] config.yml not found at {self.config_manager.config_yml_path}")
+            self.console.print("[red][ERROR][/red] Run wizard.py from project root to create config.yml")
+            sys.exit(1)
 
     def print_header(self, title: str):
         """Print a colorful header"""
@@ -138,28 +137,6 @@ def mask_api_key(self, key: str, show_chars: int = 5) -> str:
         return f"{key_clean[:show_chars]}{'*' * min(15, len(key_clean) - show_chars * 2)}{key_clean[-show_chars:]}"
 
 
-    def _get_default_config_structure(self) -> Dict[str, Any]:
-        """Return default config.yml structure if file doesn't exist"""
-        return {
-            "defaults": {
-                "llm": "openai-llm",
-                "embedding": "openai-embed",
-                "stt": "stt-deepgram",
-                "tts": "tts-http",
-                "vector_store": "vs-qdrant"
-            },
-            "models": [],
-            "memory": {
-                "provider": "chronicle",
-                "timeout_seconds": 1200,
-                "extraction": {
-                    "enabled": True,
-                    "prompt": "Extract important information from this conversation and return a JSON object with an array named \"facts\"."
-                }
-            }
-        }
-
-
     def setup_authentication(self):
         """Configure authentication settings"""
         self.print_section("Authentication Setup")
@@ -208,7 +185,6 @@ def setup_transcription(self):
 
                 # Update config.yml to use Deepgram
                 self.config_manager.update_config_defaults({"stt": "stt-deepgram"})
-                self.config_yml_data = self.config_manager.get_full_config()  # Reload
 
                 self.console.print("[green][SUCCESS][/green] Deepgram configured in config.yml and .env")
                 self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-deepgram")
@@ -224,7 +200,6 @@ def setup_transcription(self):
 
             # Update config.yml to use Parakeet
             self.config_manager.update_config_defaults({"stt": "stt-parakeet-batch"})
-            self.config_yml_data = self.config_manager.get_full_config()  # Reload
 
             self.console.print("[green][SUCCESS][/green] Parakeet configured in config.yml and .env")
             self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-parakeet-batch")
@@ -266,7 +241,6 @@ def setup_llm(self):
                 self.config["OPENAI_API_KEY"] = api_key
                 # Update config.yml to use OpenAI models
                 self.config_manager.update_config_defaults({"llm": "openai-llm", "embedding": "openai-embed"})
-                self.config_yml_data = self.config_manager.get_full_config()  # Reload to stay in sync
                 self.console.print("[green][SUCCESS][/green] OpenAI configured in config.yml")
                 self.console.print("[blue][INFO][/blue] Set defaults.llm: openai-llm")
                 self.console.print("[blue][INFO][/blue] Set defaults.embedding: openai-embed")
@@ -277,7 +251,6 @@ def setup_llm(self):
             self.console.print("[blue][INFO][/blue] Ollama selected")
             # Update config.yml to use Ollama models
             self.config_manager.update_config_defaults({"llm": "local-llm", "embedding": "local-embed"})
-            self.config_yml_data = self.config_manager.get_full_config()  # Reload to stay in sync
             self.console.print("[green][SUCCESS][/green] Ollama configured in config.yml")
             self.console.print("[blue][INFO][/blue] Set defaults.llm: local-llm")
             self.console.print("[blue][INFO][/blue] Set defaults.embedding: local-embed")
@@ -287,7 +260,6 @@ def setup_llm(self):
             self.console.print("[blue][INFO][/blue] Skipping LLM setup - memory extraction disabled")
             # Disable memory extraction in config.yml
             self.config_manager.update_memory_config({"extraction": {"enabled": False}})
-            self.config_yml_data = self.config_manager.get_full_config()  # Reload to stay in sync
 
     def setup_memory(self):
         """Configure memory provider - updates config.yml"""
@@ -309,7 +281,6 @@ def setup_memory(self):
 
             # Update config.yml (also updates .env automatically)
             self.config_manager.update_memory_config({"provider": "chronicle"})
-            self.config_yml_data = self.config_manager.get_full_config()  # Reload to stay in sync
             self.console.print("[green][SUCCESS][/green] Chronicle memory provider configured in config.yml and .env")
 
         elif choice == "2":
@@ -330,7 +301,6 @@ def setup_memory(self):
                     "timeout": int(timeout)
                 }
             })
-            self.config_yml_data = self.config_manager.get_full_config()  # Reload to stay in sync
             self.console.print("[green][SUCCESS][/green] OpenMemory MCP configured in config.yml and .env")
             self.console.print("[yellow][WARNING][/yellow] Remember to start OpenMemory: cd ../../extras/openmemory-mcp && docker compose up -d")
 
@@ -348,7 +318,6 @@ def setup_memory(self):
                     "timeout": int(timeout)
                 }
             })
-            self.config_yml_data = self.config_manager.get_full_config()  # Reload to stay in sync
             self.console.print("[green][SUCCESS][/green] Mycelia memory provider configured in config.yml and .env")
             self.console.print("[yellow][WARNING][/yellow] Make sure Mycelia is running at the configured URL")
 
@@ -405,21 +374,19 @@ def setup_obsidian(self):
                 neo4j_password = self.prompt_password("Neo4j password (min 8 chars)")
 
         if enable_obsidian:
-            # Update .env with credentials
-            self.config["OBSIDIAN_ENABLED"] = "true"
+            # Update .env with credentials only (secrets, not feature flags)
             self.config["NEO4J_HOST"] = "neo4j-mem0"
             self.config["NEO4J_USER"] = "neo4j"
             self.config["NEO4J_PASSWORD"] = neo4j_password
 
-            # Update config.yml with feature flag
-            if "memory" not in self.config_yml_data:
-                self.config_yml_data["memory"] = {}
-            if "obsidian" not in self.config_yml_data["memory"]:
-                self.config_yml_data["memory"]["obsidian"] = {}
-
-            self.config_yml_data["memory"]["obsidian"]["enabled"] = True
-            self.config_yml_data["memory"]["obsidian"]["neo4j_host"] = "neo4j-mem0"
-            self.config_yml_data["memory"]["obsidian"]["timeout"] = 30
+            # Update config.yml with feature flag (source of truth) - auto-saves via ConfigManager
+            self.config_manager.update_memory_config({
+                "obsidian": {
+                    "enabled": True,
+                    "neo4j_host": "neo4j-mem0",
+                    "timeout": 30
+                }
+            })
 
             self.console.print("[green][SUCCESS][/green] Obsidian/Neo4j configured")
             self.console.print("[blue][INFO][/blue] Neo4j will start automatically with --profile obsidian")
@@ -585,28 +552,32 @@ def show_summary(self):
 
         self.console.print(f"✅ Admin Account: {self.config.get('ADMIN_EMAIL', 'Not configured')}")
 
+        # Get current config from ConfigManager (single source of truth)
+        config_yml = self.config_manager.get_full_config()
+
         # Show transcription from config.yml
-        stt_default = self.config_yml_data.get("defaults", {}).get("stt", "not set")
+        stt_default = config_yml.get("defaults", {}).get("stt", "not set")
         stt_model = next(
-            (m for m in self.config_yml_data.get("models", []) if m.get("name") == stt_default),
+            (m for m in config_yml.get("models", []) if m.get("name") == stt_default),
             None
         )
         stt_provider = stt_model.get("model_provider", "unknown") if stt_model else "not configured"
         self.console.print(f"✅ Transcription: {stt_provider} ({stt_default}) - config.yml")
 
         # Show LLM config from config.yml
-        llm_default = self.config_yml_data.get("defaults", {}).get("llm", "not set")
-        embedding_default = self.config_yml_data.get("defaults", {}).get("embedding", "not set")
+        llm_default = config_yml.get("defaults", {}).get("llm", "not set")
+        embedding_default = config_yml.get("defaults", {}).get("embedding", "not set")
         self.console.print(f"✅ LLM: {llm_default} (config.yml)")
         self.console.print(f"✅ Embedding: {embedding_default} (config.yml)")
 
         # Show memory provider from config.yml
-        memory_provider = self.config_yml_data.get("memory", {}).get("provider", "chronicle")
+        memory_provider = config_yml.get("memory", {}).get("provider", "chronicle")
         self.console.print(f"✅ Memory Provider: {memory_provider} (config.yml)")
 
-        # Show Obsidian/Neo4j status
-        if self.config.get('OBSIDIAN_ENABLED') == 'true':
-            neo4j_host = self.config.get('NEO4J_HOST', 'not set')
+        # Show Obsidian/Neo4j status (read from config.yml)
+        obsidian_config = config_yml.get("memory", {}).get("obsidian", {})
+        if obsidian_config.get("enabled", False):
+            neo4j_host = obsidian_config.get("neo4j_host", "not set")
             self.console.print(f"✅ Obsidian/Neo4j: Enabled ({neo4j_host})")
 
         # Auto-determine URLs based on HTTPS configuration
@@ -625,9 +596,13 @@ def show_next_steps(self):
         self.print_section("Next Steps")
         self.console.print()
 
+        # Get current config from ConfigManager (single source of truth)
+        config_yml = self.config_manager.get_full_config()
+
         self.console.print("1. Start the main services:")
-        # Include --profile obsidian if Obsidian is enabled
-        if self.config.get('OBSIDIAN_ENABLED') == 'true':
+        # Include --profile obsidian if Obsidian is enabled (read from config.yml)
+        obsidian_enabled = config_yml.get("memory", {}).get("obsidian", {}).get("enabled", False)
+        if obsidian_enabled:
             self.console.print("   [cyan]docker compose --profile obsidian up --build -d[/cyan]")
             self.console.print("   [dim](Includes Neo4j for Obsidian integration)[/dim]")
         else:
diff --git a/wizard.py b/wizard.py
index d78a910c..a2e2b2f7 100755
--- a/wizard.py
+++ b/wizard.py
@@ -9,6 +9,7 @@
 import sys
 from datetime import datetime
 from pathlib import Path
+import yaml
 
 from dotenv import get_key
 from rich import print as rprint
@@ -449,17 +450,18 @@ def main():
         else:
             failed_services.append(service)
 
-    # Check for Obsidian/Neo4j configuration
+    # Check for Obsidian/Neo4j configuration (read from config.yml)
     obsidian_enabled = False
     if 'advanced' in selected_services and 'advanced' not in failed_services:
-        backend_env_path = Path('backends/advanced/.env')
-        if backend_env_path.exists():
-            neo4j_host = read_env_value(str(backend_env_path), 'NEO4J_HOST')
-            obsidian_enabled_flag = read_env_value(str(backend_env_path), 'OBSIDIAN_ENABLED')
-            if neo4j_host and not is_placeholder(neo4j_host, 'your-neo4j-host-here', 'your_neo4j_host_here'):
-                obsidian_enabled = True
-            elif obsidian_enabled_flag == 'true':
-                obsidian_enabled = True
+        config_yml_path = Path('config/config.yml')
+        if config_yml_path.exists():
+            try:
+                with open(config_yml_path, 'r') as f:
+                    config_data = yaml.safe_load(f)
+                    obsidian_config = config_data.get('memory', {}).get('obsidian', {})
+                    obsidian_enabled = obsidian_config.get('enabled', False)
+            except Exception as e:
+                console.print(f"[yellow]Warning: Could not read config.yml: {e}[/yellow]")
 
     # Final Summary
     console.print(f"\n🎊 [bold green]Setup Complete![/bold green]")

From ad4b1f95ff71c923bf5c37de0def7da188c6b75a Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 00:26:38 +0000
Subject: [PATCH 02/25] Fix string formatting for error message in
 ChronicleSetup

---
 backends/advanced/init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index f8231db8..fe04fd15 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -45,7 +45,7 @@ def __init__(self, args=None):
 
         # Verify config.yml exists - fail fast if missing
         if not self.config_manager.config_yml_path.exists():
-            self.console.print("[red][ERROR][/red] config.yml not found at {self.config_manager.config_yml_path}")
+            self.console.print(f"[red][ERROR][/red] config.yml not found at {self.config_manager.config_yml_path}")
             self.console.print("[red][ERROR][/red] Run wizard.py from project root to create config.yml")
             sys.exit(1)
 

From ff061e02ff3d0da026da269bd0ec3447c8048858 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 01:03:27 +0000
Subject: [PATCH 03/25] Enhance chat configuration management and UI
 integration

- Updated `services.py` to allow service restart with an option to recreate containers, addressing WSL2 bind mount issues.
- Added new chat configuration management functions in `system_controller.py` for loading, saving, and validating chat prompts.
- Introduced `ChatSettings` component in the web UI for admin users to manage chat configurations easily.
- Updated API service methods in `api.ts` to support chat configuration endpoints.
- Integrated chat settings into the system management page for better accessibility.
---
 .../src/advanced_omi_backend/chat_service.py  |  37 +++-
 .../controllers/system_controller.py          | 100 +++++++++
 .../routers/modules/system_routes.py          |  50 ++++-
 .../webui/src/components/ChatSettings.tsx     | 195 ++++++++++++++++++
 backends/advanced/webui/src/pages/System.tsx  |   6 +
 backends/advanced/webui/src/services/api.ts   |  11 +
 services.py                                   |  33 ++-
 tests/endpoints/system_admin_tests.robot      |  57 +++++
 8 files changed, 475 insertions(+), 14 deletions(-)
 create mode 100644 backends/advanced/webui/src/components/ChatSettings.tsx

diff --git a/backends/advanced/src/advanced_omi_backend/chat_service.py b/backends/advanced/src/advanced_omi_backend/chat_service.py
index de92a4b9..647fa7d1 100644
--- a/backends/advanced/src/advanced_omi_backend/chat_service.py
+++ b/backends/advanced/src/advanced_omi_backend/chat_service.py
@@ -22,6 +22,7 @@
 
 from advanced_omi_backend.database import get_database
 from advanced_omi_backend.llm_client import get_llm_client
+from advanced_omi_backend.model_registry import get_models_registry
 from advanced_omi_backend.services.memory import get_memory_service
 from advanced_omi_backend.services.memory.base import MemoryEntry
 from advanced_omi_backend.services.obsidian_service import (
@@ -133,7 +134,7 @@ def from_dict(cls, data: Dict) -> "ChatSession":
 
 class ChatService:
     """Service for managing chat sessions and memory-enhanced conversations."""
-    
+
     def __init__(self):
         self.db = None
         self.sessions_collection: Optional[AsyncIOMotorCollection] = None
@@ -142,6 +143,32 @@ def __init__(self):
         self.memory_service = None
         self._initialized = False
 
+    def _get_system_prompt(self) -> str:
+        """
+        Get system prompt from config with fallback to default.
+
+        Returns:
+            str: System prompt for chat interactions
+        """
+        try:
+            reg = get_models_registry()
+            if reg and hasattr(reg, 'config'):
+                chat_config = reg.config.get('chat', {})
+                prompt = chat_config.get('system_prompt')
+                if prompt:
+                    logger.debug("Loaded chat system prompt from config")
+                    return prompt
+        except Exception as e:
+            logger.warning(f"Failed to load chat system prompt from config: {e}")
+
+        # Fallback to default
+        logger.debug("Using default chat system prompt")
+        return """You are a helpful AI assistant with access to the user's personal memories and conversation history.
+
+Use the provided memories and conversation context to give personalized, contextual responses. If memories are relevant, reference them naturally in your response. Be conversational and helpful.
+
+If no relevant memories are available, respond normally based on the conversation context."""
+
     async def initialize(self):
         """Initialize the chat service with database connections."""
         if self._initialized:
@@ -392,12 +419,8 @@ async def generate_response_stream(
                 "timestamp": time.time()
             }
 
-            # Create system prompt
-            system_prompt = """You are a helpful AI assistant with access to the user's personal memories and conversation history. 
-
-Use the provided memories and conversation context to give personalized, contextual responses. If memories are relevant, reference them naturally in your response. Be conversational and helpful.
-
-If no relevant memories are available, respond normally based on the conversation context."""
+            # Get system prompt from config
+            system_prompt = self._get_system_prompt()
 
             # Prepare full prompt
             full_prompt = f"{system_prompt}\n\n{context}"
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
index 17b9cbcf..aced763f 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
@@ -455,3 +455,103 @@ async def set_memory_provider(provider: str):
     except Exception as e:
         logger.exception("Error setting memory provider")
         raise e
+
+
+# Chat Configuration Management Functions
+
+async def get_chat_config_yaml() -> str:
+    """Get chat system prompt as plain text."""
+    try:
+        config_path = _find_config_path()
+
+        default_prompt = """You are a helpful AI assistant with access to the user's personal memories and conversation history.
+
+Use the provided memories and conversation context to give personalized, contextual responses. If memories are relevant, reference them naturally in your response. Be conversational and helpful.
+
+If no relevant memories are available, respond normally based on the conversation context."""
+
+        if not os.path.exists(config_path):
+            return default_prompt
+
+        with open(config_path, 'r') as f:
+            full_config = yaml.safe_load(f) or {}
+
+        chat_config = full_config.get('chat', {})
+        system_prompt = chat_config.get('system_prompt', default_prompt)
+
+        # Return just the prompt text, not the YAML structure
+        return system_prompt
+
+    except Exception as e:
+        logger.error(f"Error loading chat config: {e}")
+        raise
+
+
+async def save_chat_config_yaml(prompt_text: str) -> dict:
+    """Save chat system prompt from plain text."""
+    try:
+        config_path = _find_config_path()
+
+        # Validate plain text prompt
+        if not prompt_text or not isinstance(prompt_text, str):
+            raise ValueError("Prompt must be a non-empty string")
+
+        prompt_text = prompt_text.strip()
+        if len(prompt_text) < 10:
+            raise ValueError("Prompt too short (minimum 10 characters)")
+        if len(prompt_text) > 10000:
+            raise ValueError("Prompt too long (maximum 10000 characters)")
+
+        # Create chat config dict
+        chat_config = {'system_prompt': prompt_text}
+
+        # Load full config
+        if os.path.exists(config_path):
+            with open(config_path, 'r') as f:
+                full_config = yaml.safe_load(f) or {}
+        else:
+            full_config = {}
+
+        # Backup existing config
+        if os.path.exists(config_path):
+            backup_path = str(config_path) + '.backup'
+            shutil.copy2(config_path, backup_path)
+            logger.info(f"Created config backup at {backup_path}")
+
+        # Update chat section
+        full_config['chat'] = chat_config
+
+        # Save
+        with open(config_path, 'w') as f:
+            yaml.dump(full_config, f, default_flow_style=False, allow_unicode=True)
+
+        # Reload config in memory (hot-reload)
+        load_models_config(force_reload=True)
+
+        logger.info("Chat configuration updated successfully")
+
+        return {"success": True, "message": "Chat configuration updated successfully"}
+
+    except Exception as e:
+        logger.error(f"Error saving chat config: {e}")
+        raise
+
+
+async def validate_chat_config_yaml(prompt_text: str) -> dict:
+    """Validate chat system prompt plain text."""
+    try:
+        # Validate plain text prompt
+        if not isinstance(prompt_text, str):
+            return {"valid": False, "error": "Prompt must be a string"}
+
+        prompt_text = prompt_text.strip()
+        if len(prompt_text) < 10:
+            return {"valid": False, "error": "Prompt too short (minimum 10 characters)"}
+        if len(prompt_text) > 10000:
+            return {"valid": False, "error": "Prompt too long (maximum 10000 characters)"}
+
+        return {"valid": True, "message": "Configuration is valid"}
+
+    except Exception as e:
+        logger.error(f"Error validating chat config: {e}")
+        return {"valid": False, "error": f"Validation error: {str(e)}"}
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
index ead61ffa..e2b49676 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
@@ -7,7 +7,8 @@
 import logging
 from typing import Optional
 
-from fastapi import APIRouter, Body, Depends, Request
+from fastapi import APIRouter, Body, Depends, HTTPException, Request
+from fastapi.responses import Response
 from pydantic import BaseModel
 
 from advanced_omi_backend.auth import current_active_user, current_superuser
@@ -128,6 +129,53 @@ async def delete_all_user_memories(current_user: User = Depends(current_active_u
     return await system_controller.delete_all_user_memories(current_user)
 
 
+# Chat Configuration Management Endpoints
+
+@router.get("/admin/chat/config", response_class=Response)
+async def get_chat_config(current_user: User = Depends(current_superuser)):
+    """Get chat configuration as YAML. Admin only."""
+    try:
+        yaml_content = await system_controller.get_chat_config_yaml()
+        return Response(content=yaml_content, media_type="text/plain")
+    except Exception as e:
+        logger.error(f"Failed to get chat config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/chat/config")
+async def save_chat_config(
+    request: Request,
+    current_user: User = Depends(current_superuser)
+):
+    """Save chat configuration from YAML. Admin only."""
+    try:
+        yaml_content = await request.body()
+        yaml_str = yaml_content.decode('utf-8')
+        result = await system_controller.save_chat_config_yaml(yaml_str)
+        return result
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to save chat config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/chat/config/validate")
+async def validate_chat_config(
+    request: Request,
+    current_user: User = Depends(current_superuser)
+):
+    """Validate chat configuration YAML. Admin only."""
+    try:
+        yaml_content = await request.body()
+        yaml_str = yaml_content.decode('utf-8')
+        result = await system_controller.validate_chat_config_yaml(yaml_str)
+        return result
+    except Exception as e:
+        logger.error(f"Failed to validate chat config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @router.get("/streaming/status")
 async def get_streaming_status(request: Request, current_user: User = Depends(current_superuser)):
     """Get status of active streaming sessions and Redis Streams health. Admin only."""
diff --git a/backends/advanced/webui/src/components/ChatSettings.tsx b/backends/advanced/webui/src/components/ChatSettings.tsx
new file mode 100644
index 00000000..1acad362
--- /dev/null
+++ b/backends/advanced/webui/src/components/ChatSettings.tsx
@@ -0,0 +1,195 @@
+import { useState, useEffect } from 'react'
+import { MessageSquare, RefreshCw, CheckCircle, Save, RotateCcw, AlertCircle } from 'lucide-react'
+import { systemApi } from '../services/api'
+import { useAuth } from '../contexts/AuthContext'
+
+interface ChatSettingsProps {
+  className?: string
+}
+
+export default function ChatSettings({ className }: ChatSettingsProps) {
+  const [configYaml, setConfigYaml] = useState('')
+  const [loading, setLoading] = useState(false)
+  const [validating, setValidating] = useState(false)
+  const [saving, setSaving] = useState(false)
+  const [message, setMessage] = useState('')
+  const [error, setError] = useState('')
+  const { isAdmin } = useAuth()
+
+  useEffect(() => {
+    loadChatConfig()
+  }, [])
+
+  const loadChatConfig = async () => {
+    setLoading(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.getChatConfigRaw()
+      setConfigYaml(response.data.config_yaml || response.data)
+      setMessage('Configuration loaded successfully')
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      const status = err.response?.status
+      if (status === 401) {
+        setError('Unauthorized: admin privileges required')
+      } else {
+        setError(err.response?.data?.error || 'Failed to load configuration')
+      }
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const validateConfig = async () => {
+    if (!configYaml.trim()) {
+      setError('Configuration cannot be empty')
+      return
+    }
+
+    setValidating(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.validateChatConfig(configYaml)
+      if (response.data.valid) {
+        setMessage('✅ Configuration is valid')
+      } else {
+        setError(response.data.error || 'Validation failed')
+      }
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      setError(err.response?.data?.error || 'Validation failed')
+    } finally {
+      setValidating(false)
+    }
+  }
+
+  const saveConfig = async () => {
+    if (!configYaml.trim()) {
+      setError('Configuration cannot be empty')
+      return
+    }
+
+    setSaving(true)
+    setError('')
+    setMessage('')
+
+    try {
+      await systemApi.updateChatConfigRaw(configYaml)
+      setMessage('✅ Configuration saved successfully')
+      setTimeout(() => setMessage(''), 5000)
+    } catch (err: any) {
+      setError(err.response?.data?.error || 'Failed to save configuration')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const resetConfig = () => {
+    loadChatConfig()
+    setMessage('Configuration reset to file version')
+    setTimeout(() => setMessage(''), 3000)
+  }
+
+  if (!isAdmin) {
+    return null
+  }
+
+  return (
+    <div className={className}>
+      <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6">
+        {/* Header */}
+        <div className="flex items-center justify-between mb-4">
+          <div className="flex items-center space-x-2">
+            <MessageSquare className="h-5 w-5 text-blue-600" />
+            <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">
+              Chat System Prompt
+            </h3>
+          </div>
+          <div className="flex items-center space-x-2">
+            <button
+              onClick={resetConfig}
+              disabled={loading || saving}
+              className="flex items-center space-x-1 px-3 py-1.5 text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200 disabled:opacity-50"
+            >
+              <RotateCcw className="h-4 w-4" />
+              <span>Reset</span>
+            </button>
+            <button
+              onClick={loadChatConfig}
+              disabled={loading || saving}
+              className="flex items-center space-x-1 px-3 py-1.5 text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200 disabled:opacity-50"
+            >
+              <RefreshCw className={`h-4 w-4 ${loading ? 'animate-spin' : ''}`} />
+              <span>Reload</span>
+            </button>
+          </div>
+        </div>
+
+        {/* Messages */}
+        {message && (
+          <div className="mb-4 p-3 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-md flex items-start space-x-2">
+            <CheckCircle className="h-5 w-5 text-green-600 dark:text-green-400 mt-0.5" />
+            <p className="text-sm text-green-700 dark:text-green-300">{message}</p>
+          </div>
+        )}
+
+        {error && (
+          <div className="mb-4 p-3 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md flex items-start space-x-2">
+            <AlertCircle className="h-5 w-5 text-red-600 dark:text-red-400 mt-0.5" />
+            <p className="text-sm text-red-700 dark:text-red-300">{error}</p>
+          </div>
+        )}
+
+        {/* Editor */}
+        <div className="mb-4">
+          <textarea
+            value={configYaml}
+            onChange={(e) => setConfigYaml(e.target.value)}
+            disabled={loading || saving}
+            className="w-full h-96 p-4 font-mono text-sm bg-gray-50 dark:bg-gray-900 border border-gray-300 dark:border-gray-600 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent resize-y"
+            placeholder="Loading configuration..."
+            spellCheck={false}
+          />
+        </div>
+
+        {/* Actions */}
+        <div className="flex space-x-3">
+          <button
+            onClick={validateConfig}
+            disabled={loading || validating || saving}
+            className="flex items-center space-x-2 px-4 py-2 text-sm font-medium text-gray-700 dark:text-gray-200 bg-white dark:bg-gray-700 border border-gray-300 dark:border-gray-600 rounded-md hover:bg-gray-50 dark:hover:bg-gray-600 disabled:opacity-50"
+          >
+            <CheckCircle className="h-4 w-4" />
+            <span>{validating ? 'Validating...' : 'Validate'}</span>
+          </button>
+
+          <button
+            onClick={saveConfig}
+            disabled={loading || saving || validating}
+            className="flex items-center space-x-2 px-4 py-2 text-sm font-medium text-white bg-blue-600 rounded-md hover:bg-blue-700 disabled:opacity-50"
+          >
+            <Save className="h-4 w-4" />
+            <span>{saving ? 'Saving...' : 'Save Changes'}</span>
+          </button>
+        </div>
+
+        {/* Help text */}
+        <div className="mt-6 p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-md">
+          <h4 className="text-sm font-medium text-blue-900 dark:text-blue-100 mb-2">
+            Configuration Help
+          </h4>
+          <ul className="text-sm text-blue-700 dark:text-blue-300 space-y-1 list-disc list-inside">
+            <li>Define the AI assistant's behavior and personality</li>
+            <li>Use YAML multi-line format (|) for readable prompts</li>
+            <li>Changes take effect immediately (no restart required)</li>
+            <li>Keep prompts clear and focused for best results</li>
+          </ul>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx
index 4ad2581a..b05cf387 100644
--- a/backends/advanced/webui/src/pages/System.tsx
+++ b/backends/advanced/webui/src/pages/System.tsx
@@ -3,6 +3,7 @@ import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users
 import { systemApi, speakerApi } from '../services/api'
 import { useAuth } from '../contexts/AuthContext'
 import MemorySettings from '../components/MemorySettings'
+import ChatSettings from '../components/ChatSettings'
 
 interface HealthData {
   status: 'healthy' | 'partial' | 'unhealthy'
@@ -747,6 +748,11 @@ export default function System() {
         )}
       </div>
 
+      {/* Chat Configuration - Full Width Section */}
+      <div className="mt-6">
+        <ChatSettings />
+      </div>
+
       {/* Memory Configuration - Full Width Section */}
       <div className="mt-6">
         <MemorySettings />
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts
index 36e6f5aa..35964fc2 100644
--- a/backends/advanced/webui/src/services/api.ts
+++ b/backends/advanced/webui/src/services/api.ts
@@ -169,6 +169,17 @@ export const systemApi = {
     }),
   reloadMemoryConfig: () => api.post('/api/admin/memory/config/reload'),
 
+  // Chat Configuration Management
+  getChatConfigRaw: () => api.get('/api/admin/chat/config'),
+  updateChatConfigRaw: (configYaml: string) =>
+    api.post('/api/admin/chat/config', configYaml, {
+      headers: { 'Content-Type': 'text/plain' }
+    }),
+  validateChatConfig: (configYaml: string) =>
+    api.post('/api/admin/chat/config/validate', configYaml, {
+      headers: { 'Content-Type': 'text/plain' }
+    }),
+
   // Memory Provider Management
   getMemoryProvider: () => api.get('/api/admin/memory/provider'),
   setMemoryProvider: (provider: string) => api.post('/api/admin/memory/provider', { provider }),
diff --git a/services.py b/services.py
index 0ffa014a..b4ed44fd 100755
--- a/services.py
+++ b/services.py
@@ -274,10 +274,15 @@ def stop_services(services):
 
     console.print(f"\n[green]🎉 {success_count}/{len(services)} services stopped successfully[/green]")
 
-def restart_services(services):
+def restart_services(services, recreate=False):
     """Restart specified services"""
     console.print(f"🔄 [bold]Restarting {len(services)} services...[/bold]")
 
+    if recreate:
+        console.print("[dim]Using down + up to recreate containers (fixes WSL2 bind mount issues)[/dim]\n")
+    else:
+        console.print("[dim]Quick restart (use --recreate to fix bind mount issues)[/dim]\n")
+
     success_count = 0
     for service_name in services:
         if service_name not in SERVICES:
@@ -289,11 +294,25 @@ def restart_services(services):
             continue
 
         console.print(f"\n🔧 Restarting {service_name}...")
-        if run_compose_command(service_name, 'restart'):
-            console.print(f"[green]✅ {service_name} restarted[/green]")
-            success_count += 1
+
+        if recreate:
+            # Full recreation: down + up (fixes bind mount issues)
+            if not run_compose_command(service_name, 'down'):
+                console.print(f"[red]❌ Failed to stop {service_name}[/red]")
+                continue
+
+            if run_compose_command(service_name, 'up'):
+                console.print(f"[green]✅ {service_name} restarted[/green]")
+                success_count += 1
+            else:
+                console.print(f"[red]❌ Failed to start {service_name}[/red]")
         else:
-            console.print(f"[red]❌ Failed to restart {service_name}[/red]")
+            # Quick restart: docker compose restart
+            if run_compose_command(service_name, 'restart'):
+                console.print(f"[green]✅ {service_name} restarted[/green]")
+                success_count += 1
+            else:
+                console.print(f"[red]❌ Failed to restart {service_name}[/red]")
 
     console.print(f"\n[green]🎉 {success_count}/{len(services)} services restarted successfully[/green]")
 
@@ -343,6 +362,8 @@ def main():
     restart_parser.add_argument('services', nargs='*',
                                help='Services to restart: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)')
     restart_parser.add_argument('--all', action='store_true', help='Restart all services')
+    restart_parser.add_argument('--recreate', action='store_true',
+                               help='Recreate containers (down + up) instead of quick restart - fixes WSL2 bind mount issues')
 
     # Status command
     subparsers.add_parser('status', help='Show service status')
@@ -406,7 +427,7 @@ def main():
             console.print("[red]❌ No services specified. Use --all or specify service names.[/red]")
             return
 
-        restart_services(services)
+        restart_services(services, recreate=args.recreate)
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot
index 283c1865..ec5e1fb2 100644
--- a/tests/endpoints/system_admin_tests.robot
+++ b/tests/endpoints/system_admin_tests.robot
@@ -149,6 +149,62 @@ Delete All User Memories Test
     Dictionary Should Contain Key    ${result}    message
 
 
+Get Chat Configuration Test
+    [Documentation]    Test getting chat system prompt (admin only)
+    [Tags]    infra	permissions
+
+    ${response}=       GET On Session    api    /api/admin/chat/config
+    Should Be Equal As Integers    ${response.status_code}    200
+
+    # Response should be plain text
+    ${prompt}=         Set Variable    ${response.text}
+    Should Not Be Empty    ${prompt}
+    Should Not Contain     ${prompt}    system_prompt:    msg=Should not contain YAML key
+    Should Contain         ${prompt}    helpful AI assistant    msg=Should contain default prompt content
+
+Validate Chat Configuration Test
+    [Documentation]    Test chat configuration validation
+    [Tags]    infra	permissions
+
+    # Valid prompt should pass
+    ${valid_prompt}=   Set Variable    You are a friendly AI assistant that helps users with their daily tasks.
+    ${response}=       POST On Session    api    /api/admin/chat/config/validate
+    ...                data=${valid_prompt}
+    ...                headers={"Content-Type": "text/plain"}
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${result}=         Set Variable    ${response.json()}
+    Should Be True     ${result}[valid] == $True
+
+    # Too short should fail
+    ${short_prompt}=   Set Variable    Hi
+    ${response}=       POST On Session    api    /api/admin/chat/config/validate
+    ...                data=${short_prompt}
+    ...                headers={"Content-Type": "text/plain"}
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${result}=         Set Variable    ${response.json()}
+    Should Be True     ${result}[valid] == $False
+    Should Contain     ${result}[error]    too short    msg=Error should mention prompt is too short
+
+Save And Retrieve Chat Configuration Test
+    [Documentation]    Test saving and retrieving chat configuration
+    [Tags]    infra	permissions
+
+    # Save custom prompt
+    ${custom_prompt}=  Set Variable    You are a specialized AI assistant for technical support and troubleshooting.
+    ${response}=       POST On Session    api    /api/admin/chat/config
+    ...                data=${custom_prompt}
+    ...                headers={"Content-Type": "text/plain"}
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${result}=         Set Variable    ${response.json()}
+    Should Be True     ${result}[success] == $True
+
+    # Retrieve and verify
+    ${response}=       GET On Session    api    /api/admin/chat/config
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${retrieved}=      Set Variable    ${response.text}
+    Should Be Equal    ${retrieved}    ${custom_prompt}    msg=Retrieved prompt should match saved prompt
+
+
 Non-Admin Cannot Access Admin Endpoints Test
     [Documentation]    Test that non-admin users cannot access admin endpoints
     [Tags]    infra	permissions
@@ -163,6 +219,7 @@ Non-Admin Cannot Access Admin Endpoints Test
     ...                /api/speaker-service-status
     ...                /api/admin/memory/config/raw
     ...                /api/admin/memory/config/reload
+    ...                /api/admin/chat/config
     ...                /api/process-audio-files/jobs
 
     FOR    ${endpoint}    IN    @{endpoints}

From 5f8d868ae1638d5267ace8b67f0c27e23b916cc9 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 01:39:53 +0000
Subject: [PATCH 04/25] Refactor backend shutdown process and enhance chat
 service configuration logging

- Updated `start.sh` to improve shutdown handling by explicitly killing the backend process if running.
- Modified `chat_service.py` to enhance logging for loading chat system prompts, providing clearer feedback on configuration usage.
- Added a new `chat` field in `model_registry.py` for better chat service configuration management.
- Updated vector store query parameters in `vector_stores.py` for improved clarity and functionality.
- Enhanced the chat component in the web UI to conditionally auto-scroll based on message sending status.
---
 .../src/advanced_omi_backend/chat_service.py       |  9 +++++----
 .../src/advanced_omi_backend/model_registry.py     | 14 ++++++++++----
 .../services/memory/providers/vector_stores.py     | 12 ++++++------
 backends/advanced/start.sh                         |  3 ++-
 backends/advanced/webui/src/pages/Chat.tsx         | 13 ++++++++-----
 5 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/backends/advanced/src/advanced_omi_backend/chat_service.py b/backends/advanced/src/advanced_omi_backend/chat_service.py
index 647fa7d1..16cba331 100644
--- a/backends/advanced/src/advanced_omi_backend/chat_service.py
+++ b/backends/advanced/src/advanced_omi_backend/chat_service.py
@@ -152,17 +152,18 @@ def _get_system_prompt(self) -> str:
         """
         try:
             reg = get_models_registry()
-            if reg and hasattr(reg, 'config'):
-                chat_config = reg.config.get('chat', {})
+            if reg and hasattr(reg, 'chat'):
+                chat_config = reg.chat
                 prompt = chat_config.get('system_prompt')
                 if prompt:
-                    logger.debug("Loaded chat system prompt from config")
+                    logger.info(f"✅ Loaded chat system prompt from config (length: {len(prompt)} chars)")
+                    logger.debug(f"System prompt: {prompt[:100]}...")
                     return prompt
         except Exception as e:
             logger.warning(f"Failed to load chat system prompt from config: {e}")
 
         # Fallback to default
-        logger.debug("Using default chat system prompt")
+        logger.info("⚠️ Using default chat system prompt (config not found)")
         return """You are a helpful AI assistant with access to the user's personal memories and conversation history.
 
 Use the provided memories and conversation context to give personalized, contextual responses. If memories are relevant, reference them naturally in your response. Be conversational and helpful.
diff --git a/backends/advanced/src/advanced_omi_backend/model_registry.py b/backends/advanced/src/advanced_omi_backend/model_registry.py
index 53d919ca..18f464ae 100644
--- a/backends/advanced/src/advanced_omi_backend/model_registry.py
+++ b/backends/advanced/src/advanced_omi_backend/model_registry.py
@@ -160,15 +160,15 @@ def validate_model(self) -> ModelDef:
 
 class AppModels(BaseModel):
     """Application models registry.
-    
+
     Contains default model selections and all available model definitions.
     """
-    
+
     model_config = ConfigDict(
         extra='allow',
         validate_assignment=True,
     )
-    
+
     defaults: Dict[str, str] = Field(
         default_factory=dict,
         description="Default model names for each model_type"
@@ -185,6 +185,10 @@ class AppModels(BaseModel):
         default_factory=dict,
         description="Speaker recognition service configuration"
     )
+    chat: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Chat service configuration including system prompt"
+    )
     
     def get_by_name(self, name: str) -> Optional[ModelDef]:
         """Get a model by its unique name.
@@ -318,6 +322,7 @@ def load_models_config(force_reload: bool = False) -> Optional[AppModels]:
     model_list = raw.get("models", []) or []
     memory_settings = raw.get("memory", {}) or {}
     speaker_recognition_cfg = raw.get("speaker_recognition", {}) or {}
+    chat_settings = raw.get("chat", {}) or {}
 
     # Parse and validate models using Pydantic
     models: Dict[str, ModelDef] = {}
@@ -336,7 +341,8 @@ def load_models_config(force_reload: bool = False) -> Optional[AppModels]:
         defaults=defaults,
         models=models,
         memory=memory_settings,
-        speaker_recognition=speaker_recognition_cfg
+        speaker_recognition=speaker_recognition_cfg,
+        chat=chat_settings
     )
     return _REGISTRY
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py
index cf153472..85ee200a 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py
@@ -171,19 +171,19 @@ async def search_memories(self, query_embedding: List[float], user_id: str, limi
             # For cosine similarity, scores range from -1 to 1, where 1 is most similar
             search_params = {
                 "collection_name": self.collection_name,
-                "query_vector": query_embedding,
+                "query": query_embedding,
                 "query_filter": search_filter,
                 "limit": limit
             }
-            
+
             if score_threshold > 0.0:
                 search_params["score_threshold"] = score_threshold
                 memory_logger.debug(f"Using similarity threshold: {score_threshold}")
-            
-            results = await self.client.search(**search_params)
-            
+
+            response = await self.client.query_points(**search_params)
+
             memories = []
-            for result in results:
+            for result in response.points:
                 memory = MemoryEntry(
                     id=str(result.id),
                     content=result.payload.get("content", ""),
diff --git a/backends/advanced/start.sh b/backends/advanced/start.sh
index 40fa4abf..5cc79635 100755
--- a/backends/advanced/start.sh
+++ b/backends/advanced/start.sh
@@ -10,7 +10,8 @@ echo "🚀 Starting Chronicle Backend..."
 # Function to handle shutdown
 shutdown() {
     echo "🛑 Shutting down services..."
-    pkill -TERM -P $$
+    # Kill the backend process if running
+    [ -n "$BACKEND_PID" ] && kill -TERM $BACKEND_PID 2>/dev/null || true
     wait
     echo "✅ All services stopped"
     exit 0
diff --git a/backends/advanced/webui/src/pages/Chat.tsx b/backends/advanced/webui/src/pages/Chat.tsx
index 5b3303d7..66384955 100644
--- a/backends/advanced/webui/src/pages/Chat.tsx
+++ b/backends/advanced/webui/src/pages/Chat.tsx
@@ -51,14 +51,17 @@ export default function Chat() {
   const inputRef = useRef<HTMLTextAreaElement>(null)
   
 
-  // Auto-scroll to bottom
+  // Auto-scroll to bottom (only when actively sending messages)
   const scrollToBottom = () => {
     messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' })
   }
 
   useEffect(() => {
-    scrollToBottom()
-  }, [messages, streamingMessage])
+    // Only auto-scroll when streaming or sending messages
+    if (streamingMessage || isSending) {
+      scrollToBottom()
+    }
+  }, [messages, streamingMessage, isSending])
 
   // Load sessions on mount
   useEffect(() => {
@@ -288,9 +291,9 @@ export default function Chat() {
   }
 
   return (
-    <div className="flex h-screen bg-gray-50 dark:bg-gray-900">
+    <div className="flex h-full max-h-screen bg-gray-50 dark:bg-gray-900">
       {/* Sidebar */}
-      <div className="w-80 bg-white dark:bg-gray-800 border-r border-gray-200 dark:border-gray-700 flex flex-col">
+      <div className="w-80 bg-white dark:bg-gray-800 border-r border-gray-200 dark:border-gray-700 flex flex-col max-h-screen">
         {/* Header */}
         <div className="p-4 border-b border-gray-200 dark:border-gray-700">
           <div className="flex items-center justify-between mb-4">

From 5a3f8be14309aadea15b9697f1c29999ca93ab28 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 07:12:34 +0000
Subject: [PATCH 05/25] Implement plugin system for enhanced functionality and
 configuration management

- Introduced a new plugin architecture to allow for extensibility in the Chronicle application.
- Added Home Assistant plugin for controlling devices via natural language commands triggered by wake words.
- Implemented plugin configuration management endpoints in the API for loading, saving, and validating plugin settings.
- Enhanced the web UI with a dedicated Plugins page for managing plugin configurations.
- Updated Docker Compose files to include Tailscale integration for remote service access.
- Refactored existing services to support plugin interactions during conversation and memory processing.
- Improved error handling and logging for plugin initialization and execution processes.
---
 backends/advanced/.env.template               |  17 +-
 backends/advanced/docker-compose-test.yml     |   4 +-
 backends/advanced/docker-compose.yml          |  34 +-
 backends/advanced/init.py                     |  93 ++-
 .../src/advanced_omi_backend/app_factory.py   |  38 ++
 .../controllers/system_controller.py          | 137 ++++
 .../advanced_omi_backend/plugins/__init__.py  |  18 +
 .../src/advanced_omi_backend/plugins/base.py  | 131 ++++
 .../plugins/homeassistant/__init__.py         |   9 +
 .../plugins/homeassistant/command_parser.py   |  97 +++
 .../plugins/homeassistant/entity_cache.py     | 133 ++++
 .../plugins/homeassistant/mcp_client.py       | 421 ++++++++++++
 .../plugins/homeassistant/plugin.py           | 598 ++++++++++++++++++
 .../advanced_omi_backend/plugins/router.py    | 170 +++++
 .../routers/modules/system_routes.py          |  53 +-
 .../services/plugin_service.py                | 108 ++++
 .../workers/conversation_jobs.py              |  75 +++
 .../workers/memory_jobs.py                    |  36 ++
 .../workers/transcription_jobs.py             |  58 ++
 backends/advanced/webui/src/App.tsx           |   6 +
 .../webui/src/components/PluginSettings.tsx   | 195 ++++++
 .../webui/src/components/layout/Layout.tsx    |   3 +-
 backends/advanced/webui/src/pages/Plugins.tsx |   9 +
 backends/advanced/webui/src/services/api.ts   |  11 +
 config/plugins.yml                            |  12 +
 status.py                                     |  49 +-
 tests/endpoints/system_admin_tests.robot      |   8 +-
 wizard.py                                     | 334 +++++++++-
 28 files changed, 2761 insertions(+), 96 deletions(-)
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/__init__.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/base.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/router.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/services/plugin_service.py
 create mode 100644 backends/advanced/webui/src/components/PluginSettings.tsx
 create mode 100644 backends/advanced/webui/src/pages/Plugins.tsx
 create mode 100644 config/plugins.yml

diff --git a/backends/advanced/.env.template b/backends/advanced/.env.template
index a63ab6f5..4c071f72 100644
--- a/backends/advanced/.env.template
+++ b/backends/advanced/.env.template
@@ -216,4 +216,19 @@ CORS_ORIGINS=http://localhost:5173,http://localhost:3000,http://127.0.0.1:5173,h
 LANGFUSE_PUBLIC_KEY=""
 LANGFUSE_SECRET_KEY=""
 LANGFUSE_HOST="http://x.x.x.x:3002"
-LANGFUSE_ENABLE_TELEMETRY=False
\ No newline at end of file
+LANGFUSE_ENABLE_TELEMETRY=False
+
+# ========================================
+# TAILSCALE CONFIGURATION (Optional)
+# ========================================
+# Required for accessing remote services on Tailscale network (e.g., Home Assistant plugin)
+#
+# To enable Tailscale Docker integration:
+# 1. Get auth key from: https://login.tailscale.com/admin/settings/keys
+# 2. Set TS_AUTHKEY below
+# 3. Start Tailscale: docker compose --profile tailscale up -d
+#
+# The Tailscale container provides proxy access to remote services at:
+#   http://host.docker.internal:18123 (proxies to Home Assistant on Tailscale)
+#
+TS_AUTHKEY=your-tailscale-auth-key-here
\ No newline at end of file
diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index 867edc5f..cf498896 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -14,7 +14,7 @@ services:
       - ./data/test_audio_chunks:/app/audio_chunks
       - ./data/test_debug_dir:/app/debug_dir
       - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml:ro  # Mount config.yml for model registry and memory settings
+      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
     environment:
       # Override with test-specific settings
       - MONGODB_URI=mongodb://mongo-test:27017/test_db
@@ -160,7 +160,7 @@ services:
       - ./data/test_audio_chunks:/app/audio_chunks
       - ./data/test_debug_dir:/app/debug_dir
       - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml:ro  # Mount config.yml for model registry and memory settings
+      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
     environment:
       # Same environment as backend
       - MONGODB_URI=mongodb://mongo-test:27017/test_db
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
index f46a23fa..2d190e77 100644
--- a/backends/advanced/docker-compose.yml
+++ b/backends/advanced/docker-compose.yml
@@ -1,4 +1,30 @@
 services:
+  tailscale:
+    image: tailscale/tailscale:latest
+    container_name: advanced-tailscale
+    hostname: chronicle-tailscale
+    environment:
+      - TS_AUTHKEY=${TS_AUTHKEY}
+      - TS_STATE_DIR=/var/lib/tailscale
+      - TS_USERSPACE=false
+      - TS_ACCEPT_DNS=true
+    volumes:
+      - tailscale-state:/var/lib/tailscale
+    devices:
+      - /dev/net/tun:/dev/net/tun
+    cap_add:
+      - NET_ADMIN
+    restart: unless-stopped
+    profiles:
+      - tailscale  # Optional profile
+    ports:
+      - "18123:18123"  # HA proxy port
+    command: >
+      sh -c "tailscaled &
+             tailscale up --authkey=$${TS_AUTHKEY} --accept-dns=true &&
+             apk add --no-cache socat 2>/dev/null || true &&
+             socat TCP-LISTEN:18123,fork,reuseaddr TCP:100.99.62.5:8123"
+
   chronicle-backend:
     build:
       context: .
@@ -12,7 +38,8 @@ services:
       - ./data/audio_chunks:/app/audio_chunks
       - ./data/debug_dir:/app/debug_dir
       - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml  # Removed :ro to allow UI config saving
+      - ../../config/config.yml:/app/config.yml  # Main config file
+      - ../../config/plugins.yml:/app/plugins.yml  # Plugin configuration
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
@@ -35,6 +62,8 @@ services:
         condition: service_healthy
       redis:
         condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"  # Access host's Tailscale network
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"]
       interval: 30s
@@ -61,6 +90,7 @@ services:
       - ./data/audio_chunks:/app/audio_chunks
       - ./data:/app/data
       - ../../config/config.yml:/app/config.yml  # Removed :ro for consistency
+      - ../../config/plugins.yml:/app/plugins.yml  # Plugin configuration
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
@@ -226,3 +256,5 @@ volumes:
     driver: local
   neo4j_logs:
     driver: local
+  tailscale-state:
+    driver: local
diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index fe04fd15..7d8169f5 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -136,6 +136,41 @@ def mask_api_key(self, key: str, show_chars: int = 5) -> str:
 
         return f"{key_clean[:show_chars]}{'*' * min(15, len(key_clean) - show_chars * 2)}{key_clean[-show_chars:]}"
 
+    def prompt_with_existing_masked(self, prompt_text: str, env_key: str, placeholders: list,
+                                     is_password: bool = False, default: str = "") -> str:
+        """
+        Prompt for a value, showing masked existing value from .env if present.
+
+        Args:
+            prompt_text: The prompt to display
+            env_key: The .env key to check for existing value
+            placeholders: List of placeholder values to treat as "not set"
+            is_password: Whether to mask the value (for passwords/tokens)
+            default: Default value if no existing value
+
+        Returns:
+            User input value, existing value if reused, or default
+        """
+        existing_value = self.read_existing_env_value(env_key)
+
+        # Check if existing value is valid (not empty and not a placeholder)
+        has_valid_existing = existing_value and existing_value not in placeholders
+
+        if has_valid_existing:
+            # Show masked value with option to reuse
+            if is_password:
+                masked = self.mask_api_key(existing_value)
+                display_prompt = f"{prompt_text} ({masked}) [press Enter to reuse, or enter new]"
+            else:
+                display_prompt = f"{prompt_text} ({existing_value}) [press Enter to reuse, or enter new]"
+
+            user_input = self.prompt_value(display_prompt, "")
+            # If user pressed Enter (empty input), reuse existing value
+            return user_input if user_input else existing_value
+        else:
+            # No existing value, prompt normally
+            return self.prompt_value(prompt_text, default)
+
 
     def setup_authentication(self):
         """Configure authentication settings"""
@@ -169,15 +204,14 @@ def setup_transcription(self):
             self.console.print("[blue][INFO][/blue] Deepgram selected")
             self.console.print("Get your API key from: https://console.deepgram.com/")
 
-            # Check for existing API key
-            existing_key = self.read_existing_env_value("DEEPGRAM_API_KEY")
-            if existing_key and existing_key not in ['your_deepgram_api_key_here', 'your-deepgram-key-here']:
-                masked_key = self.mask_api_key(existing_key)
-                prompt_text = f"Deepgram API key ({masked_key}) [press Enter to reuse, or enter new]"
-                api_key_input = self.prompt_value(prompt_text, "")
-                api_key = api_key_input if api_key_input else existing_key
-            else:
-                api_key = self.prompt_value("Deepgram API key (leave empty to skip)", "")
+            # Use the new masked prompt function
+            api_key = self.prompt_with_existing_masked(
+                prompt_text="Deepgram API key (leave empty to skip)",
+                env_key="DEEPGRAM_API_KEY",
+                placeholders=['your_deepgram_api_key_here', 'your-deepgram-key-here'],
+                is_password=True,
+                default=""
+            )
 
             if api_key:
                 # Write API key to .env
@@ -227,15 +261,14 @@ def setup_llm(self):
             self.console.print("[blue][INFO][/blue] OpenAI selected")
             self.console.print("Get your API key from: https://platform.openai.com/api-keys")
 
-            # Check for existing API key
-            existing_key = self.read_existing_env_value("OPENAI_API_KEY")
-            if existing_key and existing_key not in ['your_openai_api_key_here', 'your-openai-key-here']:
-                masked_key = self.mask_api_key(existing_key)
-                prompt_text = f"OpenAI API key ({masked_key}) [press Enter to reuse, or enter new]"
-                api_key_input = self.prompt_value(prompt_text, "")
-                api_key = api_key_input if api_key_input else existing_key
-            else:
-                api_key = self.prompt_value("OpenAI API key (leave empty to skip)", "")
+            # Use the new masked prompt function
+            api_key = self.prompt_with_existing_masked(
+                prompt_text="OpenAI API key (leave empty to skip)",
+                env_key="OPENAI_API_KEY",
+                placeholders=['your_openai_api_key_here', 'your-openai-key-here'],
+                is_password=True,
+                default=""
+            )
 
             if api_key:
                 self.config["OPENAI_API_KEY"] = api_key
@@ -347,6 +380,12 @@ def setup_optional_services(self):
             self.config["PARAKEET_ASR_URL"] = self.args.parakeet_asr_url
             self.console.print(f"[green][SUCCESS][/green] Parakeet ASR configured via args: {self.args.parakeet_asr_url}")
 
+        # Check if Tailscale auth key provided via args
+        if hasattr(self.args, 'ts_authkey') and self.args.ts_authkey:
+            self.config["TS_AUTHKEY"] = self.args.ts_authkey
+            self.console.print(f"[green][SUCCESS][/green] Tailscale auth key configured (Docker integration enabled)")
+            self.console.print("[blue][INFO][/blue] Start Tailscale with: docker compose --profile tailscale up -d")
+
     def setup_obsidian(self):
         """Configure Obsidian/Neo4j integration"""
         # Check if enabled via command line
@@ -420,14 +459,14 @@ def setup_https(self):
                 self.console.print("[blue][INFO][/blue] For distributed deployments, use your Tailscale IP (e.g., 100.64.1.2)")
                 self.console.print("[blue][INFO][/blue] For local-only access, use 'localhost'")
 
-                # Check for existing SERVER_IP
-                existing_ip = self.read_existing_env_value("SERVER_IP")
-                if existing_ip and existing_ip not in ['localhost', 'your-server-ip-here']:
-                    prompt_text = f"Server IP/Domain for SSL certificate ({existing_ip}) [press Enter to reuse, or enter new]"
-                    server_ip_input = self.prompt_value(prompt_text, "")
-                    server_ip = server_ip_input if server_ip_input else existing_ip
-                else:
-                    server_ip = self.prompt_value("Server IP/Domain for SSL certificate (Tailscale IP or localhost)", "localhost")
+                # Use the new masked prompt function (not masked for IP, but shows existing)
+                server_ip = self.prompt_with_existing_masked(
+                    prompt_text="Server IP/Domain for SSL certificate (Tailscale IP or localhost)",
+                    env_key="SERVER_IP",
+                    placeholders=['localhost', 'your-server-ip-here'],
+                    is_password=False,
+                    default="localhost"
+                )
         
         if enable_https:
             
@@ -702,6 +741,8 @@ def main():
                        help="Enable Obsidian/Neo4j integration (default: prompt user)")
     parser.add_argument("--neo4j-password",
                        help="Neo4j password (default: prompt user)")
+    parser.add_argument("--ts-authkey",
+                       help="Tailscale auth key for Docker integration (default: prompt user)")
 
     args = parser.parse_args()
     
diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py
index 7ccda184..c20b3ee9 100644
--- a/backends/advanced/src/advanced_omi_backend/app_factory.py
+++ b/backends/advanced/src/advanced_omi_backend/app_factory.py
@@ -122,6 +122,36 @@ async def lifespan(app: FastAPI):
     # SystemTracker is used for monitoring and debugging
     application_logger.info("Using SystemTracker for monitoring and debugging")
 
+    # Initialize plugins using plugin service
+    try:
+        from advanced_omi_backend.services.plugin_service import init_plugin_router, set_plugin_router
+
+        plugin_router = init_plugin_router()
+
+        if plugin_router:
+            # Initialize async resources for each enabled plugin
+            for plugin_id, plugin in plugin_router.plugins.items():
+                if plugin.enabled:
+                    try:
+                        await plugin.initialize()
+                        application_logger.info(f"✅ Plugin '{plugin_id}' initialized")
+                    except Exception as e:
+                        application_logger.error(f"Failed to initialize plugin '{plugin_id}': {e}", exc_info=True)
+
+            application_logger.info(f"Plugins initialized: {len(plugin_router.plugins)} active")
+
+            # Store in app state for API access
+            app.state.plugin_router = plugin_router
+            # Register with plugin service for worker access
+            set_plugin_router(plugin_router)
+        else:
+            application_logger.info("No plugins configured")
+            app.state.plugin_router = None
+
+    except Exception as e:
+        application_logger.error(f"Failed to initialize plugin system: {e}", exc_info=True)
+        app.state.plugin_router = None
+
     application_logger.info("Application ready - using application-level processing architecture.")
 
     logger.info("App ready")
@@ -162,6 +192,14 @@ async def lifespan(app: FastAPI):
         # Stop metrics collection and save final report
         application_logger.info("Metrics collection stopped")
 
+        # Shutdown plugins
+        try:
+            from advanced_omi_backend.services.plugin_service import cleanup_plugin_router
+            await cleanup_plugin_router()
+            application_logger.info("Plugins shut down")
+        except Exception as e:
+            application_logger.error(f"Error shutting down plugins: {e}")
+
         # Shutdown memory service and speaker service
         shutdown_memory_service()
         application_logger.info("Memory and speaker services shut down.")
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
index aced763f..f5ff3275 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
@@ -7,6 +7,7 @@
 import shutil
 import time
 from datetime import UTC, datetime
+from pathlib import Path
 
 import yaml
 from fastapi import HTTPException
@@ -555,3 +556,139 @@ async def validate_chat_config_yaml(prompt_text: str) -> dict:
     except Exception as e:
         logger.error(f"Error validating chat config: {e}")
         return {"valid": False, "error": f"Validation error: {str(e)}"}
+
+
+# Plugin Configuration Management Functions
+
+async def get_plugins_config_yaml() -> str:
+    """Get plugins configuration as YAML text."""
+    try:
+        plugins_yml_path = Path("/app/plugins.yml")
+
+        # Default empty plugins config
+        default_config = """plugins:
+  # No plugins configured yet
+  # Example plugin configuration:
+  # homeassistant:
+  #   enabled: true
+  #   access_level: transcript
+  #   trigger:
+  #     type: wake_word
+  #     wake_word: vivi
+  #   ha_url: http://localhost:8123
+  #   ha_token: YOUR_TOKEN_HERE
+"""
+
+        if not plugins_yml_path.exists():
+            return default_config
+
+        with open(plugins_yml_path, 'r') as f:
+            yaml_content = f.read()
+
+        return yaml_content
+
+    except Exception as e:
+        logger.error(f"Error loading plugins config: {e}")
+        raise
+
+
+async def save_plugins_config_yaml(yaml_content: str) -> dict:
+    """Save plugins configuration from YAML text."""
+    try:
+        plugins_yml_path = Path("/app/plugins.yml")
+
+        # Validate YAML can be parsed
+        try:
+            parsed_config = yaml.safe_load(yaml_content)
+            if not isinstance(parsed_config, dict):
+                raise ValueError("Configuration must be a YAML dictionary")
+
+            # Validate has 'plugins' key
+            if 'plugins' not in parsed_config:
+                raise ValueError("Configuration must contain 'plugins' key")
+
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML syntax: {e}")
+
+        # Create config directory if it doesn't exist
+        plugins_yml_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Backup existing config
+        if plugins_yml_path.exists():
+            backup_path = str(plugins_yml_path) + '.backup'
+            shutil.copy2(plugins_yml_path, backup_path)
+            logger.info(f"Created plugins config backup at {backup_path}")
+
+        # Save new config
+        with open(plugins_yml_path, 'w') as f:
+            f.write(yaml_content)
+
+        # Hot-reload plugins (optional - may require restart)
+        try:
+            from advanced_omi_backend.services.plugin_service import get_plugin_router
+            plugin_router = get_plugin_router()
+            if plugin_router:
+                logger.info("Plugin configuration updated - restart backend for changes to take effect")
+        except Exception as reload_err:
+            logger.warning(f"Could not reload plugins: {reload_err}")
+
+        logger.info("Plugins configuration updated successfully")
+
+        return {
+            "success": True,
+            "message": "Plugins configuration updated successfully. Restart backend for changes to take effect."
+        }
+
+    except Exception as e:
+        logger.error(f"Error saving plugins config: {e}")
+        raise
+
+
+async def validate_plugins_config_yaml(yaml_content: str) -> dict:
+    """Validate plugins configuration YAML."""
+    try:
+        # Parse YAML
+        try:
+            parsed_config = yaml.safe_load(yaml_content)
+        except yaml.YAMLError as e:
+            return {"valid": False, "error": f"Invalid YAML syntax: {e}"}
+
+        # Check structure
+        if not isinstance(parsed_config, dict):
+            return {"valid": False, "error": "Configuration must be a YAML dictionary"}
+
+        if 'plugins' not in parsed_config:
+            return {"valid": False, "error": "Configuration must contain 'plugins' key"}
+
+        plugins = parsed_config['plugins']
+        if not isinstance(plugins, dict):
+            return {"valid": False, "error": "'plugins' must be a dictionary"}
+
+        # Validate each plugin
+        valid_access_levels = ['transcript', 'conversation', 'memory']
+        valid_trigger_types = ['wake_word', 'always', 'conditional']
+
+        for plugin_id, plugin_config in plugins.items():
+            if not isinstance(plugin_config, dict):
+                return {"valid": False, "error": f"Plugin '{plugin_id}' config must be a dictionary"}
+
+            # Check required fields
+            if 'enabled' in plugin_config and not isinstance(plugin_config['enabled'], bool):
+                return {"valid": False, "error": f"Plugin '{plugin_id}': 'enabled' must be boolean"}
+
+            if 'access_level' in plugin_config and plugin_config['access_level'] not in valid_access_levels:
+                return {"valid": False, "error": f"Plugin '{plugin_id}': invalid access_level (must be one of {valid_access_levels})"}
+
+            if 'trigger' in plugin_config:
+                trigger = plugin_config['trigger']
+                if not isinstance(trigger, dict):
+                    return {"valid": False, "error": f"Plugin '{plugin_id}': 'trigger' must be a dictionary"}
+
+                if 'type' in trigger and trigger['type'] not in valid_trigger_types:
+                    return {"valid": False, "error": f"Plugin '{plugin_id}': invalid trigger type (must be one of {valid_trigger_types})"}
+
+        return {"valid": True, "message": "Configuration is valid"}
+
+    except Exception as e:
+        logger.error(f"Error validating plugins config: {e}")
+        return {"valid": False, "error": f"Validation error: {str(e)}"}
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/__init__.py
new file mode 100644
index 00000000..3ccea7dc
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/__init__.py
@@ -0,0 +1,18 @@
+"""
+Chronicle plugin system for multi-level pipeline extension.
+
+Plugins can hook into different stages of the processing pipeline:
+- transcript: When new transcript segment arrives
+- conversation: When conversation processing completes
+- memory: After memory extraction finishes
+
+Trigger types control when plugins execute:
+- wake_word: Only when transcript starts with specified wake word
+- always: Execute on every invocation at access level
+- conditional: Execute based on custom condition (future)
+"""
+
+from .base import BasePlugin, PluginContext, PluginResult
+from .router import PluginRouter
+
+__all__ = ['BasePlugin', 'PluginContext', 'PluginResult', 'PluginRouter']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/base.py b/backends/advanced/src/advanced_omi_backend/plugins/base.py
new file mode 100644
index 00000000..84fc8967
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/base.py
@@ -0,0 +1,131 @@
+"""
+Base plugin classes for Chronicle multi-level plugin architecture.
+
+Provides:
+- PluginContext: Context passed to plugin execution
+- PluginResult: Result from plugin execution
+- BasePlugin: Abstract base class for all plugins
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass, field
+
+
+@dataclass
+class PluginContext:
+    """Context passed to plugin execution"""
+    user_id: str
+    access_level: str
+    data: Dict[str, Any]  # Access-level specific data
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class PluginResult:
+    """Result from plugin execution"""
+    success: bool
+    data: Optional[Dict[str, Any]] = None
+    message: Optional[str] = None
+    should_continue: bool = True  # Whether to continue normal processing
+
+
+class BasePlugin(ABC):
+    """
+    Base class for all Chronicle plugins.
+
+    Plugins can hook into different stages of the processing pipeline:
+    - transcript: When new transcript segment arrives
+    - conversation: When conversation processing completes
+    - memory: When memory extraction finishes
+
+    Subclasses should:
+    1. Set SUPPORTED_ACCESS_LEVELS to list which levels they support
+    2. Implement initialize() for plugin initialization
+    3. Implement the appropriate callback methods (on_transcript, on_conversation_complete, on_memory_processed)
+    4. Optionally implement cleanup() for resource cleanup
+    """
+
+    # Subclasses declare which access levels they support
+    SUPPORTED_ACCESS_LEVELS: List[str] = []
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize plugin with configuration.
+
+        Args:
+            config: Plugin configuration from config/plugins.yml
+                   Contains: enabled, access_level, trigger, and plugin-specific config
+        """
+        self.config = config
+        self.enabled = config.get('enabled', False)
+        self.access_level = config.get('access_level')
+        self.trigger = config.get('trigger', {'type': 'always'})
+
+    @abstractmethod
+    async def initialize(self):
+        """
+        Initialize plugin resources (connect to services, etc.)
+
+        Called during application startup after plugin registration.
+        Raise an exception if initialization fails.
+        """
+        pass
+
+    async def cleanup(self):
+        """
+        Clean up plugin resources.
+
+        Called during application shutdown.
+        Override if your plugin needs cleanup (closing connections, etc.)
+        """
+        pass
+
+    # Access-level specific methods (implement only what you need)
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Called when new transcript segment arrives.
+
+        Context data contains:
+            - transcript: str - The transcript text
+            - segment_id: str - Unique segment identifier
+            - conversation_id: str - Current conversation ID
+
+        For wake_word triggers, router adds:
+            - command: str - Command with wake word stripped
+            - original_transcript: str - Full transcript
+
+        Returns:
+            PluginResult with success status, optional message, and should_continue flag
+        """
+        pass
+
+    async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Called when conversation processing completes.
+
+        Context data contains:
+            - conversation: dict - Full conversation data
+            - transcript: str - Complete transcript
+            - duration: float - Conversation duration
+            - conversation_id: str - Conversation identifier
+
+        Returns:
+            PluginResult with success status, optional message, and should_continue flag
+        """
+        pass
+
+    async def on_memory_processed(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Called after memory extraction finishes.
+
+        Context data contains:
+            - memories: list - Extracted memories
+            - conversation: dict - Source conversation
+            - memory_count: int - Number of memories created
+            - conversation_id: str - Conversation identifier
+
+        Returns:
+            PluginResult with success status, optional message, and should_continue flag
+        """
+        pass
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py
new file mode 100644
index 00000000..11b831e9
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py
@@ -0,0 +1,9 @@
+"""
+Home Assistant plugin for Chronicle.
+
+Allows control of Home Assistant devices via natural language wake word commands.
+"""
+
+from .plugin import HomeAssistantPlugin
+
+__all__ = ['HomeAssistantPlugin']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py
new file mode 100644
index 00000000..cc73626d
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py
@@ -0,0 +1,97 @@
+"""
+LLM-based command parser for Home Assistant integration.
+
+This module provides structured command parsing using LLM to extract
+intent, target entities/areas, and parameters from natural language.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class ParsedCommand:
+    """Structured representation of a parsed Home Assistant command."""
+
+    action: str
+    """Action to perform (e.g., turn_on, turn_off, set_brightness, toggle)"""
+
+    target_type: str
+    """Type of target (area, entity, all_in_area)"""
+
+    target: str
+    """Target identifier (area name or entity name)"""
+
+    entity_type: Optional[str] = None
+    """Entity domain filter (e.g., light, switch, fan) - None means all types"""
+
+    parameters: Dict[str, Any] = field(default_factory=dict)
+    """Additional parameters (e.g., brightness_pct=50, color='red')"""
+
+
+# LLM System Prompt for Command Parsing
+COMMAND_PARSER_SYSTEM_PROMPT = """You are a smart home command parser for Home Assistant.
+
+Extract structured information from natural language commands.
+Return ONLY valid JSON in this exact format (no markdown, no code blocks, no explanation):
+
+{
+  "action": "turn_off",
+  "target_type": "area",
+  "target": "study",
+  "entity_type": "light",
+  "parameters": {}
+}
+
+ACTIONS (choose one):
+- turn_on: Turn on entities
+- turn_off: Turn off entities
+- toggle: Toggle entity state
+- set_brightness: Set brightness level
+- set_color: Set color
+
+TARGET_TYPE (choose one):
+- area: Targeting all entities of a type in an area (e.g., "study lights")
+- all_in_area: Targeting ALL entities in an area (e.g., "everything in study")
+- entity: Targeting a specific entity by name (e.g., "desk lamp")
+
+ENTITY_TYPE (optional, use null if not specified):
+- light: Light entities
+- switch: Switch entities
+- fan: Fan entities
+- cover: Covers/blinds
+- null: All entity types (when target_type is "all_in_area")
+
+PARAMETERS (optional, empty dict if none):
+- brightness_pct: Brightness percentage (0-100)
+- color: Color name (e.g., "red", "blue", "warm white")
+
+EXAMPLES:
+
+Command: "turn off study lights"
+Response: {"action": "turn_off", "target_type": "area", "target": "study", "entity_type": "light", "parameters": {}}
+
+Command: "turn off everything in study"
+Response: {"action": "turn_off", "target_type": "all_in_area", "target": "study", "entity_type": null, "parameters": {}}
+
+Command: "turn on desk lamp"
+Response: {"action": "turn_on", "target_type": "entity", "target": "desk lamp", "entity_type": null, "parameters": {}}
+
+Command: "set study lights to 50%"
+Response: {"action": "set_brightness", "target_type": "area", "target": "study", "entity_type": "light", "parameters": {"brightness_pct": 50}}
+
+Command: "turn on living room fan"
+Response: {"action": "turn_on", "target_type": "area", "target": "living room", "entity_type": "fan", "parameters": {}}
+
+Command: "turn off all lights"
+Response: {"action": "turn_off", "target_type": "entity", "target": "all", "entity_type": "light", "parameters": {}}
+
+Command: "toggle hallway light"
+Response: {"action": "toggle", "target_type": "entity", "target": "hallway light", "entity_type": null, "parameters": {}}
+
+Remember:
+1. Return ONLY the JSON object, no markdown formatting
+2. Use lowercase for action, target_type, target, entity_type
+3. Use null (not "null" string) for missing entity_type
+4. Always include all 5 fields: action, target_type, target, entity_type, parameters
+"""
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py
new file mode 100644
index 00000000..e8624f1b
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py
@@ -0,0 +1,133 @@
+"""
+Entity cache for Home Assistant integration.
+
+This module provides caching and lookup functionality for Home Assistant areas and entities.
+"""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Dict, List, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class EntityCache:
+    """Cache for Home Assistant areas and entities."""
+
+    areas: List[str] = field(default_factory=list)
+    """List of area names (e.g., ["study", "living_room"])"""
+
+    area_entities: Dict[str, List[str]] = field(default_factory=dict)
+    """Map of area names to entity IDs (e.g., {"study": ["light.tubelight_3"]})"""
+
+    entity_details: Dict[str, Dict] = field(default_factory=dict)
+    """Full entity state data keyed by entity_id"""
+
+    last_refresh: datetime = field(default_factory=datetime.now)
+    """Timestamp of last cache refresh"""
+
+    def find_entity_by_name(self, name: str) -> Optional[str]:
+        """
+        Find entity ID by fuzzy name matching.
+
+        Matching priority:
+        1. Exact friendly_name match (case-insensitive)
+        2. Partial friendly_name match (case-insensitive)
+        3. Entity ID match (e.g., "tubelight_3" → "light.tubelight_3")
+
+        Args:
+            name: Entity name to search for
+
+        Returns:
+            Entity ID if found, None otherwise
+        """
+        name_lower = name.lower().strip()
+
+        # Step 1: Exact friendly_name match
+        for entity_id, details in self.entity_details.items():
+            friendly_name = details.get('attributes', {}).get('friendly_name', '')
+            if friendly_name.lower() == name_lower:
+                logger.debug(f"Exact match: {name} → {entity_id} (friendly_name: {friendly_name})")
+                return entity_id
+
+        # Step 2: Partial friendly_name match
+        for entity_id, details in self.entity_details.items():
+            friendly_name = details.get('attributes', {}).get('friendly_name', '')
+            if name_lower in friendly_name.lower():
+                logger.debug(f"Partial match: {name} → {entity_id} (friendly_name: {friendly_name})")
+                return entity_id
+
+        # Step 3: Entity ID match (try adding common domains)
+        common_domains = ['light', 'switch', 'fan', 'cover']
+        for domain in common_domains:
+            candidate_id = f"{domain}.{name_lower.replace(' ', '_')}"
+            if candidate_id in self.entity_details:
+                logger.debug(f"Entity ID match: {name} → {candidate_id}")
+                return candidate_id
+
+        logger.warning(f"No entity found matching: {name}")
+        return None
+
+    def get_entities_in_area(
+        self,
+        area: str,
+        entity_type: Optional[str] = None
+    ) -> List[str]:
+        """
+        Get all entities in an area, optionally filtered by domain.
+
+        Args:
+            area: Area name (case-insensitive)
+            entity_type: Entity domain filter (e.g., "light", "switch")
+
+        Returns:
+            List of entity IDs in the area
+        """
+        area_lower = area.lower().strip()
+
+        # Find matching area (case-insensitive)
+        matching_area = None
+        for area_name in self.areas:
+            if area_name.lower() == area_lower:
+                matching_area = area_name
+                break
+
+        if not matching_area:
+            logger.warning(f"Area not found: {area}")
+            return []
+
+        # Get entities in area
+        entities = self.area_entities.get(matching_area, [])
+
+        # Filter by entity type if specified
+        if entity_type:
+            entity_type_lower = entity_type.lower()
+            entities = [
+                e for e in entities
+                if e.split('.')[0] == entity_type_lower
+            ]
+
+        logger.debug(
+            f"Found {len(entities)} entities in area '{matching_area}'"
+            + (f" (type: {entity_type})" if entity_type else "")
+        )
+
+        return entities
+
+    def get_cache_age_seconds(self) -> float:
+        """Get cache age in seconds."""
+        return (datetime.now() - self.last_refresh).total_seconds()
+
+    def is_stale(self, max_age_seconds: int = 3600) -> bool:
+        """
+        Check if cache is stale.
+
+        Args:
+            max_age_seconds: Maximum cache age before considering stale (default: 1 hour)
+
+        Returns:
+            True if cache is older than max_age_seconds
+        """
+        return self.get_cache_age_seconds() > max_age_seconds
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py
new file mode 100644
index 00000000..42ede8dc
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py
@@ -0,0 +1,421 @@
+"""
+MCP client for communicating with Home Assistant's MCP Server.
+
+Home Assistant exposes an MCP server at /api/mcp that provides tools
+for controlling smart home devices.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class MCPError(Exception):
+    """MCP protocol error"""
+    pass
+
+
+class HAMCPClient:
+    """
+    MCP Client for Home Assistant's /api/mcp endpoint.
+
+    Implements the Model Context Protocol for communicating with
+    Home Assistant's built-in MCP server.
+    """
+
+    def __init__(self, base_url: str, token: str, timeout: int = 30):
+        """
+        Initialize the MCP client.
+
+        Args:
+            base_url: Base URL of Home Assistant (e.g., http://localhost:8123)
+            token: Long-lived access token for authentication
+            timeout: Request timeout in seconds
+
+        """
+        self.base_url = base_url.rstrip('/')
+        self.mcp_url = f"{self.base_url}/api/mcp"
+        self.token = token
+        self.timeout = timeout
+        self.client = httpx.AsyncClient(timeout=timeout)
+        self._request_id = 0
+
+    async def close(self):
+        """Close the HTTP client"""
+        await self.client.aclose()
+
+    def _next_request_id(self) -> int:
+        """Generate next request ID"""
+        self._request_id += 1
+        return self._request_id
+
+    async def _send_mcp_request(self, method: str, params: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Send MCP protocol request to Home Assistant.
+
+        Args:
+            method: MCP method name (e.g., "tools/list", "tools/call")
+            params: Optional method parameters
+
+        Returns:
+            Response data from MCP server
+
+        Raises:
+            MCPError: If request fails or returns an error
+        """
+        payload = {
+            "jsonrpc": "2.0",
+            "id": self._next_request_id(),
+            "method": method
+        }
+
+        if params:
+            payload["params"] = params
+
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        try:
+            logger.debug(f"MCP Request: {method} with params: {params}")
+            response = await self.client.post(
+                self.mcp_url,
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+            data = response.json()
+
+            # Check for JSON-RPC error
+            if "error" in data:
+                error = data["error"]
+                raise MCPError(f"MCP Error {error.get('code')}: {error.get('message')}")
+
+            return data.get("result", {})
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error calling MCP endpoint: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error calling MCP endpoint: {e}")
+            raise MCPError(f"Request failed: {e}")
+        except Exception as e:
+            logger.error(f"Unexpected error calling MCP endpoint: {e}")
+            raise MCPError(f"Unexpected error: {e}")
+
+    async def list_tools(self) -> List[Dict[str, Any]]:
+        """
+        Get list of available MCP tools from Home Assistant.
+
+        Returns:
+            List of tool definitions with schema
+
+        Example tool:
+            {
+                "name": "turn_on",
+                "description": "Turn on a light or switch",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "entity_id": {"type": "string"}
+                    }
+                }
+            }
+        """
+        result = await self._send_mcp_request("tools/list")
+        tools = result.get("tools", [])
+        logger.info(f"Retrieved {len(tools)} tools from Home Assistant MCP")
+        return tools
+
+    async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a tool via MCP.
+
+        Args:
+            tool_name: Name of the tool to call (e.g., "turn_on", "turn_off")
+            arguments: Tool arguments (e.g., {"entity_id": "light.hall_light"})
+
+        Returns:
+            Tool execution result
+
+        Raises:
+            MCPError: If tool execution fails
+
+        Example:
+            >>> await client.call_tool("turn_off", {"entity_id": "light.hall_light"})
+            {"success": True}
+        """
+        params = {
+            "name": tool_name,
+            "arguments": arguments
+        }
+
+        logger.info(f"Calling MCP tool '{tool_name}' with args: {arguments}")
+        result = await self._send_mcp_request("tools/call", params)
+
+        # MCP tool results are wrapped in content blocks
+        content = result.get("content", [])
+        if content and isinstance(content, list):
+            # Extract text content from first block
+            first_block = content[0]
+            if isinstance(first_block, dict) and first_block.get("type") == "text":
+                return {"result": first_block.get("text"), "success": True}
+
+        return result
+
+    async def test_connection(self) -> bool:
+        """
+        Test connection to Home Assistant MCP server.
+
+        Returns:
+            True if connection successful, False otherwise
+        """
+        try:
+            tools = await self.list_tools()
+            logger.info(f"MCP connection test successful ({len(tools)} tools available)")
+            return True
+        except Exception as e:
+            logger.error(f"MCP connection test failed: {e}")
+            return False
+
+    async def _render_template(self, template: str) -> Any:
+        """
+        Render a Home Assistant template using the Template API.
+
+        Args:
+            template: Jinja2 template string (e.g., "{{ areas() }}")
+
+        Returns:
+            Rendered template result (parsed as JSON if possible)
+
+        Raises:
+            MCPError: If template rendering fails
+
+        Example:
+            >>> await client._render_template("{{ areas() }}")
+            ["study", "living_room", "bedroom"]
+        """
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        payload = {"template": template}
+
+        try:
+            logger.debug(f"Rendering template: {template}")
+            response = await self.client.post(
+                f"{self.base_url}/api/template",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+            result = response.text.strip()
+
+            # Try to parse as JSON (for lists, dicts)
+            if result.startswith('[') or result.startswith('{'):
+                try:
+                    return json.loads(result)
+                except json.JSONDecodeError:
+                    logger.warning(f"Failed to parse template result as JSON: {result}")
+                    return result
+
+            return result
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error rendering template: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error rendering template: {e}")
+            raise MCPError(f"Request failed: {e}")
+
+    async def fetch_areas(self) -> List[str]:
+        """
+        Fetch all areas from Home Assistant using Template API.
+
+        Returns:
+            List of area names
+
+        Example:
+            >>> await client.fetch_areas()
+            ["study", "living_room", "bedroom"]
+        """
+        template = "{{ areas() | to_json }}"
+        areas = await self._render_template(template)
+
+        if isinstance(areas, list):
+            logger.info(f"Fetched {len(areas)} areas from Home Assistant")
+            return areas
+        else:
+            logger.warning(f"Unexpected areas format: {type(areas)}")
+            return []
+
+    async def fetch_area_entities(self, area_name: str) -> List[str]:
+        """
+        Fetch all entity IDs in a specific area.
+
+        Args:
+            area_name: Name of the area
+
+        Returns:
+            List of entity IDs in the area
+
+        Example:
+            >>> await client.fetch_area_entities("study")
+            ["light.tubelight_3", "switch.desk_fan"]
+        """
+        template = f"{{{{ area_entities('{area_name}') | to_json }}}}"
+        entities = await self._render_template(template)
+
+        if isinstance(entities, list):
+            logger.info(f"Fetched {len(entities)} entities from area '{area_name}'")
+            return entities
+        else:
+            logger.warning(f"Unexpected entities format for area '{area_name}': {type(entities)}")
+            return []
+
+    async def fetch_entity_states(self) -> Dict[str, Dict]:
+        """
+        Fetch all entity states from Home Assistant.
+
+        Returns:
+            Dict mapping entity_id to state data (includes attributes, area_id)
+
+        Example:
+            >>> await client.fetch_entity_states()
+            {
+                "light.tubelight_3": {
+                    "state": "on",
+                    "attributes": {"friendly_name": "Study Light", ...},
+                    "area_id": "study"
+                }
+            }
+        """
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        try:
+            logger.debug("Fetching all entity states")
+            response = await self.client.get(
+                f"{self.base_url}/api/states",
+                headers=headers
+            )
+            response.raise_for_status()
+
+            states = response.json()
+            entity_details = {}
+
+            # Enrich with area information
+            for state in states:
+                entity_id = state.get('entity_id')
+                if entity_id:
+                    # Get area_id using Template API
+                    try:
+                        area_template = f"{{{{ area_id('{entity_id}') }}}}"
+                        area_id = await self._render_template(area_template)
+                        state['area_id'] = area_id if area_id else None
+                    except Exception as e:
+                        logger.debug(f"Failed to get area for {entity_id}: {e}")
+                        state['area_id'] = None
+
+                    entity_details[entity_id] = state
+
+            logger.info(f"Fetched {len(entity_details)} entity states")
+            return entity_details
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error fetching states: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error fetching states: {e}")
+            raise MCPError(f"Request failed: {e}")
+
+    async def call_service(
+        self,
+        domain: str,
+        service: str,
+        entity_ids: List[str],
+        **parameters
+    ) -> Dict[str, Any]:
+        """
+        Call a Home Assistant service directly via REST API.
+
+        Args:
+            domain: Service domain (e.g., "light", "switch")
+            service: Service name (e.g., "turn_on", "turn_off")
+            entity_ids: List of entity IDs to target
+            **parameters: Additional service parameters (e.g., brightness_pct=50)
+
+        Returns:
+            Service call response
+
+        Example:
+            >>> await client.call_service("light", "turn_on", ["light.study"], brightness_pct=50)
+            [{"entity_id": "light.study", "state": "on"}]
+        """
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        payload = {
+            "entity_id": entity_ids,
+            **parameters
+        }
+
+        service_url = f"{self.base_url}/api/services/{domain}/{service}"
+
+        try:
+            logger.info(f"Calling service {domain}.{service} for {len(entity_ids)} entities")
+            logger.debug(f"Service payload: {payload}")
+
+            response = await self.client.post(
+                service_url,
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+            result = response.json()
+            logger.info(f"Service call successful: {domain}.{service}")
+            return result
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error calling service: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error calling service: {e}")
+            raise MCPError(f"Request failed: {e}")
+
+    async def discover_entities(self) -> Dict[str, Dict]:
+        """
+        Discover available entities from MCP tools.
+
+        Parses the available tools to build an index of entities
+        that can be controlled.
+
+        Returns:
+            Dict mapping entity_id to metadata
+        """
+        tools = await self.list_tools()
+        entities = {}
+
+        for tool in tools:
+            # Extract entity information from tool schemas
+            # This will depend on how HA MCP structures its tools
+            # For now, we'll just log what we find
+            logger.debug(f"Tool: {tool.get('name')} - {tool.get('description')}")
+
+        # TODO: Parse tool schemas to extract entity_id information
+        # For now, return empty dict - will be populated based on actual HA MCP response
+
+        return entities
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py
new file mode 100644
index 00000000..931dd813
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py
@@ -0,0 +1,598 @@
+"""
+Home Assistant plugin for Chronicle.
+
+Enables control of Home Assistant devices through natural language commands
+triggered by a wake word.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from ..base import BasePlugin, PluginContext, PluginResult
+from .entity_cache import EntityCache
+from .mcp_client import HAMCPClient, MCPError
+
+logger = logging.getLogger(__name__)
+
+
+class HomeAssistantPlugin(BasePlugin):
+    """
+    Plugin for controlling Home Assistant devices via wake word commands.
+
+    Example:
+        User says: "Vivi, turn off the hall lights"
+        -> Wake word "vivi" detected by router
+        -> Command "turn off the hall lights" passed to on_transcript()
+        -> Plugin parses command and calls HA MCP to execute
+        -> Returns: PluginResult with "I've turned off the hall light"
+    """
+
+    SUPPORTED_ACCESS_LEVELS: List[str] = ['transcript']
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize Home Assistant plugin.
+
+        Args:
+            config: Plugin configuration with keys:
+                - ha_url: Home Assistant URL
+                - ha_token: Long-lived access token
+                - wake_word: Wake word for triggering commands (handled by router)
+                - enabled: Whether plugin is enabled
+                - access_level: Should be 'transcript'
+                - trigger: Should be {'type': 'wake_word', 'wake_word': '...'}
+        """
+        super().__init__(config)
+        self.mcp_client: Optional[HAMCPClient] = None
+        self.available_tools: List[Dict] = []
+        self.entities: Dict[str, Dict] = {}
+
+        # Entity cache for area-based commands
+        self.entity_cache: Optional[EntityCache] = None
+        self.cache_initialized = False
+
+        # Configuration
+        self.ha_url = config.get('ha_url', 'http://localhost:8123')
+        self.ha_token = config.get('ha_token', '')
+        self.wake_word = config.get('wake_word', 'vivi')
+        self.timeout = config.get('timeout', 30)
+
+    async def initialize(self):
+        """
+        Initialize the Home Assistant plugin.
+
+        Connects to Home Assistant MCP server and discovers available tools.
+
+        Raises:
+            MCPError: If connection or discovery fails
+        """
+        if not self.enabled:
+            logger.info("Home Assistant plugin is disabled, skipping initialization")
+            return
+
+        if not self.ha_token:
+            raise ValueError("Home Assistant token is required")
+
+        logger.info(f"Initializing Home Assistant plugin (URL: {self.ha_url})")
+
+        # Create MCP client (used for REST API calls, not MCP protocol)
+        self.mcp_client = HAMCPClient(
+            base_url=self.ha_url,
+            token=self.ha_token,
+            timeout=self.timeout
+        )
+
+        # Test basic API connectivity with Template API
+        try:
+            logger.info("Testing Home Assistant API connectivity...")
+            test_result = await self.mcp_client._render_template("{{ 1 + 1 }}")
+            if str(test_result).strip() != "2":
+                raise ValueError(f"Unexpected template result: {test_result}")
+            logger.info("Home Assistant API connection successful")
+        except Exception as e:
+            raise MCPError(f"Failed to connect to Home Assistant API: {e}")
+
+        logger.info("Home Assistant plugin initialized successfully")
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Execute Home Assistant command from wake word transcript.
+
+        Called by the router when a wake word is detected in the transcript.
+        The router has already stripped the wake word and extracted the command.
+
+        Args:
+            context: PluginContext containing:
+                - user_id: User ID who issued the command
+                - access_level: 'transcript'
+                - data: Dict with:
+                    - command: str - Command with wake word already stripped
+                    - original_transcript: str - Full transcript with wake word
+                    - transcript: str - Original transcript
+                    - segment_id: str - Unique segment identifier
+                    - conversation_id: str - Current conversation ID
+                - metadata: Optional additional metadata
+
+        Returns:
+            PluginResult with:
+                - success: True if command executed
+                - message: User-friendly response
+                - data: Dict with action details
+                - should_continue: False to stop normal processing
+
+        Example:
+            Context data:
+                {
+                    'command': 'turn off study lights',
+                    'original_transcript': 'vivi turn off study lights',
+                    'conversation_id': 'conv_123'
+                }
+
+            Returns:
+                PluginResult(
+                    success=True,
+                    message="I've turned off 1 light in study",
+                    data={'action': 'turn_off', 'entity_ids': ['light.tubelight_3']},
+                    should_continue=False
+                )
+        """
+        command = context.data.get('command', '')
+
+        if not command:
+            return PluginResult(
+                success=False,
+                message="No command provided",
+                should_continue=True
+            )
+
+        if not self.mcp_client:
+            logger.error("MCP client not initialized")
+            return PluginResult(
+                success=False,
+                message="Sorry, Home Assistant is not connected",
+                should_continue=True
+            )
+
+        try:
+            # Step 1: Parse command using hybrid LLM + fallback parsing
+            logger.info(f"Processing HA command: '{command}'")
+            parsed = await self._parse_command_hybrid(command)
+
+            if not parsed:
+                return PluginResult(
+                    success=False,
+                    message="Sorry, I couldn't understand that command",
+                    should_continue=True
+                )
+
+            # Step 2: Resolve entities from parsed command
+            try:
+                entity_ids = await self._resolve_entities(parsed)
+            except ValueError as e:
+                logger.warning(f"Entity resolution failed: {e}")
+                return PluginResult(
+                    success=False,
+                    message=str(e),
+                    should_continue=True
+                )
+
+            # Step 3: Determine service and domain
+            # Extract domain from first entity (all should have same domain for area-based)
+            domain = entity_ids[0].split('.')[0] if entity_ids else 'light'
+
+            # Map action to service name
+            service_map = {
+                'turn_on': 'turn_on',
+                'turn_off': 'turn_off',
+                'toggle': 'toggle',
+                'set_brightness': 'turn_on',  # brightness uses turn_on with params
+                'set_color': 'turn_on'        # color uses turn_on with params
+            }
+            service = service_map.get(parsed.action, 'turn_on')
+
+            # Step 4: Call Home Assistant service
+            logger.info(
+                f"Calling {domain}.{service} for {len(entity_ids)} entities: {entity_ids}"
+            )
+
+            result = await self.mcp_client.call_service(
+                domain=domain,
+                service=service,
+                entity_ids=entity_ids,
+                **parsed.parameters
+            )
+
+            # Step 5: Format user-friendly response
+            entity_type_name = parsed.entity_type or domain
+            if parsed.target_type == 'area':
+                message = (
+                    f"I've {parsed.action.replace('_', ' ')} {len(entity_ids)} "
+                    f"{entity_type_name}{'s' if len(entity_ids) != 1 else ''} "
+                    f"in {parsed.target}"
+                )
+            elif parsed.target_type == 'all_in_area':
+                message = (
+                    f"I've {parsed.action.replace('_', ' ')} {len(entity_ids)} "
+                    f"entities in {parsed.target}"
+                )
+            else:
+                message = f"I've {parsed.action.replace('_', ' ')} {parsed.target}"
+
+            logger.info(f"HA command executed successfully: {message}")
+
+            return PluginResult(
+                success=True,
+                data={
+                    'action': parsed.action,
+                    'entity_ids': entity_ids,
+                    'target_type': parsed.target_type,
+                    'target': parsed.target,
+                    'ha_result': result
+                },
+                message=message,
+                should_continue=False  # Stop normal processing - HA command handled
+            )
+
+        except MCPError as e:
+            logger.error(f"Home Assistant API error: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Sorry, Home Assistant couldn't execute that: {e}",
+                should_continue=True
+            )
+        except Exception as e:
+            logger.error(f"Command execution failed: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message="Sorry, something went wrong while executing that command",
+                should_continue=True
+            )
+
+    async def cleanup(self):
+        """Clean up resources"""
+        if self.mcp_client:
+            await self.mcp_client.close()
+            logger.info("Closed Home Assistant MCP client")
+
+    async def _ensure_cache_initialized(self):
+        """Ensure entity cache is initialized. Lazy-load on first use."""
+        if not self.cache_initialized:
+            logger.info("Entity cache not initialized, refreshing...")
+            await self._refresh_cache()
+            self.cache_initialized = True
+
+    async def _refresh_cache(self):
+        """
+        Refresh the entity cache from Home Assistant.
+
+        Fetches:
+        - All areas
+        - Entities in each area
+        - Entity state details
+        """
+        if not self.mcp_client:
+            logger.error("Cannot refresh cache: MCP client not initialized")
+            return
+
+        try:
+            logger.info("Refreshing entity cache from Home Assistant...")
+
+            # Fetch all areas
+            areas = await self.mcp_client.fetch_areas()
+            logger.debug(f"Fetched {len(areas)} areas: {areas}")
+
+            # Fetch entities for each area
+            area_entities = {}
+            for area in areas:
+                entities = await self.mcp_client.fetch_area_entities(area)
+                area_entities[area] = entities
+                logger.debug(f"Area '{area}': {len(entities)} entities")
+
+            # Fetch all entity states
+            entity_details = await self.mcp_client.fetch_entity_states()
+            logger.debug(f"Fetched {len(entity_details)} entity states")
+
+            # Create cache
+            from datetime import datetime
+            self.entity_cache = EntityCache(
+                areas=areas,
+                area_entities=area_entities,
+                entity_details=entity_details,
+                last_refresh=datetime.now()
+            )
+
+            logger.info(
+                f"Entity cache refreshed: {len(areas)} areas, "
+                f"{len(entity_details)} entities"
+            )
+
+        except Exception as e:
+            logger.error(f"Failed to refresh entity cache: {e}", exc_info=True)
+            raise
+
+    async def _parse_command_with_llm(self, command: str) -> Optional['ParsedCommand']:
+        """
+        Parse command using LLM with structured system prompt.
+
+        Args:
+            command: Natural language command (wake word already stripped)
+
+        Returns:
+            ParsedCommand if parsing succeeds, None otherwise
+
+        Example:
+            >>> await self._parse_command_with_llm("turn off study lights")
+            ParsedCommand(
+                action="turn_off",
+                target_type="area",
+                target="study",
+                entity_type="light",
+                parameters={}
+            )
+        """
+        try:
+            from advanced_omi_backend.llm_client import get_llm_client
+            from .command_parser import COMMAND_PARSER_SYSTEM_PROMPT, ParsedCommand
+
+            llm_client = get_llm_client()
+
+            logger.debug(f"Parsing command with LLM: '{command}'")
+
+            # Use OpenAI chat format with system + user messages
+            response = llm_client.client.chat.completions.create(
+                model=llm_client.model,
+                messages=[
+                    {"role": "system", "content": COMMAND_PARSER_SYSTEM_PROMPT},
+                    {"role": "user", "content": f'Command: "{command}"\n\nReturn JSON only.'}
+                ],
+                temperature=0.1,
+                max_tokens=150
+            )
+
+            result_text = response.choices[0].message.content.strip()
+            logger.debug(f"LLM response: {result_text}")
+
+            # Remove markdown code blocks if present
+            if result_text.startswith('```'):
+                lines = result_text.split('\n')
+                result_text = '\n'.join(lines[1:-1]) if len(lines) > 2 else result_text
+                result_text = result_text.strip()
+
+            # Parse JSON response
+            result_json = json.loads(result_text)
+
+            # Validate required fields
+            required_fields = ['action', 'target_type', 'target']
+            if not all(field in result_json for field in required_fields):
+                logger.warning(f"LLM response missing required fields: {result_json}")
+                return None
+
+            parsed = ParsedCommand(
+                action=result_json['action'],
+                target_type=result_json['target_type'],
+                target=result_json['target'],
+                entity_type=result_json.get('entity_type'),
+                parameters=result_json.get('parameters', {})
+            )
+
+            logger.info(
+                f"LLM parsed command: action={parsed.action}, "
+                f"target_type={parsed.target_type}, target={parsed.target}, "
+                f"entity_type={parsed.entity_type}"
+            )
+
+            return parsed
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse LLM JSON response: {e}\nResponse: {result_text}")
+            return None
+        except Exception as e:
+            logger.error(f"LLM command parsing failed: {e}", exc_info=True)
+            return None
+
+    async def _resolve_entities(self, parsed: 'ParsedCommand') -> List[str]:
+        """
+        Resolve ParsedCommand to actual Home Assistant entity IDs.
+
+        Args:
+            parsed: ParsedCommand from LLM parsing
+
+        Returns:
+            List of entity IDs to target
+
+        Raises:
+            ValueError: If target not found or ambiguous
+
+        Example:
+            >>> await self._resolve_entities(ParsedCommand(
+            ...     action="turn_off",
+            ...     target_type="area",
+            ...     target="study",
+            ...     entity_type="light"
+            ... ))
+            ["light.tubelight_3"]
+        """
+        from .command_parser import ParsedCommand
+
+        # Ensure cache is ready
+        await self._ensure_cache_initialized()
+
+        if not self.entity_cache:
+            raise ValueError("Entity cache not initialized")
+
+        if parsed.target_type == 'area':
+            # Get entities in area, filtered by type
+            entities = self.entity_cache.get_entities_in_area(
+                area=parsed.target,
+                entity_type=parsed.entity_type
+            )
+
+            if not entities:
+                entity_desc = f"{parsed.entity_type}s" if parsed.entity_type else "entities"
+                raise ValueError(
+                    f"No {entity_desc} found in area '{parsed.target}'. "
+                    f"Available areas: {', '.join(self.entity_cache.areas)}"
+                )
+
+            logger.info(
+                f"Resolved area '{parsed.target}' to {len(entities)} "
+                f"{parsed.entity_type or 'entity'}(s)"
+            )
+            return entities
+
+        elif parsed.target_type == 'all_in_area':
+            # Get ALL entities in area (no filter)
+            entities = self.entity_cache.get_entities_in_area(
+                area=parsed.target,
+                entity_type=None
+            )
+
+            if not entities:
+                raise ValueError(
+                    f"No entities found in area '{parsed.target}'. "
+                    f"Available areas: {', '.join(self.entity_cache.areas)}"
+                )
+
+            logger.info(f"Resolved 'all in {parsed.target}' to {len(entities)} entities")
+            return entities
+
+        elif parsed.target_type == 'entity':
+            # Fuzzy match entity by name
+            entity_id = self.entity_cache.find_entity_by_name(parsed.target)
+
+            if not entity_id:
+                raise ValueError(
+                    f"Entity '{parsed.target}' not found. "
+                    f"Try being more specific or check the entity name."
+                )
+
+            logger.info(f"Resolved entity '{parsed.target}' to {entity_id}")
+            return [entity_id]
+
+        else:
+            raise ValueError(f"Unknown target type: {parsed.target_type}")
+
+    async def _parse_command_fallback(self, command: str) -> Optional[Dict[str, Any]]:
+        """
+        Fallback keyword-based command parser (used when LLM fails).
+
+        Args:
+            command: Natural language command
+
+        Returns:
+            Dict with 'tool', 'arguments', and optional metadata
+            None if parsing fails
+
+        Example:
+            Input: "turn off the hall lights"
+            Output: {
+                "tool": "turn_off",
+                "arguments": {"entity_id": "light.hall_light"},
+                "friendly_name": "Hall Light",
+                "action": "turn_off"
+            }
+        """
+        logger.debug("Using fallback keyword-based parsing")
+        command_lower = command.lower().strip()
+
+        # Determine action
+        tool = None
+        if any(word in command_lower for word in ['turn off', 'off', 'disable']):
+            tool = 'turn_off'
+            action_desc = 'turned off'
+        elif any(word in command_lower for word in ['turn on', 'on', 'enable']):
+            tool = 'turn_on'
+            action_desc = 'turned on'
+        elif 'toggle' in command_lower:
+            tool = 'toggle'
+            action_desc = 'toggled'
+        else:
+            logger.warning(f"Unknown action in command: {command}")
+            return None
+
+        # Extract entity name from command
+        entity_query = command_lower
+        for action_word in ['turn off', 'turn on', 'toggle', 'off', 'on', 'the']:
+            entity_query = entity_query.replace(action_word, '').strip()
+
+        logger.info(f"Searching for entity: '{entity_query}'")
+
+        # Return placeholder (this will work if entity ID matches pattern)
+        return {
+            "tool": tool,
+            "arguments": {
+                "entity_id": f"light.{entity_query.replace(' ', '_')}"
+            },
+            "friendly_name": entity_query.title(),
+            "action_desc": action_desc
+        }
+
+    async def _parse_command_hybrid(self, command: str) -> Optional['ParsedCommand']:
+        """
+        Hybrid command parser: Try LLM first, fallback to keywords.
+
+        This provides the best of both worlds:
+        - LLM parsing for complex area-based and natural commands
+        - Keyword fallback for reliability when LLM fails or times out
+
+        Args:
+            command: Natural language command
+
+        Returns:
+            ParsedCommand if successful, None otherwise
+
+        Example:
+            >>> await self._parse_command_hybrid("turn off study lights")
+            ParsedCommand(action="turn_off", target_type="area", target="study", ...)
+        """
+        import asyncio
+        from .command_parser import ParsedCommand
+
+        # Try LLM parsing with timeout
+        try:
+            logger.debug("Attempting LLM-based command parsing...")
+            parsed = await asyncio.wait_for(
+                self._parse_command_with_llm(command),
+                timeout=5.0
+            )
+
+            if parsed:
+                logger.info("LLM parsing succeeded")
+                return parsed
+            else:
+                logger.warning("LLM parsing returned None, falling back to keywords")
+
+        except asyncio.TimeoutError:
+            logger.warning("LLM parsing timed out (>5s), falling back to keywords")
+        except Exception as e:
+            logger.warning(f"LLM parsing failed: {e}, falling back to keywords")
+
+        # Fallback to keyword-based parsing
+        try:
+            logger.debug("Using fallback keyword parsing...")
+            fallback_result = await self._parse_command_fallback(command)
+
+            if not fallback_result:
+                return None
+
+            # Convert fallback format to ParsedCommand
+            # Extract entity_id from arguments
+            entity_id = fallback_result['arguments'].get('entity_id', '')
+            entity_name = entity_id.split('.', 1)[1] if '.' in entity_id else entity_id
+
+            # Simple heuristic: assume it's targeting a single entity
+            parsed = ParsedCommand(
+                action=fallback_result['tool'],
+                target_type='entity',
+                target=entity_name.replace('_', ' '),
+                entity_type=None,
+                parameters={}
+            )
+
+            logger.info("Fallback parsing succeeded")
+            return parsed
+
+        except Exception as e:
+            logger.error(f"Fallback parsing failed: {e}", exc_info=True)
+            return None
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py
new file mode 100644
index 00000000..e29f64e3
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py
@@ -0,0 +1,170 @@
+"""
+Plugin routing system for multi-level plugin architecture.
+
+Routes pipeline events to appropriate plugins based on access level and triggers.
+"""
+
+import logging
+from typing import Dict, List, Optional
+
+from .base import BasePlugin, PluginContext, PluginResult
+
+logger = logging.getLogger(__name__)
+
+
+class PluginRouter:
+    """Routes pipeline events to appropriate plugins based on access level and triggers"""
+
+    def __init__(self):
+        self.plugins: Dict[str, BasePlugin] = {}
+        # Index plugins by access level for fast lookup
+        self._plugins_by_level: Dict[str, List[str]] = {
+            'transcript': [],
+            'streaming_transcript': [],
+            'conversation': [],
+            'memory': []
+        }
+
+    def register_plugin(self, plugin_id: str, plugin: BasePlugin):
+        """Register a plugin with the router"""
+        self.plugins[plugin_id] = plugin
+
+        # Index by access level
+        access_level = plugin.access_level
+        if access_level in self._plugins_by_level:
+            self._plugins_by_level[access_level].append(plugin_id)
+
+        logger.info(f"Registered plugin '{plugin_id}' for access level '{access_level}'")
+
+    async def trigger_plugins(
+        self,
+        access_level: str,
+        user_id: str,
+        data: Dict,
+        metadata: Optional[Dict] = None
+    ) -> List[PluginResult]:
+        """
+        Trigger all plugins registered for this access level.
+
+        Args:
+            access_level: 'transcript', 'streaming_transcript', 'conversation', or 'memory'
+            user_id: User ID for context
+            data: Access-level specific data
+            metadata: Optional metadata
+
+        Returns:
+            List of plugin results
+        """
+        results = []
+
+        # Hierarchical triggering logic:
+        # - 'streaming_transcript': trigger both 'streaming_transcript' AND 'transcript' plugins
+        # - 'transcript': trigger ONLY 'transcript' plugins (not 'streaming_transcript')
+        # - Other levels: exact match only
+        if access_level == 'streaming_transcript':
+            # Streaming mode: trigger both streaming_transcript AND transcript plugins
+            plugin_ids = (
+                self._plugins_by_level.get('streaming_transcript', []) +
+                self._plugins_by_level.get('transcript', [])
+            )
+        else:
+            # Batch mode or other modes: exact match only
+            plugin_ids = self._plugins_by_level.get(access_level, [])
+
+        for plugin_id in plugin_ids:
+            plugin = self.plugins[plugin_id]
+
+            if not plugin.enabled:
+                continue
+
+            # Check trigger condition
+            if not await self._should_trigger(plugin, data):
+                continue
+
+            # Execute plugin at appropriate access level
+            try:
+                context = PluginContext(
+                    user_id=user_id,
+                    access_level=access_level,
+                    data=data,
+                    metadata=metadata or {}
+                )
+
+                result = await self._execute_plugin(plugin, access_level, context)
+
+                if result:
+                    results.append(result)
+
+                    # If plugin says stop processing, break
+                    if not result.should_continue:
+                        logger.info(f"Plugin '{plugin_id}' stopped further processing")
+                        break
+
+            except Exception as e:
+                logger.error(f"Error executing plugin '{plugin_id}': {e}", exc_info=True)
+
+        return results
+
+    async def _should_trigger(self, plugin: BasePlugin, data: Dict) -> bool:
+        """Check if plugin should be triggered based on trigger configuration"""
+        trigger_type = plugin.trigger.get('type', 'always')
+
+        if trigger_type == 'always':
+            return True
+
+        elif trigger_type == 'wake_word':
+            # Check if transcript starts with wake word(s)
+            transcript = data.get('transcript', '')
+            transcript_lower = transcript.lower().strip()
+
+            # Support both singular 'wake_word' and plural 'wake_words' (list)
+            wake_words = plugin.trigger.get('wake_words', [])
+            if not wake_words:
+                # Fallback to singular wake_word for backward compatibility
+                wake_word = plugin.trigger.get('wake_word', '')
+                if wake_word:
+                    wake_words = [wake_word]
+
+            # Check if transcript starts with any wake word
+            for wake_word in wake_words:
+                wake_word_lower = wake_word.lower()
+                if wake_word_lower and transcript_lower.startswith(wake_word_lower):
+                    # Extract command (remove wake word)
+                    command = transcript[len(wake_word):].strip()
+                    data['command'] = command
+                    data['original_transcript'] = transcript
+                    return True
+
+            return False
+
+        elif trigger_type == 'conditional':
+            # Future: Custom condition checking
+            return True
+
+        return False
+
+    async def _execute_plugin(
+        self,
+        plugin: BasePlugin,
+        access_level: str,
+        context: PluginContext
+    ) -> Optional[PluginResult]:
+        """Execute plugin method for specified access level"""
+        # Both 'transcript' and 'streaming_transcript' call on_transcript()
+        if access_level in ('transcript', 'streaming_transcript'):
+            return await plugin.on_transcript(context)
+        elif access_level == 'conversation':
+            return await plugin.on_conversation_complete(context)
+        elif access_level == 'memory':
+            return await plugin.on_memory_processed(context)
+
+        return None
+
+    async def cleanup_all(self):
+        """Clean up all registered plugins"""
+        for plugin_id, plugin in self.plugins.items():
+            try:
+                await plugin.cleanup()
+                logger.info(f"Cleaned up plugin '{plugin_id}'")
+            except Exception as e:
+                logger.error(f"Error cleaning up plugin '{plugin_id}': {e}")
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
index e2b49676..93e94817 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
@@ -8,7 +8,7 @@
 from typing import Optional
 
 from fastapi import APIRouter, Body, Depends, HTTPException, Request
-from fastapi.responses import Response
+from fastapi.responses import JSONResponse, Response
 from pydantic import BaseModel
 
 from advanced_omi_backend.auth import current_active_user, current_superuser
@@ -152,7 +152,7 @@ async def save_chat_config(
         yaml_content = await request.body()
         yaml_str = yaml_content.decode('utf-8')
         result = await system_controller.save_chat_config_yaml(yaml_str)
-        return result
+        return JSONResponse(content=result)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
@@ -170,12 +170,59 @@ async def validate_chat_config(
         yaml_content = await request.body()
         yaml_str = yaml_content.decode('utf-8')
         result = await system_controller.validate_chat_config_yaml(yaml_str)
-        return result
+        return JSONResponse(content=result)
     except Exception as e:
         logger.error(f"Failed to validate chat config: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 
 
+# Plugin Configuration Management Endpoints
+
+@router.get("/admin/plugins/config", response_class=Response)
+async def get_plugins_config(current_user: User = Depends(current_superuser)):
+    """Get plugins configuration as YAML. Admin only."""
+    try:
+        yaml_content = await system_controller.get_plugins_config_yaml()
+        return Response(content=yaml_content, media_type="text/plain")
+    except Exception as e:
+        logger.error(f"Failed to get plugins config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/plugins/config")
+async def save_plugins_config(
+    request: Request,
+    current_user: User = Depends(current_superuser)
+):
+    """Save plugins configuration from YAML. Admin only."""
+    try:
+        yaml_content = await request.body()
+        yaml_str = yaml_content.decode('utf-8')
+        result = await system_controller.save_plugins_config_yaml(yaml_str)
+        return JSONResponse(content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to save plugins config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/plugins/config/validate")
+async def validate_plugins_config(
+    request: Request,
+    current_user: User = Depends(current_superuser)
+):
+    """Validate plugins configuration YAML. Admin only."""
+    try:
+        yaml_content = await request.body()
+        yaml_str = yaml_content.decode('utf-8')
+        result = await system_controller.validate_plugins_config_yaml(yaml_str)
+        return JSONResponse(content=result)
+    except Exception as e:
+        logger.error(f"Failed to validate plugins config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @router.get("/streaming/status")
 async def get_streaming_status(request: Request, current_user: User = Depends(current_superuser)):
     """Get status of active streaming sessions and Redis Streams health. Admin only."""
diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
new file mode 100644
index 00000000..23f04d87
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
@@ -0,0 +1,108 @@
+"""Plugin service for accessing the global plugin router.
+
+This module provides singleton access to the plugin router, allowing
+worker jobs to trigger plugins without accessing FastAPI app state directly.
+"""
+
+import logging
+from typing import Optional
+from pathlib import Path
+import yaml
+
+from advanced_omi_backend.plugins import PluginRouter
+
+logger = logging.getLogger(__name__)
+
+# Global plugin router instance
+_plugin_router: Optional[PluginRouter] = None
+
+
+def get_plugin_router() -> Optional[PluginRouter]:
+    """Get the global plugin router instance.
+
+    Returns:
+        Plugin router instance if initialized, None otherwise
+    """
+    global _plugin_router
+    return _plugin_router
+
+
+def set_plugin_router(router: PluginRouter) -> None:
+    """Set the global plugin router instance.
+
+    This should be called during app initialization in app_factory.py.
+
+    Args:
+        router: Initialized plugin router instance
+    """
+    global _plugin_router
+    _plugin_router = router
+    logger.info("Plugin router registered with plugin service")
+
+
+def init_plugin_router() -> Optional[PluginRouter]:
+    """Initialize the plugin router from configuration.
+
+    This is called during app startup to create and register the plugin router.
+
+    Returns:
+        Initialized plugin router, or None if no plugins configured
+    """
+    global _plugin_router
+
+    if _plugin_router is not None:
+        logger.warning("Plugin router already initialized")
+        return _plugin_router
+
+    try:
+        _plugin_router = PluginRouter()
+
+        # Load plugin configuration
+        plugins_yml = Path("/app/plugins.yml")
+        if plugins_yml.exists():
+            with open(plugins_yml, 'r') as f:
+                plugins_config = yaml.safe_load(f)
+                plugins_data = plugins_config.get('plugins', {})
+
+            # Initialize each enabled plugin
+            for plugin_id, plugin_config in plugins_data.items():
+                if not plugin_config.get('enabled', False):
+                    continue
+
+                try:
+                    if plugin_id == 'homeassistant':
+                        from advanced_omi_backend.plugins.homeassistant import HomeAssistantPlugin
+                        plugin = HomeAssistantPlugin(plugin_config)
+                        # Note: async initialization happens in app_factory lifespan
+                        _plugin_router.register_plugin(plugin_id, plugin)
+                        logger.info(f"✅ Plugin '{plugin_id}' registered")
+                    else:
+                        logger.warning(f"Unknown plugin: {plugin_id}")
+
+                except Exception as e:
+                    logger.error(f"Failed to register plugin '{plugin_id}': {e}", exc_info=True)
+
+            logger.info(f"Plugins registered: {len(_plugin_router.plugins)} total")
+        else:
+            logger.info("No plugins.yml found, plugins disabled")
+
+        return _plugin_router
+
+    except Exception as e:
+        logger.error(f"Failed to initialize plugin router: {e}", exc_info=True)
+        _plugin_router = None
+        return None
+
+
+async def cleanup_plugin_router() -> None:
+    """Clean up the plugin router and all registered plugins."""
+    global _plugin_router
+
+    if _plugin_router:
+        try:
+            await _plugin_router.cleanup_all()
+            logger.info("Plugin router cleanup complete")
+        except Exception as e:
+            logger.error(f"Error during plugin router cleanup: {e}")
+        finally:
+            _plugin_router = None
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index d2b8c4fd..49f0c5c9 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -10,8 +10,10 @@
 from datetime import datetime
 from typing import Dict, Any
 from rq.job import Job
+
 from advanced_omi_backend.models.job import async_job
 from advanced_omi_backend.controllers.queue_controller import redis_conn
+from advanced_omi_backend.services.plugin_service import get_plugin_router
 
 from advanced_omi_backend.utils.conversation_utils import (
     analyze_speech,
@@ -398,6 +400,42 @@ async def open_conversation_job(
             )
             last_result_count = current_count
 
+            # Trigger transcript-level plugins on new transcript segments
+            try:
+                plugin_router = get_plugin_router()
+                if plugin_router:
+                    # Get the latest transcript text for plugin processing
+                    transcript_text = combined.get('text', '')
+
+                    if transcript_text:
+                        plugin_data = {
+                            'transcript': transcript_text,
+                            'segment_id': f"{session_id}_{current_count}",
+                            'conversation_id': conversation_id,
+                            'segments': combined.get('segments', []),
+                            'word_count': speech_analysis.get('word_count', 0),
+                        }
+
+                        plugin_results = await plugin_router.trigger_plugins(
+                            access_level='streaming_transcript',
+                            user_id=user_id,
+                            data=plugin_data,
+                            metadata={'client_id': client_id}
+                        )
+
+                        if plugin_results:
+                            logger.info(f"📌 Triggered {len(plugin_results)} streaming transcript plugins")
+                            for result in plugin_results:
+                                if result.message:
+                                    logger.info(f"  Plugin: {result.message}")
+
+                                # If plugin stopped processing, log it
+                                if not result.should_continue:
+                                    logger.info(f"  Plugin stopped normal processing")
+
+            except Exception as e:
+                logger.warning(f"⚠️ Error triggering transcript-level plugins: {e}")
+
         await asyncio.sleep(1)  # Check every second for responsiveness
 
     logger.info(
@@ -496,6 +534,43 @@ async def open_conversation_job(
     # Wait a moment to ensure jobs are registered in RQ
     await asyncio.sleep(0.5)
 
+    # Trigger conversation-level plugins
+    try:
+        plugin_router = get_plugin_router()
+        if plugin_router:
+            # Get conversation data for plugin context
+            conversation_model = await Conversation.find_one(
+                Conversation.conversation_id == conversation_id
+            )
+
+            plugin_data = {
+                'conversation': {
+                    'conversation_id': conversation_id,
+                    'audio_uuid': session_id,
+                    'client_id': client_id,
+                    'user_id': user_id,
+                },
+                'transcript': conversation_model.transcript if conversation_model else "",
+                'duration': time.time() - start_time,
+                'conversation_id': conversation_id,
+            }
+
+            plugin_results = await plugin_router.trigger_plugins(
+                access_level='conversation',
+                user_id=user_id,
+                data=plugin_data,
+                metadata={'end_reason': end_reason}
+            )
+
+            if plugin_results:
+                logger.info(f"📌 Triggered {len(plugin_results)} conversation-level plugins")
+                for result in plugin_results:
+                    if result.message:
+                        logger.info(f"  Plugin result: {result.message}")
+
+    except Exception as e:
+        logger.warning(f"⚠️ Error triggering conversation-level plugins: {e}")
+
     # Call shared cleanup/restart logic
     return await handle_end_of_conversation(
         session_id=session_id,
diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
index 8b64d690..a6939bed 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
@@ -16,6 +16,7 @@
 )
 from advanced_omi_backend.models.job import BaseRQJob, JobPriority, async_job
 from advanced_omi_backend.services.memory.base import MemoryEntry
+from advanced_omi_backend.services.plugin_service import get_plugin_router
 
 logger = logging.getLogger(__name__)
 
@@ -240,6 +241,41 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict
             # This allows users to resume talking immediately after conversation closes,
             # without waiting for memory processing to complete.
 
+            # Trigger memory-level plugins
+            try:
+                plugin_router = get_plugin_router()
+                if plugin_router:
+                    plugin_data = {
+                        'memories': created_memory_ids,
+                        'conversation': {
+                            'conversation_id': conversation_id,
+                            'client_id': client_id,
+                            'user_id': user_id,
+                            'user_email': user_email,
+                        },
+                        'memory_count': len(created_memory_ids),
+                        'conversation_id': conversation_id,
+                    }
+
+                    plugin_results = await plugin_router.trigger_plugins(
+                        access_level='memory',
+                        user_id=user_id,
+                        data=plugin_data,
+                        metadata={
+                            'processing_time': processing_time,
+                            'memory_provider': str(memory_provider),
+                        }
+                    )
+
+                    if plugin_results:
+                        logger.info(f"📌 Triggered {len(plugin_results)} memory-level plugins")
+                        for result in plugin_results:
+                            if result.message:
+                                logger.info(f"  Plugin result: {result.message}")
+
+            except Exception as e:
+                logger.warning(f"⚠️ Error triggering memory-level plugins: {e}")
+
             return {
                 "success": True,
                 "memories_created": len(created_memory_ids),
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index c9216d4f..71e64dbd 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -19,6 +19,7 @@
     REDIS_URL,
 )
 from advanced_omi_backend.utils.conversation_utils import analyze_speech, mark_conversation_deleted
+from advanced_omi_backend.services.plugin_service import get_plugin_router
 
 logger = logging.getLogger(__name__)
 
@@ -167,6 +168,10 @@ async def transcribe_full_audio_job(
     if not conversation:
         raise ValueError(f"Conversation {conversation_id} not found")
 
+    # Extract user_id and client_id for plugin context
+    user_id = str(conversation.user_id) if conversation.user_id else None
+    client_id = conversation.client_id if hasattr(conversation, 'client_id') else None
+
     # Use the provided audio path
     actual_audio_path = audio_path
     logger.info(f"📁 Using audio for transcription: {audio_path}")
@@ -202,6 +207,59 @@ async def transcribe_full_audio_job(
         f"📊 Transcription complete: {len(transcript_text)} chars, {len(segments)} segments, {len(words)} words"
     )
 
+    # Trigger transcript-level plugins BEFORE speech validation
+    # This ensures wake-word commands execute even if conversation gets deleted
+    logger.info(f"🔍 DEBUG: About to trigger plugins - transcript_text exists: {bool(transcript_text)}")
+    if transcript_text:
+        try:
+            from advanced_omi_backend.services.plugin_service import init_plugin_router
+
+            # Initialize plugin router if not already initialized (worker context)
+            plugin_router = get_plugin_router()
+            if not plugin_router:
+                logger.info("🔧 Initializing plugin router in worker process...")
+                plugin_router = init_plugin_router()
+
+                # Initialize async plugins
+                if plugin_router:
+                    for plugin_id, plugin in plugin_router.plugins.items():
+                        try:
+                            await plugin.initialize()
+                            logger.info(f"✅ Plugin '{plugin_id}' initialized in worker")
+                        except Exception as e:
+                            logger.exception(f"Failed to initialize plugin '{plugin_id}' in worker: {e}")
+
+            logger.info(f"🔍 DEBUG: Plugin router retrieved: {plugin_router is not None}")
+
+            if plugin_router:
+                logger.info(f"🔍 DEBUG: Preparing to trigger transcript plugins for conversation {conversation_id}")
+                plugin_data = {
+                    'transcript': transcript_text,
+                    'segment_id': f"{conversation_id}_batch",
+                    'conversation_id': conversation_id,
+                    'segments': segments,
+                    'word_count': len(words),
+                }
+
+                logger.info(f"🔍 DEBUG: Calling trigger_plugins with user_id={user_id}, client_id={client_id}")
+                plugin_results = await plugin_router.trigger_plugins(
+                    access_level='transcript',  # Batch mode - only 'transcript' plugins, NOT 'streaming_transcript'
+                    user_id=user_id,
+                    data=plugin_data,
+                    metadata={'client_id': client_id}
+                )
+                logger.info(f"🔍 DEBUG: Plugin trigger returned {len(plugin_results) if plugin_results else 0} results")
+
+                if plugin_results:
+                    logger.info(f"✅ Triggered {len(plugin_results)} transcript plugins in batch mode")
+                    for result in plugin_results:
+                        if result.message:
+                            logger.info(f"  Plugin: {result.message}")
+        except Exception as e:
+            logger.exception(f"⚠️ Error triggering transcript plugins in batch mode: {e}")
+
+    logger.info(f"🔍 DEBUG: Plugin processing complete, moving to speech validation")
+
     # Validate meaningful speech BEFORE any further processing
     transcript_data = {"text": transcript_text, "words": words}
     speech_analysis = analyze_speech(transcript_data)
diff --git a/backends/advanced/webui/src/App.tsx b/backends/advanced/webui/src/App.tsx
index fca59623..42370975 100644
--- a/backends/advanced/webui/src/App.tsx
+++ b/backends/advanced/webui/src/App.tsx
@@ -13,6 +13,7 @@ import System from './pages/System'
 import Upload from './pages/Upload'
 import Queue from './pages/Queue'
 import LiveRecord from './pages/LiveRecord'
+import Plugins from './pages/Plugins'
 import ProtectedRoute from './components/auth/ProtectedRoute'
 import { ErrorBoundary, PageErrorBoundary } from './components/ErrorBoundary'
 
@@ -89,6 +90,11 @@ function App() {
                     <Queue />
                   </PageErrorBoundary>
                 } />
+                <Route path="plugins" element={
+                  <PageErrorBoundary>
+                    <Plugins />
+                  </PageErrorBoundary>
+                } />
               </Route>
             </Routes>
           </Router>
diff --git a/backends/advanced/webui/src/components/PluginSettings.tsx b/backends/advanced/webui/src/components/PluginSettings.tsx
new file mode 100644
index 00000000..05576120
--- /dev/null
+++ b/backends/advanced/webui/src/components/PluginSettings.tsx
@@ -0,0 +1,195 @@
+import { useState, useEffect } from 'react'
+import { Puzzle, RefreshCw, CheckCircle, Save, RotateCcw, AlertCircle } from 'lucide-react'
+import { systemApi } from '../services/api'
+import { useAuth } from '../contexts/AuthContext'
+
+interface PluginSettingsProps {
+  className?: string
+}
+
+export default function PluginSettings({ className }: PluginSettingsProps) {
+  const [configYaml, setConfigYaml] = useState('')
+  const [loading, setLoading] = useState(false)
+  const [validating, setValidating] = useState(false)
+  const [saving, setSaving] = useState(false)
+  const [message, setMessage] = useState('')
+  const [error, setError] = useState('')
+  const { isAdmin } = useAuth()
+
+  useEffect(() => {
+    loadPluginsConfig()
+  }, [])
+
+  const loadPluginsConfig = async () => {
+    setLoading(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.getPluginsConfigRaw()
+      setConfigYaml(response.data.config_yaml || response.data)
+      setMessage('Configuration loaded successfully')
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      const status = err.response?.status
+      if (status === 401) {
+        setError('Unauthorized: admin privileges required')
+      } else {
+        setError(err.response?.data?.error || 'Failed to load configuration')
+      }
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const validateConfig = async () => {
+    if (!configYaml.trim()) {
+      setError('Configuration cannot be empty')
+      return
+    }
+
+    setValidating(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.validatePluginsConfig(configYaml)
+      if (response.data.valid) {
+        setMessage('✅ Configuration is valid')
+      } else {
+        setError(response.data.error || 'Validation failed')
+      }
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      setError(err.response?.data?.error || 'Validation failed')
+    } finally {
+      setValidating(false)
+    }
+  }
+
+  const saveConfig = async () => {
+    if (!configYaml.trim()) {
+      setError('Configuration cannot be empty')
+      return
+    }
+
+    setSaving(true)
+    setError('')
+    setMessage('')
+
+    try {
+      await systemApi.updatePluginsConfigRaw(configYaml)
+      setMessage('✅ Configuration saved successfully. Restart backend for changes to take effect.')
+      setTimeout(() => setMessage(''), 5000)
+    } catch (err: any) {
+      setError(err.response?.data?.error || 'Failed to save configuration')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const resetConfig = () => {
+    loadPluginsConfig()
+    setMessage('Configuration reset to file version')
+    setTimeout(() => setMessage(''), 3000)
+  }
+
+  if (!isAdmin) {
+    return null
+  }
+
+  return (
+    <div className={className}>
+      <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6">
+        {/* Header */}
+        <div className="flex items-center justify-between mb-4">
+          <div className="flex items-center space-x-2">
+            <Puzzle className="h-5 w-5 text-blue-600" />
+            <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">
+              Plugin Configuration
+            </h3>
+          </div>
+          <div className="flex items-center space-x-2">
+            <button
+              onClick={resetConfig}
+              disabled={loading || saving}
+              className="flex items-center space-x-1 px-3 py-1.5 text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200 disabled:opacity-50"
+            >
+              <RotateCcw className="h-4 w-4" />
+              <span>Reset</span>
+            </button>
+            <button
+              onClick={loadPluginsConfig}
+              disabled={loading || saving}
+              className="flex items-center space-x-1 px-3 py-1.5 text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200 disabled:opacity-50"
+            >
+              <RefreshCw className={`h-4 w-4 ${loading ? 'animate-spin' : ''}`} />
+              <span>Reload</span>
+            </button>
+          </div>
+        </div>
+
+        {/* Messages */}
+        {message && (
+          <div className="mb-4 p-3 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-md flex items-start space-x-2">
+            <CheckCircle className="h-5 w-5 text-green-600 dark:text-green-400 mt-0.5" />
+            <p className="text-sm text-green-700 dark:text-green-300">{message}</p>
+          </div>
+        )}
+
+        {error && (
+          <div className="mb-4 p-3 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md flex items-start space-x-2">
+            <AlertCircle className="h-5 w-5 text-red-600 dark:text-red-400 mt-0.5" />
+            <p className="text-sm text-red-700 dark:text-red-300">{error}</p>
+          </div>
+        )}
+
+        {/* Editor */}
+        <div className="mb-4">
+          <textarea
+            value={configYaml}
+            onChange={(e) => setConfigYaml(e.target.value)}
+            disabled={loading || saving}
+            className="w-full h-96 p-4 font-mono text-sm bg-gray-50 dark:bg-gray-900 border border-gray-300 dark:border-gray-600 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent resize-y"
+            placeholder="Loading configuration..."
+            spellCheck={false}
+          />
+        </div>
+
+        {/* Actions */}
+        <div className="flex space-x-3">
+          <button
+            onClick={validateConfig}
+            disabled={loading || validating || saving}
+            className="flex items-center space-x-2 px-4 py-2 text-sm font-medium text-gray-700 dark:text-gray-200 bg-white dark:bg-gray-700 border border-gray-300 dark:border-gray-600 rounded-md hover:bg-gray-50 dark:hover:bg-gray-600 disabled:opacity-50"
+          >
+            <CheckCircle className="h-4 w-4" />
+            <span>{validating ? 'Validating...' : 'Validate'}</span>
+          </button>
+
+          <button
+            onClick={saveConfig}
+            disabled={loading || saving || validating}
+            className="flex items-center space-x-2 px-4 py-2 text-sm font-medium text-white bg-blue-600 rounded-md hover:bg-blue-700 disabled:opacity-50"
+          >
+            <Save className="h-4 w-4" />
+            <span>{saving ? 'Saving...' : 'Save Changes'}</span>
+          </button>
+        </div>
+
+        {/* Help text */}
+        <div className="mt-6 p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-md">
+          <h4 className="text-sm font-medium text-blue-900 dark:text-blue-100 mb-2">
+            Configuration Help
+          </h4>
+          <ul className="text-sm text-blue-700 dark:text-blue-300 space-y-1 list-disc list-inside">
+            <li>Define enabled plugins and their trigger types</li>
+            <li>Configure wake words for command-based plugins</li>
+            <li>Set plugin URLs and authentication tokens</li>
+            <li>Changes require backend restart to take effect</li>
+          </ul>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx
index 5995f823..c3976d04 100644
--- a/backends/advanced/webui/src/components/layout/Layout.tsx
+++ b/backends/advanced/webui/src/components/layout/Layout.tsx
@@ -1,5 +1,5 @@
 import { Link, useLocation, Outlet } from 'react-router-dom'
-import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Calendar } from 'lucide-react'
+import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Calendar, Puzzle } from 'lucide-react'
 import { useAuth } from '../../contexts/AuthContext'
 import { useTheme } from '../../contexts/ThemeContext'
 
@@ -18,6 +18,7 @@ export default function Layout() {
     ...(isAdmin ? [
       { path: '/upload', label: 'Upload Audio', icon: Upload },
       { path: '/queue', label: 'Queue Management', icon: Layers },
+      { path: '/plugins', label: 'Plugins', icon: Puzzle },
       { path: '/system', label: 'System State', icon: Settings },
     ] : []),
   ]
diff --git a/backends/advanced/webui/src/pages/Plugins.tsx b/backends/advanced/webui/src/pages/Plugins.tsx
new file mode 100644
index 00000000..f28921f5
--- /dev/null
+++ b/backends/advanced/webui/src/pages/Plugins.tsx
@@ -0,0 +1,9 @@
+import PluginSettings from '../components/PluginSettings'
+
+export default function Plugins() {
+  return (
+    <div>
+      <PluginSettings />
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts
index 35964fc2..e5368dcd 100644
--- a/backends/advanced/webui/src/services/api.ts
+++ b/backends/advanced/webui/src/services/api.ts
@@ -180,6 +180,17 @@ export const systemApi = {
       headers: { 'Content-Type': 'text/plain' }
     }),
 
+  // Plugin Configuration Management
+  getPluginsConfigRaw: () => api.get('/api/admin/plugins/config'),
+  updatePluginsConfigRaw: (configYaml: string) =>
+    api.post('/api/admin/plugins/config', configYaml, {
+      headers: { 'Content-Type': 'text/plain' }
+    }),
+  validatePluginsConfig: (configYaml: string) =>
+    api.post('/api/admin/plugins/config/validate', configYaml, {
+      headers: { 'Content-Type': 'text/plain' }
+    }),
+
   // Memory Provider Management
   getMemoryProvider: () => api.get('/api/admin/memory/provider'),
   setMemoryProvider: (provider: string) => api.post('/api/admin/memory/provider', { provider }),
diff --git a/config/plugins.yml b/config/plugins.yml
new file mode 100644
index 00000000..61c14def
--- /dev/null
+++ b/config/plugins.yml
@@ -0,0 +1,12 @@
+plugins:
+  homeassistant:
+    enabled: true
+    access_level: transcript
+    trigger:
+      type: wake_word
+      wake_words:  # Support multiple variations
+        - vv    # Deepgram transcribes "vivi" as "VV"
+        - vivi  # Original wake word
+        - vv.   # Sometimes includes period
+    ha_url: http://host.docker.internal:18123
+    ha_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiI0ODE0MDk1MWViOGM0MTYxOTY2N2YzNzI1MTFmM2QyMiIsImlhdCI6MTc2NzQwOTg4NiwiZXhwIjoyMDgyNzY5ODg2fQ.Q1ko6z2BprxoQO0Pp1xCVc_BRny0lNMd-_L3YSDVkKM
diff --git a/status.py b/status.py
index 3b3e61c9..82e3f041 100644
--- a/status.py
+++ b/status.py
@@ -43,40 +43,8 @@ def get_container_status(service_name: str) -> Dict[str, Any]:
 
     try:
         # Get container status using docker compose ps
-        cmd = ['docker', 'compose', 'ps', '--format', 'json']
-
-        # Handle special profiles for backend (HTTPS and Obsidian)
-        if service_name == 'backend':
-            profiles = []
-            
-            # Check for HTTPS profile
-            caddyfile_path = service_path / 'Caddyfile'
-            if caddyfile_path.exists():
-                profiles.append('https')
-            
-            # Check for Obsidian/Neo4j profile
-            env_file = service_path / '.env'
-            if env_file.exists():
-                env_values = dotenv_values(env_file)
-                neo4j_host = env_values.get('NEO4J_HOST', '')
-                if neo4j_host and neo4j_host not in ['', 'your-neo4j-host-here', 'your_neo4j_host_here']:
-                    profiles.append('obsidian')
-            
-            # Apply profiles if any are needed
-            if profiles:
-                cmd = ['docker', 'compose'] + [item for profile in profiles for item in ['--profile', profile]] + ['ps', '--format', 'json']
-
-        # Handle speaker-recognition profiles
-        if service_name == 'speaker-recognition':
-            from dotenv import dotenv_values
-            env_file = service_path / '.env'
-            if env_file.exists():
-                env_values = dotenv_values(env_file)
-                compute_mode = env_values.get('COMPUTE_MODE', 'cpu')
-                if compute_mode == 'gpu':
-                    cmd = ['docker', 'compose', '--profile', 'gpu', 'ps', '--format', 'json']
-                else:
-                    cmd = ['docker', 'compose', '--profile', 'cpu', 'ps', '--format', 'json']
+        # Use 'ps -a' to get all containers regardless of profile
+        cmd = ['docker', 'compose', 'ps', '-a', '--format', 'json']
 
         result = subprocess.run(
             cmd,
@@ -95,8 +63,14 @@ def get_container_status(service_name: str) -> Dict[str, Any]:
             if line:
                 try:
                     container = json.loads(line)
+                    container_name = container.get('Name', 'unknown')
+
+                    # Skip test containers - they're not part of production services
+                    if '-test-' in container_name.lower():
+                        continue
+
                     containers.append({
-                        'name': container.get('Name', 'unknown'),
+                        'name': container_name,
                         'state': container.get('State', 'unknown'),
                         'status': container.get('Status', 'unknown'),
                         'health': container.get('Health', 'none')
@@ -202,7 +176,12 @@ def show_quick_status():
             container_icon = "🟡"
         elif status['container_status'] == 'stopped':
             container_icon = "🔴"
+        elif status['container_status'] == 'not_found':
+            container_icon = "⚪"
+        elif status['container_status'] in ['error', 'timeout']:
+            container_icon = "⚫"
         else:
+            # Unknown status - log it for debugging
             container_icon = "⚫"
 
         # Health status
diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot
index ec5e1fb2..5e4b9d3e 100644
--- a/tests/endpoints/system_admin_tests.robot
+++ b/tests/endpoints/system_admin_tests.robot
@@ -168,9 +168,10 @@ Validate Chat Configuration Test
 
     # Valid prompt should pass
     ${valid_prompt}=   Set Variable    You are a friendly AI assistant that helps users with their daily tasks.
+    &{headers}=        Create Dictionary    Content-Type=text/plain
     ${response}=       POST On Session    api    /api/admin/chat/config/validate
     ...                data=${valid_prompt}
-    ...                headers={"Content-Type": "text/plain"}
+    ...                headers=${headers}
     Should Be Equal As Integers    ${response.status_code}    200
     ${result}=         Set Variable    ${response.json()}
     Should Be True     ${result}[valid] == $True
@@ -179,7 +180,7 @@ Validate Chat Configuration Test
     ${short_prompt}=   Set Variable    Hi
     ${response}=       POST On Session    api    /api/admin/chat/config/validate
     ...                data=${short_prompt}
-    ...                headers={"Content-Type": "text/plain"}
+    ...                headers=${headers}
     Should Be Equal As Integers    ${response.status_code}    200
     ${result}=         Set Variable    ${response.json()}
     Should Be True     ${result}[valid] == $False
@@ -191,9 +192,10 @@ Save And Retrieve Chat Configuration Test
 
     # Save custom prompt
     ${custom_prompt}=  Set Variable    You are a specialized AI assistant for technical support and troubleshooting.
+    &{headers}=        Create Dictionary    Content-Type=text/plain
     ${response}=       POST On Session    api    /api/admin/chat/config
     ...                data=${custom_prompt}
-    ...                headers={"Content-Type": "text/plain"}
+    ...                headers=${headers}
     Should Be Equal As Integers    ${response.status_code}    200
     ${result}=         Set Variable    ${response.json()}
     Should Be True     ${result}[success] == $True
diff --git a/wizard.py b/wizard.py
index a2e2b2f7..dd727cec 100755
--- a/wizard.py
+++ b/wizard.py
@@ -4,6 +4,7 @@
 Handles service selection and delegation only - no configuration duplication
 """
 
+import getpass
 import shutil
 import subprocess
 import sys
@@ -81,6 +82,62 @@ def is_placeholder(value, *placeholder_variants):
     }
 }
 
+# Plugin configuration registry
+# Plugins are lightweight integrations that extend Chronicle functionality
+# They are configured during wizard setup and stored in config/plugins.yml
+#
+# Access Levels (when plugins execute):
+#   - transcript: Fires when new transcript segment arrives
+#   - conversation: Fires when conversation completes
+#   - memory: Fires after memory extraction
+#
+# Trigger Types (how plugins decide to execute):
+#   - wake_word: Only if transcript starts with specified wake word
+#   - always: Execute on every invocation at this access level
+#   - conditional: Custom condition checking (future)
+PLUGINS = {
+    'homeassistant': {
+        'name': 'Home Assistant',
+        'description': 'Control Home Assistant devices via natural language with wake word',
+        'enabled_by_default': False,
+        'requires_tailscale': True,  # Requires Tailscale for remote HA access
+        'access_level': 'transcript',  # When to trigger
+        'trigger_type': 'wake_word',   # How to trigger
+        'config': {
+            'ha_url': {
+                'prompt': 'Home Assistant URL',
+                'default': 'http://localhost:8123',
+                'type': 'url',
+                'help': 'The URL of your Home Assistant instance (e.g., http://100.99.62.5:8123)'
+            },
+            'ha_token': {
+                'prompt': 'Long-Lived Access Token',
+                'type': 'password',
+                'help': 'Create at: Home Assistant > Profile > Long-Lived Access Tokens'
+            },
+            'wake_word': {
+                'prompt': 'Wake word for HA commands',
+                'default': 'vivi',
+                'type': 'text',
+                'help': 'Say this word before commands (e.g., "Vivi, turn off hall lights")'
+            }
+        }
+    }
+    # Future plugin examples:
+    # 'sentiment_analyzer': {
+    #     'name': 'Sentiment Analyzer',
+    #     'access_level': 'conversation',
+    #     'trigger_type': 'always',
+    #     ...
+    # },
+    # 'memory_enricher': {
+    #     'name': 'Memory Enricher',
+    #     'access_level': 'memory',
+    #     'trigger_type': 'always',
+    #     ...
+    # }
+}
+
 def check_service_exists(service_name, service_config):
     """Check if service directory and script exist"""
     service_path = Path(service_config['path'])
@@ -153,18 +210,18 @@ def cleanup_unselected_services(selected_services):
                 console.print(f"🧹 [dim]Backed up {service_name} configuration to {backup_file.name} (service not selected)[/dim]")
 
 def run_service_setup(service_name, selected_services, https_enabled=False, server_ip=None,
-                     obsidian_enabled=False, neo4j_password=None):
+                     obsidian_enabled=False, neo4j_password=None, ts_authkey=None):
     """Execute individual service setup script"""
     if service_name == 'advanced':
         service = SERVICES['backend'][service_name]
-        
+
         # For advanced backend, pass URLs of other selected services and HTTPS config
         cmd = service['cmd'].copy()
         if 'speaker-recognition' in selected_services:
             cmd.extend(['--speaker-service-url', 'http://speaker-service:8085'])
         if 'asr-services' in selected_services:
             cmd.extend(['--parakeet-asr-url', 'http://host.docker.internal:8767'])
-        
+
         # Add HTTPS configuration
         if https_enabled and server_ip:
             cmd.extend(['--enable-https', '--server-ip', server_ip])
@@ -173,6 +230,10 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv
         if obsidian_enabled and neo4j_password:
             cmd.extend(['--enable-obsidian', '--neo4j-password', neo4j_password])
 
+        # Add Tailscale configuration
+        if ts_authkey:
+            cmd.extend(['--ts-authkey', ts_authkey])
+
     else:
         service = SERVICES['extras'][service_name]
         cmd = service['cmd'].copy()
@@ -285,6 +346,230 @@ def show_service_status():
         status = "✅" if exists else "⏸️"
         console.print(f"  {status} {service_config['description']} - {msg}")
 
+def prompt_value(prompt_text, default=""):
+    """Prompt user for a value with a default"""
+    if default:
+        display_prompt = f"{prompt_text} [{default}]"
+    else:
+        display_prompt = prompt_text
+
+    try:
+        value = console.input(f"[cyan]{display_prompt}:[/cyan] ").strip()
+        return value if value else default
+    except EOFError:
+        return default
+
+def prompt_password(prompt_text):
+    """Prompt user for a password (hidden input)"""
+    try:
+        return getpass.getpass(f"{prompt_text}: ")
+    except (EOFError, KeyboardInterrupt):
+        return ""
+
+def mask_value(value, show_chars=5):
+    """Mask a value showing only first and last few characters"""
+    if not value or len(value) <= show_chars * 2:
+        return value
+
+    # Remove quotes if present
+    value_clean = value.strip("'\"")
+
+    return f"{value_clean[:show_chars]}{'*' * min(15, len(value_clean) - show_chars * 2)}{value_clean[-show_chars:]}"
+
+def read_plugin_config_value(plugin_id, config_key):
+    """Read a value from existing plugins.yml file"""
+    plugins_yml_path = Path('config/plugins.yml')
+    if not plugins_yml_path.exists():
+        return None
+
+    try:
+        with open(plugins_yml_path, 'r') as f:
+            plugins_data = yaml.safe_load(f)
+
+        if not plugins_data or 'plugins' not in plugins_data:
+            return None
+
+        plugin_config = plugins_data['plugins'].get(plugin_id, {})
+        return plugin_config.get(config_key)
+    except Exception:
+        return None
+
+def prompt_with_existing_masked(prompt_text, existing_value, placeholders=None, is_password=False, default=""):
+    """
+    Prompt for a value, showing masked existing value if present.
+
+    Args:
+        prompt_text: The prompt to display
+        existing_value: Existing value from config (or None)
+        placeholders: List of placeholder values to treat as "not set"
+        is_password: Whether to use password input (hidden)
+        default: Default value if no existing value
+
+    Returns:
+        User input value, existing value if reused, or default
+    """
+    placeholders = placeholders or []
+
+    # Check if existing value is valid (not empty and not a placeholder)
+    has_valid_existing = existing_value and existing_value not in placeholders
+
+    if has_valid_existing:
+        # Show masked value with option to reuse
+        if is_password:
+            masked = mask_value(existing_value)
+            display_prompt = f"{prompt_text} ({masked}) [press Enter to reuse, or enter new]"
+        else:
+            display_prompt = f"{prompt_text} ({existing_value}) [press Enter to reuse, or enter new]"
+
+        if is_password:
+            user_input = prompt_password(display_prompt)
+        else:
+            user_input = prompt_value(display_prompt, "")
+
+        # If user pressed Enter (empty input), reuse existing value
+        return user_input if user_input else existing_value
+    else:
+        # No existing value, prompt normally
+        if is_password:
+            return prompt_password(prompt_text)
+        else:
+            return prompt_value(prompt_text, default)
+
+def select_plugins():
+    """Interactive plugin selection and configuration"""
+    console.print("\n🔌 [bold cyan]Plugin Configuration[/bold cyan]")
+    console.print("Chronicle supports plugins for extended functionality.\n")
+
+    selected_plugins = {}
+
+    for plugin_id, plugin_meta in PLUGINS.items():
+        # Show plugin description with access level and trigger type
+        console.print(f"[bold]{plugin_meta['name']}[/bold]")
+        console.print(f"  {plugin_meta['description']}")
+        console.print(f"  Access Level: [cyan]{plugin_meta['access_level']}[/cyan]")
+        console.print(f"  Trigger Type: [cyan]{plugin_meta['trigger_type']}[/cyan]\n")
+
+        try:
+            enable = Confirm.ask(
+                f"  Enable {plugin_meta['name']}?",
+                default=plugin_meta['enabled_by_default']
+            )
+        except EOFError:
+            console.print(f"  Using default: {'Yes' if plugin_meta['enabled_by_default'] else 'No'}")
+            enable = plugin_meta['enabled_by_default']
+
+        if enable:
+            plugin_config = {
+                'enabled': True,
+                'access_level': plugin_meta['access_level'],
+                'trigger': {
+                    'type': plugin_meta['trigger_type']
+                }
+            }
+
+            for config_key, config_spec in plugin_meta['config'].items():
+                # Show help text if available
+                if 'help' in config_spec:
+                    console.print(f"  [dim]{config_spec['help']}[/dim]")
+
+                # Read existing value from plugins.yml if it exists
+                existing_value = read_plugin_config_value(plugin_id, config_key)
+
+                # Use the masked prompt function
+                is_password = config_spec['type'] == 'password'
+                value = prompt_with_existing_masked(
+                    prompt_text=f"  {config_spec['prompt']}",
+                    existing_value=existing_value,
+                    placeholders=[],  # No placeholders for plugin config
+                    is_password=is_password,
+                    default=config_spec.get('default', '')
+                )
+
+                plugin_config[config_key] = value
+
+                # For wake_word trigger, add to trigger config
+                if config_key == 'wake_word':
+                    plugin_config['trigger']['wake_word'] = value
+
+            selected_plugins[plugin_id] = plugin_config
+            console.print(f"  [green]✅ {plugin_meta['name']} configured[/green]\n")
+
+    return selected_plugins
+
+def save_plugin_config(plugins_config):
+    """Save plugin configuration to config/plugins.yml"""
+    if not plugins_config:
+        console.print("[dim]No plugins configured, skipping plugins.yml creation[/dim]")
+        return
+
+    config_dir = Path('config')
+    config_dir.mkdir(parents=True, exist_ok=True)
+
+    plugins_yml_path = config_dir / 'plugins.yml'
+
+    # Build YAML structure
+    yaml_data = {
+        'plugins': {}
+    }
+
+    for plugin_id, plugin_config in plugins_config.items():
+        # Plugin config already includes 'enabled', 'access_level', and 'trigger'
+        yaml_data['plugins'][plugin_id] = plugin_config
+
+    # Write to file
+    with open(plugins_yml_path, 'w') as f:
+        yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)
+
+    console.print(f"[green]✅ Plugin configuration saved to {plugins_yml_path}[/green]")
+
+def setup_tailscale_if_needed(selected_plugins):
+    """Check if any selected plugins require Tailscale and prompt for auth key.
+
+    Args:
+        selected_plugins: List of plugin IDs selected by user
+
+    Returns:
+        Tailscale auth key string if provided, None otherwise
+    """
+    # Check if any selected plugins require Tailscale
+    needs_tailscale = any(
+        PLUGINS[p].get('requires_tailscale', False)
+        for p in selected_plugins
+    )
+
+    if not needs_tailscale:
+        return None
+
+    console.print("\n🌐 [bold cyan]Tailscale Configuration[/bold cyan]")
+    console.print("Home Assistant plugin requires Tailscale for remote access.")
+    console.print("\n[blue][INFO][/blue] The Tailscale Docker container enables Chronicle to access")
+    console.print("           services on your Tailscale network (like Home Assistant).")
+    console.print()
+    console.print("Get your auth key from: [link]https://login.tailscale.com/admin/settings/keys[/link]")
+    console.print()
+
+    # Check for existing TS_AUTHKEY in backend .env
+    backend_env_path = 'backends/advanced/.env'
+    existing_key = read_env_value(backend_env_path, 'TS_AUTHKEY')
+
+    # Use the masked prompt helper
+    ts_authkey = prompt_with_existing_masked(
+        prompt_text="Tailscale auth key (or press Enter to skip)",
+        existing_value=existing_key,
+        placeholders=['your-tailscale-auth-key-here'],
+        is_password=True,
+        default=""
+    )
+
+    if not ts_authkey or ts_authkey.strip() == "":
+        console.print("[yellow]⚠️  Skipping Tailscale - HA plugin will only work for local instances[/yellow]")
+        console.print("[yellow]    You can configure this later in backends/advanced/.env[/yellow]")
+        return None
+
+    console.print("[green]✅[/green] Tailscale auth key configured")
+    console.print("[blue][INFO][/blue] Start Tailscale with: docker compose --profile tailscale up -d")
+    return ts_authkey
+
 def setup_git_hooks():
     """Setup pre-commit hooks for development"""
     console.print("\n🔧 [bold]Setting up development environment...[/bold]")
@@ -346,11 +631,21 @@ def main():
 
     # Service Selection
     selected_services = select_services()
-    
+
     if not selected_services:
         console.print("\n[yellow]No services selected. Exiting.[/yellow]")
         return
-    
+
+    # Plugin Configuration
+    selected_plugins = select_plugins()
+    if selected_plugins:
+        save_plugin_config(selected_plugins)
+
+    # Tailscale Configuration (if plugins require it)
+    ts_authkey = None
+    if selected_plugins:
+        ts_authkey = setup_tailscale_if_needed(selected_plugins)
+
     # HTTPS Configuration (for services that need it)
     https_enabled = False
     server_ip = None
@@ -374,27 +669,18 @@ def main():
             console.print("[blue][INFO][/blue] For local-only access, use 'localhost'")
             console.print("Examples: localhost, 100.64.1.2, your-domain.com")
 
-            # Check for existing SERVER_IP
+            # Check for existing SERVER_IP from backend .env
             backend_env_path = 'backends/advanced/.env'
             existing_ip = read_env_value(backend_env_path, 'SERVER_IP')
 
-            if existing_ip and existing_ip not in ['localhost', 'your-server-ip-here']:
-                # Show existing IP with option to reuse
-                prompt_text = f"Server IP/Domain for SSL certificates ({existing_ip}) [press Enter to reuse, or enter new]"
-                default_value = existing_ip
-            else:
-                prompt_text = "Server IP/Domain for SSL certificates [localhost]"
-                default_value = "localhost"
-
-            while True:
-                try:
-                    server_ip = console.input(f"{prompt_text}: ").strip()
-                    if not server_ip:
-                        server_ip = default_value
-                    break
-                except EOFError:
-                    server_ip = default_value
-                    break
+            # Use the new masked prompt function
+            server_ip = prompt_with_existing_masked(
+                prompt_text="Server IP/Domain for SSL certificates",
+                existing_value=existing_ip,
+                placeholders=['localhost', 'your-server-ip-here'],
+                is_password=False,
+                default="localhost"
+            )
 
             console.print(f"[green]✅[/green] HTTPS configured for: {server_ip}")
 
@@ -445,7 +731,7 @@ def main():
     
     for service in selected_services:
         if run_service_setup(service, selected_services, https_enabled, server_ip,
-                            obsidian_enabled, neo4j_password):
+                            obsidian_enabled, neo4j_password, ts_authkey):
             success_count += 1
         else:
             failed_services.append(service)

From 32d541f81340a6d15f1e0b541f613d0d41e45e13 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 11:45:38 +0000
Subject: [PATCH 06/25] Enhance configuration management and plugin system
 integration

- Updated .gitignore to include plugins.yml for security reasons.
- Modified start.sh to allow passing additional arguments during service startup.
- Refactored wizard.py to support new HF_TOKEN configuration prompts and improved handling of wake words in plugin settings.
- Introduced a new setup_hf_token_if_needed function to streamline Hugging Face token management.
- Enhanced the GitHub Actions workflow to create plugins.yml from a template, ensuring proper configuration setup.
- Added detailed comments and documentation in the plugins.yml.template for better user guidance on Home Assistant integration.
---
 .github/workflows/robot-tests.yml             |  12 +
 .gitignore                                    |   4 +
 backends/advanced/.env.template               |  24 +-
 backends/advanced/docker-compose.yml          |  31 ++
 backends/advanced/init.py                     |  24 +-
 backends/advanced/run-test.sh                 |  35 ++
 .../controllers/websocket_controller.py       | 180 ++++++-
 .../services/audio_stream/consumer.py         |  94 +---
 .../services/plugin_service.py                |  57 ++-
 .../services/transcription/__init__.py        | 141 ++++--
 .../transcription/deepgram_stream_consumer.py | 457 ++++++++++++++++++
 .../audio_stream_deepgram_streaming_worker.py | 106 ++++
 config/plugins.yml                            |  12 -
 config/plugins.yml.template                   |  30 ++
 start.sh                                      |   2 +-
 tests/configs/deepgram-openai.yml             | 151 +++---
 tests/run-robot-tests.sh                      |  32 ++
 wizard.py                                     | 106 ++--
 18 files changed, 1233 insertions(+), 265 deletions(-)
 create mode 100644 backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
 delete mode 100644 config/plugins.yml
 create mode 100644 config/plugins.yml.template

diff --git a/.github/workflows/robot-tests.yml b/.github/workflows/robot-tests.yml
index 3333266d..b48b5e75 100644
--- a/.github/workflows/robot-tests.yml
+++ b/.github/workflows/robot-tests.yml
@@ -85,6 +85,18 @@ jobs:
         echo "✓ Test config.yml created from tests/configs/deepgram-openai.yml"
         ls -lh config/config.yml
 
+    - name: Create plugins.yml from template
+      run: |
+        echo "Creating plugins.yml from template..."
+        if [ -f "config/plugins.yml.template" ]; then
+          cp config/plugins.yml.template config/plugins.yml
+          echo "✓ plugins.yml created from template"
+          ls -lh config/plugins.yml
+        else
+          echo "❌ ERROR: config/plugins.yml.template not found"
+          exit 1
+        fi
+
     - name: Run Robot Framework tests
       working-directory: tests
       env:
diff --git a/.gitignore b/.gitignore
index 23141c6b..6fa02d7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,10 @@ tests/setup/.env.test
 config/config.yml
 !config/config.yml.template
 
+# Plugins config (contains secrets)
+config/plugins.yml
+!config/plugins.yml.template
+
 # Config backups
 config/*.backup.*
 config/*.backup*
diff --git a/backends/advanced/.env.template b/backends/advanced/.env.template
index 4c071f72..9c11af67 100644
--- a/backends/advanced/.env.template
+++ b/backends/advanced/.env.template
@@ -231,4 +231,26 @@ LANGFUSE_ENABLE_TELEMETRY=False
 # The Tailscale container provides proxy access to remote services at:
 #   http://host.docker.internal:18123 (proxies to Home Assistant on Tailscale)
 #
-TS_AUTHKEY=your-tailscale-auth-key-here
\ No newline at end of file
+TS_AUTHKEY=your-tailscale-auth-key-here
+
+# ========================================
+# HOME ASSISTANT PLUGIN (Optional)
+# ========================================
+# Required for Home Assistant voice control via wake word (e.g., "Hey Vivi, turn off the lights")
+#
+# To get a long-lived access token:
+# 1. Go to Home Assistant → Profile → Security tab
+# 2. Scroll to "Long-lived access tokens"
+# 3. Click "Create Token"
+# 4. Copy the token and paste it below
+#
+# Configuration in config/plugins.yml:
+#   - Enable the homeassistant plugin
+#   - Set ha_url to your Home Assistant URL
+#   - Set ha_token to ${HA_TOKEN} (reads from this variable)
+#
+# SECURITY: This token grants full access to your Home Assistant.
+#   - Never commit .env or config/plugins.yml to version control
+#   - Rotate the token if it's ever exposed
+#
+HA_TOKEN=
\ No newline at end of file
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
index 2d190e77..4e6ba153 100644
--- a/backends/advanced/docker-compose.yml
+++ b/backends/advanced/docker-compose.yml
@@ -53,6 +53,7 @@ services:
       - NEO4J_HOST=${NEO4J_HOST}
       - NEO4J_USER=${NEO4J_USER}
       - NEO4J_PASSWORD=${NEO4J_PASSWORD}
+      - HA_TOKEN=${HA_TOKEN}
       - CORS_ORIGINS=http://localhost:3010,http://localhost:8000,http://192.168.1.153:3010,http://192.168.1.153:8000,https://localhost:3010,https://localhost:8000,https://100.105.225.45,https://localhost
       - REDIS_URL=redis://redis:6379/0
     depends_on:
@@ -96,6 +97,7 @@ services:
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - GROQ_API_KEY=${GROQ_API_KEY}
+      - HA_TOKEN=${HA_TOKEN}
       - REDIS_URL=redis://redis:6379/0
     depends_on:
       redis:
@@ -106,6 +108,35 @@ services:
         condition: service_started
     restart: unless-stopped
 
+  # Deepgram WebSocket streaming worker
+  # Real-time transcription worker that processes audio via Deepgram's WebSocket API
+  # Publishes interim results to Redis Pub/Sub for client display
+  # Publishes final results to Redis Streams for storage
+  # Triggers plugins on final results only
+  deepgram-streaming-worker:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: >
+      uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
+    env_file:
+      - .env
+    volumes:
+      - ./src:/app/src
+      - ./data:/app/data
+      - ../../config/config.yml:/app/config.yml
+      - ../../config/plugins.yml:/app/plugins.yml
+    environment:
+      - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
+      - REDIS_URL=redis://redis:6379/0
+      - HA_TOKEN=${HA_TOKEN}
+    depends_on:
+      redis:
+        condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    restart: unless-stopped
+
   webui:
     build:
       context: ./webui
diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index 7d8169f5..601120ad 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -49,6 +49,9 @@ def __init__(self, args=None):
             self.console.print("[red][ERROR][/red] Run wizard.py from project root to create config.yml")
             sys.exit(1)
 
+        # Ensure plugins.yml exists (copy from template if missing)
+        self._ensure_plugins_yml_exists()
+
     def print_header(self, title: str):
         """Print a colorful header"""
         self.console.print()
@@ -107,6 +110,26 @@ def prompt_choice(self, prompt: str, choices: Dict[str, str], default: str = "1"
                 self.console.print(f"Using default choice: {default}")
                 return default
 
+    def _ensure_plugins_yml_exists(self):
+        """Ensure plugins.yml exists by copying from template if missing."""
+        plugins_yml = Path("../../config/plugins.yml")
+        plugins_template = Path("../../config/plugins.yml.template")
+
+        if not plugins_yml.exists():
+            if plugins_template.exists():
+                self.console.print("[blue][INFO][/blue] plugins.yml not found, creating from template...")
+                shutil.copy2(plugins_template, plugins_yml)
+                self.console.print(f"[green]✅[/green] Created {plugins_yml} from template")
+                self.console.print("[yellow][NOTE][/yellow] Edit config/plugins.yml to configure plugins")
+                self.console.print("[yellow][NOTE][/yellow] Set HA_TOKEN in .env for Home Assistant integration")
+            else:
+                raise RuntimeError(
+                    f"Template file not found: {plugins_template}\n"
+                    f"The repository structure is incomplete. Please ensure config/plugins.yml.template exists."
+                )
+        else:
+            self.console.print(f"[blue][INFO][/blue] Found existing {plugins_yml}")
+
     def backup_existing_env(self):
         """Backup existing .env file"""
         env_path = Path(".env")
@@ -384,7 +407,6 @@ def setup_optional_services(self):
         if hasattr(self.args, 'ts_authkey') and self.args.ts_authkey:
             self.config["TS_AUTHKEY"] = self.args.ts_authkey
             self.console.print(f"[green][SUCCESS][/green] Tailscale auth key configured (Docker integration enabled)")
-            self.console.print("[blue][INFO][/blue] Start Tailscale with: docker compose --profile tailscale up -d")
 
     def setup_obsidian(self):
         """Configure Obsidian/Neo4j integration"""
diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh
index 01204be6..5f13d35a 100755
--- a/backends/advanced/run-test.sh
+++ b/backends/advanced/run-test.sh
@@ -91,6 +91,29 @@ if [ -n "$_CONFIG_FILE_OVERRIDE" ]; then
     print_info "Using command-line override: CONFIG_FILE=$CONFIG_FILE"
 fi
 
+# Load HF_TOKEN from speaker-recognition/.env (proper location for this credential)
+SPEAKER_ENV="../../extras/speaker-recognition/.env"
+if [ -f "$SPEAKER_ENV" ] && [ -z "$HF_TOKEN" ]; then
+    print_info "Loading HF_TOKEN from speaker-recognition service..."
+    set -a
+    source "$SPEAKER_ENV"
+    set +a
+fi
+
+# Display HF_TOKEN status with masking
+if [ -n "$HF_TOKEN" ]; then
+    if [ ${#HF_TOKEN} -gt 15 ]; then
+        MASKED_TOKEN="${HF_TOKEN:0:5}***************${HF_TOKEN: -5}"
+    else
+        MASKED_TOKEN="***************"
+    fi
+    print_info "HF_TOKEN configured: $MASKED_TOKEN"
+    export HF_TOKEN
+else
+    print_warning "HF_TOKEN not found - speaker recognition tests may fail"
+    print_info "Configure via wizard: uv run --with-requirements ../../setup-requirements.txt python ../../wizard.py"
+fi
+
 # Set default CONFIG_FILE if not provided
 # This allows testing with different provider combinations
 # Usage: CONFIG_FILE=../../tests/configs/parakeet-ollama.yml ./run-test.sh
@@ -166,6 +189,18 @@ if [ ! -f "diarization_config.json" ] && [ -f "diarization_config.json.template"
     print_success "diarization_config.json created"
 fi
 
+# Ensure plugins.yml exists (required for Docker volume mount)
+if [ ! -f "../../config/plugins.yml" ]; then
+    if [ -f "../../config/plugins.yml.template" ]; then
+        print_info "Creating config/plugins.yml from template..."
+        cp ../../config/plugins.yml.template ../../config/plugins.yml
+        print_success "config/plugins.yml created"
+    else
+        print_error "config/plugins.yml.template not found - repository structure incomplete"
+        exit 1
+    fi
+fi
+
 # Note: Robot Framework dependencies are managed via tests/test-requirements.txt
 # The integration tests use Docker containers for service dependencies
 
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index 50ffc77f..2d99e05c 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -17,6 +17,7 @@
 
 from fastapi import WebSocket, WebSocketDisconnect, Query
 from friend_lite.decoder import OmiOpusDecoder
+import redis.asyncio as redis
 
 from advanced_omi_backend.auth import websocket_auth
 from advanced_omi_backend.client_manager import generate_client_id, get_client_manager
@@ -39,6 +40,89 @@
 pending_connections: set[str] = set()
 
 
+async def subscribe_to_interim_results(websocket: WebSocket, session_id: str) -> None:
+    """
+    Subscribe to interim transcription results from Redis Pub/Sub and forward to client WebSocket.
+
+    Runs as background task during WebSocket connection. Listens for interim and final
+    transcription results published by the Deepgram streaming consumer and forwards them
+    to the connected client for real-time transcript display.
+
+    Args:
+        websocket: Connected WebSocket client
+        session_id: Session ID (client_id) to subscribe to
+
+    Note:
+        This task runs continuously until the WebSocket disconnects or the task is cancelled.
+        Results are published to Redis Pub/Sub channel: transcription:interim:{session_id}
+    """
+    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
+
+    try:
+        # Create Redis client for Pub/Sub
+        redis_client = await redis.from_url(redis_url, decode_responses=True)
+
+        # Create Pub/Sub instance
+        pubsub = redis_client.pubsub()
+
+        # Subscribe to interim results channel for this session
+        channel = f"transcription:interim:{session_id}"
+        await pubsub.subscribe(channel)
+
+        logger.info(f"📢 Subscribed to interim results channel: {channel}")
+
+        # Listen for messages
+        while True:
+            try:
+                message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0)
+
+                if message and message['type'] == 'message':
+                    # Parse result data
+                    try:
+                        result_data = json.loads(message['data'])
+
+                        # Forward to client WebSocket
+                        await websocket.send_json({
+                            "type": "interim_transcript",
+                            "data": result_data
+                        })
+
+                        # Log for debugging
+                        is_final = result_data.get("is_final", False)
+                        text_preview = result_data.get("text", "")[:50]
+                        result_type = "FINAL" if is_final else "interim"
+                        logger.debug(f"✉️ Forwarded {result_type} result to client {session_id}: {text_preview}...")
+
+                    except json.JSONDecodeError as e:
+                        logger.error(f"Failed to parse interim result JSON: {e}")
+                    except Exception as send_error:
+                        logger.error(f"Failed to send interim result to client {session_id}: {send_error}")
+                        # WebSocket might be closed, exit loop
+                        break
+
+            except asyncio.TimeoutError:
+                # No message received, continue waiting
+                continue
+            except asyncio.CancelledError:
+                logger.info(f"Interim results subscriber cancelled for session {session_id}")
+                break
+            except Exception as e:
+                logger.error(f"Error in interim results subscriber for {session_id}: {e}", exc_info=True)
+                break
+
+    except Exception as e:
+        logger.error(f"Failed to initialize interim results subscriber for {session_id}: {e}", exc_info=True)
+    finally:
+        try:
+            # Unsubscribe and close connections
+            await pubsub.unsubscribe(channel)
+            await pubsub.close()
+            await redis_client.aclose()
+            logger.info(f"🔕 Unsubscribed from interim results channel: {channel}")
+        except Exception as cleanup_error:
+            logger.error(f"Error cleaning up interim results subscriber: {cleanup_error}")
+
+
 async def parse_wyoming_protocol(ws: WebSocket) -> tuple[dict, Optional[bytes]]:
     """Parse Wyoming protocol: JSON header line followed by optional binary payload.
 
@@ -279,8 +363,9 @@ async def _initialize_streaming_session(
     user_id: str,
     user_email: str,
     client_id: str,
-    audio_format: dict
-) -> None:
+    audio_format: dict,
+    websocket: Optional[WebSocket] = None
+) -> Optional[asyncio.Task]:
     """
     Initialize streaming session with Redis and enqueue processing jobs.
 
@@ -291,10 +376,14 @@ async def _initialize_streaming_session(
         user_email: User email
         client_id: Client ID
         audio_format: Audio format dict from audio-start event
+        websocket: Optional WebSocket connection to launch interim results subscriber
+
+    Returns:
+        Interim results subscriber task if websocket provided and session initialized, None otherwise
     """
     if hasattr(client_state, 'stream_session_id'):
         application_logger.debug(f"Session already initialized for {client_id}")
-        return
+        return None
 
     # Initialize stream session
     client_state.stream_session_id = str(uuid.uuid4())
@@ -340,6 +429,16 @@ async def _initialize_streaming_session(
     client_state.speech_detection_job_id = job_ids['speech_detection']
     client_state.audio_persistence_job_id = job_ids['audio_persistence']
 
+    # Launch interim results subscriber if WebSocket provided
+    subscriber_task = None
+    if websocket:
+        subscriber_task = asyncio.create_task(
+            subscribe_to_interim_results(websocket, client_state.stream_session_id)
+        )
+        application_logger.info(f"📡 Launched interim results subscriber for session {client_state.stream_session_id}")
+
+    return subscriber_task
+
 
 async def _finalize_streaming_session(
     client_state,
@@ -516,8 +615,9 @@ async def _handle_streaming_mode_audio(
     audio_format: dict,
     user_id: str,
     user_email: str,
-    client_id: str
-) -> None:
+    client_id: str,
+    websocket: Optional[WebSocket] = None
+) -> Optional[asyncio.Task]:
     """
     Handle audio chunk in streaming mode.
 
@@ -529,16 +629,22 @@ async def _handle_streaming_mode_audio(
         user_id: User ID
         user_email: User email
         client_id: Client ID
+        websocket: Optional WebSocket connection to launch interim results subscriber
+
+    Returns:
+        Interim results subscriber task if websocket provided and session initialized, None otherwise
     """
     # Initialize session if needed
+    subscriber_task = None
     if not hasattr(client_state, 'stream_session_id'):
-        await _initialize_streaming_session(
+        subscriber_task = await _initialize_streaming_session(
             client_state,
             audio_stream_producer,
             user_id,
             user_email,
             client_id,
-            audio_format
+            audio_format,
+            websocket=websocket  # Pass WebSocket to launch interim results subscriber
         )
 
     # Publish to Redis Stream
@@ -553,6 +659,8 @@ async def _handle_streaming_mode_audio(
         audio_format.get("width", 2)
     )
 
+    return subscriber_task
+
 
 async def _handle_batch_mode_audio(
     client_state,
@@ -589,8 +697,9 @@ async def _handle_audio_chunk(
     audio_format: dict,
     user_id: str,
     user_email: str,
-    client_id: str
-) -> None:
+    client_id: str,
+    websocket: Optional[WebSocket] = None
+) -> Optional[asyncio.Task]:
     """
     Route audio chunk to appropriate mode handler (streaming or batch).
 
@@ -602,18 +711,24 @@ async def _handle_audio_chunk(
         user_id: User ID
         user_email: User email
         client_id: Client ID
+        websocket: Optional WebSocket connection to launch interim results subscriber
+
+    Returns:
+        Interim results subscriber task if websocket provided and streaming mode, None otherwise
     """
     recording_mode = getattr(client_state, 'recording_mode', 'batch')
 
     if recording_mode == "streaming":
-        await _handle_streaming_mode_audio(
+        return await _handle_streaming_mode_audio(
             client_state, audio_stream_producer, audio_data,
-            audio_format, user_id, user_email, client_id
+            audio_format, user_id, user_email, client_id,
+            websocket=websocket
         )
     else:
         await _handle_batch_mode_audio(
             client_state, audio_data, audio_format, client_id
         )
+        return None
 
 
 async def _handle_audio_session_start(
@@ -788,6 +903,7 @@ async def handle_omi_websocket(
 
     client_id = None
     client_state = None
+    interim_subscriber_task = None
 
     try:
         # Setup connection (accept, auth, create client state)
@@ -814,13 +930,14 @@ async def handle_omi_websocket(
             if header["type"] == "audio-start":
                 # Handle audio session start
                 application_logger.info(f"🎙️ OMI audio session started for {client_id}")
-                await _initialize_streaming_session(
+                interim_subscriber_task = await _initialize_streaming_session(
                     client_state,
                     audio_stream_producer,
                     user.user_id,
                     user.email,
                     client_id,
-                    header.get("data", {"rate": OMI_SAMPLE_RATE, "width": OMI_SAMPLE_WIDTH, "channels": OMI_CHANNELS})
+                    header.get("data", {"rate": OMI_SAMPLE_RATE, "width": OMI_SAMPLE_WIDTH, "channels": OMI_CHANNELS}),
+                    websocket=ws  # Pass WebSocket to launch interim results subscriber
                 )
 
             elif header["type"] == "audio-chunk" and payload:
@@ -883,6 +1000,16 @@ async def handle_omi_websocket(
     except Exception as e:
         application_logger.error(f"❌ WebSocket error for client {client_id}: {e}", exc_info=True)
     finally:
+        # Cancel interim results subscriber task if running
+        if interim_subscriber_task and not interim_subscriber_task.done():
+            interim_subscriber_task.cancel()
+            try:
+                await interim_subscriber_task
+            except asyncio.CancelledError:
+                application_logger.info(f"Interim subscriber task cancelled for {client_id}")
+            except Exception as task_error:
+                application_logger.error(f"Error cancelling interim subscriber task: {task_error}")
+
         # Clean up pending connection tracking
         pending_connections.discard(pending_client_id)
 
@@ -909,6 +1036,7 @@ async def handle_pcm_websocket(
 
     client_id = None
     client_state = None
+    interim_subscriber_task = None
 
     try:
         # Setup connection (accept, auth, create client state)
@@ -1011,15 +1139,19 @@ async def handle_pcm_websocket(
 
                                             # Route to appropriate mode handler
                                             audio_format = control_header.get("data", {})
-                                            await _handle_audio_chunk(
+                                            task = await _handle_audio_chunk(
                                                 client_state,
                                                 audio_stream_producer,
                                                 audio_data,
                                                 audio_format,
                                                 user.user_id,
                                                 user.email,
-                                                client_id
+                                                client_id,
+                                                websocket=ws
                                             )
+                                            # Store subscriber task if it was created (first streaming chunk)
+                                            if task and not interim_subscriber_task:
+                                                interim_subscriber_task = task
                                         else:
                                             application_logger.warning(f"Expected binary payload for audio-chunk, got: {payload_msg.keys()}")
                                     else:
@@ -1044,15 +1176,19 @@ async def handle_pcm_websocket(
 
                             # Route to appropriate mode handler with default format
                             default_format = {"rate": 16000, "width": 2, "channels": 1}
-                            await _handle_audio_chunk(
+                            task = await _handle_audio_chunk(
                                 client_state,
                                 audio_stream_producer,
                                 audio_data,
                                 default_format,
                                 user.user_id,
                                 user.email,
-                                client_id
+                                client_id,
+                                websocket=ws
                             )
+                            # Store subscriber task if it was created (first streaming chunk)
+                            if task and not interim_subscriber_task:
+                                interim_subscriber_task = task
                         
                         else:
                             application_logger.warning(f"Unexpected message format in streaming mode: {message.keys()}")
@@ -1115,6 +1251,16 @@ async def handle_pcm_websocket(
             f"❌ PCM WebSocket error for client {client_id}: {e}", exc_info=True
         )
     finally:
+        # Cancel interim results subscriber task if running
+        if interim_subscriber_task and not interim_subscriber_task.done():
+            interim_subscriber_task.cancel()
+            try:
+                await interim_subscriber_task
+            except asyncio.CancelledError:
+                application_logger.info(f"Interim subscriber task cancelled for {client_id}")
+            except Exception as task_error:
+                application_logger.error(f"Error cancelling interim subscriber task: {task_error}")
+
         # Clean up pending connection tracking
         pending_connections.discard(pending_client_id)
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py
index 8ae0646b..aeb12e02 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py
@@ -11,8 +11,6 @@
 
 import redis.asyncio as redis
 from redis import exceptions as redis_exceptions
-from redis.asyncio.lock import Lock
-
 logger = logging.getLogger(__name__)
 
 
@@ -28,8 +26,8 @@ def __init__(self, provider_name: str, redis_client: redis.Redis, buffer_chunks:
         """
         Initialize consumer.
 
-        Dynamically discovers all audio:stream:* streams and claims them using Redis locks
-        to ensure exclusive processing (one consumer per stream).
+        Dynamically discovers all audio:stream:* streams and uses Redis consumer groups
+        for fan-out processing (multiple worker types can process the same stream).
 
         Args:
             provider_name: Provider name (e.g., "deepgram", "parakeet")
@@ -47,9 +45,8 @@ def __init__(self, provider_name: str, redis_client: redis.Redis, buffer_chunks:
 
         self.running = False
 
-        # Dynamic stream discovery with exclusive locks
+        # Dynamic stream discovery - consumer groups handle fan-out
         self.active_streams = {}  # {stream_name: True}
-        self.stream_locks = {}  # {stream_name: Lock object}
 
         # Buffering: accumulate chunks per session
         self.session_buffers = {}  # {session_id: {"chunks": [], "chunk_ids": [], "sample_rate": int}}
@@ -73,59 +70,6 @@ async def discover_streams(self) -> list[str]:
 
         return streams
 
-    async def try_claim_stream(self, stream_name: str) -> bool:
-        """
-        Try to claim exclusive ownership of a stream using Redis lock.
-
-        Args:
-            stream_name: Stream to claim
-
-        Returns:
-            True if lock acquired, False otherwise
-        """
-        lock_key = f"consumer:lock:{stream_name}"
-
-        # Create lock with 30 second timeout (will be renewed)
-        lock = Lock(
-            self.redis_client,
-            lock_key,
-            timeout=30,
-            blocking=False  # Non-blocking
-        )
-
-        acquired = await lock.acquire(blocking=False)
-
-        if acquired:
-            self.stream_locks[stream_name] = lock
-            logger.info(f"🔒 Claimed stream: {stream_name}")
-            return True
-        else:
-            logger.debug(f"⏭️ Stream already claimed by another consumer: {stream_name}")
-            return False
-
-    async def release_stream(self, stream_name: str):
-        """Release lock on a stream."""
-        if stream_name in self.stream_locks:
-            try:
-                await self.stream_locks[stream_name].release()
-                logger.info(f"🔓 Released stream: {stream_name}")
-            except Exception as e:
-                logger.warning(f"Failed to release lock for {stream_name}: {e}")
-            finally:
-                del self.stream_locks[stream_name]
-
-    async def renew_stream_locks(self):
-        """Renew locks on all claimed streams."""
-        for stream_name, lock in list(self.stream_locks.items()):
-            try:
-                await lock.reacquire()
-            except Exception as e:
-                logger.warning(f"Failed to renew lock for {stream_name}: {e}")
-                # Lock expired, remove from our list
-                del self.stream_locks[stream_name]
-                if stream_name in self.active_streams:
-                    del self.active_streams[stream_name]
-
     async def setup_consumer_group(self, stream_name: str):
         """Create consumer group if it doesn't exist."""
         # Create consumer group (ignore error if already exists)
@@ -257,14 +201,12 @@ async def transcribe_audio(self, audio_data: bytes, sample_rate: int) -> dict:
         pass
 
     async def start_consuming(self):
-        """Discover and consume from multiple streams with exclusive locking."""
+        """Discover and consume from multiple streams using Redis consumer groups."""
         self.running = True
-        logger.info(f"➡️ Starting dynamic stream consumer: {self.consumer_name}")
+        logger.info(f"➡️ Starting dynamic stream consumer: {self.consumer_name} (group: {self.group_name})")
 
         last_discovery = 0
-        last_lock_renewal = 0
         discovery_interval = 10  # Discover new streams every 10 seconds
-        lock_renewal_interval = 15  # Renew locks every 15 seconds
 
         while self.running:
             try:
@@ -277,20 +219,13 @@ async def start_consuming(self):
 
                     for stream_name in discovered:
                         if stream_name not in self.active_streams:
-                            # Try to claim this stream
-                            if await self.try_claim_stream(stream_name):
-                                # Setup consumer group for this stream
-                                await self.setup_consumer_group(stream_name)
-                                self.active_streams[stream_name] = True
-                                logger.info(f"✅ Now consuming from {stream_name}")
+                            # Setup consumer group for this stream (no manual lock needed)
+                            await self.setup_consumer_group(stream_name)
+                            self.active_streams[stream_name] = True
+                            logger.info(f"✅ Now consuming from {stream_name} (group: {self.group_name})")
 
                     last_discovery = current_time
 
-                # Periodically renew locks
-                if current_time - last_lock_renewal > lock_renewal_interval:
-                    await self.renew_stream_locks()
-                    last_lock_renewal = current_time
-
                 # Read from all active streams
                 if not self.active_streams:
                     # No streams claimed yet, wait and retry
@@ -326,14 +261,6 @@ async def start_consuming(self):
                         if stream_name in error_msg:
                             logger.warning(f"➡️ [{self.consumer_name}] Stream {stream_name} was deleted, removing from active streams")
 
-                            # Release the lock
-                            lock_key = f"consumer:lock:{stream_name}"
-                            try:
-                                await self.redis_client.delete(lock_key)
-                                logger.info(f"🔓 Released lock for deleted stream: {stream_name}")
-                            except:
-                                pass
-
                             # Remove from active streams
                             del self.active_streams[stream_name]
                             logger.info(f"➡️ [{self.consumer_name}] Removed {stream_name}, {len(self.active_streams)} streams remaining")
@@ -419,9 +346,6 @@ async def process_message(self, message_id: bytes, fields: dict, stream_name: st
                     # Clean up session buffer
                     del self.session_buffers[session_id]
 
-                # Release the consumer lock for this stream
-                await self.release_stream(stream_name)
-
                 # ACK the END message
                 await self.redis_client.xack(stream_name, self.group_name, message_id)
                 return
diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
index 23f04d87..2c0c9988 100644
--- a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
+++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
@@ -5,7 +5,9 @@
 """
 
 import logging
-from typing import Optional
+import os
+import re
+from typing import Optional, Any
 from pathlib import Path
 import yaml
 
@@ -17,6 +19,57 @@
 _plugin_router: Optional[PluginRouter] = None
 
 
+def expand_env_vars(value: Any) -> Any:
+    """
+    Recursively expand environment variables in configuration values.
+
+    Supports ${ENV_VAR} syntax. If the environment variable is not set,
+    the original placeholder is kept.
+
+    Args:
+        value: Configuration value (can be str, dict, list, or other)
+
+    Returns:
+        Value with environment variables expanded
+
+    Examples:
+        >>> os.environ['MY_TOKEN'] = 'secret123'
+        >>> expand_env_vars('token: ${MY_TOKEN}')
+        'token: secret123'
+        >>> expand_env_vars({'token': '${MY_TOKEN}'})
+        {'token': 'secret123'}
+    """
+    if isinstance(value, str):
+        # Pattern: ${ENV_VAR} or ${ENV_VAR:-default}
+        def replacer(match):
+            var_expr = match.group(1)
+            # Support default values: ${VAR:-default}
+            if ':-' in var_expr:
+                var_name, default = var_expr.split(':-', 1)
+                return os.environ.get(var_name.strip(), default.strip())
+            else:
+                var_name = var_expr.strip()
+                env_value = os.environ.get(var_name)
+                if env_value is None:
+                    logger.warning(
+                        f"Environment variable '{var_name}' not found, "
+                        f"keeping placeholder: ${{{var_name}}}"
+                    )
+                    return match.group(0)  # Keep original placeholder
+                return env_value
+
+        return re.sub(r'\$\{([^}]+)\}', replacer, value)
+
+    elif isinstance(value, dict):
+        return {k: expand_env_vars(v) for k, v in value.items()}
+
+    elif isinstance(value, list):
+        return [expand_env_vars(item) for item in value]
+
+    else:
+        return value
+
+
 def get_plugin_router() -> Optional[PluginRouter]:
     """Get the global plugin router instance.
 
@@ -62,6 +115,8 @@ def init_plugin_router() -> Optional[PluginRouter]:
         if plugins_yml.exists():
             with open(plugins_yml, 'r') as f:
                 plugins_config = yaml.safe_load(f)
+                # Expand environment variables in configuration
+                plugins_config = expand_env_vars(plugins_config)
                 plugins_data = plugins_config.get('plugins', {})
 
             # Initialize each enabled plugin
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
index 2e20171b..f481ac3f 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
@@ -10,6 +10,7 @@
 import json
 import logging
 from typing import Optional
+from urllib.parse import urlencode
 
 import httpx
 import websockets
@@ -167,26 +168,65 @@ def __init__(self):
     def name(self) -> str:
         return self._name
 
+    async def transcribe(self, audio_data: bytes, sample_rate: int, **kwargs) -> dict:
+        """Not used for streaming providers - use start_stream/process_audio_chunk/end_stream instead."""
+        raise NotImplementedError("Streaming providers do not support batch transcription")
+
     async def start_stream(self, client_id: str, sample_rate: int = 16000, diarize: bool = False):
-        url = self.model.model_url
+        base_url = self.model.model_url
         ops = self.model.operations or {}
+
+        # Build WebSocket URL with query parameters (for Deepgram streaming)
+        query_params = ops.get("query", {})
+        query_dict = dict(query_params) if query_params else {}
+
+        # Override sample_rate if provided
+        if sample_rate and "sample_rate" in query_dict:
+            query_dict["sample_rate"] = sample_rate
+        if diarize and "diarize" in query_dict:
+            query_dict["diarize"] = "true"
+
+        # Normalize boolean values to lowercase strings (Deepgram expects "true"/"false", not "True"/"False")
+        normalized_query = {}
+        for k, v in query_dict.items():
+            if isinstance(v, bool):
+                normalized_query[k] = "true" if v else "false"
+            else:
+                normalized_query[k] = v
+
+        # Build query string with proper URL encoding (NO token in query)
+        query_str = urlencode(normalized_query)
+        url = f"{base_url}?{query_str}" if query_str else base_url
+
+        # Debug: Log the URL
+        logger.info(f"🔗 Connecting to Deepgram WebSocket: {url}")
+
+        # Connect to WebSocket with Authorization header (Deepgram requires this for server-side connections)
+        headers = {}
+        if self.model.api_key:
+            headers["Authorization"] = f"Token {self.model.api_key}"
+
+        ws = await websockets.connect(url, additional_headers=headers)
+
+        # Send start message if required by provider
         start_msg = (ops.get("start", {}) or {}).get("message", {})
-        # Inject session_id if placeholder present
-        start_msg = json.loads(json.dumps(start_msg))  # deep copy
-        start_msg.setdefault("session_id", client_id)
-        # Apply sample rate and diarization if present
-        if "config" in start_msg and isinstance(start_msg["config"], dict):
-            start_msg["config"].setdefault("sample_rate", sample_rate)
-            if diarize:
-                start_msg["config"]["diarize"] = True
-
-        ws = await websockets.connect(url, open_timeout=10)
-        await ws.send(json.dumps(start_msg))
-        # Wait for confirmation; non-fatal if not provided
-        try:
-            await asyncio.wait_for(ws.recv(), timeout=2.0)
-        except Exception:
-            pass
+        if start_msg:
+            # Inject session_id if placeholder present
+            start_msg = json.loads(json.dumps(start_msg))  # deep copy
+            start_msg.setdefault("session_id", client_id)
+            # Apply sample rate and diarization if present
+            if "config" in start_msg and isinstance(start_msg["config"], dict):
+                start_msg["config"].setdefault("sample_rate", sample_rate)
+                if diarize:
+                    start_msg["config"]["diarize"] = True
+            await ws.send(json.dumps(start_msg))
+
+            # Wait for confirmation; non-fatal if not provided
+            try:
+                await asyncio.wait_for(ws.recv(), timeout=2.0)
+            except Exception:
+                pass
+
         self._streams[client_id] = {"ws": ws, "sample_rate": sample_rate, "final": None, "interim": []}
 
     async def process_audio_chunk(self, client_id: str, audio_chunk: bytes) -> dict | None:
@@ -194,26 +234,67 @@ async def process_audio_chunk(self, client_id: str, audio_chunk: bytes) -> dict
             return None
         ws = self._streams[client_id]["ws"]
         ops = self.model.operations or {}
+
+        # Send chunk header if required (for providers like Parakeet)
         chunk_hdr = (ops.get("chunk_header", {}) or {}).get("message", {})
-        hdr = json.loads(json.dumps(chunk_hdr))
-        hdr.setdefault("type", "audio_chunk")
-        hdr.setdefault("session_id", client_id)
-        hdr.setdefault("rate", self._streams[client_id]["sample_rate"])
-        await ws.send(json.dumps(hdr))
+        if chunk_hdr:
+            hdr = json.loads(json.dumps(chunk_hdr))
+            hdr.setdefault("type", "audio_chunk")
+            hdr.setdefault("session_id", client_id)
+            hdr.setdefault("rate", self._streams[client_id]["sample_rate"])
+            await ws.send(json.dumps(hdr))
+
+        # Send audio chunk (raw bytes for Deepgram, or after header for others)
         await ws.send(audio_chunk)
 
-        # Non-blocking read for interim results
+        # Non-blocking read for results
         expect = (ops.get("expect", {}) or {})
+        extract = expect.get("extract", {})
         interim_type = expect.get("interim_type")
+        final_type = expect.get("final_type")
+
         try:
-            while True:
-                msg = await asyncio.wait_for(ws.recv(), timeout=0.01)
-                data = json.loads(msg)
-                if interim_type and data.get("type") == interim_type:
-                    self._streams[client_id]["interim"].append(data)
+            # Try to read a message (non-blocking)
+            msg = await asyncio.wait_for(ws.recv(), timeout=0.05)
+            data = json.loads(msg)
+
+            # Determine if this is interim or final result
+            is_final = False
+            if final_type and data.get("type") == final_type:
+                # Check if Deepgram marks it as final
+                is_final = data.get("is_final", False)
+            elif interim_type and data.get("type") == interim_type:
+                is_final = data.get("is_final", False)
+
+            # Extract result data
+            text = _dotted_get(data, extract.get("text")) if extract.get("text") else data.get("text", "")
+            words = _dotted_get(data, extract.get("words")) if extract.get("words") else data.get("words", [])
+            segments = _dotted_get(data, extract.get("segments")) if extract.get("segments") else data.get("segments", [])
+
+            # Calculate confidence if available
+            confidence = data.get("confidence", 0.0)
+            if not confidence and words and isinstance(words, list):
+                # Calculate average word confidence
+                confidences = [w.get("confidence", 0.0) for w in words if isinstance(w, dict) and "confidence" in w]
+                if confidences:
+                    confidence = sum(confidences) / len(confidences)
+
+            # Return result with is_final flag
+            # Consumer decides what to do with interim vs final
+            return {
+                "text": text,
+                "words": words,
+                "segments": segments,
+                "is_final": is_final,
+                "confidence": confidence
+            }
+
         except asyncio.TimeoutError:
-            pass
-        return None
+            # No message available yet
+            return None
+        except Exception as e:
+            logger.error(f"Error processing audio chunk result for {client_id}: {e}")
+            return None
 
     async def end_stream(self, client_id: str) -> dict:
         if client_id not in self._streams:
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
new file mode 100644
index 00000000..68b3c61a
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
@@ -0,0 +1,457 @@
+"""
+Deepgram WebSocket streaming consumer for real-time transcription.
+
+Reads from: audio:stream:* streams
+Publishes interim to: Redis Pub/Sub channel transcription:interim:{session_id}
+Writes final to: transcription:results:{session_id} Redis Stream
+Triggers plugins: streaming_transcript level (final results only)
+"""
+
+import asyncio
+import json
+import logging
+import os
+import time
+from typing import Dict, Optional
+
+import redis.asyncio as redis
+from redis import exceptions as redis_exceptions
+
+from advanced_omi_backend.plugins.router import PluginRouter
+from advanced_omi_backend.services.transcription import get_transcription_provider
+from advanced_omi_backend.client_manager import get_client_owner
+
+logger = logging.getLogger(__name__)
+
+
+class DeepgramStreamingConsumer:
+    """
+    Deepgram streaming consumer for real-time WebSocket transcription.
+
+    - Discovers audio:stream:* streams dynamically
+    - Uses Redis consumer groups for fan-out (allows batch workers to process same stream)
+    - Starts WebSocket connections to Deepgram per stream
+    - Sends audio immediately (no buffering)
+    - Publishes interim results to Redis Pub/Sub for client display
+    - Publishes final results to Redis Streams for storage
+    - Triggers plugins only on final results
+    """
+
+    def __init__(self, redis_client: redis.Redis, plugin_router: Optional[PluginRouter] = None):
+        """
+        Initialize Deepgram streaming consumer.
+
+        Args:
+            redis_client: Connected Redis client
+            plugin_router: Plugin router for triggering plugins on final results
+        """
+        self.redis_client = redis_client
+        self.plugin_router = plugin_router
+
+        # Get streaming transcription provider from registry
+        self.provider = get_transcription_provider(mode="streaming")
+        if not self.provider:
+            raise RuntimeError(
+                "Failed to load streaming transcription provider. "
+                "Ensure config.yml has a default 'stt_stream' model configured."
+            )
+
+        # Stream configuration
+        self.stream_pattern = "audio:stream:*"
+        self.group_name = "streaming-transcription"
+        self.consumer_name = f"streaming-worker-{os.getpid()}"
+
+        self.running = False
+
+        # Active stream tracking - consumer groups handle fan-out
+        self.active_streams: Dict[str, Dict] = {}  # {stream_name: {"session_id": ...}}
+
+        # Session tracking for WebSocket connections
+        self.active_sessions: Dict[str, Dict] = {}  # {session_id: {"last_activity": timestamp}}
+
+    async def discover_streams(self) -> list[str]:
+        """
+        Discover all audio streams matching the pattern.
+
+        Returns:
+            List of stream names
+        """
+        streams = []
+        cursor = b"0"
+
+        while cursor:
+            cursor, keys = await self.redis_client.scan(
+                cursor, match=self.stream_pattern, count=100
+            )
+            if keys:
+                streams.extend([k.decode() if isinstance(k, bytes) else k for k in keys])
+
+        return streams
+
+    async def setup_consumer_group(self, stream_name: str):
+        """Create consumer group if it doesn't exist."""
+        try:
+            await self.redis_client.xgroup_create(
+                stream_name,
+                self.group_name,
+                "0",
+                mkstream=True
+            )
+            logger.debug(f"➡️ Created consumer group {self.group_name} for {stream_name}")
+        except redis_exceptions.ResponseError as e:
+            if "BUSYGROUP" not in str(e):
+                raise
+            logger.debug(f"➡️ Consumer group {self.group_name} already exists for {stream_name}")
+
+    async def start_session_stream(self, session_id: str, sample_rate: int = 16000):
+        """
+        Start WebSocket connection to Deepgram for a session.
+
+        Args:
+            session_id: Session ID (client_id from audio stream)
+            sample_rate: Audio sample rate in Hz
+        """
+        try:
+            await self.provider.start_stream(
+                client_id=session_id,
+                sample_rate=sample_rate,
+                diarize=False  # Deepgram streaming doesn't support diarization
+            )
+
+            self.active_sessions[session_id] = {
+                "last_activity": time.time(),
+                "sample_rate": sample_rate
+            }
+
+            logger.info(f"🎙️ Started Deepgram WebSocket stream for session: {session_id}")
+
+        except Exception as e:
+            logger.error(f"Failed to start Deepgram stream for {session_id}: {e}", exc_info=True)
+            raise
+
+    async def end_session_stream(self, session_id: str):
+        """
+        End WebSocket connection to Deepgram for a session.
+
+        Args:
+            session_id: Session ID
+        """
+        try:
+            # Get final result from Deepgram
+            final_result = await self.provider.end_stream(client_id=session_id)
+
+            # If there's a final result, publish it
+            if final_result and final_result.get("text"):
+                await self.publish_to_client(session_id, final_result, is_final=True)
+                await self.store_final_result(session_id, final_result)
+
+                # Trigger plugins on final result
+                if self.plugin_router:
+                    await self.trigger_plugins(session_id, final_result)
+
+            self.active_sessions.pop(session_id, None)
+            logger.info(f"🛑 Ended Deepgram WebSocket stream for session: {session_id}")
+
+        except Exception as e:
+            logger.error(f"Error ending stream for {session_id}: {e}", exc_info=True)
+
+    async def process_audio_chunk(self, session_id: str, audio_chunk: bytes, chunk_id: str):
+        """
+        Process a single audio chunk through Deepgram WebSocket.
+
+        Args:
+            session_id: Session ID
+            audio_chunk: Raw audio bytes
+            chunk_id: Chunk identifier from Redis stream
+        """
+        try:
+            # Send audio chunk to Deepgram WebSocket and get result
+            result = await self.provider.process_audio_chunk(
+                client_id=session_id,
+                audio_chunk=audio_chunk
+            )
+
+            # Update last activity
+            if session_id in self.active_sessions:
+                self.active_sessions[session_id]["last_activity"] = time.time()
+
+            # Deepgram returns None if no response yet, or a dict with results
+            if result:
+                is_final = result.get("is_final", False)
+
+                # Always publish to clients (interim + final) for real-time display
+                await self.publish_to_client(session_id, result, is_final=is_final)
+
+                # If final result, also store and trigger plugins
+                if is_final:
+                    await self.store_final_result(session_id, result, chunk_id=chunk_id)
+
+                    # Trigger plugins on final results only
+                    if self.plugin_router:
+                        await self.trigger_plugins(session_id, result)
+
+        except Exception as e:
+            logger.error(f"Error processing audio chunk for {session_id}: {e}", exc_info=True)
+
+    async def publish_to_client(self, session_id: str, result: Dict, is_final: bool):
+        """
+        Publish interim or final results to Redis Pub/Sub for client consumption.
+
+        Args:
+            session_id: Session ID
+            result: Transcription result from Deepgram
+            is_final: Whether this is a final result
+        """
+        try:
+            channel = f"transcription:interim:{session_id}"
+
+            # Prepare message for clients
+            message = {
+                "text": result.get("text", ""),
+                "is_final": is_final,
+                "words": result.get("words", []),
+                "confidence": result.get("confidence", 0.0),
+                "timestamp": time.time()
+            }
+
+            # Publish to Redis Pub/Sub
+            await self.redis_client.publish(channel, json.dumps(message))
+
+            result_type = "FINAL" if is_final else "interim"
+            logger.debug(f"📢 Published {result_type} result to {channel}: {message['text'][:50]}...")
+
+        except Exception as e:
+            logger.error(f"Error publishing to client for {session_id}: {e}", exc_info=True)
+
+    async def store_final_result(self, session_id: str, result: Dict, chunk_id: str = None):
+        """
+        Store final transcription result to Redis Stream.
+
+        Args:
+            session_id: Session ID
+            result: Final transcription result
+            chunk_id: Optional chunk identifier
+        """
+        try:
+            stream_name = f"transcription:results:{session_id}"
+
+            # Prepare result entry
+            entry = {
+                "message_id": chunk_id or f"final_{int(time.time() * 1000)}",
+                "text": result.get("text", ""),
+                "confidence": result.get("confidence", 0.0),
+                "provider": "deepgram-stream",
+                "timestamp": time.time(),
+                "words": json.dumps(result.get("words", [])),
+                "segments": json.dumps(result.get("segments", [])),
+                "is_final": "true"
+            }
+
+            # Write to Redis Stream
+            await self.redis_client.xadd(stream_name, entry)
+
+            logger.info(f"💾 Stored final result to {stream_name}: {entry['text'][:50]}...")
+
+        except Exception as e:
+            logger.error(f"Error storing final result for {session_id}: {e}", exc_info=True)
+
+    async def _get_user_id_from_client_id(self, client_id: str) -> Optional[str]:
+        """
+        Look up user_id from client_id using ClientManager.
+
+        Args:
+            client_id: Client ID to search for
+
+        Returns:
+            user_id if found, None otherwise
+        """
+        user_id = get_client_owner(client_id)
+
+        if user_id:
+            logger.debug(f"Found user_id {user_id} for client_id {client_id}")
+        else:
+            logger.warning(f"No user_id found for client_id {client_id}")
+
+        return user_id
+
+    async def trigger_plugins(self, session_id: str, result: Dict):
+        """
+        Trigger plugins at streaming_transcript access level (final results only).
+
+        Args:
+            session_id: Session ID (client_id from stream name)
+            result: Final transcription result
+        """
+        try:
+            # Find user_id by looking up session with matching client_id
+            # session_id here is actually the client_id extracted from stream name
+            user_id = await self._get_user_id_from_client_id(session_id)
+
+            if not user_id:
+                logger.warning(
+                    f"Could not find user_id for client_id {session_id}. "
+                    "Plugins will not be triggered."
+                )
+                return
+
+            plugin_data = {
+                'transcript': result.get("text", ""),
+                'session_id': session_id,
+                'words': result.get("words", []),
+                'segments': result.get("segments", []),
+                'confidence': result.get("confidence", 0.0),
+                'is_final': True
+            }
+
+            # Trigger plugins with streaming_transcript access level
+            logger.info(f"🎯 Triggering plugins for user {user_id}, transcript: {plugin_data['transcript'][:50]}...")
+
+            plugin_results = await self.plugin_router.trigger_plugins(
+                access_level='streaming_transcript',
+                user_id=user_id,
+                data=plugin_data,
+                metadata={'client_id': session_id}
+            )
+
+            if plugin_results:
+                logger.info(f"✅ Plugins triggered successfully: {len(plugin_results)} results")
+            else:
+                logger.info(f"ℹ️ No plugins triggered (no matching conditions)")
+
+        except Exception as e:
+            logger.error(f"Error triggering plugins for {session_id}: {e}", exc_info=True)
+
+    async def process_stream(self, stream_name: str):
+        """
+        Process a single audio stream.
+
+        Args:
+            stream_name: Redis stream name (e.g., "audio:stream:user01-phone")
+        """
+        # Extract session_id from stream name (format: audio:stream:{session_id})
+        session_id = stream_name.replace("audio:stream:", "")
+
+        # Track this stream
+        self.active_streams[stream_name] = {
+            "session_id": session_id,
+            "started_at": time.time()
+        }
+
+        # Start WebSocket connection to Deepgram
+        await self.start_session_stream(session_id)
+
+        last_id = "0"  # Start from beginning
+        stream_ended = False
+
+        try:
+            while self.running and not stream_ended:
+                # Read messages from Redis stream using consumer group
+                try:
+                    messages = await self.redis_client.xreadgroup(
+                        self.group_name,  # "streaming-transcription"
+                        self.consumer_name,  # "streaming-worker-{pid}"
+                        {stream_name: ">"},  # Read only new messages
+                        count=10,
+                        block=1000  # Block for 1 second
+                    )
+
+                    if not messages:
+                        # No new messages - check if stream is still alive
+                        # Check for stream end marker or timeout
+                        if session_id not in self.active_sessions:
+                            logger.info(f"Session {session_id} no longer active, ending stream processing")
+                            stream_ended = True
+                        continue
+
+                    for stream, stream_messages in messages:
+                        for message_id, fields in stream_messages:
+                            msg_id = message_id.decode() if isinstance(message_id, bytes) else message_id
+
+                            # Check for end marker
+                            if fields.get(b'end_marker') or fields.get('end_marker'):
+                                logger.info(f"End marker received for {session_id}")
+                                stream_ended = True
+                                # ACK the end marker
+                                await self.redis_client.xack(stream_name, self.group_name, msg_id)
+                                break
+
+                            # Extract audio data (producer sends as 'audio_data', not 'audio_chunk')
+                            audio_chunk = fields.get(b'audio_data') or fields.get('audio_data')
+                            if audio_chunk:
+                                # Process audio chunk through Deepgram WebSocket
+                                await self.process_audio_chunk(
+                                    session_id=session_id,
+                                    audio_chunk=audio_chunk,
+                                    chunk_id=msg_id
+                                )
+
+                            # ACK the message after processing
+                            await self.redis_client.xack(stream_name, self.group_name, msg_id)
+
+                        if stream_ended:
+                            break
+
+                except Exception as e:
+                    logger.error(f"Error reading from stream {stream_name}: {e}", exc_info=True)
+                    await asyncio.sleep(1)
+
+        finally:
+            # End WebSocket connection
+            await self.end_session_stream(session_id)
+
+    async def start_consuming(self):
+        """
+        Start consuming audio streams and processing through Deepgram WebSocket.
+        Uses Redis consumer groups for fan-out (allows batch workers to process same stream).
+        """
+        self.running = True
+        logger.info(f"🚀 Deepgram streaming consumer started (group: {self.group_name})")
+
+        try:
+            while self.running:
+                # Discover available streams
+                streams = await self.discover_streams()
+
+                if streams:
+                    logger.debug(f"🔍 Discovered {len(streams)} audio streams")
+                else:
+                    logger.debug("🔍 No audio streams found")
+
+                # Setup consumer groups and spawn processing tasks
+                for stream_name in streams:
+                    if stream_name in self.active_streams:
+                        continue  # Already processing
+
+                    # Setup consumer group (no manual lock needed)
+                    await self.setup_consumer_group(stream_name)
+
+                    # Track stream and spawn task to process it
+                    session_id = stream_name.replace("audio:stream:", "")
+                    self.active_streams[stream_name] = {"session_id": session_id}
+
+                    # Spawn task to process this stream
+                    asyncio.create_task(self.process_stream(stream_name))
+                    logger.info(f"✅ Now consuming from {stream_name} (group: {self.group_name})")
+
+                # Sleep before next discovery cycle
+                await asyncio.sleep(5)
+
+        except Exception as e:
+            logger.error(f"Fatal error in consumer main loop: {e}", exc_info=True)
+        finally:
+            await self.stop()
+
+    async def stop(self):
+        """Stop consuming and clean up resources."""
+        logger.info("🛑 Stopping Deepgram streaming consumer...")
+        self.running = False
+
+        # End all active sessions
+        session_ids = list(self.active_sessions.keys())
+        for session_id in session_ids:
+            try:
+                await self.end_session_stream(session_id)
+            except Exception as e:
+                logger.error(f"Error ending session {session_id}: {e}")
+
+        logger.info("✅ Deepgram streaming consumer stopped")
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
new file mode 100644
index 00000000..8b9aa885
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""
+Deepgram WebSocket streaming audio worker.
+
+Starts a consumer that reads from audio:stream:* streams and transcribes via Deepgram WebSocket API.
+Publishes interim results to Redis Pub/Sub for real-time client display.
+Publishes final results to Redis Streams for storage.
+Triggers plugins on final results only.
+"""
+
+import asyncio
+import logging
+import os
+import signal
+import sys
+
+import redis.asyncio as redis
+
+from advanced_omi_backend.services.plugin_service import init_plugin_router
+from advanced_omi_backend.services.transcription.deepgram_stream_consumer import DeepgramStreamingConsumer
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+)
+
+logger = logging.getLogger(__name__)
+
+
+async def main():
+    """Main worker entry point."""
+    logger.info("🚀 Starting Deepgram WebSocket streaming worker")
+
+    # Validate DEEPGRAM_API_KEY
+    api_key = os.getenv("DEEPGRAM_API_KEY")
+    if not api_key:
+        logger.error("DEEPGRAM_API_KEY environment variable not set")
+        logger.error("Cannot start Deepgram streaming worker without API key")
+        sys.exit(1)
+
+    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
+
+    # Create Redis client
+    try:
+        redis_client = await redis.from_url(
+            redis_url,
+            encoding="utf-8",
+            decode_responses=False
+        )
+        logger.info(f"✅ Connected to Redis: {redis_url}")
+    except Exception as e:
+        logger.error(f"Failed to connect to Redis: {e}", exc_info=True)
+        sys.exit(1)
+
+    # Initialize plugin router
+    try:
+        plugin_router = init_plugin_router()
+        if plugin_router:
+            logger.info(f"✅ Plugin router initialized with {len(plugin_router.plugins)} plugins")
+        else:
+            logger.warning("No plugin router available - plugins will not be triggered")
+    except Exception as e:
+        logger.error(f"Failed to initialize plugin router: {e}", exc_info=True)
+        plugin_router = None
+
+    # Create Deepgram streaming consumer
+    try:
+        consumer = DeepgramStreamingConsumer(
+            redis_client=redis_client,
+            plugin_router=plugin_router
+        )
+        logger.info("✅ Deepgram streaming consumer created")
+    except Exception as e:
+        logger.error(f"Failed to create Deepgram streaming consumer: {e}", exc_info=True)
+        await redis_client.aclose()
+        sys.exit(1)
+
+    # Setup signal handlers for graceful shutdown
+    def signal_handler(signum, frame):
+        logger.info(f"Received signal {signum}, shutting down...")
+        asyncio.create_task(consumer.stop())
+
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        logger.info("✅ Deepgram streaming worker ready")
+        logger.info("📡 Listening for audio streams on audio:stream:* pattern")
+        logger.info("📢 Publishing interim results to transcription:interim:{session_id}")
+        logger.info("💾 Publishing final results to transcription:results:{session_id}")
+
+        # This blocks until consumer is stopped
+        await consumer.start_consuming()
+
+    except KeyboardInterrupt:
+        logger.info("Keyboard interrupt received, shutting down...")
+    except Exception as e:
+        logger.error(f"Worker error: {e}", exc_info=True)
+        sys.exit(1)
+    finally:
+        await redis_client.aclose()
+        logger.info("👋 Deepgram streaming worker stopped")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/config/plugins.yml b/config/plugins.yml
deleted file mode 100644
index 61c14def..00000000
--- a/config/plugins.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-plugins:
-  homeassistant:
-    enabled: true
-    access_level: transcript
-    trigger:
-      type: wake_word
-      wake_words:  # Support multiple variations
-        - vv    # Deepgram transcribes "vivi" as "VV"
-        - vivi  # Original wake word
-        - vv.   # Sometimes includes period
-    ha_url: http://host.docker.internal:18123
-    ha_token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiI0ODE0MDk1MWViOGM0MTYxOTY2N2YzNzI1MTFmM2QyMiIsImlhdCI6MTc2NzQwOTg4NiwiZXhwIjoyMDgyNzY5ODg2fQ.Q1ko6z2BprxoQO0Pp1xCVc_BRny0lNMd-_L3YSDVkKM
diff --git a/config/plugins.yml.template b/config/plugins.yml.template
new file mode 100644
index 00000000..ef8cc63d
--- /dev/null
+++ b/config/plugins.yml.template
@@ -0,0 +1,30 @@
+# Chronicle Plugin Configuration Template
+#
+# SECURITY: This file contains placeholders for sensitive data.
+# Copy this file to plugins.yml and replace with actual values:
+#   cp config/plugins.yml.template config/plugins.yml
+#
+# IMPORTANT: Never commit plugins.yml to version control!
+# The actual plugins.yml file is gitignored to protect secrets.
+#
+# Environment Variable Substitution:
+# You can use ${ENV_VAR} syntax to reference environment variables.
+# Example: ha_token: ${HA_TOKEN}
+
+plugins:
+  homeassistant:
+    enabled: true
+    access_level: streaming_transcript  # Execute on each streaming transcript chunk
+    trigger:
+      type: wake_word
+      wake_words:  # Support multiple wake words
+        - hey vivi      # Example: "hey vivi, turn off the lights"
+        - hey jarvis    # Example: "hey jarvis, what's the temperature"
+    ha_url: http://host.docker.internal:8123  # Your Home Assistant URL
+    ha_token: ${HA_TOKEN}  # Use environment variable (recommended) or paste token directly (not recommended)
+    # To get a long-lived token:
+    # 1. Go to Home Assistant → Profile → Security tab
+    # 2. Scroll to "Long-lived access tokens"
+    # 3. Click "Create Token"
+    # 4. Copy the token and set it as HA_TOKEN environment variable
+    #    or replace ${HA_TOKEN} with the actual token (not recommended for security)
diff --git a/start.sh b/start.sh
index 44ba6f2c..b01ef87a 100755
--- a/start.sh
+++ b/start.sh
@@ -1 +1 @@
-uv run --with-requirements setup-requirements.txt python services.py start --all --build
+uv run --with-requirements setup-requirements.txt python services.py start --all "$@"
diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml
index 46c8ddef..1e4cd8b2 100644
--- a/tests/configs/deepgram-openai.yml
+++ b/tests/configs/deepgram-openai.yml
@@ -1,89 +1,86 @@
-# Test Configuration: Deepgram (STT) + OpenAI (LLM)
-# Cloud-based services - recommended for CI/testing when API credits available
-
+chat:
+  system_prompt: You are a specialized AI assistant for technical support and troubleshooting.
 defaults:
-  llm: openai-llm
   embedding: openai-embed
+  llm: openai-llm
   stt: stt-deepgram
   vector_store: vs-qdrant
-
-models:
-  - name: openai-llm
-    description: OpenAI GPT-4o-mini
-    model_type: llm
-    model_provider: openai
-    api_family: openai
-    model_name: gpt-4o-mini
-    model_url: https://api.openai.com/v1
-    api_key: ${OPENAI_API_KEY:-}
-    model_params:
-      temperature: 0.2
-      max_tokens: 2000
-    model_output: json
-
-  - name: openai-embed
-    description: OpenAI text-embedding-3-small
-    model_type: embedding
-    model_provider: openai
-    api_family: openai
-    model_name: text-embedding-3-small
-    model_url: https://api.openai.com/v1
-    api_key: ${OPENAI_API_KEY:-}
-    embedding_dimensions: 1536
-    model_output: vector
-
-  - name: vs-qdrant
-    description: Qdrant vector database
-    model_type: vector_store
-    model_provider: qdrant
-    api_family: qdrant
-    model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333}
-    model_params:
-      host: ${QDRANT_BASE_URL:-qdrant}
-      port: ${QDRANT_PORT:-6333}
-      collection_name: omi_memories
-
-  - name: stt-deepgram
-    description: Deepgram Nova 3 (batch)
-    model_type: stt
-    model_provider: deepgram
-    api_family: http
-    model_url: https://api.deepgram.com/v1
-    api_key: ${DEEPGRAM_API_KEY:-}
-    operations:
-      stt_transcribe:
-        method: POST
-        path: /listen
-        headers:
-          Authorization: Token ${DEEPGRAM_API_KEY:-}
-          Content-Type: audio/raw
-        query:
-          model: nova-3
-          language: multi
-          smart_format: 'true'
-          punctuate: 'true'
-          diarize: 'true'
-          encoding: linear16
-          sample_rate: 16000
-          channels: '1'
-        response:
-          type: json
-          extract:
-            text: results.channels[0].alternatives[0].transcript
-            words: results.channels[0].alternatives[0].words
-            segments: results.channels[0].alternatives[0].paragraphs.paragraphs
-
 memory:
-  provider: chronicle
-  timeout_seconds: 1200
   extraction:
     enabled: true
-    prompt: |
-      Extract important information from this conversation and return a JSON object with an array named "facts".
-      Include personal preferences, plans, names, dates, locations, numbers, and key details.
+    prompt: 'Extract important information from this conversation and return a JSON
+      object with an array named "facts".
+
+      Include personal preferences, plans, names, dates, locations, numbers, and key
+      details.
+
       Keep items concise and useful.
 
+      '
+  provider: chronicle
+  timeout_seconds: 1200
+models:
+- api_family: openai
+  api_key: ${OPENAI_API_KEY:-}
+  description: OpenAI GPT-4o-mini
+  model_name: gpt-4o-mini
+  model_output: json
+  model_params:
+    max_tokens: 2000
+    temperature: 0.2
+  model_provider: openai
+  model_type: llm
+  model_url: https://api.openai.com/v1
+  name: openai-llm
+- api_family: openai
+  api_key: ${OPENAI_API_KEY:-}
+  description: OpenAI text-embedding-3-small
+  embedding_dimensions: 1536
+  model_name: text-embedding-3-small
+  model_output: vector
+  model_provider: openai
+  model_type: embedding
+  model_url: https://api.openai.com/v1
+  name: openai-embed
+- api_family: qdrant
+  description: Qdrant vector database
+  model_params:
+    collection_name: omi_memories
+    host: ${QDRANT_BASE_URL:-qdrant}
+    port: ${QDRANT_PORT:-6333}
+  model_provider: qdrant
+  model_type: vector_store
+  model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333}
+  name: vs-qdrant
+- api_family: http
+  api_key: ${DEEPGRAM_API_KEY:-}
+  description: Deepgram Nova 3 (batch)
+  model_provider: deepgram
+  model_type: stt
+  model_url: https://api.deepgram.com/v1
+  name: stt-deepgram
+  operations:
+    stt_transcribe:
+      headers:
+        Authorization: Token ${DEEPGRAM_API_KEY:-}
+        Content-Type: audio/raw
+      method: POST
+      path: /listen
+      query:
+        channels: '1'
+        diarize: 'true'
+        encoding: linear16
+        language: multi
+        model: nova-3
+        punctuate: 'true'
+        sample_rate: 16000
+        smart_format: 'true'
+      response:
+        extract:
+          segments: results.channels[0].alternatives[0].paragraphs.paragraphs
+          text: results.channels[0].alternatives[0].transcript
+          words: results.channels[0].alternatives[0].words
+        type: json
 speaker_recognition:
-  # Disable speaker recognition in CI tests (too slow, blocks workers)
   enabled: false
   timeout: 60
diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh
index b5af8682..c44b16ec 100755
--- a/tests/run-robot-tests.sh
+++ b/tests/run-robot-tests.sh
@@ -85,6 +85,38 @@ print_info "DEEPGRAM_API_KEY length: ${#DEEPGRAM_API_KEY}"
 print_info "OPENAI_API_KEY length: ${#OPENAI_API_KEY}"
 print_info "Using config file: $CONFIG_FILE"
 
+# Load HF_TOKEN from speaker-recognition/.env for test environment
+SPEAKER_ENV="../extras/speaker-recognition/.env"
+if [ -f "$SPEAKER_ENV" ] && [ -z "$HF_TOKEN" ]; then
+    print_info "Loading HF_TOKEN from speaker-recognition service..."
+    set -a
+    source "$SPEAKER_ENV"
+    set +a
+
+    if [ -n "$HF_TOKEN" ]; then
+        # Mask token for display
+        if [ ${#HF_TOKEN} -gt 15 ]; then
+            MASKED_TOKEN="${HF_TOKEN:0:5}***************${HF_TOKEN: -5}"
+        else
+            MASKED_TOKEN="***************"
+        fi
+        print_info "HF_TOKEN configured: $MASKED_TOKEN"
+    fi
+elif [ -n "$HF_TOKEN" ]; then
+    # Already set (e.g., from CI)
+    if [ ${#HF_TOKEN} -gt 15 ]; then
+        MASKED_TOKEN="${HF_TOKEN:0:5}***************${HF_TOKEN: -5}"
+    else
+        MASKED_TOKEN="***************"
+    fi
+    print_info "HF_TOKEN configured: $MASKED_TOKEN"
+else
+    print_warning "HF_TOKEN not found - speaker recognition tests may fail"
+    print_info "Configure via wizard: uv run --with-requirements ../setup-requirements.txt python ../wizard.py"
+fi
+
+export HF_TOKEN
+
 # Create test environment file if it doesn't exist
 if [ ! -f "setup/.env.test" ]; then
     print_info "Creating test environment file..."
diff --git a/wizard.py b/wizard.py
index dd727cec..68134815 100755
--- a/wizard.py
+++ b/wizard.py
@@ -101,7 +101,7 @@ def is_placeholder(value, *placeholder_variants):
         'description': 'Control Home Assistant devices via natural language with wake word',
         'enabled_by_default': False,
         'requires_tailscale': True,  # Requires Tailscale for remote HA access
-        'access_level': 'transcript',  # When to trigger
+        'access_level': 'streaming_transcript',  # When to trigger
         'trigger_type': 'wake_word',   # How to trigger
         'config': {
             'ha_url': {
@@ -115,11 +115,11 @@ def is_placeholder(value, *placeholder_variants):
                 'type': 'password',
                 'help': 'Create at: Home Assistant > Profile > Long-Lived Access Tokens'
             },
-            'wake_word': {
-                'prompt': 'Wake word for HA commands',
-                'default': 'vivi',
+            'wake_words': {
+                'prompt': 'Wake words for HA commands (comma-separated)',
+                'default': 'hey vivi, hey jarvis',
                 'type': 'text',
-                'help': 'Say this word before commands (e.g., "Vivi, turn off hall lights")'
+                'help': 'Say these words before commands. Use comma-separated list for multiple (e.g., "hey vivi, hey jarvis")'
             }
         }
     }
@@ -210,7 +210,7 @@ def cleanup_unselected_services(selected_services):
                 console.print(f"🧹 [dim]Backed up {service_name} configuration to {backup_file.name} (service not selected)[/dim]")
 
 def run_service_setup(service_name, selected_services, https_enabled=False, server_ip=None,
-                     obsidian_enabled=False, neo4j_password=None, ts_authkey=None):
+                     obsidian_enabled=False, neo4j_password=None, ts_authkey=None, hf_token=None):
     """Execute individual service setup script"""
     if service_name == 'advanced':
         service = SERVICES['backend'][service_name]
@@ -241,35 +241,15 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv
         # Add HTTPS configuration for services that support it
         if service_name == 'speaker-recognition' and https_enabled and server_ip:
             cmd.extend(['--enable-https', '--server-ip', server_ip])
-        
-        # For speaker-recognition, validate HF_TOKEN is required
+
+        # For speaker-recognition, pass HF_TOKEN from centralized configuration
         if service_name == 'speaker-recognition':
-            # HF_TOKEN is required for speaker-recognition
-            speaker_env_path = 'extras/speaker-recognition/.env'
-            hf_token = read_env_value(speaker_env_path, 'HF_TOKEN')
-            
-            # Check if HF_TOKEN is missing or is a placeholder
-            if not hf_token or is_placeholder(hf_token, 'your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'):
-                console.print("\n[red][ERROR][/red] HF_TOKEN is required for speaker-recognition service")
-                console.print("[yellow]Speaker recognition requires a Hugging Face token to download models[/yellow]")
-                console.print("Get your token from: https://huggingface.co/settings/tokens")
-                console.print()
-                
-                # Prompt for HF_TOKEN
-                try:
-                    hf_token_input = console.input("[cyan]Enter your HF_TOKEN[/cyan]: ").strip()
-                    if not hf_token_input or is_placeholder(hf_token_input, 'your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'):
-                        console.print("[red][ERROR][/red] Invalid HF_TOKEN provided. Speaker-recognition setup cancelled.")
-                        return False
-                    hf_token = hf_token_input
-                except EOFError:
-                    console.print("[red][ERROR][/red] HF_TOKEN is required. Speaker-recognition setup cancelled.")
-                    return False
-            
-            # Pass HF Token to init script
-            cmd.extend(['--hf-token', hf_token])
-            console.print("[green][SUCCESS][/green] HF_TOKEN configured")
-            
+            # HF Token should have been provided via setup_hf_token_if_needed()
+            if hf_token:
+                cmd.extend(['--hf-token', hf_token])
+            else:
+                console.print("[yellow][WARNING][/yellow] No HF_TOKEN provided - speaker recognition may fail to download models")
+
             # Pass Deepgram API key from backend if available
             backend_env_path = 'backends/advanced/.env'
             deepgram_key = read_env_value(backend_env_path, 'DEEPGRAM_API_KEY')
@@ -485,11 +465,14 @@ def select_plugins():
                     default=config_spec.get('default', '')
                 )
 
-                plugin_config[config_key] = value
-
-                # For wake_word trigger, add to trigger config
-                if config_key == 'wake_word':
-                    plugin_config['trigger']['wake_word'] = value
+                # For wake_words, convert comma-separated string to list and store in trigger
+                if config_key == 'wake_words':
+                    # Split by comma and strip whitespace
+                    wake_words_list = [w.strip() for w in value.split(',') if w.strip()]
+                    plugin_config['trigger']['wake_words'] = wake_words_list
+                    # Don't store at root level - only in trigger section
+                else:
+                    plugin_config[config_key] = value
 
             selected_plugins[plugin_id] = plugin_config
             console.print(f"  [green]✅ {plugin_meta['name']} configured[/green]\n")
@@ -600,6 +583,46 @@ def setup_git_hooks():
     except Exception as e:
         console.print(f"⚠️  [yellow]Could not setup git hooks: {e} (optional)[/yellow]")
 
+def setup_hf_token_if_needed(selected_services):
+    """Prompt for Hugging Face token if needed by selected services.
+
+    Args:
+        selected_services: List of service names selected by user
+
+    Returns:
+        HF_TOKEN string if provided, None otherwise
+    """
+    # Check if any selected services need HF_TOKEN
+    needs_hf_token = 'speaker-recognition' in selected_services or 'advanced' in selected_services
+
+    if not needs_hf_token:
+        return None
+
+    console.print("\n🤗 [bold cyan]Hugging Face Token Configuration[/bold cyan]")
+    console.print("Required for speaker recognition (PyAnnote models)")
+    console.print("\n[blue][INFO][/blue] Get yours from: https://huggingface.co/settings/tokens\n")
+
+    # Check for existing token from speaker-recognition service
+    speaker_env_path = 'extras/speaker-recognition/.env'
+    existing_token = read_env_value(speaker_env_path, 'HF_TOKEN')
+
+    # Use the masked prompt function
+    hf_token = prompt_with_existing_masked(
+        prompt_text="Hugging Face Token",
+        existing_value=existing_token,
+        placeholders=['your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'],
+        is_password=True,
+        default=""
+    )
+
+    if hf_token:
+        masked = mask_value(hf_token)
+        console.print(f"[green]✅ HF_TOKEN configured: {masked}[/green]\n")
+        return hf_token
+    else:
+        console.print("[yellow]⚠️  No HF_TOKEN provided - speaker recognition may fail[/yellow]\n")
+        return None
+
 def setup_config_file():
     """Setup config/config.yml from template if it doesn't exist"""
     config_file = Path("config/config.yml")
@@ -646,6 +669,9 @@ def main():
     if selected_plugins:
         ts_authkey = setup_tailscale_if_needed(selected_plugins)
 
+    # HF Token Configuration (if services require it)
+    hf_token = setup_hf_token_if_needed(selected_services)
+
     # HTTPS Configuration (for services that need it)
     https_enabled = False
     server_ip = None
@@ -731,7 +757,7 @@ def main():
     
     for service in selected_services:
         if run_service_setup(service, selected_services, https_enabled, server_ip,
-                            obsidian_enabled, neo4j_password, ts_authkey):
+                            obsidian_enabled, neo4j_password, ts_authkey, hf_token):
             success_count += 1
         else:
             failed_services.append(service)

From 251010ae83b09a555f8ee69639a2cb2bb0bcaadf Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 11:47:03 +0000
Subject: [PATCH 07/25] Implement Redis integration for client-user mapping and
 enhance wake word processing

- Added asynchronous Redis support in ClientManager for tracking client-user relationships.
- Introduced `initialize_redis_for_client_manager` to set up Redis for cross-container mapping.
- Updated `create_client_state` to use asynchronous tracking for client-user relationships.
- Enhanced wake word processing in PluginRouter with normalization and command extraction.
- Refactored DeepgramStreamingConsumer to utilize async Redis lookups for user ID retrieval.
- Set TTL on Redis streams during client state cleanup for better resource management.
---
 .../src/advanced_omi_backend/app_factory.py   |  5 ++
 .../advanced_omi_backend/client_manager.py    | 68 ++++++++++++++-
 .../controllers/websocket_controller.py       | 12 +--
 .../advanced_omi_backend/plugins/router.py    | 82 +++++++++++++++++--
 .../transcription/deepgram_stream_consumer.py | 10 +--
 .../audio_stream_deepgram_streaming_worker.py |  5 ++
 6 files changed, 162 insertions(+), 20 deletions(-)

diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py
index c20b3ee9..8a162cec 100644
--- a/backends/advanced/src/advanced_omi_backend/app_factory.py
+++ b/backends/advanced/src/advanced_omi_backend/app_factory.py
@@ -111,6 +111,11 @@ async def lifespan(app: FastAPI):
         from advanced_omi_backend.services.audio_stream import AudioStreamProducer
         app.state.audio_stream_producer = AudioStreamProducer(app.state.redis_audio_stream)
         application_logger.info("✅ Redis client for audio streaming producer initialized")
+
+        # Initialize ClientManager Redis for cross-container client→user mapping
+        from advanced_omi_backend.client_manager import initialize_redis_for_client_manager
+        initialize_redis_for_client_manager(config.redis_url)
+
     except Exception as e:
         application_logger.error(f"Failed to initialize Redis client for audio streaming: {e}", exc_info=True)
         application_logger.warning("Audio streaming producer will not be available")
diff --git a/backends/advanced/src/advanced_omi_backend/client_manager.py b/backends/advanced/src/advanced_omi_backend/client_manager.py
index 5a3131b5..e55b3502 100644
--- a/backends/advanced/src/advanced_omi_backend/client_manager.py
+++ b/backends/advanced/src/advanced_omi_backend/client_manager.py
@@ -9,6 +9,7 @@
 import logging
 import uuid
 from typing import TYPE_CHECKING, Dict, Optional
+import redis.asyncio as redis
 
 if TYPE_CHECKING:
     from advanced_omi_backend.client import ClientState
@@ -21,6 +22,9 @@
 _client_to_user_mapping: Dict[str, str] = {}  # Active clients only
 _all_client_user_mappings: Dict[str, str] = {}  # All clients including disconnected
 
+# Redis client for cross-container client→user mapping
+_redis_client: Optional[redis.Redis] = None
+
 
 class ClientManager:
     """
@@ -372,9 +376,33 @@ def unregister_client_user_mapping(client_id: str):
         logger.warning(f"⚠️ Attempted to unregister non-existent client {client_id}")
 
 
+async def track_client_user_relationship_async(client_id: str, user_id: str, ttl: int = 86400):
+    """
+    Track that a client belongs to a user (async, writes to Redis for cross-container support).
+
+    Args:
+        client_id: The client ID
+        user_id: The user ID that owns this client
+        ttl: Time-to-live in seconds (default 24 hours)
+    """
+    _all_client_user_mappings[client_id] = user_id  # In-memory fallback
+
+    if _redis_client:
+        try:
+            await _redis_client.setex(f"client:owner:{client_id}", ttl, user_id)
+            logger.debug(f"✅ Tracked client {client_id} → user {user_id} in Redis (TTL: {ttl}s)")
+        except Exception as e:
+            logger.warning(f"Failed to track client in Redis: {e}")
+    else:
+        logger.debug(f"Tracked client {client_id} relationship to user {user_id} (in-memory only)")
+
+
 def track_client_user_relationship(client_id: str, user_id: str):
     """
-    Track that a client belongs to a user (persists after disconnection for database queries).
+    Track that a client belongs to a user (sync version for backward compatibility).
+
+    WARNING: This is synchronous and cannot use Redis. Use track_client_user_relationship_async()
+    instead in async contexts for cross-container support.
 
     Args:
         client_id: The client ID
@@ -444,9 +472,45 @@ def get_user_clients_active(user_id: str) -> list[str]:
     return user_clients
 
 
+def initialize_redis_for_client_manager(redis_url: str):
+    """
+    Initialize Redis client for cross-container client→user mapping.
+
+    Args:
+        redis_url: Redis connection URL
+    """
+    global _redis_client
+    _redis_client = redis.from_url(redis_url, decode_responses=True)
+    logger.info(f"✅ ClientManager Redis initialized: {redis_url}")
+
+
+async def get_client_owner_async(client_id: str) -> Optional[str]:
+    """
+    Get the user ID that owns a specific client (async Redis lookup).
+
+    Args:
+        client_id: The client ID to look up
+
+    Returns:
+        User ID if found, None otherwise
+    """
+    if _redis_client:
+        try:
+            user_id = await _redis_client.get(f"client:owner:{client_id}")
+            return user_id
+        except Exception as e:
+            logger.warning(f"Redis lookup failed for client {client_id}: {e}")
+
+    # Fallback to in-memory mapping
+    return _all_client_user_mappings.get(client_id)
+
+
 def get_client_owner(client_id: str) -> Optional[str]:
     """
-    Get the user ID that owns a specific client.
+    Get the user ID that owns a specific client (sync version for backward compatibility).
+
+    WARNING: This is synchronous and cannot use Redis. Use get_client_owner_async() instead
+    in async contexts for cross-container support.
 
     Args:
         client_id: The client ID to look up
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index 2d99e05c..602e20a4 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -189,9 +189,9 @@ async def create_client_state(client_id: str, user, device_name: Optional[str] =
         client_id, CHUNK_DIR, user.user_id, user.email
     )
 
-    # Also track in persistent mapping (for database queries)
-    from advanced_omi_backend.client_manager import track_client_user_relationship
-    track_client_user_relationship(client_id, user.user_id)
+    # Also track in persistent mapping (for database queries + cross-container Redis)
+    from advanced_omi_backend.client_manager import track_client_user_relationship_async
+    await track_client_user_relationship_async(client_id, user.user_id)
 
     # Register client in user model (persistent)
     from advanced_omi_backend.users import register_client_to_user
@@ -265,12 +265,12 @@ async def cleanup_client_state(client_id: str):
         if sessions_closed > 0:
             logger.info(f"✅ Closed {sessions_closed} active session(s) for client {client_id}")
 
-        # Delete Redis Streams for this client
+        # Set TTL on Redis Streams for this client (allows consumer groups to finish processing)
         stream_pattern = f"audio:stream:{client_id}"
         stream_key = await async_redis.exists(stream_pattern)
         if stream_key:
-            await async_redis.delete(stream_pattern)
-            logger.info(f"🧹 Deleted Redis stream: {stream_pattern}")
+            await async_redis.expire(stream_pattern, 60)  # 60 second TTL for consumer group fan-out
+            logger.info(f"⏰ Set 60s TTL on Redis stream: {stream_pattern}")
         else:
             logger.debug(f"No Redis stream found for client {client_id}")
 
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py
index e29f64e3..e8ae4634 100644
--- a/backends/advanced/src/advanced_omi_backend/plugins/router.py
+++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py
@@ -5,6 +5,8 @@
 """
 
 import logging
+import re
+import string
 from typing import Dict, List, Optional
 
 from .base import BasePlugin, PluginContext, PluginResult
@@ -12,6 +14,71 @@
 logger = logging.getLogger(__name__)
 
 
+def normalize_text_for_wake_word(text: str) -> str:
+    """
+    Normalize text for wake word matching.
+    - Lowercase
+    - Remove punctuation
+    - Collapse multiple spaces to single space
+    - Strip leading/trailing whitespace
+
+    Example:
+        "Hey, Vivi!" -> "hey vivi"
+        "HEY  VIVI" -> "hey vivi"
+    """
+    # Lowercase
+    text = text.lower()
+    # Remove punctuation
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    # Normalize whitespace (collapse multiple spaces to single space)
+    text = re.sub(r'\s+', ' ', text)
+    # Strip leading/trailing whitespace
+    return text.strip()
+
+
+def extract_command_after_wake_word(transcript: str, wake_word: str) -> str:
+    """
+    Intelligently extract command after wake word in original transcript.
+
+    Handles punctuation and spacing variations by creating a flexible regex pattern.
+
+    Example:
+        transcript: "Hey, Vivi, turn off lights"
+        wake_word: "hey vivi"
+        -> extracts: "turn off lights"
+
+    Args:
+        transcript: Original transcript text with punctuation
+        wake_word: Configured wake word (will be normalized)
+
+    Returns:
+        Command text after wake word, or full transcript if wake word boundary not found
+    """
+    # Split wake word into parts (normalized)
+    wake_word_parts = normalize_text_for_wake_word(wake_word).split()
+
+    if not wake_word_parts:
+        return transcript.strip()
+
+    # Create regex pattern that allows punctuation/whitespace between parts
+    # Example: "hey" + "vivi" -> r"hey[\s,.\-!?]*vivi"
+    pattern_parts = [re.escape(part) for part in wake_word_parts]
+    pattern = r'\s*[\W_]*\s*'.join(pattern_parts)
+    pattern = '^' + pattern  # Must be at start of transcript
+
+    # Try to match wake word at start of transcript (case-insensitive)
+    match = re.match(pattern, transcript, re.IGNORECASE)
+
+    if match:
+        # Extract everything after the matched wake word
+        command = transcript[match.end():].strip()
+        return command
+    else:
+        # Fallback: couldn't find wake word boundary, return full transcript
+        logger.warning(f"Could not find wake word boundary for '{wake_word}' in '{transcript}', using full transcript")
+        return transcript.strip()
+
+
 class PluginRouter:
     """Routes pipeline events to appropriate plugins based on access level and triggers"""
 
@@ -113,9 +180,9 @@ async def _should_trigger(self, plugin: BasePlugin, data: Dict) -> bool:
             return True
 
         elif trigger_type == 'wake_word':
-            # Check if transcript starts with wake word(s)
+            # Normalize transcript for matching (handles punctuation and spacing)
             transcript = data.get('transcript', '')
-            transcript_lower = transcript.lower().strip()
+            normalized_transcript = normalize_text_for_wake_word(transcript)
 
             # Support both singular 'wake_word' and plural 'wake_words' (list)
             wake_words = plugin.trigger.get('wake_words', [])
@@ -125,14 +192,15 @@ async def _should_trigger(self, plugin: BasePlugin, data: Dict) -> bool:
                 if wake_word:
                     wake_words = [wake_word]
 
-            # Check if transcript starts with any wake word
+            # Check if transcript starts with any wake word (after normalization)
             for wake_word in wake_words:
-                wake_word_lower = wake_word.lower()
-                if wake_word_lower and transcript_lower.startswith(wake_word_lower):
-                    # Extract command (remove wake word)
-                    command = transcript[len(wake_word):].strip()
+                normalized_wake_word = normalize_text_for_wake_word(wake_word)
+                if normalized_wake_word and normalized_transcript.startswith(normalized_wake_word):
+                    # Smart extraction: find where wake word actually ends in original text
+                    command = extract_command_after_wake_word(transcript, wake_word)
                     data['command'] = command
                     data['original_transcript'] = transcript
+                    logger.debug(f"Wake word '{wake_word}' detected. Original: '{transcript}', Command: '{command}'")
                     return True
 
             return False
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
index 68b3c61a..ca5396f9 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
@@ -19,7 +19,7 @@
 
 from advanced_omi_backend.plugins.router import PluginRouter
 from advanced_omi_backend.services.transcription import get_transcription_provider
-from advanced_omi_backend.client_manager import get_client_owner
+from advanced_omi_backend.client_manager import get_client_owner_async
 
 logger = logging.getLogger(__name__)
 
@@ -257,7 +257,7 @@ async def store_final_result(self, session_id: str, result: Dict, chunk_id: str
 
     async def _get_user_id_from_client_id(self, client_id: str) -> Optional[str]:
         """
-        Look up user_id from client_id using ClientManager.
+        Look up user_id from client_id using ClientManager (async Redis lookup).
 
         Args:
             client_id: Client ID to search for
@@ -265,12 +265,12 @@ async def _get_user_id_from_client_id(self, client_id: str) -> Optional[str]:
         Returns:
             user_id if found, None otherwise
         """
-        user_id = get_client_owner(client_id)
+        user_id = await get_client_owner_async(client_id)
 
         if user_id:
-            logger.debug(f"Found user_id {user_id} for client_id {client_id}")
+            logger.debug(f"Found user_id {user_id} for client_id {client_id} via Redis")
         else:
-            logger.warning(f"No user_id found for client_id {client_id}")
+            logger.warning(f"No user_id found for client_id {client_id} in Redis")
 
         return user_id
 
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
index 8b9aa885..73b04168 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
@@ -18,6 +18,7 @@
 
 from advanced_omi_backend.services.plugin_service import init_plugin_router
 from advanced_omi_backend.services.transcription.deepgram_stream_consumer import DeepgramStreamingConsumer
+from advanced_omi_backend.client_manager import initialize_redis_for_client_manager
 
 logging.basicConfig(
     level=logging.INFO,
@@ -48,6 +49,10 @@ async def main():
             decode_responses=False
         )
         logger.info(f"✅ Connected to Redis: {redis_url}")
+
+        # Initialize ClientManager Redis for cross-container client→user mapping
+        initialize_redis_for_client_manager(redis_url)
+
     except Exception as e:
         logger.error(f"Failed to connect to Redis: {e}", exc_info=True)
         sys.exit(1)

From eceb6334495c014cb79cdb51e0992052a081afdd Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 12:42:01 +0000
Subject: [PATCH 08/25] Refactor Deepgram worker management and enhance text
 normalization

- Disabled the batch Deepgram worker in favor of the streaming worker to prevent race conditions.
- Updated text normalization in wake word processing to replace punctuation with spaces, preserving word boundaries.
- Enhanced regex pattern for wake word matching to allow optional punctuation and whitespace after the last part.
- Improved logging in DeepgramStreamingConsumer for better visibility of message processing and error handling.
---
 .../advanced_omi_backend/plugins/router.py    | 18 +++++++++------
 .../transcription/deepgram_stream_consumer.py | 17 ++++++++++++++
 .../audio_stream_deepgram_streaming_worker.py |  8 +++++++
 backends/advanced/start-workers.sh            | 22 +++++++++++--------
 4 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py
index e8ae4634..8074feb3 100644
--- a/backends/advanced/src/advanced_omi_backend/plugins/router.py
+++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py
@@ -18,18 +18,19 @@ def normalize_text_for_wake_word(text: str) -> str:
     """
     Normalize text for wake word matching.
     - Lowercase
-    - Remove punctuation
+    - Replace punctuation with spaces
     - Collapse multiple spaces to single space
     - Strip leading/trailing whitespace
 
     Example:
         "Hey, Vivi!" -> "hey vivi"
         "HEY  VIVI" -> "hey vivi"
+        "Hey-Vivi" -> "hey vivi"
     """
     # Lowercase
     text = text.lower()
-    # Remove punctuation
-    text = text.translate(str.maketrans('', '', string.punctuation))
+    # Replace punctuation with spaces (instead of removing, to preserve word boundaries)
+    text = text.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation)))
     # Normalize whitespace (collapse multiple spaces to single space)
     text = re.sub(r'\s+', ' ', text)
     # Strip leading/trailing whitespace
@@ -61,16 +62,19 @@ def extract_command_after_wake_word(transcript: str, wake_word: str) -> str:
         return transcript.strip()
 
     # Create regex pattern that allows punctuation/whitespace between parts
-    # Example: "hey" + "vivi" -> r"hey[\s,.\-!?]*vivi"
+    # Example: "hey" + "vivi" -> r"hey[\s,.\-!?]*vivi[\s,.\-!?]*"
+    # The pattern matches the wake word parts with optional punctuation/whitespace between and after
     pattern_parts = [re.escape(part) for part in wake_word_parts]
-    pattern = r'\s*[\W_]*\s*'.join(pattern_parts)
-    pattern = '^' + pattern  # Must be at start of transcript
+    # Allow optional punctuation/whitespace between parts
+    pattern = r'[\s,.\-!?;:]*'.join(pattern_parts)
+    # Add trailing punctuation/whitespace consumption after last wake word part
+    pattern = '^' + pattern + r'[\s,.\-!?;:]*'
 
     # Try to match wake word at start of transcript (case-insensitive)
     match = re.match(pattern, transcript, re.IGNORECASE)
 
     if match:
-        # Extract everything after the matched wake word
+        # Extract everything after the matched wake word (including trailing punctuation)
         command = transcript[match.end():].strip()
         return command
     else:
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
index ca5396f9..ff312360 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
@@ -364,6 +364,7 @@ async def process_stream(self, stream_name: str):
                         continue
 
                     for stream, stream_messages in messages:
+                        logger.debug(f"📥 Read {len(stream_messages)} messages from {stream_name}")
                         for message_id, fields in stream_messages:
                             msg_id = message_id.decode() if isinstance(message_id, bytes) else message_id
 
@@ -378,12 +379,15 @@ async def process_stream(self, stream_name: str):
                             # Extract audio data (producer sends as 'audio_data', not 'audio_chunk')
                             audio_chunk = fields.get(b'audio_data') or fields.get('audio_data')
                             if audio_chunk:
+                                logger.debug(f"🎵 Processing audio chunk {msg_id} ({len(audio_chunk)} bytes)")
                                 # Process audio chunk through Deepgram WebSocket
                                 await self.process_audio_chunk(
                                     session_id=session_id,
                                     audio_chunk=audio_chunk,
                                     chunk_id=msg_id
                                 )
+                            else:
+                                logger.warning(f"⚠️ Message {msg_id} has no audio_data field")
 
                             # ACK the message after processing
                             await self.redis_client.xack(stream_name, self.group_name, msg_id)
@@ -391,6 +395,15 @@ async def process_stream(self, stream_name: str):
                         if stream_ended:
                             break
 
+                except redis_exceptions.ResponseError as e:
+                    if "NOGROUP" in str(e):
+                        # Stream has expired or been deleted - exit gracefully
+                        logger.info(f"Stream {stream_name} expired or deleted, ending processing")
+                        stream_ended = True
+                        break
+                    else:
+                        logger.error(f"Redis error reading from stream {stream_name}: {e}", exc_info=True)
+                        await asyncio.sleep(1)
                 except Exception as e:
                     logger.error(f"Error reading from stream {stream_name}: {e}", exc_info=True)
                     await asyncio.sleep(1)
@@ -399,6 +412,10 @@ async def process_stream(self, stream_name: str):
             # End WebSocket connection
             await self.end_session_stream(session_id)
 
+            # Remove from active streams tracking
+            self.active_streams.pop(stream_name, None)
+            logger.debug(f"Removed {stream_name} from active streams tracking")
+
     async def start_consuming(self):
         """
         Start consuming audio streams and processing through Deepgram WebSocket.
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
index 73b04168..0a893e6a 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
@@ -62,6 +62,14 @@ async def main():
         plugin_router = init_plugin_router()
         if plugin_router:
             logger.info(f"✅ Plugin router initialized with {len(plugin_router.plugins)} plugins")
+
+            # Initialize async plugins
+            for plugin_id, plugin in plugin_router.plugins.items():
+                try:
+                    await plugin.initialize()
+                    logger.info(f"✅ Plugin '{plugin_id}' initialized in streaming worker")
+                except Exception as e:
+                    logger.exception(f"Failed to initialize plugin '{plugin_id}' in streaming worker: {e}")
         else:
             logger.warning("No plugin router available - plugins will not be triggered")
     except Exception as e:
diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh
index 3fea5a39..774dcda0 100755
--- a/backends/advanced/start-workers.sh
+++ b/backends/advanced/start-workers.sh
@@ -64,15 +64,19 @@ if registry and registry.defaults:
 
     echo "📋 Configured STT provider: ${DEFAULT_STT:-none}"
 
-    # Only start Deepgram worker if configured as default STT
-    if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then
-        echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
-        uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
-        AUDIO_STREAM_DEEPGRAM_WORKER_PID=$!
-    else
-        echo "⏭️  Skipping Deepgram stream worker (not configured as default STT or API key missing)"
-        AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
-    fi
+    # DISABLED: Batch Deepgram worker - using streaming worker instead
+    # The deepgram-streaming-worker container handles audio:stream:* streams with plugin support
+    # Batch worker is disabled to prevent race condition with streaming worker
+    # if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then
+    #     echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
+    #     uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
+    #     AUDIO_STREAM_DEEPGRAM_WORKER_PID=$!
+    # else
+    #     echo "⏭️  Skipping Deepgram stream worker (not configured as default STT or API key missing)"
+    #     AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
+    # fi
+    echo "⏭️  Batch Deepgram worker disabled - using deepgram-streaming-worker container instead"
+    AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
 
     # Only start Parakeet worker if configured as default STT
     if [[ "$DEFAULT_STT" == "parakeet" ]]; then

From 916135e0ca276782211d67938b979c8e754daa30 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 14:02:31 +0000
Subject: [PATCH 09/25] Add original prompt retrieval and restoration in chat
 configuration test

- Implemented retrieval of the original chat prompt before saving a custom prompt to ensure test isolation.
- Added restoration of the original prompt after the test to prevent interference with subsequent tests.
- Enhanced the test documentation for clarity on the purpose of these changes.
---
 tests/endpoints/system_admin_tests.robot | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot
index 5e4b9d3e..0ee3d439 100644
--- a/tests/endpoints/system_admin_tests.robot
+++ b/tests/endpoints/system_admin_tests.robot
@@ -190,6 +190,10 @@ Save And Retrieve Chat Configuration Test
     [Documentation]    Test saving and retrieving chat configuration
     [Tags]    infra	permissions
 
+    # Get original prompt to restore later
+    ${response}=       GET On Session    api    /api/admin/chat/config
+    ${original_prompt}=    Set Variable    ${response.text}
+
     # Save custom prompt
     ${custom_prompt}=  Set Variable    You are a specialized AI assistant for technical support and troubleshooting.
     &{headers}=        Create Dictionary    Content-Type=text/plain
@@ -206,6 +210,12 @@ Save And Retrieve Chat Configuration Test
     ${retrieved}=      Set Variable    ${response.text}
     Should Be Equal    ${retrieved}    ${custom_prompt}    msg=Retrieved prompt should match saved prompt
 
+    # Restore original prompt to avoid test interference
+    ${response}=       POST On Session    api    /api/admin/chat/config
+    ...                data=${original_prompt}
+    ...                headers=${headers}
+    Should Be Equal As Integers    ${response.status_code}    200
+
 
 Non-Admin Cannot Access Admin Endpoints Test
     [Documentation]    Test that non-admin users cannot access admin endpoints

From 944fc627c3bbc6533471e2bd45501abfc34b4ba4 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 3 Jan 2026 14:37:43 +0000
Subject: [PATCH 10/25] Refactor test execution and enhance documentation for
 integration tests

- Simplified test execution commands in CLAUDE.md and quickstart.md for better usability.
- Added instructions for running tests from the project root and clarified the process for executing the complete Robot Framework test suite.
- Introduced a new Docker service for the Deepgram streaming worker in docker-compose-test.yml to improve testing capabilities.
- Updated system_admin_tests.robot to use a defined default prompt for restoration, enhancing test reliability and clarity.
---
 CLAUDE.md                                 | 10 +------
 Docs/getting-started.md                   | 11 +++++---
 backends/advanced/Docs/quickstart.md      | 11 +++++---
 backends/advanced/docker-compose-test.yml | 33 +++++++++++++++++++++++
 tests/endpoints/system_admin_tests.robot  |  9 +++----
 5 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index abe20db6..b981231a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -114,16 +114,8 @@ cp .env.template .env  # Configure API keys
 # Run full integration test suite
 ./run-test.sh
 
-# Manual test execution (for debugging)
-source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY
-uv run robot --outputdir test-results --loglevel INFO ../../tests/integration/integration_test.robot
-
 # Leave test containers running for debugging (don't auto-cleanup)
-CLEANUP_CONTAINERS=false source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY
-uv run robot --outputdir test-results --loglevel INFO ../../tests/integration/integration_test.robot
-
-# Manual cleanup when needed
-docker compose -f docker-compose-test.yml down -v
+CLEANUP_CONTAINERS=false ./run-test.sh
 ```
 
 #### Test Configuration Flags
diff --git a/Docs/getting-started.md b/Docs/getting-started.md
index a923c99c..c1e1a4b4 100644
--- a/Docs/getting-started.md
+++ b/Docs/getting-started.md
@@ -175,11 +175,16 @@ PARAKEET_ASR_URL=http://host.docker.internal:8080
 
 After configuration, verify everything works with the integration test suite:
 ```bash
+# From backends/advanced directory
 ./run-test.sh
 
-# Alternative: Manual test with detailed logging
-source .env && export DEEPGRAM_API_KEY OPENAI_API_KEY && \
-  uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot
+# Or run all tests from project root
+cd ../..
+./run-test.sh advanced-backend
+
+# Or run complete Robot Framework test suite
+cd tests
+./run-robot-tests.sh
 ```
 This end-to-end test validates the complete audio processing pipeline using Robot Framework.
 
diff --git a/backends/advanced/Docs/quickstart.md b/backends/advanced/Docs/quickstart.md
index 0d681978..9f966242 100644
--- a/backends/advanced/Docs/quickstart.md
+++ b/backends/advanced/Docs/quickstart.md
@@ -173,11 +173,16 @@ PARAKEET_ASR_URL=http://host.docker.internal:8080
 
 After configuration, verify everything works with the integration test suite:
 ```bash
+# From backends/advanced directory
 ./run-test.sh
 
-# Alternative: Manual test with detailed logging
-source .env && export DEEPGRAM_API_KEY OPENAI_API_KEY && \
-  uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot
+# Or run all tests from project root
+cd ../..
+./run-test.sh advanced-backend
+
+# Or run complete Robot Framework test suite
+cd tests
+./run-robot-tests.sh
 ```
 This end-to-end test validates the complete audio processing pipeline using Robot Framework.
 
diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index cf498896..812d29b9 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -200,6 +200,39 @@ services:
         condition: service_healthy
     restart: unless-stopped
 
+  deepgram-streaming-worker-test:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    command: >
+      uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
+    volumes:
+      - ./src:/app/src
+      - ./data/test_data:/app/data
+      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml
+      - ${PLUGINS_CONFIG:-../../config/plugins.yml}:/app/plugins.yml
+    environment:
+      - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
+      - REDIS_URL=redis://redis-test:6379/0
+      - HA_TOKEN=${HA_TOKEN}
+      - MONGODB_URI=mongodb://mongo-test:27017/test_db
+      - QDRANT_BASE_URL=qdrant-test
+      - QDRANT_PORT=6333
+      - DEBUG_DIR=/app/debug_dir
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - GROQ_API_KEY=${GROQ_API_KEY}
+      - AUTH_SECRET_KEY=test-jwt-signing-key-for-integration-tests
+      - ADMIN_PASSWORD=test-admin-password-123
+      - ADMIN_EMAIL=test-admin@example.com
+      - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram}
+      - MEMORY_PROVIDER=${MEMORY_PROVIDER:-chronicle}
+    depends_on:
+      redis-test:
+        condition: service_started
+      mongo-test:
+        condition: service_healthy
+    restart: unless-stopped
+
   # Mycelia - AI memory and timeline service (test environment)
   # mycelia-backend-test:
   #   build:
diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot
index 0ee3d439..de8f233b 100644
--- a/tests/endpoints/system_admin_tests.robot
+++ b/tests/endpoints/system_admin_tests.robot
@@ -190,9 +190,8 @@ Save And Retrieve Chat Configuration Test
     [Documentation]    Test saving and retrieving chat configuration
     [Tags]    infra	permissions
 
-    # Get original prompt to restore later
-    ${response}=       GET On Session    api    /api/admin/chat/config
-    ${original_prompt}=    Set Variable    ${response.text}
+    # Define known default prompt for restoration (from system_controller.py and chat_service.py)
+    ${default_prompt}=    Set Variable    You are a helpful AI assistant with access to the user's personal memories and conversation history.
 
     # Save custom prompt
     ${custom_prompt}=  Set Variable    You are a specialized AI assistant for technical support and troubleshooting.
@@ -210,9 +209,9 @@ Save And Retrieve Chat Configuration Test
     ${retrieved}=      Set Variable    ${response.text}
     Should Be Equal    ${retrieved}    ${custom_prompt}    msg=Retrieved prompt should match saved prompt
 
-    # Restore original prompt to avoid test interference
+    # Restore default prompt to avoid test interference
     ${response}=       POST On Session    api    /api/admin/chat/config
-    ...                data=${original_prompt}
+    ...                data=${default_prompt}
     ...                headers=${headers}
     Should Be Equal As Integers    ${response.status_code}    200
 

From 952d471e6082d5f0cc0d2ac1eaa84ddc348107ce Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 05:31:26 +0000
Subject: [PATCH 11/25] Enhance test environment cleanup and improve Deepgram
 worker management

- Updated `run-test.sh` and `run-robot-tests.sh` to improve cleanup processes, including handling permission issues with Docker.
- Introduced a new function `mark_session_complete` in `session_controller.py` to ensure atomic updates for session completion status.
- Refactored WebSocket and conversation job handling to utilize the new session completion function, enhancing reliability.
- Updated `start-workers.sh` to enable the batch Deepgram worker alongside the streaming worker for improved transcription capabilities.
- Enhanced test scripts to verify the status of Deepgram workers and ensure proper cleanup of test containers.
---
 backends/advanced/run-test.sh                 | 16 ++++-
 .../controllers/session_controller.py         | 53 ++++++++++++++++-
 .../controllers/websocket_controller.py       |  8 +--
 .../workers/conversation_jobs.py              |  7 ++-
 backends/advanced/start-workers.sh            | 26 ++++-----
 tests/configs/deepgram-openai.yml             |  3 +-
 tests/endpoints/system_admin_tests.robot      |  9 +++
 tests/run-robot-tests.sh                      | 58 ++++++++++++++++---
 8 files changed, 146 insertions(+), 34 deletions(-)

diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh
index 5f13d35a..a18dc895 100755
--- a/backends/advanced/run-test.sh
+++ b/backends/advanced/run-test.sh
@@ -211,15 +211,25 @@ print_info "Using environment variables from .env file for test configuration"
 
 # Clean test environment
 print_info "Cleaning test environment..."
-sudo rm -rf ./test_audio_chunks/ ./test_data/ ./test_debug_dir/ ./mongo_data_test/ ./qdrant_data_test/ ./test_neo4j/ || true
+rm -rf ./test_audio_chunks/ ./test_data/ ./test_debug_dir/ ./mongo_data_test/ ./qdrant_data_test/ ./test_neo4j/ 2>/dev/null || true
+
+# If cleanup fails due to permissions, try with docker
+if [ -d "./data/test_audio_chunks/" ] || [ -d "./data/test_data/" ] || [ -d "./data/test_debug_dir/" ]; then
+    print_warning "Permission denied, using docker to clean test directories..."
+    docker run --rm -v "$(pwd)/data:/data" alpine sh -c 'rm -rf /data/test_*' 2>/dev/null || true
+fi
 
 # Use unique project name to avoid conflicts with development environment
 export COMPOSE_PROJECT_NAME="advanced-backend-test"
 
 # Stop any existing test containers
 print_info "Stopping existing test containers..."
+# Try cleanup with current project name
 docker compose -f docker-compose-test.yml down -v || true
 
+# Also try cleanup with default project name (in case containers were started without COMPOSE_PROJECT_NAME)
+COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
+
 # Run integration tests
 print_info "Running integration tests..."
 print_info "Using fresh mode (CACHED_MODE=False) for clean testing"
@@ -257,6 +267,8 @@ else
     if [ "${CLEANUP_CONTAINERS:-true}" != "false" ]; then
         print_info "Cleaning up test containers after failure..."
         docker compose -f docker-compose-test.yml down -v || true
+        # Also cleanup with default project name
+        COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
         docker system prune -f || true
     else
         print_warning "Skipping cleanup (CLEANUP_CONTAINERS=false) - containers left running for debugging"
@@ -269,6 +281,8 @@ fi
 if [ "${CLEANUP_CONTAINERS:-true}" != "false" ]; then
     print_info "Cleaning up test containers..."
     docker compose -f docker-compose-test.yml down -v || true
+    # Also cleanup with default project name
+    COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
     docker system prune -f || true
 else
     print_warning "Skipping cleanup (CLEANUP_CONTAINERS=false) - containers left running"
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
index a3836898..d1a22695 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
@@ -9,13 +9,61 @@
 
 import logging
 import time
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Literal
 
 from fastapi.responses import JSONResponse
 
 logger = logging.getLogger(__name__)
 
 
+async def mark_session_complete(
+    redis_client,
+    session_id: str,
+    reason: Literal[
+        "websocket_disconnect",
+        "user_stopped",
+        "inactivity_timeout",
+        "max_duration",
+        "all_jobs_complete"
+    ],
+) -> None:
+    """
+    Single source of truth for marking sessions as complete.
+
+    This function ensures that both 'status' and 'completion_reason' are ALWAYS
+    set together atomically, preventing race conditions where workers check status
+    before completion_reason is set.
+
+    Args:
+        redis_client: Redis async client
+        session_id: Session UUID
+        reason: Why the session is completing (enforced by type system)
+
+    Usage:
+        # WebSocket disconnect
+        await mark_session_complete(redis, session_id, "websocket_disconnect")
+
+        # User manually stopped
+        await mark_session_complete(redis, session_id, "user_stopped")
+
+        # Inactivity timeout
+        await mark_session_complete(redis, session_id, "inactivity_timeout")
+
+        # Max duration reached
+        await mark_session_complete(redis, session_id, "max_duration")
+
+        # All jobs finished
+        await mark_session_complete(redis, session_id, "all_jobs_complete")
+    """
+    session_key = f"audio:session:{session_id}"
+    await redis_client.hset(session_key, mapping={
+        "status": "complete",
+        "completed_at": str(time.time()),
+        "completion_reason": reason
+    })
+    logger.info(f"✅ Session {session_id[:12]} marked complete: {reason}")
+
+
 async def get_session_info(redis_client, session_id: str) -> Optional[Dict]:
     """
     Get detailed information about a specific session.
@@ -192,8 +240,7 @@ async def get_streaming_status(request):
                     # All jobs complete - this is truly a completed session
                     # Update Redis status if it wasn't already marked complete
                     if status not in ["complete", "completed", "finalized"]:
-                        await redis_client.hset(key, "status", "complete")
-                        logger.info(f"✅ Marked session {session_id} as complete (all jobs terminal)")
+                        await mark_session_complete(redis_client, session_id, "all_jobs_complete")
 
                     # Get additional session data for completed sessions
                     session_key = f"audio:session:{session_id}"
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index 602e20a4..2b98bcbb 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -22,6 +22,7 @@
 from advanced_omi_backend.auth import websocket_auth
 from advanced_omi_backend.client_manager import generate_client_id, get_client_manager
 from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH
+from advanced_omi_backend.controllers.session_controller import mark_session_complete
 from advanced_omi_backend.utils.audio_utils import process_audio_chunk
 from advanced_omi_backend.services.audio_stream import AudioStreamProducer
 from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer
@@ -250,13 +251,8 @@ async def cleanup_client_state(client_id: str):
                 client_id_bytes = await async_redis.hget(key, "client_id")
                 if client_id_bytes and client_id_bytes.decode() == client_id:
                     # Mark session as complete (WebSocket disconnected)
-                    await async_redis.hset(key, mapping={
-                        "status": "complete",
-                        "completed_at": str(time.time()),
-                        "completion_reason": "websocket_disconnect"
-                    })
                     session_id = key.decode().replace("audio:session:", "")
-                    logger.info(f"📊 Marked session {session_id[:12]} as complete (WebSocket disconnect)")
+                    await mark_session_complete(async_redis, session_id, "websocket_disconnect")
                     sessions_closed += 1
 
             if cursor == 0:
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 49f0c5c9..7c754d19 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -13,6 +13,7 @@
 
 from advanced_omi_backend.models.job import async_job
 from advanced_omi_backend.controllers.queue_controller import redis_conn
+from advanced_omi_backend.controllers.session_controller import mark_session_complete
 from advanced_omi_backend.services.plugin_service import get_plugin_router
 
 from advanced_omi_backend.utils.conversation_utils import (
@@ -296,9 +297,9 @@ async def open_conversation_job(
             if status_str in ["finalizing", "complete"]:
                 finalize_received = True
 
-                # Check if this was a WebSocket disconnect
+                # Get completion reason (guaranteed to exist with unified API)
                 completion_reason = await redis_client.hget(session_key, "completion_reason")
-                completion_reason_str = completion_reason.decode() if completion_reason else None
+                completion_reason_str = completion_reason.decode() if completion_reason else "unknown"
 
                 if completion_reason_str == "websocket_disconnect":
                     logger.warning(
@@ -308,7 +309,7 @@ async def open_conversation_job(
                     timeout_triggered = False  # This is a disconnect, not a timeout
                 else:
                     logger.info(
-                        f"🛑 Session finalizing (reason: {completion_reason_str or 'user_stopped'}), "
+                        f"🛑 Session finalizing (reason: {completion_reason_str}), "
                         f"waiting for audio persistence job to complete..."
                     )
                 break  # Exit immediately when finalize signal received
diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh
index 774dcda0..8715da4b 100755
--- a/backends/advanced/start-workers.sh
+++ b/backends/advanced/start-workers.sh
@@ -64,19 +64,19 @@ if registry and registry.defaults:
 
     echo "📋 Configured STT provider: ${DEFAULT_STT:-none}"
 
-    # DISABLED: Batch Deepgram worker - using streaming worker instead
-    # The deepgram-streaming-worker container handles audio:stream:* streams with plugin support
-    # Batch worker is disabled to prevent race condition with streaming worker
-    # if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then
-    #     echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
-    #     uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
-    #     AUDIO_STREAM_DEEPGRAM_WORKER_PID=$!
-    # else
-    #     echo "⏭️  Skipping Deepgram stream worker (not configured as default STT or API key missing)"
-    #     AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
-    # fi
-    echo "⏭️  Batch Deepgram worker disabled - using deepgram-streaming-worker container instead"
-    AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
+    # Batch Deepgram worker - uses consumer group "deepgram_workers"
+    # Runs alongside deepgram-streaming-worker container (consumer group "streaming-transcription")
+    # Both workers process same streams via Redis consumer groups (fan-out architecture)
+    # - Batch worker: High-quality transcription with diarization (~6s latency)
+    # - Streaming worker: Fast wake-word detection with plugins (~1-2s latency)
+    if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then
+        echo "🎵 Starting audio stream Deepgram batch worker (consumer group: deepgram_workers)..."
+        uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
+        AUDIO_STREAM_DEEPGRAM_WORKER_PID=$!
+    else
+        echo "⏭️  Skipping Deepgram batch worker (not configured as default STT or API key missing)"
+        AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
+    fi
 
     # Only start Parakeet worker if configured as default STT
     if [[ "$DEFAULT_STT" == "parakeet" ]]; then
diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml
index 1e4cd8b2..6a2a11bd 100644
--- a/tests/configs/deepgram-openai.yml
+++ b/tests/configs/deepgram-openai.yml
@@ -1,5 +1,6 @@
 chat:
-  system_prompt: You are a specialized AI assistant for technical support and troubleshooting.
+  system_prompt: You are a helpful AI assistant with access to the user's personal
+    memories and conversation history.
 defaults:
   embedding: openai-embed
   llm: openai-llm
diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot
index de8f233b..c8ce0c4c 100644
--- a/tests/endpoints/system_admin_tests.robot
+++ b/tests/endpoints/system_admin_tests.robot
@@ -153,6 +153,15 @@ Get Chat Configuration Test
     [Documentation]    Test getting chat system prompt (admin only)
     [Tags]    infra	permissions
 
+    # First ensure default prompt is set (cleanup from previous test runs)
+    ${default_prompt}=    Set Variable    You are a helpful AI assistant with access to the user's personal memories and conversation history.
+    &{headers}=        Create Dictionary    Content-Type=text/plain
+    ${response}=       POST On Session    api    /api/admin/chat/config
+    ...                data=${default_prompt}
+    ...                headers=${headers}
+    Should Be Equal As Integers    ${response.status_code}    200
+
+    # Now test getting the default prompt
     ${response}=       GET On Session    api    /api/admin/chat/config
     Should Be Equal As Integers    ${response.status_code}    200
 
diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh
index c44b16ec..04787825 100755
--- a/tests/run-robot-tests.sh
+++ b/tests/run-robot-tests.sh
@@ -155,14 +155,25 @@ export COMPOSE_PROJECT_NAME="advanced-backend-test"
 
 # Clean up any existing test containers and volumes for fresh start
 print_info "Cleaning up any existing test environment..."
+
+# Try cleanup with current project name
 docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
 
-# Force remove any stuck containers with test names (uses COMPOSE_PROJECT_NAME)
+# Also try cleanup with default project name (in case containers were started without COMPOSE_PROJECT_NAME)
+COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
+
+# Force remove any stuck containers with both naming patterns
 print_info "Removing any stuck test containers..."
-# Dynamically construct container names from docker-compose services
 TEST_SERVICES=(mongo-test redis-test qdrant-test chronicle-backend-test workers-test webui-test speaker-service-test)
+
+# Remove containers with new project name (advanced-backend-test)
+for service in "${TEST_SERVICES[@]}"; do
+    docker rm -f "advanced-backend-test-${service}-1" 2>/dev/null || true
+done
+
+# Remove containers with old/default project name (advanced)
 for service in "${TEST_SERVICES[@]}"; do
-    docker rm -f "${COMPOSE_PROJECT_NAME}-${service}-1" 2>/dev/null || true
+    docker rm -f "advanced-${service}-1" 2>/dev/null || true
 done
 
 # Start infrastructure services (MongoDB, Redis, Qdrant)
@@ -221,9 +232,12 @@ for i in {1..40}; do
     sleep 3
 done
 
-# Start workers
-print_info "Starting RQ workers..."
-docker compose -f docker-compose-test.yml up -d workers-test
+# Build and start workers
+print_info "Building workers..."
+docker compose -f docker-compose-test.yml build workers-test
+
+print_info "Starting RQ workers and Deepgram streaming worker..."
+docker compose -f docker-compose-test.yml up -d workers-test deepgram-streaming-worker-test
 
 # Wait for workers container
 print_info "Waiting for workers container (up to 30s)..."
@@ -246,7 +260,7 @@ for i in {1..30}; do
     WORKER_COUNT=$(docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); print(len(Worker.all(connection=r)))' 2>/dev/null || echo "0")
 
     if [ "$WORKER_COUNT" -ge 6 ]; then
-        print_success "Found $WORKER_COUNT workers registered"
+        print_success "Found $WORKER_COUNT RQ workers registered"
         break
     fi
 
@@ -259,6 +273,34 @@ for i in {1..30}; do
     sleep 2
 done
 
+# Verify batch Deepgram worker is running
+print_info "Verifying Deepgram batch worker process..."
+BATCH_WORKER_CHECK=$(docker compose -f docker-compose-test.yml exec -T workers-test ps aux | grep -c "audio_stream_deepgram_worker" || echo "0")
+if [ "$BATCH_WORKER_CHECK" -gt 0 ]; then
+    print_success "Deepgram batch worker process is running"
+else
+    print_warning "Deepgram batch worker process not found - checking logs..."
+    docker compose -f docker-compose-test.yml logs --tail=30 workers-test | grep -i "deepgram"
+fi
+
+# Check Redis consumer groups registration
+print_info "Checking Redis Streams consumer groups..."
+docker compose -f docker-compose-test.yml exec -T redis-test redis-cli KEYS "audio:stream:*" 2>/dev/null || true
+
+# Wait for streaming worker to start
+print_info "Waiting for Deepgram streaming worker (up to 30s)..."
+for i in {1..15}; do
+    if docker compose -f docker-compose-test.yml ps deepgram-streaming-worker-test | grep -q "Up"; then
+        print_success "Deepgram streaming worker is running"
+        break
+    fi
+    if [ $i -eq 15 ]; then
+        print_warning "Deepgram streaming worker not detected (may still start async)"
+        break
+    fi
+    sleep 2
+done
+
 print_success "All services ready!"
 
 # Return to tests directory
@@ -379,6 +421,8 @@ if [ "$CLEANUP_CONTAINERS" = "true" ]; then
     print_info "Cleaning up test containers..."
     cd "$BACKEND_DIR"
     docker compose -f docker-compose-test.yml down -v
+    # Also cleanup with default project name
+    COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
     cd "$TESTS_DIR"
     print_success "Cleanup complete"
 else

From 4eb1ca994a381403eb4aaecab3b9ea00718c6f6c Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 05:53:56 +0000
Subject: [PATCH 12/25] Refactor worker management and introduce orchestrator
 for improved process handling

- Replaced the bash-based `start-workers.sh` script with a Python-based worker orchestrator for better process management and health monitoring.
- Updated `docker-compose.yml` to configure the new orchestrator and adjust worker definitions, including the addition of audio persistence and stream workers.
- Enhanced the Dockerfile to remove the old startup script and ensure the orchestrator is executable.
- Introduced new modules for orchestrator configuration, health monitoring, process management, and worker registry to streamline worker lifecycle management.
- Improved environment variable handling for worker configuration and health checks.
---
 backends/advanced/Dockerfile                  |   5 +-
 backends/advanced/docker-compose.yml          |  19 +-
 .../workers/orchestrator/__init__.py          |  28 ++
 .../workers/orchestrator/config.py            |  91 ++++++
 .../workers/orchestrator/health_monitor.py    | 232 ++++++++++++++
 .../workers/orchestrator/process_manager.py   | 296 ++++++++++++++++++
 .../workers/orchestrator/worker_registry.py   | 170 ++++++++++
 backends/advanced/start-workers.sh            | 208 ------------
 8 files changed, 832 insertions(+), 217 deletions(-)
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
 delete mode 100755 backends/advanced/start-workers.sh

diff --git a/backends/advanced/Dockerfile b/backends/advanced/Dockerfile
index 352bcfe9..a24ed841 100644
--- a/backends/advanced/Dockerfile
+++ b/backends/advanced/Dockerfile
@@ -39,10 +39,9 @@ COPY . .
 COPY diarization_config.json* ./
 
 
-# Copy and make startup scripts executable
+# Copy and make startup script executable
 COPY start.sh ./
-COPY start-workers.sh ./
-RUN chmod +x start.sh start-workers.sh
+RUN chmod +x start.sh
 
 # Run the application with workers
 CMD ["./start.sh"]
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
index 4e6ba153..e0895271 100644
--- a/backends/advanced/docker-compose.yml
+++ b/backends/advanced/docker-compose.yml
@@ -76,22 +76,24 @@ services:
   # Unified Worker Container
   # No CUDA needed for chronicle-backend and workers, workers only orchestrate jobs and call external services
   # Runs all workers in a single container for efficiency:
-  # - 3 RQ workers (transcription, memory, default queues)
-  # - 1 Audio stream worker (Redis Streams consumer - must be single to maintain sequential chunks)
+  # - 6 RQ workers (transcription, memory, default queues)
+  # - 1 Audio persistence worker (audio queue)
+  # - 1+ Stream workers (conditional based on config.yml - Deepgram/Parakeet)
+  # Uses Python orchestrator for process management, health monitoring, and self-healing
   workers:
     build:
       context: .
       dockerfile: Dockerfile
-    command: ["./start-workers.sh"]
+    command: ["uv", "run", "python", "worker_orchestrator.py"]
     env_file:
       - .env
     volumes:
       - ./src:/app/src
-      - ./start-workers.sh:/app/start-workers.sh
+      - ./worker_orchestrator.py:/app/worker_orchestrator.py
       - ./data/audio_chunks:/app/audio_chunks
       - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml  # Removed :ro for consistency
-      - ../../config/plugins.yml:/app/plugins.yml  # Plugin configuration
+      - ../../config/config.yml:/app/config.yml
+      - ../../config/plugins.yml:/app/plugins.yml
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
@@ -99,6 +101,11 @@ services:
       - GROQ_API_KEY=${GROQ_API_KEY}
       - HA_TOKEN=${HA_TOKEN}
       - REDIS_URL=redis://redis:6379/0
+      # Worker orchestrator configuration (optional - defaults shown)
+      - WORKER_CHECK_INTERVAL=${WORKER_CHECK_INTERVAL:-10}
+      - MIN_RQ_WORKERS=${MIN_RQ_WORKERS:-6}
+      - WORKER_STARTUP_GRACE_PERIOD=${WORKER_STARTUP_GRACE_PERIOD:-30}
+      - WORKER_SHUTDOWN_TIMEOUT=${WORKER_SHUTDOWN_TIMEOUT:-30}
     depends_on:
       redis:
         condition: service_healthy
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py
new file mode 100644
index 00000000..1c7b0d7a
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py
@@ -0,0 +1,28 @@
+"""
+Worker Orchestrator Package
+
+This package provides a Python-based orchestration system for managing
+Chronicle's worker processes, replacing the bash-based start-workers.sh script.
+
+Components:
+- config: Worker definitions and orchestrator configuration
+- worker_registry: Build worker list with conditional logic
+- process_manager: Process lifecycle management
+- health_monitor: Health checks and self-healing
+"""
+
+from .config import WorkerDefinition, OrchestratorConfig, WorkerType
+from .worker_registry import build_worker_definitions
+from .process_manager import ManagedWorker, ProcessManager, WorkerState
+from .health_monitor import HealthMonitor
+
+__all__ = [
+    "WorkerDefinition",
+    "OrchestratorConfig",
+    "WorkerType",
+    "build_worker_definitions",
+    "ManagedWorker",
+    "ProcessManager",
+    "WorkerState",
+    "HealthMonitor",
+]
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py
new file mode 100644
index 00000000..633d366a
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py
@@ -0,0 +1,91 @@
+"""
+Worker Orchestrator Configuration
+
+Defines data structures for worker definitions and orchestrator configuration.
+"""
+
+import os
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional, Callable, List
+
+
+class WorkerType(Enum):
+    """Type of worker process"""
+
+    RQ_WORKER = "rq_worker"  # RQ queue worker
+    STREAM_CONSUMER = "stream_consumer"  # Redis Streams consumer
+
+
+@dataclass
+class WorkerDefinition:
+    """
+    Definition of a single worker process.
+
+    Attributes:
+        name: Unique identifier for the worker
+        command: Full command to execute (as list for subprocess)
+        worker_type: Type of worker (RQ vs stream consumer)
+        queues: Queue names for RQ workers (empty for stream consumers)
+        enabled_check: Optional predicate function to determine if worker should start
+        restart_on_failure: Whether to automatically restart on failure
+        health_check: Optional custom health check function
+    """
+
+    name: str
+    command: List[str]
+    worker_type: WorkerType = WorkerType.RQ_WORKER
+    queues: List[str] = field(default_factory=list)
+    enabled_check: Optional[Callable[[], bool]] = None
+    restart_on_failure: bool = True
+    health_check: Optional[Callable[[], bool]] = None
+
+    def is_enabled(self) -> bool:
+        """Check if this worker should be started"""
+        if self.enabled_check is None:
+            return True
+        return self.enabled_check()
+
+
+@dataclass
+class OrchestratorConfig:
+    """
+    Global configuration for the worker orchestrator.
+
+    All settings can be overridden via environment variables.
+    """
+
+    # Redis connection
+    redis_url: str = field(
+        default_factory=lambda: os.getenv("REDIS_URL", "redis://localhost:6379/0")
+    )
+
+    # Health monitoring settings
+    check_interval: int = field(
+        default_factory=lambda: int(os.getenv("WORKER_CHECK_INTERVAL", "10"))
+    )
+    min_rq_workers: int = field(
+        default_factory=lambda: int(os.getenv("MIN_RQ_WORKERS", "6"))
+    )
+    startup_grace_period: int = field(
+        default_factory=lambda: int(os.getenv("WORKER_STARTUP_GRACE_PERIOD", "30"))
+    )
+
+    # Shutdown settings
+    shutdown_timeout: int = field(
+        default_factory=lambda: int(os.getenv("WORKER_SHUTDOWN_TIMEOUT", "30"))
+    )
+
+    # Logging
+    log_level: str = field(default_factory=lambda: os.getenv("LOG_LEVEL", "INFO"))
+
+    def __post_init__(self):
+        """Validate configuration after initialization"""
+        if self.check_interval <= 0:
+            raise ValueError("check_interval must be positive")
+        if self.min_rq_workers < 0:
+            raise ValueError("min_rq_workers must be non-negative")
+        if self.startup_grace_period < 0:
+            raise ValueError("startup_grace_period must be non-negative")
+        if self.shutdown_timeout <= 0:
+            raise ValueError("shutdown_timeout must be positive")
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
new file mode 100644
index 00000000..afd8b7cd
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
@@ -0,0 +1,232 @@
+"""
+Health Monitor
+
+Self-healing monitor that detects and recovers from worker failures.
+Periodically checks worker health and restarts failed workers.
+"""
+
+import asyncio
+import logging
+import time
+from typing import Optional
+
+from redis import Redis
+from rq import Worker
+
+from .config import OrchestratorConfig, WorkerType
+from .process_manager import ProcessManager, WorkerState
+
+logger = logging.getLogger(__name__)
+
+
+class HealthMonitor:
+    """
+    Self-healing monitor for worker processes.
+
+    Periodically checks:
+    1. Individual worker health (process liveness)
+    2. RQ worker registration count in Redis
+
+    Automatically restarts failed workers if configured.
+    """
+
+    def __init__(
+        self,
+        process_manager: ProcessManager,
+        config: OrchestratorConfig,
+        redis_client: Redis,
+    ):
+        self.process_manager = process_manager
+        self.config = config
+        self.redis = redis_client
+        self.running = False
+        self.monitor_task: Optional[asyncio.Task] = None
+        self.start_time = time.time()
+
+    async def start(self):
+        """Start the health monitoring loop"""
+        if self.running:
+            logger.warning("Health monitor already running")
+            return
+
+        self.running = True
+        self.start_time = time.time()
+        logger.info(
+            f"Starting health monitor (check interval: {self.config.check_interval}s, "
+            f"grace period: {self.config.startup_grace_period}s)"
+        )
+
+        self.monitor_task = asyncio.create_task(self._monitor_loop())
+
+    async def stop(self):
+        """Stop the health monitoring loop"""
+        if not self.running:
+            return
+
+        logger.info("Stopping health monitor...")
+        self.running = False
+
+        if self.monitor_task:
+            self.monitor_task.cancel()
+            try:
+                await self.monitor_task
+            except asyncio.CancelledError:
+                pass
+
+        logger.info("Health monitor stopped")
+
+    async def _monitor_loop(self):
+        """Main monitoring loop"""
+        try:
+            while self.running:
+                # Wait for startup grace period before starting checks
+                elapsed = time.time() - self.start_time
+                if elapsed < self.config.startup_grace_period:
+                    remaining = self.config.startup_grace_period - elapsed
+                    logger.debug(
+                        f"In startup grace period - waiting {remaining:.0f}s before health checks"
+                    )
+                    await asyncio.sleep(self.config.check_interval)
+                    continue
+
+                # Perform health checks
+                await self._check_health()
+
+                # Wait for next check
+                await asyncio.sleep(self.config.check_interval)
+
+        except asyncio.CancelledError:
+            logger.info("Health monitor loop cancelled")
+            raise
+        except Exception as e:
+            logger.error(f"Health monitor loop error: {e}", exc_info=True)
+
+    async def _check_health(self):
+        """Perform all health checks and restart failed workers"""
+        try:
+            # Check individual worker health
+            worker_health = self._check_worker_health()
+
+            # Check RQ worker registration count
+            rq_health = self._check_rq_worker_registration()
+
+            # Restart failed workers
+            self._restart_failed_workers()
+
+            # Log summary
+            if not worker_health or not rq_health:
+                logger.warning(
+                    f"Health check: worker_health={worker_health}, rq_health={rq_health}"
+                )
+
+        except Exception as e:
+            logger.error(f"Error during health check: {e}", exc_info=True)
+
+    def _check_worker_health(self) -> bool:
+        """
+        Check individual worker health.
+
+        Returns:
+            True if all workers are healthy
+        """
+        all_healthy = True
+
+        for worker in self.process_manager.get_all_workers():
+            try:
+                is_healthy = worker.check_health()
+                if not is_healthy:
+                    all_healthy = False
+                    logger.warning(
+                        f"{worker.name}: Health check failed (state={worker.state.value})"
+                    )
+            except Exception as e:
+                all_healthy = False
+                logger.error(f"{worker.name}: Health check raised exception: {e}")
+
+        return all_healthy
+
+    def _check_rq_worker_registration(self) -> bool:
+        """
+        Check RQ worker registration count in Redis.
+
+        This replicates the bash script's logic:
+        - Query Redis for all registered RQ workers
+        - Check if count >= min_rq_workers
+
+        Returns:
+            True if RQ worker count is sufficient
+        """
+        try:
+            workers = Worker.all(connection=self.redis)
+            worker_count = len(workers)
+
+            if worker_count < self.config.min_rq_workers:
+                logger.warning(
+                    f"RQ worker registration: {worker_count} workers "
+                    f"(expected >= {self.config.min_rq_workers})"
+                )
+                return False
+
+            logger.debug(f"RQ worker registration: {worker_count} workers registered")
+            return True
+
+        except Exception as e:
+            logger.error(f"Failed to check RQ worker registration: {e}")
+            return False
+
+    def _restart_failed_workers(self):
+        """Restart workers that have failed and should be restarted"""
+        for worker in self.process_manager.get_all_workers():
+            # Only restart if:
+            # 1. Worker state is FAILED
+            # 2. Worker definition has restart_on_failure=True
+            if (
+                worker.state == WorkerState.FAILED
+                and worker.definition.restart_on_failure
+            ):
+                logger.warning(
+                    f"{worker.name}: Worker failed, initiating restart "
+                    f"(restart count: {worker.restart_count})"
+                )
+
+                success = self.process_manager.restart_worker(worker.name)
+
+                if success:
+                    logger.info(
+                        f"{worker.name}: Restart successful "
+                        f"(total restarts: {worker.restart_count})"
+                    )
+                else:
+                    logger.error(f"{worker.name}: Restart failed")
+
+    def get_health_status(self) -> dict:
+        """
+        Get current health status summary.
+
+        Returns:
+            Dictionary with health status information
+        """
+        worker_status = self.process_manager.get_status()
+
+        # Count workers by state
+        state_counts = {}
+        for status in worker_status.values():
+            state = status["state"]
+            state_counts[state] = state_counts.get(state, 0) + 1
+
+        # Check RQ worker registration
+        try:
+            rq_workers = Worker.all(connection=self.redis)
+            rq_worker_count = len(rq_workers)
+        except Exception:
+            rq_worker_count = -1  # Error indicator
+
+        return {
+            "running": self.running,
+            "uptime": time.time() - self.start_time if self.running else 0,
+            "total_workers": len(worker_status),
+            "state_counts": state_counts,
+            "rq_worker_count": rq_worker_count,
+            "min_rq_workers": self.config.min_rq_workers,
+            "rq_healthy": rq_worker_count >= self.config.min_rq_workers,
+        }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
new file mode 100644
index 00000000..d90ecc00
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
@@ -0,0 +1,296 @@
+"""
+Process Manager
+
+Manages lifecycle of all worker processes with state tracking.
+Handles process creation, monitoring, and graceful shutdown.
+"""
+
+import logging
+import subprocess
+import time
+from enum import Enum
+from typing import Dict, List, Optional
+
+from .config import WorkerDefinition
+
+logger = logging.getLogger(__name__)
+
+
+class WorkerState(Enum):
+    """Worker process lifecycle states"""
+
+    PENDING = "pending"  # Not yet started
+    STARTING = "starting"  # Process started, waiting for health check
+    RUNNING = "running"  # Healthy and running
+    UNHEALTHY = "unhealthy"  # Running but health check failed
+    STOPPING = "stopping"  # Shutdown initiated
+    STOPPED = "stopped"  # Cleanly stopped
+    FAILED = "failed"  # Crashed or failed to start
+
+
+class ManagedWorker:
+    """
+    Wraps a single worker process with state tracking.
+
+    Attributes:
+        definition: Worker definition
+        process: Subprocess.Popen object (None if not started)
+        state: Current worker state
+        start_time: Timestamp when worker was started
+        restart_count: Number of times worker has been restarted
+        last_health_check: Timestamp of last health check
+    """
+
+    def __init__(self, definition: WorkerDefinition):
+        self.definition = definition
+        self.process: Optional[subprocess.Popen] = None
+        self.state = WorkerState.PENDING
+        self.start_time: Optional[float] = None
+        self.restart_count = 0
+        self.last_health_check: Optional[float] = None
+
+    @property
+    def name(self) -> str:
+        """Worker name"""
+        return self.definition.name
+
+    @property
+    def pid(self) -> Optional[int]:
+        """Process ID (None if not started)"""
+        return self.process.pid if self.process else None
+
+    @property
+    def is_alive(self) -> bool:
+        """Check if process is alive"""
+        if not self.process:
+            return False
+        return self.process.poll() is None
+
+    def start(self) -> bool:
+        """
+        Start the worker process.
+
+        Returns:
+            True if started successfully, False otherwise
+        """
+        if self.process and self.is_alive:
+            logger.warning(f"{self.name}: Already running (PID {self.pid})")
+            return False
+
+        try:
+            logger.info(f"{self.name}: Starting worker...")
+            logger.debug(f"{self.name}: Command: {' '.join(self.definition.command)}")
+
+            self.process = subprocess.Popen(
+                self.definition.command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,  # Line buffered
+            )
+
+            self.state = WorkerState.STARTING
+            self.start_time = time.time()
+
+            logger.info(f"{self.name}: Started with PID {self.pid}")
+            return True
+
+        except Exception as e:
+            logger.error(f"{self.name}: Failed to start: {e}")
+            self.state = WorkerState.FAILED
+            return False
+
+    def stop(self, timeout: int = 30) -> bool:
+        """
+        Gracefully stop the worker process.
+
+        Args:
+            timeout: Maximum wait time in seconds
+
+        Returns:
+            True if stopped successfully, False otherwise
+        """
+        if not self.process or not self.is_alive:
+            logger.debug(f"{self.name}: Already stopped")
+            self.state = WorkerState.STOPPED
+            return True
+
+        try:
+            logger.info(f"{self.name}: Stopping worker (PID {self.pid})...")
+            self.state = WorkerState.STOPPING
+
+            # Send SIGTERM for graceful shutdown
+            self.process.terminate()
+
+            # Wait for process to exit
+            try:
+                self.process.wait(timeout=timeout)
+                logger.info(f"{self.name}: Stopped gracefully")
+                self.state = WorkerState.STOPPED
+                return True
+
+            except subprocess.TimeoutExpired:
+                # Force kill if timeout exceeded
+                logger.warning(
+                    f"{self.name}: Timeout expired, force killing (SIGKILL)..."
+                )
+                self.process.kill()
+                self.process.wait(timeout=5)
+                logger.warning(f"{self.name}: Force killed")
+                self.state = WorkerState.STOPPED
+                return True
+
+        except Exception as e:
+            logger.error(f"{self.name}: Error during shutdown: {e}")
+            self.state = WorkerState.FAILED
+            return False
+
+    def check_health(self) -> bool:
+        """
+        Check worker health.
+
+        Returns:
+            True if healthy, False otherwise
+        """
+        self.last_health_check = time.time()
+
+        # Basic liveness check
+        if not self.is_alive:
+            logger.warning(f"{self.name}: Process is not alive")
+            self.state = WorkerState.FAILED
+            return False
+
+        # Custom health check if defined
+        if self.definition.health_check:
+            try:
+                if not self.definition.health_check():
+                    logger.warning(f"{self.name}: Custom health check failed")
+                    self.state = WorkerState.UNHEALTHY
+                    return False
+            except Exception as e:
+                logger.error(f"{self.name}: Health check raised exception: {e}")
+                self.state = WorkerState.UNHEALTHY
+                return False
+
+        # Update state if currently starting
+        if self.state == WorkerState.STARTING:
+            self.state = WorkerState.RUNNING
+
+        return True
+
+
+class ProcessManager:
+    """
+    Manages all worker processes.
+
+    Provides high-level API for starting, stopping, and monitoring workers.
+    """
+
+    def __init__(self, worker_definitions: List[WorkerDefinition]):
+        self.workers: Dict[str, ManagedWorker] = {
+            defn.name: ManagedWorker(defn) for defn in worker_definitions
+        }
+        logger.info(f"ProcessManager initialized with {len(self.workers)} workers")
+
+    def start_all(self) -> bool:
+        """
+        Start all workers.
+
+        Returns:
+            True if all workers started successfully
+        """
+        logger.info("Starting all workers...")
+        success = True
+
+        for worker in self.workers.values():
+            if not worker.start():
+                success = False
+
+        if success:
+            logger.info("All workers started successfully")
+        else:
+            logger.warning("Some workers failed to start")
+
+        return success
+
+    def stop_all(self, timeout: int = 30) -> bool:
+        """
+        Stop all workers gracefully.
+
+        Args:
+            timeout: Maximum wait time per worker in seconds
+
+        Returns:
+            True if all workers stopped successfully
+        """
+        logger.info("Stopping all workers...")
+        success = True
+
+        for worker in self.workers.values():
+            if not worker.stop(timeout=timeout):
+                success = False
+
+        if success:
+            logger.info("All workers stopped successfully")
+        else:
+            logger.warning("Some workers failed to stop cleanly")
+
+        return success
+
+    def restart_worker(self, name: str, timeout: int = 30) -> bool:
+        """
+        Restart a specific worker.
+
+        Args:
+            name: Worker name
+            timeout: Maximum wait time for shutdown in seconds
+
+        Returns:
+            True if restarted successfully
+        """
+        worker = self.workers.get(name)
+        if not worker:
+            logger.error(f"Worker '{name}' not found")
+            return False
+
+        logger.info(f"Restarting worker: {name}")
+        worker.stop(timeout=timeout)
+        success = worker.start()
+
+        if success:
+            worker.restart_count += 1
+            logger.info(f"{name}: Restart #{worker.restart_count} successful")
+        else:
+            logger.error(f"{name}: Restart failed")
+
+        return success
+
+    def get_status(self) -> Dict[str, Dict]:
+        """
+        Get detailed status of all workers.
+
+        Returns:
+            Dictionary mapping worker name to status info
+        """
+        status = {}
+
+        for name, worker in self.workers.items():
+            status[name] = {
+                "pid": worker.pid,
+                "state": worker.state.value,
+                "is_alive": worker.is_alive,
+                "restart_count": worker.restart_count,
+                "start_time": worker.start_time,
+                "last_health_check": worker.last_health_check,
+                "queues": worker.definition.queues,
+            }
+
+        return status
+
+    def get_worker(self, name: str) -> Optional[ManagedWorker]:
+        """Get worker by name"""
+        return self.workers.get(name)
+
+    def get_all_workers(self) -> List[ManagedWorker]:
+        """Get all workers"""
+        return list(self.workers.values())
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
new file mode 100644
index 00000000..512f4a9a
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
@@ -0,0 +1,170 @@
+"""
+Worker Registry
+
+Builds the complete list of worker definitions with conditional logic.
+Reuses model_registry.py for config.yml parsing.
+"""
+
+import os
+import logging
+from typing import List
+
+from .config import WorkerDefinition, WorkerType
+
+logger = logging.getLogger(__name__)
+
+
+def get_default_stt_provider() -> str:
+    """
+    Query config.yml for the default STT provider.
+
+    Returns:
+        Provider name (e.g., "deepgram", "parakeet") or empty string if not configured
+    """
+    try:
+        from advanced_omi_backend.model_registry import get_models_registry
+
+        registry = get_models_registry()
+        if registry and registry.defaults:
+            stt_model = registry.get_default("stt")
+            if stt_model:
+                return stt_model.model_provider or ""
+    except Exception as e:
+        logger.warning(f"Failed to read STT provider from config.yml: {e}")
+
+    return ""
+
+
+def should_start_deepgram_batch() -> bool:
+    """
+    Check if Deepgram batch worker should start.
+
+    Conditions:
+    - DEFAULT_STT provider is "deepgram" (from config.yml)
+    - DEEPGRAM_API_KEY is set in environment
+    """
+    stt_provider = get_default_stt_provider()
+    has_api_key = bool(os.getenv("DEEPGRAM_API_KEY"))
+
+    enabled = stt_provider == "deepgram" and has_api_key
+
+    if stt_provider == "deepgram" and not has_api_key:
+        logger.warning(
+            "Deepgram configured as default STT but DEEPGRAM_API_KEY not set - worker disabled"
+        )
+
+    return enabled
+
+
+def should_start_parakeet() -> bool:
+    """
+    Check if Parakeet stream worker should start.
+
+    Conditions:
+    - DEFAULT_STT provider is "parakeet" (from config.yml)
+    """
+    stt_provider = get_default_stt_provider()
+    return stt_provider == "parakeet"
+
+
+def build_worker_definitions() -> List[WorkerDefinition]:
+    """
+    Build the complete list of worker definitions.
+
+    Returns:
+        List of WorkerDefinition objects, including conditional workers
+    """
+    workers = []
+
+    # 6x RQ Workers - Multi-queue workers (transcription, memory, default)
+    for i in range(1, 7):
+        workers.append(
+            WorkerDefinition(
+                name=f"rq-worker-{i}",
+                command=[
+                    "uv",
+                    "run",
+                    "python",
+                    "-m",
+                    "advanced_omi_backend.workers.rq_worker_entry",
+                    "transcription",
+                    "memory",
+                    "default",
+                ],
+                worker_type=WorkerType.RQ_WORKER,
+                queues=["transcription", "memory", "default"],
+                restart_on_failure=True,
+            )
+        )
+
+    # Audio Persistence Worker - Single-queue worker (audio queue)
+    workers.append(
+        WorkerDefinition(
+            name="audio-persistence",
+            command=[
+                "uv",
+                "run",
+                "python",
+                "-m",
+                "advanced_omi_backend.workers.rq_worker_entry",
+                "audio",
+            ],
+            worker_type=WorkerType.RQ_WORKER,
+            queues=["audio"],
+            restart_on_failure=True,
+        )
+    )
+
+    # Deepgram Batch Worker - Conditional (if DEFAULT_STT=deepgram + API key)
+    workers.append(
+        WorkerDefinition(
+            name="deepgram-batch",
+            command=[
+                "uv",
+                "run",
+                "python",
+                "-m",
+                "advanced_omi_backend.workers.audio_stream_deepgram_worker",
+            ],
+            worker_type=WorkerType.STREAM_CONSUMER,
+            enabled_check=should_start_deepgram_batch,
+            restart_on_failure=True,
+        )
+    )
+
+    # Parakeet Stream Worker - Conditional (if DEFAULT_STT=parakeet)
+    workers.append(
+        WorkerDefinition(
+            name="parakeet-stream",
+            command=[
+                "uv",
+                "run",
+                "python",
+                "-m",
+                "advanced_omi_backend.workers.audio_stream_parakeet_worker",
+            ],
+            worker_type=WorkerType.STREAM_CONSUMER,
+            enabled_check=should_start_parakeet,
+            restart_on_failure=True,
+        )
+    )
+
+    # Log worker configuration
+    stt_provider = get_default_stt_provider()
+    logger.info(f"STT Provider from config.yml: {stt_provider or 'none'}")
+
+    enabled_workers = [w for w in workers if w.is_enabled()]
+    disabled_workers = [w for w in workers if not w.is_enabled()]
+
+    logger.info(f"Total workers configured: {len(workers)}")
+    logger.info(f"Enabled workers: {len(enabled_workers)}")
+    logger.info(
+        f"Enabled worker names: {', '.join([w.name for w in enabled_workers])}"
+    )
+
+    if disabled_workers:
+        logger.info(
+            f"Disabled workers: {', '.join([w.name for w in disabled_workers])}"
+        )
+
+    return enabled_workers
diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh
deleted file mode 100755
index 8715da4b..00000000
--- a/backends/advanced/start-workers.sh
+++ /dev/null
@@ -1,208 +0,0 @@
-#!/bin/bash
-# Unified worker startup script
-# Starts all workers in a single container for efficiency
-
-set -e
-
-echo "🚀 Starting Chronicle Workers..."
-
-# Clean up any stale worker registrations from previous runs
-echo "🧹 Cleaning up stale worker registrations from Redis..."
-# Use RQ's cleanup command to remove dead workers
-uv run python -c "
-from rq import Worker
-from redis import Redis
-import os
-import socket
-
-redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
-redis_conn = Redis.from_url(redis_url)
-hostname = socket.gethostname()
-
-# Only clean up workers from THIS hostname (pod)
-workers = Worker.all(connection=redis_conn)
-cleaned = 0
-for worker in workers:
-    if worker.hostname == hostname:
-        worker.register_death()
-        cleaned += 1
-print(f'Cleaned up {cleaned} stale workers from {hostname}')
-" 2>/dev/null || echo "No stale workers to clean"
-
-sleep 1
-
-# Function to start all workers
-start_workers() {
-    echo "🔧 Starting RQ workers (6 workers, all queues: transcription, memory, default)..."
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_1_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_2_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_3_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_4_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_5_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_6_PID=$!
-
-    echo "💾 Starting audio persistence worker (1 worker for audio queue)..."
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry audio &
-    AUDIO_PERSISTENCE_WORKER_PID=$!
-
-    # Determine which STT provider to use from config.yml
-    echo "📋 Checking config.yml for default STT provider..."
-    DEFAULT_STT=$(uv run python -c "
-from advanced_omi_backend.model_registry import get_models_registry
-registry = get_models_registry()
-if registry and registry.defaults:
-    stt_model = registry.get_default('stt')
-    if stt_model:
-        print(stt_model.model_provider or '')
-" 2>/dev/null || echo "")
-
-    echo "📋 Configured STT provider: ${DEFAULT_STT:-none}"
-
-    # Batch Deepgram worker - uses consumer group "deepgram_workers"
-    # Runs alongside deepgram-streaming-worker container (consumer group "streaming-transcription")
-    # Both workers process same streams via Redis consumer groups (fan-out architecture)
-    # - Batch worker: High-quality transcription with diarization (~6s latency)
-    # - Streaming worker: Fast wake-word detection with plugins (~1-2s latency)
-    if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then
-        echo "🎵 Starting audio stream Deepgram batch worker (consumer group: deepgram_workers)..."
-        uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
-        AUDIO_STREAM_DEEPGRAM_WORKER_PID=$!
-    else
-        echo "⏭️  Skipping Deepgram batch worker (not configured as default STT or API key missing)"
-        AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
-    fi
-
-    # Only start Parakeet worker if configured as default STT
-    if [[ "$DEFAULT_STT" == "parakeet" ]]; then
-        echo "🎵 Starting audio stream Parakeet worker (1 worker for sequential processing)..."
-        uv run python -m advanced_omi_backend.workers.audio_stream_parakeet_worker &
-        AUDIO_STREAM_PARAKEET_WORKER_PID=$!
-    else
-        echo "⏭️  Skipping Parakeet stream worker (not configured as default STT)"
-        AUDIO_STREAM_PARAKEET_WORKER_PID=""
-    fi
-
-    echo "✅ All workers started:"
-    echo "  - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)"
-    echo "  - RQ worker 2: PID $RQ_WORKER_2_PID (transcription, memory, default)"
-    echo "  - RQ worker 3: PID $RQ_WORKER_3_PID (transcription, memory, default)"
-    echo "  - RQ worker 4: PID $RQ_WORKER_4_PID (transcription, memory, default)"
-    echo "  - RQ worker 5: PID $RQ_WORKER_5_PID (transcription, memory, default)"
-    echo "  - RQ worker 6: PID $RQ_WORKER_6_PID (transcription, memory, default)"
-    echo "  - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)"
-    [ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && echo "  - Audio stream Deepgram worker: PID $AUDIO_STREAM_DEEPGRAM_WORKER_PID (Redis Streams consumer)" || true
-    [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && echo "  - Audio stream Parakeet worker: PID $AUDIO_STREAM_PARAKEET_WORKER_PID (Redis Streams consumer)" || true
-}
-
-# Function to check worker registration health
-check_worker_health() {
-    WORKER_COUNT=$(uv run python -c "
-from rq import Worker
-from redis import Redis
-import os
-import sys
-
-try:
-    redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
-    r = Redis.from_url(redis_url)
-    workers = Worker.all(connection=r)
-    print(len(workers))
-except Exception as e:
-    print('0', file=sys.stderr)
-    sys.exit(1)
-" 2>/dev/null || echo "0")
-    echo "$WORKER_COUNT"
-}
-
-# Self-healing monitoring function
-monitor_worker_health() {
-    local CHECK_INTERVAL=10  # Check every 10 seconds
-    local MIN_WORKERS=6      # Expect at least 6 RQ workers
-
-    echo "🩺 Starting self-healing monitor (check interval: ${CHECK_INTERVAL}s, min workers: ${MIN_WORKERS})"
-
-    while true; do
-        sleep $CHECK_INTERVAL
-
-        WORKER_COUNT=$(check_worker_health)
-
-        if [ "$WORKER_COUNT" -lt "$MIN_WORKERS" ]; then
-            echo "⚠️ Self-healing: Only $WORKER_COUNT workers registered (expected >= $MIN_WORKERS)"
-            echo "🔧 Self-healing: Restarting all workers to restore registration..."
-
-            # Kill all workers
-            kill $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID $RQ_WORKER_4_PID $RQ_WORKER_5_PID $RQ_WORKER_6_PID $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true
-            [ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && kill $AUDIO_STREAM_DEEPGRAM_WORKER_PID 2>/dev/null || true
-            [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true
-            wait 2>/dev/null || true
-
-            # Restart workers
-            start_workers
-
-            # Verify recovery
-            sleep 3
-            NEW_WORKER_COUNT=$(check_worker_health)
-            echo "✅ Self-healing: Workers restarted - new count: $NEW_WORKER_COUNT"
-        fi
-    done
-}
-
-# Function to handle shutdown
-shutdown() {
-    echo "🛑 Shutting down workers..."
-    kill $MONITOR_PID 2>/dev/null || true
-    kill $RQ_WORKER_1_PID 2>/dev/null || true
-    kill $RQ_WORKER_2_PID 2>/dev/null || true
-    kill $RQ_WORKER_3_PID 2>/dev/null || true
-    kill $RQ_WORKER_4_PID 2>/dev/null || true
-    kill $RQ_WORKER_5_PID 2>/dev/null || true
-    kill $RQ_WORKER_6_PID 2>/dev/null || true
-    kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true
-    [ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && kill $AUDIO_STREAM_DEEPGRAM_WORKER_PID 2>/dev/null || true
-    [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true
-    wait
-    echo "✅ All workers stopped"
-    exit 0
-}
-
-# Set up signal handlers
-trap shutdown SIGTERM SIGINT
-
-# Configure Python logging for RQ workers
-export PYTHONUNBUFFERED=1
-
-# Start all workers
-start_workers
-
-# Start self-healing monitor in background
-monitor_worker_health &
-MONITOR_PID=$!
-echo "🩺 Self-healing monitor started: PID $MONITOR_PID"
-
-# Keep the script running and let the self-healing monitor handle worker failures
-# Don't use wait -n (fail-fast on first worker exit) - this kills all workers when one fails
-# Instead, wait for the monitor process or explicit shutdown signal
-echo "⏳ Workers running - self-healing monitor will restart failed workers automatically"
-wait $MONITOR_PID
-
-# If monitor exits (should only happen on SIGTERM/SIGINT), shut down gracefully
-echo "🛑 Monitor exited, shutting down all workers..."
-kill $RQ_WORKER_1_PID 2>/dev/null || true
-kill $RQ_WORKER_2_PID 2>/dev/null || true
-kill $RQ_WORKER_3_PID 2>/dev/null || true
-kill $RQ_WORKER_4_PID 2>/dev/null || true
-kill $RQ_WORKER_5_PID 2>/dev/null || true
-kill $RQ_WORKER_6_PID 2>/dev/null || true
-kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true
-[ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && kill $AUDIO_STREAM_DEEPGRAM_WORKER_PID 2>/dev/null || true
-[ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true
-wait
-
-echo "✅ All workers stopped gracefully"
-exit 0

From 5cffe17cf25b48c2adad34b6796a233bd84142fb Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 06:00:03 +0000
Subject: [PATCH 13/25] oops

---
 backends/advanced/worker_orchestrator.py | 245 +++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100755 backends/advanced/worker_orchestrator.py

diff --git a/backends/advanced/worker_orchestrator.py b/backends/advanced/worker_orchestrator.py
new file mode 100755
index 00000000..0929bdd0
--- /dev/null
+++ b/backends/advanced/worker_orchestrator.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""
+Worker Orchestrator
+
+Main entrypoint for Chronicle worker orchestration system.
+Replaces start-workers.sh bash script with Python-based orchestration.
+
+Usage:
+    python worker_orchestrator.py
+    # Or via Docker: docker compose up workers
+
+Environment Variables:
+    REDIS_URL                    Redis connection URL (default: redis://localhost:6379/0)
+    WORKER_CHECK_INTERVAL        Health check interval in seconds (default: 10)
+    MIN_RQ_WORKERS               Minimum expected RQ workers (default: 6)
+    WORKER_STARTUP_GRACE_PERIOD  Grace period before health checks (default: 30)
+    WORKER_SHUTDOWN_TIMEOUT      Max wait for graceful shutdown (default: 30)
+    LOG_LEVEL                    Logging level (default: INFO)
+"""
+
+import asyncio
+import logging
+import os
+import signal
+import socket
+import sys
+from typing import Optional
+
+from redis import Redis
+from rq import Worker
+
+# Import orchestrator components
+from src.advanced_omi_backend.workers.orchestrator import (
+    OrchestratorConfig,
+    ProcessManager,
+    HealthMonitor,
+    build_worker_definitions,
+)
+
+# Configure logging
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
+logging.basicConfig(
+    level=LOG_LEVEL,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    stream=sys.stdout,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class WorkerOrchestrator:
+    """
+    Main orchestrator that coordinates all components.
+
+    Handles:
+    - Startup sequence (Redis cleanup, worker startup)
+    - Signal handling (SIGTERM, SIGINT)
+    - Health monitoring
+    - Graceful shutdown
+    """
+
+    def __init__(self):
+        self.config: Optional[OrchestratorConfig] = None
+        self.redis: Optional[Redis] = None
+        self.process_manager: Optional[ProcessManager] = None
+        self.health_monitor: Optional[HealthMonitor] = None
+        self.shutdown_event = asyncio.Event()
+
+    async def startup(self):
+        """
+        Startup sequence.
+
+        1. Load configuration
+        2. Connect to Redis
+        3. Clean up stale worker registrations
+        4. Build worker definitions
+        5. Start all workers
+        6. Setup signal handlers
+        7. Start health monitor
+        """
+        logger.info("🚀 Starting Chronicle Worker Orchestrator...")
+
+        # 1. Load configuration
+        logger.info("Loading configuration...")
+        self.config = OrchestratorConfig()
+        logger.info(f"Redis URL: {self.config.redis_url}")
+        logger.info(f"Check interval: {self.config.check_interval}s")
+        logger.info(f"Min RQ workers: {self.config.min_rq_workers}")
+        logger.info(f"Startup grace period: {self.config.startup_grace_period}s")
+
+        # 2. Connect to Redis
+        logger.info("Connecting to Redis...")
+        self.redis = Redis.from_url(self.config.redis_url)
+        try:
+            self.redis.ping()
+            logger.info("✅ Redis connection successful")
+        except Exception as e:
+            logger.error(f"❌ Failed to connect to Redis: {e}")
+            raise
+
+        # 3. Clean up stale worker registrations
+        logger.info("🧹 Cleaning up stale worker registrations from Redis...")
+        cleaned_count = self._cleanup_stale_workers()
+        if cleaned_count > 0:
+            logger.info(f"Cleaned up {cleaned_count} stale workers")
+        else:
+            logger.info("No stale workers to clean")
+
+        # 4. Build worker definitions
+        logger.info("Building worker definitions...")
+        worker_definitions = build_worker_definitions()
+        logger.info(f"Total enabled workers: {len(worker_definitions)}")
+
+        # 5. Create process manager and start all workers
+        logger.info("Starting all workers...")
+        self.process_manager = ProcessManager(worker_definitions)
+        success = self.process_manager.start_all()
+
+        if not success:
+            logger.error("❌ Some workers failed to start")
+            raise RuntimeError("Worker startup failed")
+
+        # Log worker status
+        logger.info("✅ All workers started:")
+        for worker in self.process_manager.get_all_workers():
+            logger.info(
+                f"  - {worker.name}: PID {worker.pid} "
+                f"(queues: {', '.join(worker.definition.queues) if worker.definition.queues else 'stream consumer'})"
+            )
+
+        # 6. Setup signal handlers
+        loop = asyncio.get_running_loop()
+        for sig in (signal.SIGTERM, signal.SIGINT):
+            loop.add_signal_handler(sig, lambda s=sig: asyncio.create_task(self._signal_handler(s)))
+
+        logger.info("✅ Signal handlers configured (SIGTERM, SIGINT)")
+
+        # 7. Start health monitor
+        logger.info("Starting health monitor...")
+        self.health_monitor = HealthMonitor(
+            self.process_manager, self.config, self.redis
+        )
+        await self.health_monitor.start()
+        logger.info("✅ Health monitor started")
+
+        logger.info("⏳ Workers running - health monitor will auto-restart failed workers")
+
+    def _cleanup_stale_workers(self) -> int:
+        """
+        Clean up stale worker registrations from Redis.
+
+        This replicates the bash script's logic:
+        - Only clean up workers from THIS hostname (pod-aware)
+        - Use RQ's register_death() to properly clean up
+
+        Returns:
+            Number of workers cleaned up
+        """
+        try:
+            hostname = socket.gethostname()
+            workers = Worker.all(connection=self.redis)
+            cleaned = 0
+
+            for worker in workers:
+                if worker.hostname == hostname:
+                    worker.register_death()
+                    cleaned += 1
+
+            return cleaned
+
+        except Exception as e:
+            logger.warning(f"Failed to clean up stale workers: {e}")
+            return 0
+
+    async def _signal_handler(self, sig: signal.Signals):
+        """Handle shutdown signals"""
+        logger.info(f"Received signal: {sig.name}")
+        self.shutdown_event.set()
+
+    async def shutdown(self):
+        """
+        Graceful shutdown sequence.
+
+        1. Stop health monitor
+        2. Stop all workers
+        3. Close Redis connection
+        """
+        logger.info("🛑 Initiating graceful shutdown...")
+
+        # 1. Stop health monitor
+        if self.health_monitor:
+            await self.health_monitor.stop()
+
+        # 2. Stop all workers
+        if self.process_manager:
+            logger.info("Stopping all workers...")
+            self.process_manager.stop_all(timeout=self.config.shutdown_timeout)
+
+        # 3. Close Redis connection
+        if self.redis:
+            logger.info("Closing Redis connection...")
+            self.redis.close()
+
+        logger.info("✅ All workers stopped gracefully")
+
+    async def run(self):
+        """Main run loop - wait for shutdown signal"""
+        try:
+            # Perform startup
+            await self.startup()
+
+            # Wait for shutdown signal
+            await self.shutdown_event.wait()
+
+        except Exception as e:
+            logger.error(f"❌ Orchestrator error: {e}", exc_info=True)
+            raise
+        finally:
+            # Always perform shutdown
+            await self.shutdown()
+
+
+async def main():
+    """Main entrypoint"""
+    orchestrator = WorkerOrchestrator()
+
+    try:
+        await orchestrator.run()
+        sys.exit(0)
+
+    except KeyboardInterrupt:
+        logger.info("Interrupted by user")
+        sys.exit(0)
+
+    except Exception as e:
+        logger.error(f"Fatal error: {e}", exc_info=True)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    # Ensure unbuffered output for Docker logs
+    os.environ["PYTHONUNBUFFERED"] = "1"
+
+    # Run the orchestrator
+    asyncio.run(main())

From 8f44c4b393bc3971bcc8e74a06e7b5b8f9ed974e Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 06:02:33 +0000
Subject: [PATCH 14/25] oops2

---
 backends/advanced/docker-compose-test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index 812d29b9..134e6687 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -154,9 +154,10 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
-    command: ./start-workers.sh
+    command: ["uv", "run", "python", "worker_orchestrator.py"]
     volumes:
       - ./src:/app/src
+      - ./worker_orchestrator.py:/app/worker_orchestrator.py
       - ./data/test_audio_chunks:/app/audio_chunks
       - ./data/test_debug_dir:/app/debug_dir
       - ./data/test_data:/app/data

From 7e05de967d25a3700d4170a3e87e0ce77334e584 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 06:20:46 +0000
Subject: [PATCH 15/25] Remove legacy test runner script and update worker
 orchestration

- Deleted the `run-test.sh` script, which was used for local test execution.
- Updated Docker configurations to replace the `start-workers.sh` script with `worker_orchestrator.py` for improved worker management.
- Enhanced health monitoring and process management in the orchestrator to ensure better reliability and logging.
- Adjusted deployment configurations to reflect the new orchestrator setup.
---
 backends/advanced/.dockerignore               |   2 +-
 backends/advanced/Dockerfile.k8s              |   6 +-
 .../workers/orchestrator/health_monitor.py    |   2 +
 .../workers/orchestrator/process_manager.py   |  19 ++-
 backends/advanced/start-k8s.sh                |   6 +-
 .../templates/deployment.yaml                 |   2 +-
 .../templates/workers-deployment.yaml         |   2 +-
 run-test.sh                                   | 113 ------------------
 8 files changed, 25 insertions(+), 127 deletions(-)
 delete mode 100755 run-test.sh

diff --git a/backends/advanced/.dockerignore b/backends/advanced/.dockerignore
index 2dd9b44f..f0f7f05c 100644
--- a/backends/advanced/.dockerignore
+++ b/backends/advanced/.dockerignore
@@ -17,5 +17,5 @@
 !nginx.conf.template
 !start.sh
 !start-k8s.sh
-!start-workers.sh
+!worker_orchestrator.py
 !Caddyfile
\ No newline at end of file
diff --git a/backends/advanced/Dockerfile.k8s b/backends/advanced/Dockerfile.k8s
index b746752a..6500ccf5 100644
--- a/backends/advanced/Dockerfile.k8s
+++ b/backends/advanced/Dockerfile.k8s
@@ -36,9 +36,9 @@ COPY . .
 # Copy memory config (created by init.sh from template)
 
 
-# Copy and make K8s startup scripts executable
-COPY start-k8s.sh start-workers.sh ./
-RUN chmod +x start-k8s.sh start-workers.sh
+# Copy and make K8s startup script executable
+COPY start-k8s.sh ./
+RUN chmod +x start-k8s.sh
 
 # Activate virtual environment in PATH
 ENV PATH="/app/.venv/bin:$PATH"
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
index afd8b7cd..80c83cbd 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
@@ -100,6 +100,8 @@ async def _monitor_loop(self):
             raise
         except Exception as e:
             logger.error(f"Health monitor loop error: {e}", exc_info=True)
+            self.running = False  # Mark monitor as stopped so callers know it's not active
+            raise  # Re-raise to ensure the monitor task fails properly
 
     async def _check_health(self):
         """Perform all health checks and restart failed workers"""
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
index d90ecc00..21b7f23e 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
@@ -81,12 +81,13 @@ def start(self) -> bool:
             logger.info(f"{self.name}: Starting worker...")
             logger.debug(f"{self.name}: Command: {' '.join(self.definition.command)}")
 
+            # Don't capture stdout/stderr - let it flow to container logs (Docker captures it)
+            # This prevents buffer overflow and blocking when worker output exceeds 64KB
+            # Worker logs will be visible via 'docker logs' command
             self.process = subprocess.Popen(
                 self.definition.command,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                text=True,
-                bufsize=1,  # Line buffered
+                stdout=None,  # Inherit from parent (goes to container stdout)
+                stderr=None,  # Inherit from parent (goes to container stderr)
             )
 
             self.state = WorkerState.STARTING
@@ -254,7 +255,15 @@ def restart_worker(self, name: str, timeout: int = 30) -> bool:
             return False
 
         logger.info(f"Restarting worker: {name}")
-        worker.stop(timeout=timeout)
+
+        # Ensure worker is fully stopped before attempting restart
+        stop_success = worker.stop(timeout=timeout)
+        if not stop_success:
+            logger.error(f"{name}: Failed to stop cleanly, restart aborted")
+            worker.state = WorkerState.FAILED
+            return False
+
+        # Attempt to start the worker
         success = worker.start()
 
         if success:
diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh
index a2f3d817..4235b16c 100755
--- a/backends/advanced/start-k8s.sh
+++ b/backends/advanced/start-k8s.sh
@@ -80,7 +80,7 @@ sleep 1
 # Function to start all workers
 start_workers() {
     # NEW WORKERS - Redis Streams multi-provider architecture
-    # Single worker ensures sequential processing of audio chunks (matching start-workers.sh)
+    # Single worker ensures sequential processing of audio chunks (matching worker_orchestrator.py)
     echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
     if python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
     then
@@ -91,7 +91,7 @@ start_workers() {
         exit 1
     fi
 
-    # Start 3 RQ workers listening to ALL queues (matching start-workers.sh)
+    # Start 3 RQ workers listening to ALL queues (matching worker_orchestrator.py)
     echo "🔧 Starting RQ workers (3 workers, all queues: transcription, memory, default)..."
     if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
     then
@@ -123,7 +123,7 @@ start_workers() {
         exit 1
     fi
 
-    # Start 1 dedicated audio persistence worker (matching start-workers.sh)
+    # Start 1 dedicated audio persistence worker (matching worker_orchestrator.py)
     echo "💾 Starting audio persistence worker (1 worker for audio queue)..."
     if python3 -m advanced_omi_backend.workers.rq_worker_entry audio &
     then
diff --git a/backends/charts/advanced-backend/templates/deployment.yaml b/backends/charts/advanced-backend/templates/deployment.yaml
index 0e40a7fb..2eb3425d 100644
--- a/backends/charts/advanced-backend/templates/deployment.yaml
+++ b/backends/charts/advanced-backend/templates/deployment.yaml
@@ -67,7 +67,7 @@ spec:
         - name: {{ .Chart.Name }}-workers
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          command: ["./start-workers.sh"]
+          command: ["uv", "run", "python", "worker_orchestrator.py"]
           envFrom:
             - configMapRef:
                 name: chronicle-config
diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml
index 22751d31..48add12a 100644
--- a/backends/charts/advanced-backend/templates/workers-deployment.yaml
+++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml
@@ -21,7 +21,7 @@ spec:
         - name: {{ .Chart.Name }}-workers
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          command: ["./start-workers.sh"]
+          command: ["uv", "run", "python", "worker_orchestrator.py"]
           envFrom:
             - configMapRef:
                 name: chronicle-config
diff --git a/run-test.sh b/run-test.sh
deleted file mode 100755
index ebc39a07..00000000
--- a/run-test.sh
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/bin/bash
-
-# Chronicle Local Test Runner
-# Runs the same tests as GitHub CI but configured for local development
-# Usage: ./run-test.sh [advanced-backend|speaker-recognition|all]
-
-set -e
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Print colored output
-print_info() {
-    echo -e "${BLUE}[INFO]${NC} $1"
-}
-
-print_success() {
-    echo -e "${GREEN}[SUCCESS]${NC} $1"
-}
-
-print_warning() {
-    echo -e "${YELLOW}[WARNING]${NC} $1"
-}
-
-print_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# Function to run advanced backend tests
-run_advanced_backend_tests() {
-    print_info "Running Advanced Backend Integration Tests..."
-    
-    if [ ! -f "backends/advanced/run-test.sh" ]; then
-        print_error "backends/advanced/run-test.sh not found!"
-        return 1
-    fi
-    
-    cd backends/advanced
-    ./run-test.sh
-    cd ../..
-    
-    print_success "Advanced Backend tests completed"
-}
-
-# Function to run speaker recognition tests
-run_speaker_recognition_tests() {
-    print_info "Running Speaker Recognition Tests..."
-    
-    if [ ! -f "extras/speaker-recognition/run-test.sh" ]; then
-        print_error "extras/speaker-recognition/run-test.sh not found!"
-        return 1
-    fi
-    
-    cd extras/speaker-recognition
-    ./run-test.sh
-    cd ../..
-    
-    print_success "Speaker Recognition tests completed"
-}
-
-# Main execution
-print_info "Chronicle Local Test Runner"
-print_info "=============================="
-
-# Check if we're in the right directory
-if [ ! -f "CLAUDE.md" ]; then
-    print_error "Please run this script from the chronicle root directory"
-    exit 1
-fi
-
-# Parse command line argument
-TEST_SUITE="${1:-all}"
-
-case "$TEST_SUITE" in
-    "advanced-backend")
-        run_advanced_backend_tests
-        ;;
-    "speaker-recognition")
-        run_speaker_recognition_tests
-        ;;
-    "all")
-        print_info "Running all test suites..."
-        
-        # Run advanced backend tests
-        if run_advanced_backend_tests; then
-            print_success "Advanced Backend tests: PASSED"
-        else
-            print_error "Advanced Backend tests: FAILED"
-            exit 1
-        fi
-        
-        # Run speaker recognition tests
-        if run_speaker_recognition_tests; then
-            print_success "Speaker Recognition tests: PASSED"
-        else
-            print_error "Speaker Recognition tests: FAILED"
-            exit 1
-        fi
-        
-        print_success "All test suites completed successfully!"
-        ;;
-    *)
-        print_error "Unknown test suite: $TEST_SUITE"
-        echo "Usage: $0 [advanced-backend|speaker-recognition|all]"
-        exit 1
-        ;;
-esac
-
-print_success "Test execution completed!"
\ No newline at end of file

From 112a2805e3c227a5eef838331be51d3770bcefa3 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 07:07:26 +0000
Subject: [PATCH 16/25] Add bulk restart mechanism for RQ worker registration
 loss

- Introduced a new method `_handle_registration_loss` to manage RQ worker registration loss, replicating the behavior of the previous bash script.
- Implemented a cooldown period to prevent frequent restarts during network issues.
- Added logging for bulk restart actions and their outcomes to enhance monitoring and debugging capabilities.
- Created a `_restart_all_rq_workers` method to facilitate the bulk restart of RQ workers, ensuring they re-register with Redis upon startup.
---
 .../workers/orchestrator/health_monitor.py    | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
index 80c83cbd..9b1149e2 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
@@ -42,6 +42,8 @@ def __init__(
         self.running = False
         self.monitor_task: Optional[asyncio.Task] = None
         self.start_time = time.time()
+        self.last_registration_recovery: Optional[float] = None
+        self.registration_recovery_cooldown = 60  # seconds
 
     async def start(self):
         """Start the health monitoring loop"""
@@ -112,6 +114,10 @@ async def _check_health(self):
             # Check RQ worker registration count
             rq_health = self._check_rq_worker_registration()
 
+            # If RQ workers lost registration, trigger bulk restart (matches old bash script behavior)
+            if not rq_health:
+                self._handle_registration_loss()
+
             # Restart failed workers
             self._restart_failed_workers()
 
@@ -201,6 +207,83 @@ def _restart_failed_workers(self):
                 else:
                     logger.error(f"{worker.name}: Restart failed")
 
+    def _handle_registration_loss(self):
+        """
+        Handle RQ worker registration loss.
+
+        This replicates the old bash script's self-healing behavior:
+        - Check if cooldown period has passed
+        - Restart all RQ workers (bulk restart)
+        - Update recovery timestamp
+
+        Cooldown prevents too-frequent restarts during Redis/network issues.
+        """
+        current_time = time.time()
+
+        # Check if cooldown period has passed
+        if self.last_registration_recovery is not None:
+            elapsed = current_time - self.last_registration_recovery
+            if elapsed < self.registration_recovery_cooldown:
+                remaining = self.registration_recovery_cooldown - elapsed
+                logger.debug(
+                    f"Registration recovery cooldown active - "
+                    f"waiting {remaining:.0f}s before next recovery attempt"
+                )
+                return
+
+        logger.warning(
+            "⚠️  RQ worker registration loss detected - initiating bulk restart "
+            "(replicating old start-workers.sh behavior)"
+        )
+
+        # Restart all RQ workers
+        success = self._restart_all_rq_workers()
+
+        if success:
+            logger.info("✅ Bulk restart completed - workers should re-register soon")
+        else:
+            logger.error("❌ Bulk restart encountered errors - check individual worker logs")
+
+        # Update recovery timestamp to start cooldown
+        self.last_registration_recovery = current_time
+
+    def _restart_all_rq_workers(self) -> bool:
+        """
+        Restart all RQ workers (bulk restart).
+
+        This matches the old bash script's recovery mechanism:
+        - Kill all RQ workers
+        - Restart them
+        - Workers will automatically re-register with Redis on startup
+
+        Returns:
+            True if all RQ workers restarted successfully, False otherwise
+        """
+        rq_workers = [
+            worker
+            for worker in self.process_manager.get_all_workers()
+            if worker.definition.worker_type == WorkerType.RQ_WORKER
+        ]
+
+        if not rq_workers:
+            logger.warning("No RQ workers found to restart")
+            return False
+
+        logger.info(f"Restarting {len(rq_workers)} RQ workers...")
+
+        all_success = True
+        for worker in rq_workers:
+            logger.info(f"  ↻ Restarting {worker.name}...")
+            success = self.process_manager.restart_worker(worker.name)
+
+            if success:
+                logger.info(f"  ✓ {worker.name} restarted successfully")
+            else:
+                logger.error(f"  ✗ {worker.name} restart failed")
+                all_success = False
+
+        return all_success
+
     def get_health_status(self) -> dict:
         """
         Get current health status summary.

From 0d82c8e5a9b021330a5946864006373db03b9022 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 09:27:54 +0000
Subject: [PATCH 17/25] Enhance plugin architecture with event-driven system
 and test integration

- Introduced a new Test Event Plugin to log all plugin events to an SQLite database for integration testing.
- Updated the plugin system to utilize event subscriptions instead of access levels, allowing for more flexible event handling.
- Refactored the PluginRouter to dispatch events based on subscriptions, improving the event-driven architecture.
- Enhanced Docker configurations to support development and testing environments with appropriate dependencies.
- Added comprehensive integration tests to verify the functionality of the event dispatch system and plugin interactions.
- Updated documentation and test configurations to reflect the new event-based plugin structure.
---
 backends/advanced/Dockerfile                  |  51 +++-
 backends/advanced/docker-compose-test.yml     |   7 +-
 backends/advanced/docker-compose.yml          |   3 +
 backends/advanced/pyproject.toml              |   1 +
 .../src/advanced_omi_backend/plugins/base.py  |   8 +-
 .../advanced_omi_backend/plugins/router.py    |  67 ++---
 .../plugins/test_event/__init__.py            |   5 +
 .../plugins/test_event/event_storage.py       | 253 ++++++++++++++++++
 .../plugins/test_event/plugin.py              | 221 +++++++++++++++
 .../services/plugin_service.py                |   6 +
 .../transcription/deepgram_stream_consumer.py |   8 +-
 .../workers/conversation_jobs.py              |   4 +-
 .../workers/memory_jobs.py                    |   4 +-
 .../workers/transcription_jobs.py             |   8 +-
 tests/config/plugins.test.yml                 |  14 +
 tests/endpoints/plugin_tests.robot            | 141 ++++++++++
 tests/integration/plugin_event_tests.robot    | 215 +++++++++++++++
 tests/resources/plugin_keywords.robot         | 133 +++++++++
 18 files changed, 1077 insertions(+), 72 deletions(-)
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
 create mode 100644 backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py
 create mode 100644 tests/config/plugins.test.yml
 create mode 100644 tests/endpoints/plugin_tests.robot
 create mode 100644 tests/integration/plugin_event_tests.robot
 create mode 100644 tests/resources/plugin_keywords.robot

diff --git a/backends/advanced/Dockerfile b/backends/advanced/Dockerfile
index a24ed841..886c1f32 100644
--- a/backends/advanced/Dockerfile
+++ b/backends/advanced/Dockerfile
@@ -1,6 +1,9 @@
-FROM python:3.12-slim-bookworm AS builder
+# ============================================
+# Base stage - common setup
+# ============================================
+FROM python:3.12-slim-bookworm AS base
 
-# Install system dependencies for building
+# Install system dependencies
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     build-essential \
@@ -9,39 +12,59 @@ RUN apt-get update && \
     curl \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
-    # portaudio19-dev \
 
 # Install uv
 COPY --from=ghcr.io/astral-sh/uv:0.6.10 /uv /uvx /bin/
 
-# Set up the working directory
+# Set up working directory
 WORKDIR /app
 
-# Copy package structure and dependency files first
+# Copy package structure and dependency files
 COPY pyproject.toml README.md ./
 COPY uv.lock .
 RUN mkdir -p src/advanced_omi_backend
 COPY src/advanced_omi_backend/__init__.py src/advanced_omi_backend/
 
-# Install dependencies using uv with deepgram extra
-# Use cache mount for BuildKit, fallback for legacy builds
-# RUN --mount=type=cache,target=/root/.cache/uv \
-#     uv sync --extra deepgram
-# Fallback for legacy Docker builds (CI compatibility)
+
+# ============================================
+# Production stage - production dependencies only
+# ============================================
+FROM base AS prod
+
+# Install production dependencies only
 RUN uv sync --extra deepgram
 
 # Copy all application code
 COPY . .
 
-# Copy configuration files if they exist, otherwise they will be created from templates at runtime
-# The files are expected to exist, but we handle the case where they don't gracefully
-
+# Copy configuration files if they exist
 COPY diarization_config.json* ./
 
+# Copy and make startup script executable
+COPY start.sh ./
+RUN chmod +x start.sh
+
+# Run the application
+CMD ["./start.sh"]
+
+
+# ============================================
+# Dev/Test stage - includes test dependencies
+# ============================================
+FROM base AS dev
+
+# Install production + test dependencies
+RUN uv sync --extra deepgram --group test
+
+# Copy all application code
+COPY . .
+
+# Copy configuration files if they exist
+COPY diarization_config.json* ./
 
 # Copy and make startup script executable
 COPY start.sh ./
 RUN chmod +x start.sh
 
-# Run the application with workers
+# Run the application
 CMD ["./start.sh"]
diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index 134e6687..4cfe0327 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -7,6 +7,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+      target: dev  # Use dev stage with test dependencies
     ports:
       - "8001:8000"  # Avoid conflict with dev on 8000
     volumes:
@@ -15,6 +16,7 @@ services:
       - ./data/test_debug_dir:/app/debug_dir
       - ./data/test_data:/app/data
       - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
+      - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
     environment:
       # Override with test-specific settings
       - MONGODB_URI=mongodb://mongo-test:27017/test_db
@@ -154,6 +156,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+      target: dev  # Use dev stage with test dependencies
     command: ["uv", "run", "python", "worker_orchestrator.py"]
     volumes:
       - ./src:/app/src
@@ -162,6 +165,7 @@ services:
       - ./data/test_debug_dir:/app/debug_dir
       - ./data/test_data:/app/data
       - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
+      - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
     environment:
       # Same environment as backend
       - MONGODB_URI=mongodb://mongo-test:27017/test_db
@@ -205,13 +209,14 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+      target: dev  # Use dev stage with test dependencies
     command: >
       uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
     volumes:
       - ./src:/app/src
       - ./data/test_data:/app/data
       - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml
-      - ${PLUGINS_CONFIG:-../../config/plugins.yml}:/app/plugins.yml
+      - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - REDIS_URL=redis://redis-test:6379/0
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
index e0895271..b9133876 100644
--- a/backends/advanced/docker-compose.yml
+++ b/backends/advanced/docker-compose.yml
@@ -29,6 +29,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+      target: prod  # Use prod stage without test dependencies
     ports:
       - "8000:8000"
     env_file:
@@ -84,6 +85,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+      target: prod  # Use prod stage without test dependencies
     command: ["uv", "run", "python", "worker_orchestrator.py"]
     env_file:
       - .env
@@ -124,6 +126,7 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
+      target: prod  # Use prod stage without test dependencies
     command: >
       uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
     env_file:
diff --git a/backends/advanced/pyproject.toml b/backends/advanced/pyproject.toml
index e7bcb50a..aa26a9b2 100644
--- a/backends/advanced/pyproject.toml
+++ b/backends/advanced/pyproject.toml
@@ -114,4 +114,5 @@ test = [
     "requests-mock>=1.12.1",
     "pytest-json-report>=1.5.0",
     "pytest-html>=4.0.0",
+    "aiosqlite>=0.20.0",  # For test plugin event storage
 ]
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/base.py b/backends/advanced/src/advanced_omi_backend/plugins/base.py
index 84fc8967..e5dfcc36 100644
--- a/backends/advanced/src/advanced_omi_backend/plugins/base.py
+++ b/backends/advanced/src/advanced_omi_backend/plugins/base.py
@@ -15,8 +15,8 @@
 class PluginContext:
     """Context passed to plugin execution"""
     user_id: str
-    access_level: str
-    data: Dict[str, Any]  # Access-level specific data
+    event: str  # Event name (e.g., "transcript.streaming", "conversation.complete")
+    data: Dict[str, Any]  # Event-specific data
     metadata: Dict[str, Any] = field(default_factory=dict)
 
 
@@ -54,11 +54,11 @@ def __init__(self, config: Dict[str, Any]):
 
         Args:
             config: Plugin configuration from config/plugins.yml
-                   Contains: enabled, access_level, trigger, and plugin-specific config
+                   Contains: enabled, subscriptions, trigger, and plugin-specific config
         """
         self.config = config
         self.enabled = config.get('enabled', False)
-        self.access_level = config.get('access_level')
+        self.subscriptions = config.get('subscriptions', [])
         self.trigger = config.get('trigger', {'type': 'always'})
 
     @abstractmethod
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py
index 8074feb3..21b82eb8 100644
--- a/backends/advanced/src/advanced_omi_backend/plugins/router.py
+++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py
@@ -84,43 +84,39 @@ def extract_command_after_wake_word(transcript: str, wake_word: str) -> str:
 
 
 class PluginRouter:
-    """Routes pipeline events to appropriate plugins based on access level and triggers"""
+    """Routes pipeline events to appropriate plugins based on event subscriptions"""
 
     def __init__(self):
         self.plugins: Dict[str, BasePlugin] = {}
-        # Index plugins by access level for fast lookup
-        self._plugins_by_level: Dict[str, List[str]] = {
-            'transcript': [],
-            'streaming_transcript': [],
-            'conversation': [],
-            'memory': []
-        }
+        # Index plugins by event subscription for fast lookup
+        self._plugins_by_event: Dict[str, List[str]] = {}
 
     def register_plugin(self, plugin_id: str, plugin: BasePlugin):
         """Register a plugin with the router"""
         self.plugins[plugin_id] = plugin
 
-        # Index by access level
-        access_level = plugin.access_level
-        if access_level in self._plugins_by_level:
-            self._plugins_by_level[access_level].append(plugin_id)
+        # Index by each event subscription
+        for event in plugin.subscriptions:
+            if event not in self._plugins_by_event:
+                self._plugins_by_event[event] = []
+            self._plugins_by_event[event].append(plugin_id)
 
-        logger.info(f"Registered plugin '{plugin_id}' for access level '{access_level}'")
+        logger.info(f"Registered plugin '{plugin_id}' for events: {plugin.subscriptions}")
 
-    async def trigger_plugins(
+    async def dispatch_event(
         self,
-        access_level: str,
+        event: str,
         user_id: str,
         data: Dict,
         metadata: Optional[Dict] = None
     ) -> List[PluginResult]:
         """
-        Trigger all plugins registered for this access level.
+        Dispatch event to all subscribed plugins.
 
         Args:
-            access_level: 'transcript', 'streaming_transcript', 'conversation', or 'memory'
+            event: Event name (e.g., 'transcript.streaming', 'conversation.complete')
             user_id: User ID for context
-            data: Access-level specific data
+            data: Event-specific data
             metadata: Optional metadata
 
         Returns:
@@ -128,19 +124,8 @@ async def trigger_plugins(
         """
         results = []
 
-        # Hierarchical triggering logic:
-        # - 'streaming_transcript': trigger both 'streaming_transcript' AND 'transcript' plugins
-        # - 'transcript': trigger ONLY 'transcript' plugins (not 'streaming_transcript')
-        # - Other levels: exact match only
-        if access_level == 'streaming_transcript':
-            # Streaming mode: trigger both streaming_transcript AND transcript plugins
-            plugin_ids = (
-                self._plugins_by_level.get('streaming_transcript', []) +
-                self._plugins_by_level.get('transcript', [])
-            )
-        else:
-            # Batch mode or other modes: exact match only
-            plugin_ids = self._plugins_by_level.get(access_level, [])
+        # Get plugins subscribed to this event
+        plugin_ids = self._plugins_by_event.get(event, [])
 
         for plugin_id in plugin_ids:
             plugin = self.plugins[plugin_id]
@@ -148,20 +133,20 @@ async def trigger_plugins(
             if not plugin.enabled:
                 continue
 
-            # Check trigger condition
+            # Check trigger condition (wake_word, etc.)
             if not await self._should_trigger(plugin, data):
                 continue
 
-            # Execute plugin at appropriate access level
+            # Execute plugin
             try:
                 context = PluginContext(
                     user_id=user_id,
-                    access_level=access_level,
+                    event=event,
                     data=data,
                     metadata=metadata or {}
                 )
 
-                result = await self._execute_plugin(plugin, access_level, context)
+                result = await self._execute_plugin(plugin, event, context)
 
                 if result:
                     results.append(result)
@@ -218,16 +203,16 @@ async def _should_trigger(self, plugin: BasePlugin, data: Dict) -> bool:
     async def _execute_plugin(
         self,
         plugin: BasePlugin,
-        access_level: str,
+        event: str,
         context: PluginContext
     ) -> Optional[PluginResult]:
-        """Execute plugin method for specified access level"""
-        # Both 'transcript' and 'streaming_transcript' call on_transcript()
-        if access_level in ('transcript', 'streaming_transcript'):
+        """Execute plugin method for specified event"""
+        # Map events to plugin callback methods
+        if event.startswith('transcript.'):
             return await plugin.on_transcript(context)
-        elif access_level == 'conversation':
+        elif event.startswith('conversation.'):
             return await plugin.on_conversation_complete(context)
-        elif access_level == 'memory':
+        elif event.startswith('memory.'):
             return await plugin.on_memory_processed(context)
 
         return None
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py
new file mode 100644
index 00000000..5f3f2ecf
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py
@@ -0,0 +1,5 @@
+"""Test Event Plugin for integration testing"""
+
+from .plugin import TestEventPlugin
+
+__all__ = ['TestEventPlugin']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
new file mode 100644
index 00000000..16e98792
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
@@ -0,0 +1,253 @@
+"""
+Event storage module for test plugin using SQLite.
+
+Provides async SQLite operations for logging and querying plugin events.
+"""
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import aiosqlite
+
+logger = logging.getLogger(__name__)
+
+
+class EventStorage:
+    """SQLite-based event storage for test plugin"""
+
+    def __init__(self, db_path: str = "/app/debug/test_plugin_events.db"):
+        self.db_path = db_path
+        self.db: Optional[aiosqlite.Connection] = None
+
+    async def initialize(self):
+        """Initialize database and create tables"""
+        # Ensure directory exists
+        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
+
+        self.db = await aiosqlite.connect(self.db_path)
+
+        # Create events table
+        await self.db.execute("""
+            CREATE TABLE IF NOT EXISTS plugin_events (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME NOT NULL,
+                event TEXT NOT NULL,
+                user_id TEXT NOT NULL,
+                data TEXT NOT NULL,
+                metadata TEXT,
+                created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+
+        # Create index for faster queries
+        await self.db.execute("""
+            CREATE INDEX IF NOT EXISTS idx_event_type
+            ON plugin_events(event)
+        """)
+
+        await self.db.execute("""
+            CREATE INDEX IF NOT EXISTS idx_user_id
+            ON plugin_events(user_id)
+        """)
+
+        await self.db.commit()
+        logger.info(f"Event storage initialized at {self.db_path}")
+
+    async def log_event(
+        self,
+        event: str,
+        user_id: str,
+        data: Dict[str, Any],
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> int:
+        """
+        Log an event to the database.
+
+        Args:
+            event: Event name (e.g., 'transcript.batch')
+            user_id: User ID from context
+            data: Event data dictionary
+            metadata: Optional metadata dictionary
+
+        Returns:
+            Row ID of inserted event
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        timestamp = datetime.utcnow().isoformat()
+        data_json = json.dumps(data)
+        metadata_json = json.dumps(metadata) if metadata else None
+
+        cursor = await self.db.execute(
+            """
+            INSERT INTO plugin_events (timestamp, event, user_id, data, metadata)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            (timestamp, event, user_id, data_json, metadata_json)
+        )
+
+        await self.db.commit()
+        row_id = cursor.lastrowid
+
+        logger.debug(
+            f"Logged event: {event} for user {user_id} (row_id={row_id})"
+        )
+
+        return row_id
+
+    async def get_events_by_type(self, event: str) -> List[Dict[str, Any]]:
+        """
+        Query events by event type.
+
+        Args:
+            event: Event name to filter by
+
+        Returns:
+            List of event dictionaries
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute(
+            """
+            SELECT id, timestamp, event, user_id, data, metadata, created_at
+            FROM plugin_events
+            WHERE event = ?
+            ORDER BY created_at DESC
+            """,
+            (event,)
+        )
+
+        rows = await cursor.fetchall()
+        return self._rows_to_dicts(rows)
+
+    async def get_events_by_user(self, user_id: str) -> List[Dict[str, Any]]:
+        """
+        Query events by user ID.
+
+        Args:
+            user_id: User ID to filter by
+
+        Returns:
+            List of event dictionaries
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute(
+            """
+            SELECT id, timestamp, event, user_id, data, metadata, created_at
+            FROM plugin_events
+            WHERE user_id = ?
+            ORDER BY created_at DESC
+            """,
+            (user_id,)
+        )
+
+        rows = await cursor.fetchall()
+        return self._rows_to_dicts(rows)
+
+    async def get_all_events(self) -> List[Dict[str, Any]]:
+        """
+        Get all logged events.
+
+        Returns:
+            List of all event dictionaries
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute(
+            """
+            SELECT id, timestamp, event, user_id, data, metadata, created_at
+            FROM plugin_events
+            ORDER BY created_at DESC
+            """
+        )
+
+        rows = await cursor.fetchall()
+        return self._rows_to_dicts(rows)
+
+    async def clear_events(self) -> int:
+        """
+        Clear all events from the database.
+
+        Returns:
+            Number of rows deleted
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute("DELETE FROM plugin_events")
+        await self.db.commit()
+
+        deleted = cursor.rowcount
+        logger.info(f"Cleared {deleted} events from database")
+
+        return deleted
+
+    async def get_event_count(self, event: Optional[str] = None) -> int:
+        """
+        Get count of events.
+
+        Args:
+            event: Optional event type to filter by
+
+        Returns:
+            Count of matching events
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        if event:
+            cursor = await self.db.execute(
+                "SELECT COUNT(*) FROM plugin_events WHERE event = ?",
+                (event,)
+            )
+        else:
+            cursor = await self.db.execute(
+                "SELECT COUNT(*) FROM plugin_events"
+            )
+
+        row = await cursor.fetchone()
+        return row[0] if row else 0
+
+    def _rows_to_dicts(self, rows: List[tuple]) -> List[Dict[str, Any]]:
+        """
+        Convert database rows to dictionaries.
+
+        Args:
+            rows: List of database row tuples
+
+        Returns:
+            List of event dictionaries
+        """
+        events = []
+
+        for row in rows:
+            event_dict = {
+                'id': row[0],
+                'timestamp': row[1],
+                'event': row[2],
+                'user_id': row[3],
+                'data': json.loads(row[4]) if row[4] else {},
+                'metadata': json.loads(row[5]) if row[5] else {},
+                'created_at': row[6]
+            }
+
+            # Flatten data fields to top level for easier access in tests
+            if isinstance(event_dict['data'], dict):
+                event_dict.update(event_dict['data'])
+
+            events.append(event_dict)
+
+        return events
+
+    async def cleanup(self):
+        """Close database connection"""
+        if self.db:
+            await self.db.close()
+            logger.info("Event storage connection closed")
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py
new file mode 100644
index 00000000..6b96e078
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py
@@ -0,0 +1,221 @@
+"""
+Test Event Plugin
+
+Logs all plugin events to SQLite database for integration testing.
+Subscribes to all event types to verify event dispatch system works correctly.
+"""
+import logging
+from typing import Any, Dict, List, Optional
+
+from advanced_omi_backend.plugins.base import BasePlugin, PluginContext, PluginResult
+from .event_storage import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class TestEventPlugin(BasePlugin):
+    """
+    Test plugin that logs all events for verification.
+
+    Subscribes to:
+    - transcript.streaming: Real-time WebSocket transcription
+    - transcript.batch: File upload batch transcription
+    - conversation.complete: Conversation processing complete
+    - memory.processed: Memory extraction complete
+
+    All events are logged to SQLite database with full context for test verification.
+    """
+
+    SUPPORTED_ACCESS_LEVELS: List[str] = ['transcript', 'conversation', 'memory']
+
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__(config)
+        self.storage = EventStorage(
+            db_path=config.get('db_path', '/app/debug/test_plugin_events.db')
+        )
+        self.event_count = 0
+
+    async def initialize(self):
+        """Initialize the test plugin and event storage"""
+        try:
+            await self.storage.initialize()
+            logger.info("✅ Test Event Plugin initialized successfully")
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize Test Event Plugin: {e}")
+            raise
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Log transcript events (streaming or batch).
+
+        Context data contains:
+        - transcript: str - The transcript text
+        - conversation_id: str - Conversation ID
+        - For streaming: is_final, confidence, words, segments
+        - For batch: word_count, segments
+
+        Args:
+            context: Plugin context with event data
+
+        Returns:
+            PluginResult indicating success
+        """
+        try:
+            # Determine which transcript event this is based on context.event
+            event_type = context.event  # 'transcript.streaming' or 'transcript.batch'
+
+            # Extract key data fields
+            transcript = context.data.get('transcript', '')
+            conversation_id = context.data.get('conversation_id', 'unknown')
+
+            # Log to storage
+            row_id = await self.storage.log_event(
+                event=event_type,
+                user_id=context.user_id,
+                data=context.data,
+                metadata=context.metadata
+            )
+
+            self.event_count += 1
+
+            logger.info(
+                f"📝 Logged {event_type} event (row_id={row_id}): "
+                f"user={context.user_id}, "
+                f"conversation={conversation_id}, "
+                f"transcript='{transcript[:50]}...'"
+            )
+
+            return PluginResult(
+                success=True,
+                message=f"Transcript event logged (row_id={row_id})",
+                should_continue=True  # Don't block normal processing
+            )
+
+        except Exception as e:
+            logger.error(f"Error logging transcript event: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Failed to log transcript event: {e}",
+                should_continue=True
+            )
+
+    async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Log conversation completion events.
+
+        Context data contains:
+        - conversation: dict - Full conversation data
+        - transcript: str - Complete conversation transcript
+        - duration: float - Conversation duration
+        - conversation_id: str - Conversation identifier
+
+        Args:
+            context: Plugin context with event data
+
+        Returns:
+            PluginResult indicating success
+        """
+        try:
+            conversation_id = context.data.get('conversation_id', 'unknown')
+            duration = context.data.get('duration', 0)
+
+            # Log to storage
+            row_id = await self.storage.log_event(
+                event=context.event,  # 'conversation.complete'
+                user_id=context.user_id,
+                data=context.data,
+                metadata=context.metadata
+            )
+
+            self.event_count += 1
+
+            logger.info(
+                f"📝 Logged conversation.complete event (row_id={row_id}): "
+                f"user={context.user_id}, "
+                f"conversation={conversation_id}, "
+                f"duration={duration:.2f}s"
+            )
+
+            return PluginResult(
+                success=True,
+                message=f"Conversation event logged (row_id={row_id})",
+                should_continue=True
+            )
+
+        except Exception as e:
+            logger.error(f"Error logging conversation event: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Failed to log conversation event: {e}",
+                should_continue=True
+            )
+
+    async def on_memory_processed(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Log memory processing events.
+
+        Context data contains:
+        - memories: list - Extracted memories
+        - conversation: dict - Source conversation
+        - memory_count: int - Number of memories created
+        - conversation_id: str - Conversation identifier
+
+        Metadata contains:
+        - processing_time: float - Time spent processing
+        - memory_provider: str - Provider name
+
+        Args:
+            context: Plugin context with event data
+
+        Returns:
+            PluginResult indicating success
+        """
+        try:
+            conversation_id = context.data.get('conversation_id', 'unknown')
+            memory_count = context.data.get('memory_count', 0)
+            memory_provider = context.metadata.get('memory_provider', 'unknown')
+            processing_time = context.metadata.get('processing_time', 0)
+
+            # Log to storage
+            row_id = await self.storage.log_event(
+                event=context.event,  # 'memory.processed'
+                user_id=context.user_id,
+                data=context.data,
+                metadata=context.metadata
+            )
+
+            self.event_count += 1
+
+            logger.info(
+                f"📝 Logged memory.processed event (row_id={row_id}): "
+                f"user={context.user_id}, "
+                f"conversation={conversation_id}, "
+                f"memory_count={memory_count}, "
+                f"provider={memory_provider}, "
+                f"processing_time={processing_time:.2f}s"
+            )
+
+            return PluginResult(
+                success=True,
+                message=f"Memory event logged (row_id={row_id})",
+                should_continue=True
+            )
+
+        except Exception as e:
+            logger.error(f"Error logging memory event: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Failed to log memory event: {e}",
+                should_continue=True
+            )
+
+    async def cleanup(self):
+        """Clean up plugin resources"""
+        try:
+            logger.info(
+                f"🧹 Test Event Plugin shutting down. "
+                f"Logged {self.event_count} total events"
+            )
+            await self.storage.cleanup()
+        except Exception as e:
+            logger.error(f"Error during test plugin cleanup: {e}")
diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
index 2c0c9988..f97399e3 100644
--- a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
+++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
@@ -131,6 +131,12 @@ def init_plugin_router() -> Optional[PluginRouter]:
                         # Note: async initialization happens in app_factory lifespan
                         _plugin_router.register_plugin(plugin_id, plugin)
                         logger.info(f"✅ Plugin '{plugin_id}' registered")
+                    elif plugin_id == 'test_event':
+                        from advanced_omi_backend.plugins.test_event import TestEventPlugin
+                        plugin = TestEventPlugin(plugin_config)
+                        # Note: async initialization happens in app_factory lifespan
+                        _plugin_router.register_plugin(plugin_id, plugin)
+                        logger.info(f"✅ Plugin '{plugin_id}' registered")
                     else:
                         logger.warning(f"Unknown plugin: {plugin_id}")
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
index ff312360..7f166890 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
@@ -303,11 +303,11 @@ async def trigger_plugins(self, session_id: str, result: Dict):
                 'is_final': True
             }
 
-            # Trigger plugins with streaming_transcript access level
-            logger.info(f"🎯 Triggering plugins for user {user_id}, transcript: {plugin_data['transcript'][:50]}...")
+            # Dispatch transcript.streaming event
+            logger.info(f"🎯 Dispatching transcript.streaming event for user {user_id}, transcript: {plugin_data['transcript'][:50]}...")
 
-            plugin_results = await self.plugin_router.trigger_plugins(
-                access_level='streaming_transcript',
+            plugin_results = await self.plugin_router.dispatch_event(
+                event='transcript.streaming',
                 user_id=user_id,
                 data=plugin_data,
                 metadata={'client_id': session_id}
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 7c754d19..024c22f2 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -556,8 +556,8 @@ async def open_conversation_job(
                 'conversation_id': conversation_id,
             }
 
-            plugin_results = await plugin_router.trigger_plugins(
-                access_level='conversation',
+            plugin_results = await plugin_router.dispatch_event(
+                event='conversation.complete',
                 user_id=user_id,
                 data=plugin_data,
                 metadata={'end_reason': end_reason}
diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
index a6939bed..a307f004 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
@@ -257,8 +257,8 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict
                         'conversation_id': conversation_id,
                     }
 
-                    plugin_results = await plugin_router.trigger_plugins(
-                        access_level='memory',
+                    plugin_results = await plugin_router.dispatch_event(
+                        event='memory.processed',
                         user_id=user_id,
                         data=plugin_data,
                         metadata={
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index 71e64dbd..cf65b2d9 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -241,14 +241,14 @@ async def transcribe_full_audio_job(
                     'word_count': len(words),
                 }
 
-                logger.info(f"🔍 DEBUG: Calling trigger_plugins with user_id={user_id}, client_id={client_id}")
-                plugin_results = await plugin_router.trigger_plugins(
-                    access_level='transcript',  # Batch mode - only 'transcript' plugins, NOT 'streaming_transcript'
+                logger.info(f"🔍 DEBUG: Dispatching transcript.batch event with user_id={user_id}, client_id={client_id}")
+                plugin_results = await plugin_router.dispatch_event(
+                    event='transcript.batch',
                     user_id=user_id,
                     data=plugin_data,
                     metadata={'client_id': client_id}
                 )
-                logger.info(f"🔍 DEBUG: Plugin trigger returned {len(plugin_results) if plugin_results else 0} results")
+                logger.info(f"🔍 DEBUG: Event dispatch returned {len(plugin_results) if plugin_results else 0} results")
 
                 if plugin_results:
                     logger.info(f"✅ Triggered {len(plugin_results)} transcript plugins in batch mode")
diff --git a/tests/config/plugins.test.yml b/tests/config/plugins.test.yml
new file mode 100644
index 00000000..b335c0f5
--- /dev/null
+++ b/tests/config/plugins.test.yml
@@ -0,0 +1,14 @@
+# Test plugin configuration for integration testing
+# This file is loaded during tests to verify event dispatch system
+
+plugins:
+  test_event:
+    enabled: true
+    subscriptions:
+      - transcript.streaming
+      - transcript.batch
+      - conversation.complete
+      - memory.processed
+    trigger:
+      type: always  # Capture all events without filtering
+    db_path: /app/debug/test_plugin_events.db
diff --git a/tests/endpoints/plugin_tests.robot b/tests/endpoints/plugin_tests.robot
new file mode 100644
index 00000000..0b5a4db2
--- /dev/null
+++ b/tests/endpoints/plugin_tests.robot
@@ -0,0 +1,141 @@
+*** Settings ***
+Documentation    Plugin Event System Tests
+...
+...              Tests the event-based plugin architecture:
+...              - Plugin configuration with event subscriptions
+...              - Event dispatch to subscribed plugins
+...              - Wake word filtering
+...              - Multiple event subscriptions
+Library          RequestsLibrary
+Library          Collections
+Library          String
+Library          OperatingSystem
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Resource         ../resources/user_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/audio_keywords.robot
+Resource         ../resources/plugin_keywords.robot
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+*** Test Cases ***
+
+Plugin Config Uses Event Subscriptions
+    [Documentation]    Verify plugin configuration uses new event-based format
+    [Tags]    infra
+
+    # Verify HomeAssistant plugin config follows new format
+    Verify HA Plugin Uses Events
+
+Plugin Mock Config Creation
+    [Documentation]    Test creating mock plugin configurations
+    [Tags]    infra
+
+    # Test single event subscription
+    ${config}=    Create Mock Plugin Config
+    ...    subscriptions=["transcript.streaming"]
+    Verify Plugin Config Format    ${config}
+
+    ${subscriptions}=    Get From Dictionary    ${config}    subscriptions
+    Should Contain    ${subscriptions}    transcript.streaming
+    ...    msg=Plugin should subscribe to transcript.streaming event
+
+    # Test multiple event subscriptions
+    ${multi_config}=    Create Mock Plugin Config
+    ...    subscriptions=["transcript.streaming", "transcript.batch", "conversation.complete"]
+    ${multi_subs}=    Get From Dictionary    ${multi_config}    subscriptions
+    Length Should Be Equal    ${multi_subs}    3
+    ...    msg=Plugin should subscribe to 3 events
+
+Plugin Mock With Wake Word Trigger
+    [Documentation]    Test creating plugin with wake word trigger
+    [Tags]    infra
+
+    ${wake_words}=    Create List    hey vivi    vivi    hey jarvis
+    ${config}=    Create Mock Plugin Config
+    ...    subscriptions=["transcript.streaming"]
+    ...    trigger_type=wake_word
+    ...    wake_words=${wake_words}
+
+    # Verify trigger configuration
+    ${trigger}=    Get From Dictionary    ${config}    trigger
+    Dictionary Should Contain Key    ${trigger}    type
+    Dictionary Should Contain Key    ${trigger}    wake_words
+
+    ${trigger_type}=    Get From Dictionary    ${trigger}    type
+    Should Be Equal    ${trigger_type}    wake_word
+
+    ${configured_wake_words}=    Get From Dictionary    ${trigger}    wake_words
+    Lists Should Be Equal    ${configured_wake_words}    ${wake_words}
+
+Event Name Format Validation
+    [Documentation]    Verify event names follow hierarchical naming convention
+    [Tags]    infra
+
+    # Valid event names
+    Verify Event Name Format    transcript.streaming
+    Verify Event Name Format    transcript.batch
+    Verify Event Name Format    conversation.complete
+    Verify Event Name Format    memory.processed
+
+Event Subscription Matching
+    [Documentation]    Test event matching against subscriptions
+    [Tags]    infra
+
+    # Exact matching (no wildcards in simple version)
+    Verify Event Matches Subscription    transcript.streaming    transcript.streaming
+    Verify Event Matches Subscription    transcript.batch    transcript.batch
+    Verify Event Matches Subscription    conversation.complete    conversation.complete
+
+Batch Transcription Should Trigger Batch Event
+    [Documentation]    Verify batch transcription triggers transcript.batch event
+    [Tags]    audio-upload
+
+    # Upload audio file for batch processing
+    ${result}=    Upload Single Audio File
+
+    # Verify processing completed
+    Should Be True    ${result}[successful] > 0
+    ...    msg=At least one file should be processed successfully
+
+    # Note: We can't directly verify event dispatch without plugin instrumentation
+    # This test validates the upload pathway that triggers transcript.batch
+    # Integration with real plugin would verify actual event dispatch
+
+Streaming Transcription Should Trigger Streaming Event
+    [Documentation]    Verify streaming transcription triggers transcript.streaming event
+    [Tags]    audio-streaming
+
+    # Note: This would require WebSocket streaming test infrastructure
+    # The event dispatch happens in deepgram_stream_consumer.py:309
+    # Real test would:
+    # 1. Connect WebSocket with test audio
+    # 2. Stream audio data
+    # 3. Verify transcript.streaming event dispatched
+    # 4. Verify subscribed plugins triggered
+
+    # For now, we verify the config is set up correctly
+    Verify HA Plugin Uses Events
+
+*** Keywords ***
+Upload Single Audio File
+    [Documentation]    Upload a single test audio file for batch processing
+
+    # Get test audio file path
+    ${test_audio}=    Set Variable    ${CURDIR}/../../extras/test-audios/short-test.wav
+
+    # Create fallback if test audio doesn't exist
+    ${file_exists}=    Run Keyword And Return Status    File Should Exist    ${test_audio}
+    IF    not ${file_exists}
+        Log    Test audio file not found, test will skip actual upload
+        ${result}=    Create Dictionary    successful=0    message=Test audio not available
+        RETURN    ${result}
+    END
+
+    # Upload file for processing
+    # Note: This requires authenticated session and proper endpoint
+    # Implementation depends on your audio upload endpoint
+    ${result}=    Create Dictionary    successful=1    message=Upload simulation
+    RETURN    ${result}
diff --git a/tests/integration/plugin_event_tests.robot b/tests/integration/plugin_event_tests.robot
new file mode 100644
index 00000000..5d7d3094
--- /dev/null
+++ b/tests/integration/plugin_event_tests.robot
@@ -0,0 +1,215 @@
+*** Settings ***
+Documentation    Plugin Event System Integration Tests
+...
+...              Tests the event-driven plugin architecture by:
+...              - Uploading audio and verifying transcript.batch events
+...              - Streaming audio and verifying transcript.streaming events
+...              - Verifying conversation.complete events after conversation ends
+...              - Verifying memory.processed events after memory extraction
+Library          RequestsLibrary
+Library          Collections
+Library          String
+Library          OperatingSystem
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Resource         ../resources/user_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/audio_keywords.robot
+Resource         ../resources/plugin_keywords.robot
+Resource         ../resources/websocket_keywords.robot
+Suite Setup      Test Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+*** Variables ***
+${TEST_AUDIO_FILE}    ${CURDIR}/../../extras/test-audios/DIY Muffin Enamel Short Mono 16khz.wav
+
+*** Test Cases ***
+
+Verify Test Plugin Configuration
+    [Documentation]    Verify test plugin config file is properly formatted
+    [Tags]    infra
+
+    # Verify test config file exists
+    File Should Exist    ${CURDIR}/../config/plugins.test.yml
+    ...    msg=Test plugin config file should exist
+
+    # Verify test_event plugin is configured
+    ${config_content}=    Get File    ${CURDIR}/../config/plugins.test.yml
+    Should Contain    ${config_content}    test_event
+    ...    msg=Test config should contain test_event plugin
+
+    Should Contain    ${config_content}    transcript.streaming
+    ...    msg=Test plugin should subscribe to transcript.streaming
+
+    Should Contain    ${config_content}    transcript.batch
+    ...    msg=Test plugin should subscribe to transcript.batch
+
+Upload Audio And Verify Transcript Batch Event
+    [Documentation]    Upload audio file and verify transcript.batch event is dispatched
+    [Tags]    audio-upload
+
+    # Clear any existing events
+    Clear Plugin Events
+
+    # Get baseline event count
+    ${baseline_count}=    Get Plugin Event Count    transcript.batch
+
+    # Upload test audio file
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ...    msg=Test audio file should exist
+    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
+
+    # Wait for transcription to complete
+    Sleep    15s
+
+    # Query plugin events database
+    ${final_count}=    Get Plugin Event Count    transcript.batch
+    ${new_events}=    Evaluate    ${final_count} - ${baseline_count}
+
+    # Verify at least one new event was received
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one transcript.batch event should be logged
+
+    # Get the events and verify structure
+    ${events}=    Get Plugin Events By Type    transcript.batch
+    Should Not Be Empty    ${events}
+    ...    msg=Should have transcript.batch events
+
+    # Verify first event has required fields
+    ${event}=    Set Variable    ${events}[0]
+    Log    Event data: ${event}
+
+    # Verify event contains transcript data (data field is JSON, so check the data column)
+    Should Not Be Empty    ${event}[3]
+    ...    msg=Event should have transcript data
+
+Conversation Complete Should Trigger Event
+    [Documentation]    Verify conversation.complete event after conversation ends
+    [Tags]    conversation
+
+    # Clear events
+    Clear Plugin Events
+
+    # Get baseline count
+    ${baseline_count}=    Get Plugin Event Count    conversation.complete
+
+    # Upload audio (triggers conversation creation and completion)
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
+
+    # Wait for full pipeline: transcription → conversation
+    Sleep    20s
+
+    # Verify conversation.complete event
+    ${final_count}=    Get Plugin Event Count    conversation.complete
+    ${new_events}=    Evaluate    ${final_count} - ${baseline_count}
+
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one conversation.complete event should be logged
+
+    # Verify event structure
+    ${events}=    Get Plugin Events By Type    conversation.complete
+    Should Not Be Empty    ${events}
+
+Memory Processing Should Trigger Event
+    [Documentation]    Verify memory.processed event after memory extraction
+    [Tags]    memory
+
+    # Clear events
+    Clear Plugin Events
+
+    # Get baseline count
+    ${baseline_count}=    Get Plugin Event Count    memory.processed
+
+    # Upload audio with meaningful content for memory extraction
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
+
+    # Wait for full pipeline: transcription → conversation → memory
+    Sleep    30s
+
+    # Verify memory.processed event
+    ${final_count}=    Get Plugin Event Count    memory.processed
+    ${new_events}=    Evaluate    ${final_count} - ${baseline_count}
+
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one memory.processed event should be logged
+
+    # Verify event structure
+    ${events}=    Get Plugin Events By Type    memory.processed
+    Should Not Be Empty    ${events}
+
+Verify All Events Are Logged
+    [Documentation]    Comprehensive test that verifies all event types are logged
+    [Tags]    e2e
+
+    # Clear all events
+    Clear Plugin Events
+
+    # Get baseline counts for all event types
+    ${batch_baseline}=    Get Plugin Event Count    transcript.batch
+    ${conv_baseline}=    Get Plugin Event Count    conversation.complete
+    ${mem_baseline}=    Get Plugin Event Count    memory.processed
+
+    # Upload audio file (should trigger all events)
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
+
+    # Wait for full pipeline
+    Sleep    35s
+
+    # Verify all events were triggered
+    ${batch_final}=    Get Plugin Event Count    transcript.batch
+    ${conv_final}=    Get Plugin Event Count    conversation.complete
+    ${mem_final}=    Get Plugin Event Count    memory.processed
+
+    ${batch_new}=    Evaluate    ${batch_final} - ${batch_baseline}
+    ${conv_new}=    Evaluate    ${conv_final} - ${conv_baseline}
+    ${mem_new}=    Evaluate    ${mem_final} - ${mem_baseline}
+
+    Should Be True    ${batch_new} > 0
+    ...    msg=transcript.batch events should be logged
+
+    Should Be True    ${conv_new} > 0
+    ...    msg=conversation.complete events should be logged
+
+    Should Be True    ${mem_new} > 0
+    ...    msg=memory.processed events should be logged
+
+    # Log summary
+    Log    Events logged - Batch: ${batch_new}, Conversation: ${conv_new}, Memory: ${mem_new}
+
+*** Keywords ***
+Test Suite Setup
+    [Documentation]    Setup for plugin event tests
+    # Standard suite setup
+    Suite Setup
+
+    # Verify test audio file exists
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ...    msg=Test audio file must exist for integration tests
+
+Test Cleanup
+    [Documentation]    Cleanup after each test
+    # Standard cleanup
+    # Note: We intentionally don't clear plugin events between tests
+    # to allow for debugging and event inspection
+
+Upload Audio For Processing
+    [Arguments]    ${audio_file}
+    [Documentation]    Upload audio file for batch processing
+
+    # Get admin session
+    ${session}=    Get Admin API Session
+
+    # Upload audio file
+    ${files}=    Create Dictionary    files=${audio_file}
+    ${response}=    POST On Session    ${session}    /api/process-audio-files
+    ...    files=${files}
+    ...    expected_status=200
+
+    ${result}=    Set Variable    ${response.json()}
+    Log    Upload result: ${result}
+
+    RETURN    ${result}
diff --git a/tests/resources/plugin_keywords.robot b/tests/resources/plugin_keywords.robot
new file mode 100644
index 00000000..aa63df9a
--- /dev/null
+++ b/tests/resources/plugin_keywords.robot
@@ -0,0 +1,133 @@
+*** Settings ***
+Documentation    Plugin testing resource file
+...
+...              This file contains keywords for plugin testing.
+...              Keywords in this file should handle:
+...              - Mock plugin creation and registration
+...              - Plugin event subscription verification
+...              - Event dispatch testing
+...              - Wake word trigger testing
+...
+Library          Collections
+Library          OperatingSystem
+Library          Process
+Library          DatabaseLibrary
+
+*** Keywords ***
+Create Mock Plugin Config
+    [Documentation]    Create a mock plugin configuration for testing
+    [Arguments]    ${subscriptions}    ${trigger_type}=always    ${wake_words}=${NONE}
+
+    ${config}=    Create Dictionary
+    ...    enabled=True
+    ...    subscriptions=${subscriptions}
+
+    ${trigger}=    Create Dictionary    type=${trigger_type}
+    IF    '${wake_words}' != 'None'
+        Set To Dictionary    ${trigger}    wake_words=${wake_words}
+    END
+    Set To Dictionary    ${config}    trigger=${trigger}
+
+    RETURN    ${config}
+
+Verify Plugin Config Format
+    [Documentation]    Verify plugin config follows new event-based format
+    [Arguments]    ${config}
+
+    Dictionary Should Contain Key    ${config}    subscriptions
+    ...    msg=Plugin config should have 'subscriptions' field
+
+    ${subscriptions}=    Get From Dictionary    ${config}    subscriptions
+    Should Be True    isinstance(${subscriptions}, list)
+    ...    msg=Subscriptions should be a list
+
+    Length Should Be Greater Than    ${subscriptions}    0
+    ...    msg=Plugin should subscribe to at least one event
+
+Verify Event Name Format
+    [Documentation]    Verify event name follows hierarchical naming convention
+    [Arguments]    ${event}
+
+    Should Contain    ${event}    .
+    ...    msg=Event name should contain dot separator (e.g., 'transcript.streaming')
+
+    ${parts}=    Split String    ${event}    .
+    Length Should Be Greater Than    ${parts}    1
+    ...    msg=Event should have domain and type (e.g., 'transcript.streaming')
+
+Verify Event Matches Subscription
+    [Documentation]    Verify an event would match a subscription
+    [Arguments]    ${event}    ${subscription}
+
+    Should Be Equal    ${event}    ${subscription}
+    ...    msg=Event '${event}' should match subscription '${subscription}'
+
+Get Test Plugins Config Path
+    [Documentation]    Get path to test plugins configuration
+    RETURN    ${CURDIR}/../../config/plugins.yml
+
+Verify HA Plugin Uses Events
+    [Documentation]    Verify HomeAssistant plugin config uses event subscriptions
+
+    ${plugins_yml}=    Get Test Plugins Config Path
+    ${config_content}=    Get File    ${plugins_yml}
+
+    Should Contain    ${config_content}    subscriptions:
+    ...    msg=Plugin config should use 'subscriptions' field
+
+    Should Contain    ${config_content}    transcript.streaming
+    ...    msg=HA plugin should subscribe to 'transcript.streaming' event
+
+    Should Not Contain    ${config_content}    access_level:
+    ...    msg=Plugin config should NOT use old 'access_level' field
+
+# Test Plugin Event Database Keywords
+
+Clear Plugin Events
+    [Documentation]    Clear all events from test plugin database
+    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    Execute SQL String    DELETE FROM plugin_events
+    Disconnect From Database
+
+Get Plugin Events By Type
+    [Arguments]    ${event_type}
+    [Documentation]    Query plugin events by event type
+    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    ${query}=    Query    SELECT * FROM plugin_events WHERE event = '${event_type}' ORDER BY created_at DESC
+    Disconnect From Database
+    RETURN    ${query}
+
+Get Plugin Events By User
+    [Arguments]    ${user_id}
+    [Documentation]    Query plugin events by user_id
+    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    ${query}=    Query    SELECT * FROM plugin_events WHERE user_id = '${user_id}' ORDER BY created_at DESC
+    Disconnect From Database
+    RETURN    ${query}
+
+Get All Plugin Events
+    [Documentation]    Get all events from test plugin database
+    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    ${query}=    Query    SELECT * FROM plugin_events ORDER BY created_at DESC
+    Disconnect From Database
+    RETURN    ${query}
+
+Get Plugin Event Count
+    [Arguments]    ${event_type}=${NONE}
+    [Documentation]    Get count of events, optionally filtered by type
+    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    IF    '${event_type}' != 'None'
+        ${count}=    Row Count    SELECT COUNT(*) FROM plugin_events WHERE event = '${event_type}'
+    ELSE
+        ${count}=    Row Count    SELECT COUNT(*) FROM plugin_events
+    END
+    Disconnect From Database
+    RETURN    ${count}
+
+Verify Event Contains Data
+    [Arguments]    ${event}    @{required_fields}
+    [Documentation]    Verify event contains required data fields
+    FOR    ${field}    IN    @{required_fields}
+        Dictionary Should Contain Key    ${event}    ${field}
+        ...    msg=Event should contain field '${field}'
+    END

From df79524db8a880715e4a9403b3e29d2d9f263995 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 6 Jan 2026 09:51:43 +0000
Subject: [PATCH 18/25] Enhance Docker configurations and startup script for
 test mode

- Updated `docker-compose-test.yml` to include a test command for services, enabling a dedicated test mode.
- Modified `start.sh` to support a `--test` flag, allowing the FastAPI backend to run with test-specific configurations.
- Adjusted worker commands to utilize the `--group test` option in test mode for improved orchestration and management.
---
 backends/advanced/docker-compose-test.yml |  5 +++--
 backends/advanced/start.sh                | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index 4cfe0327..467a321e 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -8,6 +8,7 @@ services:
       context: .
       dockerfile: Dockerfile
       target: dev  # Use dev stage with test dependencies
+    command: ["./start.sh", "--test"]
     ports:
       - "8001:8000"  # Avoid conflict with dev on 8000
     volumes:
@@ -157,7 +158,7 @@ services:
       context: .
       dockerfile: Dockerfile
       target: dev  # Use dev stage with test dependencies
-    command: ["uv", "run", "python", "worker_orchestrator.py"]
+    command: ["uv", "run", "--group", "test", "python", "worker_orchestrator.py"]
     volumes:
       - ./src:/app/src
       - ./worker_orchestrator.py:/app/worker_orchestrator.py
@@ -211,7 +212,7 @@ services:
       dockerfile: Dockerfile
       target: dev  # Use dev stage with test dependencies
     command: >
-      uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
+      uv run --group test python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
     volumes:
       - ./src:/app/src
       - ./data/test_data:/app/data
diff --git a/backends/advanced/start.sh b/backends/advanced/start.sh
index 5cc79635..feb8d57a 100755
--- a/backends/advanced/start.sh
+++ b/backends/advanced/start.sh
@@ -2,9 +2,17 @@
 
 # Chronicle Backend Startup Script
 # Starts both the FastAPI backend and RQ workers
+# Usage: ./start.sh [--test]
 
 set -e
 
+# Check for test mode flag
+TEST_MODE=false
+if [[ "$1" == "--test" ]]; then
+    TEST_MODE=true
+    echo "🧪 Running in TEST mode (with test dependencies)"
+fi
+
 echo "🚀 Starting Chronicle Backend..."
 
 # Function to handle shutdown
@@ -53,7 +61,12 @@ sleep 2
 
 # Start the main FastAPI application
 echo "🌐 Starting FastAPI backend..."
-uv run --extra deepgram python3 src/advanced_omi_backend/main.py &
+# Use --group test in test mode
+if [ "$TEST_MODE" = true ]; then
+    uv run --extra deepgram --group test python3 src/advanced_omi_backend/main.py &
+else
+    uv run --extra deepgram python3 src/advanced_omi_backend/main.py &
+fi
 BACKEND_PID=$!
 
 # Wait for any process to exit

From 668dfea77d079487a766882a6c797ee2d5ae57a5 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Wed, 7 Jan 2026 03:41:22 +0000
Subject: [PATCH 19/25] Refactor test scripts for improved reliability and
 clarity

- Updated `run-robot-tests.sh` to enhance the verification of the Deepgram batch worker process, ensuring non-numeric characters are removed from the check.
- Modified `plugin_tests.robot` to use a more explicit method for checking the length of subscriptions and added a skip condition for unavailable audio files.
- Adjusted `plugin_event_tests.robot` to load the test audio file from a variable, improving test data management.
- Refactored `plugin_keywords.robot` to utilize clearer length checks for subscriptions and event parts, enhancing readability and maintainability.
---
 tests/endpoints/plugin_tests.robot         | 6 +++++-
 tests/integration/plugin_event_tests.robot | 3 ++-
 tests/resources/plugin_keywords.robot      | 8 +++++---
 tests/run-robot-tests.sh                   | 7 ++++---
 4 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/tests/endpoints/plugin_tests.robot b/tests/endpoints/plugin_tests.robot
index 0b5a4db2..7e5ae0f9 100644
--- a/tests/endpoints/plugin_tests.robot
+++ b/tests/endpoints/plugin_tests.robot
@@ -46,7 +46,8 @@ Plugin Mock Config Creation
     ${multi_config}=    Create Mock Plugin Config
     ...    subscriptions=["transcript.streaming", "transcript.batch", "conversation.complete"]
     ${multi_subs}=    Get From Dictionary    ${multi_config}    subscriptions
-    Length Should Be Equal    ${multi_subs}    3
+    ${length}=    Get Length    ${multi_subs}
+    Should Be Equal As Integers    ${length}    3
     ...    msg=Plugin should subscribe to 3 events
 
 Plugin Mock With Wake Word Trigger
@@ -96,6 +97,9 @@ Batch Transcription Should Trigger Batch Event
     # Upload audio file for batch processing
     ${result}=    Upload Single Audio File
 
+    # Skip test if audio file not available
+    Skip If    ${result}[successful] == 0    Test audio file not available
+
     # Verify processing completed
     Should Be True    ${result}[successful] > 0
     ...    msg=At least one file should be processed successfully
diff --git a/tests/integration/plugin_event_tests.robot b/tests/integration/plugin_event_tests.robot
index 5d7d3094..4bdd49d1 100644
--- a/tests/integration/plugin_event_tests.robot
+++ b/tests/integration/plugin_event_tests.robot
@@ -17,12 +17,13 @@ Resource         ../resources/conversation_keywords.robot
 Resource         ../resources/audio_keywords.robot
 Resource         ../resources/plugin_keywords.robot
 Resource         ../resources/websocket_keywords.robot
+Variables        ../setup/test_data.py
 Suite Setup      Test Suite Setup
 Suite Teardown   Suite Teardown
 Test Setup       Test Cleanup
 
 *** Variables ***
-${TEST_AUDIO_FILE}    ${CURDIR}/../../extras/test-audios/DIY Muffin Enamel Short Mono 16khz.wav
+# TEST_AUDIO_FILE is loaded from test_data.py
 
 *** Test Cases ***
 
diff --git a/tests/resources/plugin_keywords.robot b/tests/resources/plugin_keywords.robot
index aa63df9a..a7c2cd8b 100644
--- a/tests/resources/plugin_keywords.robot
+++ b/tests/resources/plugin_keywords.robot
@@ -23,7 +23,7 @@ Create Mock Plugin Config
     ...    subscriptions=${subscriptions}
 
     ${trigger}=    Create Dictionary    type=${trigger_type}
-    IF    '${wake_words}' != 'None'
+    IF    $wake_words is not None
         Set To Dictionary    ${trigger}    wake_words=${wake_words}
     END
     Set To Dictionary    ${config}    trigger=${trigger}
@@ -41,7 +41,8 @@ Verify Plugin Config Format
     Should Be True    isinstance(${subscriptions}, list)
     ...    msg=Subscriptions should be a list
 
-    Length Should Be Greater Than    ${subscriptions}    0
+    ${length}=    Get Length    ${subscriptions}
+    Should Be True    ${length} > 0
     ...    msg=Plugin should subscribe to at least one event
 
 Verify Event Name Format
@@ -52,7 +53,8 @@ Verify Event Name Format
     ...    msg=Event name should contain dot separator (e.g., 'transcript.streaming')
 
     ${parts}=    Split String    ${event}    .
-    Length Should Be Greater Than    ${parts}    1
+    ${length}=    Get Length    ${parts}
+    Should Be True    ${length} > 1
     ...    msg=Event should have domain and type (e.g., 'transcript.streaming')
 
 Verify Event Matches Subscription
diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh
index 04787825..ea7fa949 100755
--- a/tests/run-robot-tests.sh
+++ b/tests/run-robot-tests.sh
@@ -275,12 +275,13 @@ done
 
 # Verify batch Deepgram worker is running
 print_info "Verifying Deepgram batch worker process..."
-BATCH_WORKER_CHECK=$(docker compose -f docker-compose-test.yml exec -T workers-test ps aux | grep -c "audio_stream_deepgram_worker" || echo "0")
-if [ "$BATCH_WORKER_CHECK" -gt 0 ]; then
+BATCH_WORKER_CHECK=$(docker compose -f docker-compose-test.yml exec -T workers-test ps aux | grep -c "audio_stream_deepgram_worker" || echo "0" | tr -d '\n\r')
+BATCH_WORKER_CHECK=${BATCH_WORKER_CHECK//[^0-9]/}  # Remove non-numeric characters
+if [ -n "$BATCH_WORKER_CHECK" ] && [ "$BATCH_WORKER_CHECK" -gt 0 ]; then
     print_success "Deepgram batch worker process is running"
 else
     print_warning "Deepgram batch worker process not found - checking logs..."
-    docker compose -f docker-compose-test.yml logs --tail=30 workers-test | grep -i "deepgram"
+    docker compose -f docker-compose-test.yml logs --tail=30 workers-test | grep -i "deepgram" || true
 fi
 
 # Check Redis consumer groups registration

From 197a6108d0c4e67082f20863b3316121730a213b Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Wed, 7 Jan 2026 04:26:38 +0000
Subject: [PATCH 20/25] remove mistral deadcode; notebooks untouched

---
 .env.template                                 |  6 +-----
 CLAUDE.md                                     | 15 +++++--------
 Docs/getting-started.md                       | 21 ++++++-------------
 .../Docs/memory-configuration-guide.md        |  2 +-
 backends/advanced/Docs/quickstart.md          | 21 ++++++-------------
 backends/advanced/README.md                   |  2 +-
 backends/advanced/SETUP_SCRIPTS.md            |  9 ++++----
 .../models/conversation.py                    |  3 +--
 .../services/audio_stream/producer.py         |  2 +-
 .../services/transcription/base.py            |  1 -
 .../tests/test_conversation_models.py         |  5 ++---
 config.env.template                           |  6 +-----
 tests/configs/README.md                       |  4 ++--
 13 files changed, 31 insertions(+), 66 deletions(-)

diff --git a/.env.template b/.env.template
index c2a4d8a2..388edbf5 100644
--- a/.env.template
+++ b/.env.template
@@ -90,16 +90,12 @@ CHAT_TEMPERATURE=0.7
 # SPEECH-TO-TEXT CONFIGURATION
 # ========================================
 
-# Primary transcription provider: deepgram, mistral, or parakeet
+# Primary transcription provider: deepgram or parakeet
 TRANSCRIPTION_PROVIDER=deepgram
 
 # Deepgram configuration
 DEEPGRAM_API_KEY=your-deepgram-key-here
 
-# Mistral configuration (when TRANSCRIPTION_PROVIDER=mistral)
-MISTRAL_API_KEY=your-mistral-key-here
-MISTRAL_MODEL=voxtral-mini-2507
-
 # Parakeet ASR configuration (when TRANSCRIPTION_PROVIDER=parakeet)
 PARAKEET_ASR_URL=http://host.docker.internal:8767
 
diff --git a/CLAUDE.md b/CLAUDE.md
index abe20db6..dfd92196 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -18,7 +18,7 @@ This supports a comprehensive web dashboard for management.
 Chronicle includes an **interactive setup wizard** for easy configuration. The wizard guides you through:
 - Service selection (backend + optional services)
 - Authentication setup (admin account, JWT secrets)
-- Transcription provider configuration (Deepgram, Mistral, or offline ASR)
+- Transcription provider configuration (Deepgram or offline ASR)
 - LLM provider setup (OpenAI or Ollama)
 - Memory provider selection (Chronicle Native with Qdrant or OpenMemory MCP)
 - Network configuration and HTTPS setup
@@ -184,12 +184,12 @@ docker compose up --build
 ## Architecture Overview
 
 ### Key Components
-- **Audio Pipeline**: Real-time Opus/PCM → Application-level processing → Deepgram/Mistral transcription → memory extraction
+- **Audio Pipeline**: Real-time Opus/PCM → Application-level processing → Deepgram transcription → memory extraction
 - **Wyoming Protocol**: WebSocket communication uses Wyoming protocol (JSONL + binary) for structured audio sessions
 - **Unified Pipeline**: Job-based tracking system for all audio processing (WebSocket and file uploads)
 - **Job Tracker**: Tracks pipeline jobs with stage events (audio → transcription → memory) and completion status
 - **Task Management**: BackgroundTaskManager tracks all async tasks to prevent orphaned processes
-- **Unified Transcription**: Deepgram/Mistral transcription with fallback to offline ASR services
+- **Unified Transcription**: Deepgram transcription with fallback to offline ASR services
 - **Memory System**: Pluggable providers (Chronicle native or OpenMemory MCP)
 - **Authentication**: Email-based login with MongoDB ObjectId user system
 - **Client Management**: Auto-generated client IDs as `{user_id_suffix}-{device_name}`, centralized ClientManager
@@ -205,7 +205,7 @@ Required:
 
 Recommended:
   - Vector Storage: Qdrant (Chronicle provider) or OpenMemory MCP server
-  - Transcription: Deepgram, Mistral, or offline ASR services
+  - Transcription: Deepgram or offline ASR services
 
 Optional:
   - Parakeet ASR: Offline transcription service
@@ -329,12 +329,7 @@ Chronicle supports multiple transcription services:
 TRANSCRIPTION_PROVIDER=deepgram
 DEEPGRAM_API_KEY=your-deepgram-key-here
 
-# Option 2: Mistral (Voxtral models)
-TRANSCRIPTION_PROVIDER=mistral
-MISTRAL_API_KEY=your-mistral-key-here
-MISTRAL_MODEL=voxtral-mini-2507
-
-# Option 3: Local ASR (Parakeet)
+# Option 2: Local ASR (Parakeet)
 PARAKEET_ASR_URL=http://host.docker.internal:8767
 ```
 
diff --git a/Docs/getting-started.md b/Docs/getting-started.md
index a923c99c..b8115ff6 100644
--- a/Docs/getting-started.md
+++ b/Docs/getting-started.md
@@ -36,7 +36,7 @@ cd backends/advanced
 
 **The setup wizard will guide you through:**
 - **Authentication**: Admin email/password setup
-- **Transcription Provider**: Choose Deepgram, Mistral, or Offline (Parakeet)
+- **Transcription Provider**: Choose Deepgram or Offline (Parakeet)
 - **LLM Provider**: Choose OpenAI or Ollama for memory extraction
 - **Memory Provider**: Choose Chronicle Native or OpenMemory MCP
 - **Optional Services**: Speaker Recognition and other extras
@@ -52,14 +52,13 @@ cd backends/advanced
 Admin email [admin@example.com]: john@company.com
 Admin password (min 8 chars): ********
 
-► Speech-to-Text Configuration  
+► Speech-to-Text Configuration
 -------------------------------
 Choose your transcription provider:
   1) Deepgram (recommended - high quality, requires API key)
-  2) Mistral (Voxtral models - requires API key) 
-  3) Offline (Parakeet ASR - requires GPU, runs locally)
-  4) None (skip transcription setup)
-Enter choice (1-4) [1]: 1
+  2) Offline (Parakeet ASR - requires GPU, runs locally)
+  3) None (skip transcription setup)
+Enter choice (1-3) [1]: 1
 
 Get your API key from: https://console.deepgram.com/
 Deepgram API key: dg_xxxxxxxxxxxxx
@@ -154,12 +153,7 @@ OLLAMA_BASE_URL=http://ollama:11434
 TRANSCRIPTION_PROVIDER=deepgram
 DEEPGRAM_API_KEY=your-deepgram-api-key-here
 
-# Option 2: Mistral (Voxtral models for transcription)
-TRANSCRIPTION_PROVIDER=mistral
-MISTRAL_API_KEY=your-mistral-api-key-here
-MISTRAL_MODEL=voxtral-mini-2507
-
-# Option 3: Local ASR service  
+# Option 2: Local ASR service
 PARAKEET_ASR_URL=http://host.docker.internal:8080
 ```
 
@@ -167,7 +161,6 @@ PARAKEET_ASR_URL=http://host.docker.internal:8080
 - **OpenAI is strongly recommended** for LLM processing as it provides much better memory extraction and eliminates JSON parsing errors
 - **TRANSCRIPTION_PROVIDER** determines which service to use:
   - `deepgram`: Uses Deepgram's Nova-3 model for high-quality transcription
-  - `mistral`: Uses Mistral's Voxtral models for transcription
   - If not set, system falls back to offline ASR service
 - The system requires either online API keys or offline ASR service configuration
 
@@ -312,7 +305,6 @@ curl -X POST "http://localhost:8000/api/process-audio-files" \
 
 ### Transcription Options
 - **Deepgram API**: Cloud-based batch processing, high accuracy (recommended)
-- **Mistral API**: Voxtral models for transcription with REST API processing
 - **Self-hosted ASR**: Local Wyoming protocol services with real-time processing
 - **Collection timeout**: 1.5 minute collection for optimal online processing quality
 
@@ -407,7 +399,6 @@ uv sync --group (whatever group you want to sync)
 
 **Transcription Issues:**
 - **Deepgram**: Verify API key is valid and `TRANSCRIPTION_PROVIDER=deepgram`
-- **Mistral**: Verify API key is valid and `TRANSCRIPTION_PROVIDER=mistral`
 - **Self-hosted**: Ensure ASR service is running on port 8765
 - Check transcription service connection in health endpoint
 
diff --git a/backends/advanced/Docs/memory-configuration-guide.md b/backends/advanced/Docs/memory-configuration-guide.md
index 12796e13..66244003 100644
--- a/backends/advanced/Docs/memory-configuration-guide.md
+++ b/backends/advanced/Docs/memory-configuration-guide.md
@@ -65,7 +65,7 @@ memory:
 - **Embeddings**: `text-embedding-3-small`, `text-embedding-3-large`
 
 #### Ollama Models (Local)
-- **LLM**: `llama3`, `mistral`, `qwen2.5`
+- **LLM**: `llama3`, `qwen2.5`
 - **Embeddings**: `nomic-embed-text`, `all-minilm`
 
 ## Hot Reload
diff --git a/backends/advanced/Docs/quickstart.md b/backends/advanced/Docs/quickstart.md
index 0d681978..96a66421 100644
--- a/backends/advanced/Docs/quickstart.md
+++ b/backends/advanced/Docs/quickstart.md
@@ -34,7 +34,7 @@ cd backends/advanced
 
 **The setup wizard will guide you through:**
 - **Authentication**: Admin email/password setup
-- **Transcription Provider**: Choose Deepgram, Mistral, or Offline (Parakeet)
+- **Transcription Provider**: Choose Deepgram or Offline (Parakeet)
 - **LLM Provider**: Choose OpenAI or Ollama for memory extraction
 - **Memory Provider**: Choose Chronicle Native or OpenMemory MCP
 - **Optional Services**: Speaker Recognition and other extras
@@ -50,14 +50,13 @@ cd backends/advanced
 Admin email [admin@example.com]: john@company.com
 Admin password (min 8 chars): ********
 
-► Speech-to-Text Configuration  
+► Speech-to-Text Configuration
 -------------------------------
 Choose your transcription provider:
   1) Deepgram (recommended - high quality, requires API key)
-  2) Mistral (Voxtral models - requires API key) 
-  3) Offline (Parakeet ASR - requires GPU, runs locally)
-  4) None (skip transcription setup)
-Enter choice (1-4) [1]: 1
+  2) Offline (Parakeet ASR - requires GPU, runs locally)
+  3) None (skip transcription setup)
+Enter choice (1-3) [1]: 1
 
 Get your API key from: https://console.deepgram.com/
 Deepgram API key: dg_xxxxxxxxxxxxx
@@ -152,12 +151,7 @@ OLLAMA_BASE_URL=http://ollama:11434
 TRANSCRIPTION_PROVIDER=deepgram
 DEEPGRAM_API_KEY=your-deepgram-api-key-here
 
-# Option 2: Mistral (Voxtral models for transcription)
-TRANSCRIPTION_PROVIDER=mistral
-MISTRAL_API_KEY=your-mistral-api-key-here
-MISTRAL_MODEL=voxtral-mini-2507
-
-# Option 3: Local ASR service  
+# Option 2: Local ASR service
 PARAKEET_ASR_URL=http://host.docker.internal:8080
 ```
 
@@ -165,7 +159,6 @@ PARAKEET_ASR_URL=http://host.docker.internal:8080
 - **OpenAI is strongly recommended** for LLM processing as it provides much better memory extraction and eliminates JSON parsing errors
 - **TRANSCRIPTION_PROVIDER** determines which service to use:
   - `deepgram`: Uses Deepgram's Nova-3 model for high-quality transcription
-  - `mistral`: Uses Mistral's Voxtral models for transcription
   - If not set, system falls back to offline ASR service
 - The system requires either online API keys or offline ASR service configuration
 
@@ -310,7 +303,6 @@ curl -X POST "http://localhost:8000/api/audio/upload" \
 
 ### Transcription Options
 - **Deepgram API**: Cloud-based batch processing, high accuracy (recommended)
-- **Mistral API**: Voxtral models for transcription with REST API processing
 - **Self-hosted ASR**: Local Wyoming protocol services with real-time processing
 - **Collection timeout**: 1.5 minute collection for optimal online processing quality
 
@@ -405,7 +397,6 @@ uv sync --group (whatever group you want to sync)
 
 **Transcription Issues:**
 - **Deepgram**: Verify API key is valid and `TRANSCRIPTION_PROVIDER=deepgram`
-- **Mistral**: Verify API key is valid and `TRANSCRIPTION_PROVIDER=mistral`
 - **Self-hosted**: Ensure ASR service is running on port 8765
 - Check transcription service connection in health endpoint
 
diff --git a/backends/advanced/README.md b/backends/advanced/README.md
index d493241c..60c832f0 100644
--- a/backends/advanced/README.md
+++ b/backends/advanced/README.md
@@ -31,7 +31,7 @@ Modern React-based web dashboard located in `./webui/` with:
 
 **The setup wizard guides you through:**
 - **Authentication**: Admin email/password setup with secure keys
-- **Transcription Provider**: Choose between Deepgram, Mistral, or Offline (Parakeet)
+- **Transcription Provider**: Choose between Deepgram or Offline (Parakeet)
 - **LLM Provider**: Choose between OpenAI (recommended) or Ollama for memory extraction
 - **Memory Provider**: Choose between Friend-Lite Native or OpenMemory MCP
 - **Optional Services**: Speaker Recognition, network configuration
diff --git a/backends/advanced/SETUP_SCRIPTS.md b/backends/advanced/SETUP_SCRIPTS.md
index b45c8910..7103e220 100644
--- a/backends/advanced/SETUP_SCRIPTS.md
+++ b/backends/advanced/SETUP_SCRIPTS.md
@@ -15,7 +15,7 @@ This document explains the different setup scripts available in Friend-Lite and
 
 ### What it does:
 - ✅ **Authentication Setup**: Admin email/password with secure key generation
-- ✅ **Transcription Provider Selection**: Choose between Deepgram, Mistral, or Offline (Parakeet)
+- ✅ **Transcription Provider Selection**: Choose between Deepgram or Offline (Parakeet)
 - ✅ **LLM Provider Configuration**: Choose between OpenAI (recommended) or Ollama
 - ✅ **Memory Provider Setup**: Choose between Friend-Lite Native or OpenMemory MCP
 - ✅ **API Key Collection**: Prompts for required keys with helpful links to obtain them
@@ -43,10 +43,9 @@ Admin password (min 8 chars): ********
 -------------------------------
 Choose your transcription provider:
   1) Deepgram (recommended - high quality, requires API key)
-  2) Mistral (Voxtral models - requires API key)
-  3) Offline (Parakeet ASR - requires GPU, runs locally)
-  4) None (skip transcription setup)
-Enter choice (1-4) [1]: 1
+  2) Offline (Parakeet ASR - requires GPU, runs locally)
+  3) None (skip transcription setup)
+Enter choice (1-3) [1]: 1
 
 Get your API key from: https://console.deepgram.com/
 Deepgram API key: dg_xxxxxxxxxxxxx
diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py
index 01dd5d96..735a8be5 100644
--- a/backends/advanced/src/advanced_omi_backend/models/conversation.py
+++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py
@@ -21,7 +21,6 @@ class Conversation(Document):
     class TranscriptProvider(str, Enum):
         """Supported transcription providers."""
         DEEPGRAM = "deepgram"
-        MISTRAL = "mistral"
         PARAKEET = "parakeet"
         SPEECH_DETECTION = "speech_detection"  # Legacy value
         UNKNOWN = "unknown"  # Fallback value
@@ -63,7 +62,7 @@ class TranscriptVersion(BaseModel):
         transcript: Optional[str] = Field(None, description="Full transcript text")
         segments: List["Conversation.SpeakerSegment"] = Field(default_factory=list, description="Speaker segments")
         provider: Optional["Conversation.TranscriptProvider"] = Field(None, description="Transcription provider used")
-        model: Optional[str] = Field(None, description="Model used (e.g., nova-3, voxtral-mini-2507)")
+        model: Optional[str] = Field(None, description="Model used (e.g., nova-3, parakeet)")
         created_at: datetime = Field(description="When this version was created")
         processing_time_seconds: Optional[float] = Field(None, description="Time taken to process")
         metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional provider-specific metadata")
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
index 66b0acf7..f7299cda 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
@@ -52,7 +52,7 @@ async def init_session(
             user_id: User identifier
             client_id: Client identifier
             mode: Processing mode (streaming/batch)
-            provider: Transcription provider ("deepgram", "mistral", etc.)
+            provider: Transcription provider ("deepgram", "parakeet", etc.)
         """
         # Client-specific stream naming (one stream per client for isolation)
         stream_name = f"audio:stream:{client_id}"
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/base.py b/backends/advanced/src/advanced_omi_backend/services/transcription/base.py
index 13893a68..7d0f2306 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/base.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/base.py
@@ -36,7 +36,6 @@ class TranscriptionProvider(Enum):
     """Available transcription providers for audio stream routing."""
     DEEPGRAM = "deepgram"
     PARAKEET = "parakeet"
-    MISTRAL = "mistral"
 
 
 class BaseTranscriptionProvider(abc.ABC):
diff --git a/backends/advanced/tests/test_conversation_models.py b/backends/advanced/tests/test_conversation_models.py
index e4387c89..c2c27dd0 100644
--- a/backends/advanced/tests/test_conversation_models.py
+++ b/backends/advanced/tests/test_conversation_models.py
@@ -134,7 +134,7 @@ def test_add_transcript_version(self):
             version_id="v2",
             transcript="Updated transcript",
             segments=segments,
-            provider=TranscriptProvider.MISTRAL,
+            provider=TranscriptProvider.PARAKEET,
             set_as_active=False
         )
 
@@ -170,7 +170,7 @@ def test_set_active_versions(self):
         segments2 = [SpeakerSegment(start=0.0, end=5.0, text="Version 2", speaker="Speaker A")]
 
         conversation.add_transcript_version("v1", "Transcript 1", segments1, TranscriptProvider.DEEPGRAM)
-        conversation.add_transcript_version("v2", "Transcript 2", segments2, TranscriptProvider.MISTRAL, set_as_active=False)
+        conversation.add_transcript_version("v2", "Transcript 2", segments2, TranscriptProvider.PARAKEET, set_as_active=False)
 
         # Should be v1 active
         assert conversation.active_transcript_version == "v1"
@@ -213,7 +213,6 @@ def test_provider_enums(self):
         """Test that provider enums work correctly."""
         # Test TranscriptProvider enum
         assert TranscriptProvider.DEEPGRAM == "deepgram"
-        assert TranscriptProvider.MISTRAL == "mistral"
         assert TranscriptProvider.PARAKEET == "parakeet"
 
         # Test MemoryProvider enum
diff --git a/config.env.template b/config.env.template
index 3312dfae..bc7d0ca4 100644
--- a/config.env.template
+++ b/config.env.template
@@ -65,16 +65,12 @@ OPENAI_API_KEY = sk-xxxxx
 # SPEECH-TO-TEXT CONFIGURATION
 # ========================================
 
-# Primary transcription provider: deepgram, mistral, or parakeet
+# Primary transcription provider: deepgram or parakeet
 TRANSCRIPTION_PROVIDER = deepgram
 
 # Deepgram configuration
 DEEPGRAM_API_KEY = 90xxxxxx
 
-# Mistral configuration (when TRANSCRIPTION_PROVIDER=mistral)
-MISTRAL_API_KEY = 
-MISTRAL_MODEL = voxtral-mini-2507
-
 # Parakeet ASR configuration (when TRANSCRIPTION_PROVIDER=parakeet)
 PARAKEET_ASR_URL = http://host.docker.internal:8767
 
diff --git a/tests/configs/README.md b/tests/configs/README.md
index 8b1e196f..0b6ff73d 100644
--- a/tests/configs/README.md
+++ b/tests/configs/README.md
@@ -60,7 +60,7 @@ done
 
 When creating a new test configuration:
 
-1. **Name it descriptively**: `{stt}-{llm}.yml` (e.g., `mistral-openai.yml`)
+1. **Name it descriptively**: `{stt}-{llm}.yml` (e.g., `deepgram-openai.yml`)
 2. **Use environment variables**: Always use `${VAR:-default}` pattern for secrets
 3. **Set appropriate defaults**: Update the `defaults:` section to match your provider combo
 4. **Include only required models**: Don't include models that aren't used
@@ -124,7 +124,7 @@ Test configs use environment variable substitution to avoid hardcoding secrets:
 
 As you add support for new providers, create corresponding test configs:
 
-- `mistral-openai.yml` - Mistral Voxtral STT + OpenAI LLM
+- `deepgram-openai.yml` - Deepgram STT + OpenAI LLM
 - `deepgram-ollama.yml` - Deepgram STT + Local Ollama LLM
 - `parakeet-openai.yml` - Local Parakeet STT + OpenAI LLM
 - etc.

From a65b1bfc9fe6ba797242f2917943bcabb97b6518 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sat, 10 Jan 2026 08:30:20 +0000
Subject: [PATCH 21/25] Refactor audio streaming endpoints and improve
 documentation

- Updated WebSocket endpoints to use a unified format with codec parameters (`/ws?codec=pcm` and `/ws?codec=opus`) for audio streaming, replacing the previous `/ws_pcm` and `/ws_omi` endpoints.
- Enhanced documentation to reflect the new endpoint structure and clarify audio processing capabilities.
- Removed deprecated audio cropping functionality and related configurations to streamline the audio processing workflow.
- Updated various components and scripts to align with the new endpoint structure, ensuring consistent usage across the application.
---
 CLAUDE.md                                     |   2 +-
 app/README.md                                 |  20 +-
 app/app/components/BackendStatus.tsx          |   4 +-
 app/app/index.tsx                             |  14 +-
 backends/advanced/Docs/architecture.md        |  10 +-
 backends/advanced/Docs/auth.md                |   8 +-
 backends/advanced/docker-compose-test.yml     |  42 +---
 backends/advanced/docker-compose.yml          |  30 ---
 backends/advanced/scripts/laptop_client.py    |   2 +-
 .../src/advanced_omi_backend/app_config.py    |   5 -
 .../clients/audio_stream_client.py            |  12 +-
 .../controllers/audio_controller.py           |  46 +---
 .../controllers/conversation_controller.py    |  37 +---
 .../controllers/queue_controller.py           |  58 +++--
 .../controllers/websocket_controller.py       |  56 +++--
 .../advanced/src/advanced_omi_backend/main.py |   2 +-
 .../middleware/app_middleware.py              |   2 -
 .../advanced_omi_backend/models/audio_file.py |   3 -
 .../models/conversation.py                    |  13 +-
 .../routers/modules/audio_routes.py           |   5 +-
 .../routers/modules/conversation_routes.py    |   8 -
 .../routers/modules/health_routes.py          |   1 -
 .../routers/modules/websocket_routes.py       |  41 ++--
 .../services/audio_stream/producer.py         |  94 +++++++-
 .../services/transcription/deepgram.py        |  92 --------
 .../transcription/parakeet_stream_consumer.py |  90 --------
 ...ream_consumer.py => streaming_consumer.py} |  42 ++--
 .../advanced_omi_backend/utils/audio_utils.py | 206 ------------------
 .../advanced_omi_backend/workers/__init__.py  |   8 +-
 .../workers/audio_jobs.py                     | 201 -----------------
 .../workers/audio_stream_deepgram_worker.py   |  80 -------
 .../workers/audio_stream_parakeet_worker.py   |  95 --------
 ...aming_worker.py => audio_stream_worker.py} |  30 ++-
 .../workers/orchestrator/worker_registry.py   |  91 +++-----
 .../workers/transcription_jobs.py             |   6 +-
 backends/advanced/start-k8s.sh                |  11 +-
 backends/advanced/uv.lock                     |  11 +
 .../webui/src/components/audio/DebugPanel.tsx |   2 +-
 .../src/components/audio/RecordingStatus.tsx  |   2 +-
 .../webui/src/hooks/useAudioRecording.ts      |   6 +-
 .../src/hooks/useSimpleAudioRecording.ts      |   6 +-
 .../webui/src/pages/Conversations.tsx         |  38 +---
 .../webui/src/pages/ConversationsTimeline.tsx |  21 +-
 backends/advanced/webui/src/pages/Queue.tsx   |  34 +--
 .../advanced-backend/ingress-values.yaml      |   4 -
 extras/havpe-relay/README.md                  |  14 +-
 extras/havpe-relay/docker-compose.yml         |   2 +-
 extras/havpe-relay/main.py                    |   4 +-
 extras/local-omi-bt/send_to_adv.py            |   2 +-
 tests/configs/deepgram-openai.yml             |  32 +++
 tests/endpoints/health_tests.robot            |   2 -
 tests/endpoints/plugin_tests.robot            |   9 +-
 tests/infrastructure/infra_tests.robot        |   7 +-
 .../audio_streaming_integration_tests.robot   | 187 ++++++++++++++++
 tests/integration/conversation_queue.robot    |   2 +-
 tests/integration/integration_test.robot      |  19 +-
 tests/integration/plugin_event_tests.robot    |   3 +-
 .../websocket_streaming_tests.robot           |  85 +-------
 tests/resources/audio_keywords.robot          |   8 -
 tests/resources/conversation_keywords.robot   |   7 -
 tests/resources/plugin_keywords.robot         |  10 +-
 tests/resources/queue_keywords.robot          |   4 +-
 tests/resources/redis_keywords.robot          |  75 +++++++
 tests/resources/websocket_keywords.robot      |  26 ++-
 tests/run-robot-tests.sh                      |  34 +--
 tests/setup/test_data.py                      |   2 +-
 tests/test-requirements.txt                   |   1 +
 67 files changed, 726 insertions(+), 1400 deletions(-)
 delete mode 100644 backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py
 delete mode 100644 backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py
 rename backends/advanced/src/advanced_omi_backend/services/transcription/{deepgram_stream_consumer.py => streaming_consumer.py} (92%)
 delete mode 100644 backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py
 delete mode 100644 backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py
 rename backends/advanced/src/advanced_omi_backend/workers/{audio_stream_deepgram_streaming_worker.py => audio_stream_worker.py} (76%)
 create mode 100644 tests/integration/audio_streaming_integration_tests.robot
 create mode 100644 tests/resources/redis_keywords.robot

diff --git a/CLAUDE.md b/CLAUDE.md
index d88ba1b9..88c901be 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -340,7 +340,7 @@ SPEAKER_SERVICE_URL=http://speaker-recognition:8085
 ### Common Endpoints
 - **GET /health**: Basic application health check
 - **GET /readiness**: Service dependency validation
-- **WS /ws_pcm**: Primary audio streaming endpoint (Wyoming protocol + raw PCM fallback)
+- **WS /ws**: Audio streaming endpoint with codec parameter (Wyoming protocol, supports pcm and opus codecs)
 - **GET /api/conversations**: User's conversations with transcripts
 - **GET /api/memories/search**: Semantic memory search with relevance scoring
 - **POST /auth/jwt/login**: Email-based login (returns JWT token)
diff --git a/app/README.md b/app/README.md
index d73dd748..e85e83e5 100644
--- a/app/README.md
+++ b/app/README.md
@@ -120,14 +120,14 @@ The app connects to any backend that accepts OPUS audio streams:
 2. **Advanced Backend** (`backends/advanced/`)
    - Full transcription and memory features
    - Real-time processing with speaker recognition
-   - WebSocket endpoint: `/ws_pcm`
+   - WebSocket endpoint: `/ws?codec=pcm`
 
 ### Connection Setup
 
 #### Local Development
 ```
-Backend URL: ws://[machine-ip]:8000/ws_pcm
-Example: ws://192.168.1.100:8000/ws_pcm
+Backend URL: ws://[machine-ip]:8000/ws?codec=pcm
+Example: ws://192.168.1.100:8000/ws?codec=pcm
 ```
 
 #### Public Access (Production)
@@ -138,7 +138,7 @@ Use ngrok or similar tunneling service:
 ngrok http 8000
 
 # Use provided URL in app
-Backend URL: wss://[ngrok-subdomain].ngrok.io/ws_pcm
+Backend URL: wss://[ngrok-subdomain].ngrok.io/ws?codec=pcm
 ```
 
 ### Configuration Steps
@@ -147,8 +147,8 @@ Backend URL: wss://[ngrok-subdomain].ngrok.io/ws_pcm
 2. **Open the mobile app**
 3. **Navigate to Settings**
 4. **Enter Backend URL**:
-   - Local: `ws://[your-ip]:8000/ws_pcm`
-   - Public: `wss://[your-domain]/ws_pcm`
+   - Local: `ws://[your-ip]:8000/ws?codec=pcm`
+   - Public: `wss://[your-domain]/ws?codec=pcm`
 5. **Save configuration**
 
 ## Phone Audio Streaming (NEW)
@@ -176,7 +176,7 @@ Stream audio directly from your phone's microphone to Chronicle backend, bypassi
 - **iOS**: iOS 13+ with microphone permissions
 - **Android**: Android API 21+ with microphone permissions  
 - **Network**: Stable connection to Chronicle backend
-- **Backend**: Advanced backend running with `/ws_pcm` endpoint
+- **Backend**: Advanced backend running with `/ws?codec=pcm` endpoint
 
 #### Switching Audio Sources
 - **Mutual Exclusion**: Cannot use Bluetooth and phone audio simultaneously
@@ -187,7 +187,7 @@ Stream audio directly from your phone's microphone to Chronicle backend, bypassi
 
 #### Audio Not Streaming
 - **Check Permissions**: Ensure microphone access granted
-- **Verify Backend URL**: Confirm `ws://[ip]:8000/ws_pcm` format
+- **Verify Backend URL**: Confirm `ws://[ip]:8000/ws?codec=pcm` format
 - **Network Connection**: Test backend connectivity
 - **Authentication**: Verify JWT token is valid
 
@@ -292,7 +292,7 @@ curl -i -N -H "Connection: Upgrade" \
      -H "Upgrade: websocket" \
      -H "Sec-WebSocket-Key: test" \
      -H "Sec-WebSocket-Version: 13" \
-     http://[backend-ip]:8000/ws_pcm
+     http://[backend-ip]:8000/ws?codec=pcm
 ```
 
 ## Development
@@ -338,7 +338,7 @@ npx expo build:android
 ### WebSocket Communication
 ```javascript
 // Connect to backend
-const ws = new WebSocket('ws://backend-url:8000/ws_pcm');
+const ws = new WebSocket('ws://backend-url:8000/ws?codec=pcm');
 
 // Send audio data
 ws.send(audioBuffer);
diff --git a/app/app/components/BackendStatus.tsx b/app/app/components/BackendStatus.tsx
index 75fdd7a8..4f55d37f 100644
--- a/app/app/components/BackendStatus.tsx
+++ b/app/app/components/BackendStatus.tsx
@@ -208,9 +208,9 @@ export const BackendStatus: React.FC<BackendStatusProps> = ({
       </TouchableOpacity>
 
       <Text style={styles.helpText}>
-        Enter the WebSocket URL of your backend server. Simple backend: http://localhost:8000/ (no auth). 
+        Enter the WebSocket URL of your backend server. Simple backend: http://localhost:8000/ (no auth).
         Advanced backend: http://localhost:8080/ (requires login). Status is automatically checked.
-        The websocket URL can be different or the same as the HTTP URL, with /ws_omi suffix
+        The websocket URL can be different or the same as the HTTP URL, with /ws endpoint and codec parameter (e.g., /ws?codec=pcm)
       </Text>
     </View>
   );
diff --git a/app/app/index.tsx b/app/app/index.tsx
index fc924d92..649a2e2b 100644
--- a/app/app/index.tsx
+++ b/app/app/index.tsx
@@ -322,10 +322,16 @@ export default function App() {
       // Convert HTTP/HTTPS to WS/WSS protocol
       finalWebSocketUrl = finalWebSocketUrl.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:');
       
-      // Ensure /ws_pcm endpoint is included
-      if (!finalWebSocketUrl.includes('/ws_pcm')) {
-        // Remove trailing slash if present, then add /ws_pcm
-        finalWebSocketUrl = finalWebSocketUrl.replace(/\/$/, '') + '/ws_pcm';
+      // Ensure /ws endpoint is included
+      if (!finalWebSocketUrl.includes('/ws')) {
+        // Remove trailing slash if present, then add /ws
+        finalWebSocketUrl = finalWebSocketUrl.replace(/\/$/, '') + '/ws';
+      }
+
+      // Add codec parameter if not present
+      if (!finalWebSocketUrl.includes('codec=')) {
+        const separator = finalWebSocketUrl.includes('?') ? '&' : '?';
+        finalWebSocketUrl = finalWebSocketUrl + separator + 'codec=pcm';
       }
       
       // Check if this is the advanced backend (requires authentication) or simple backend
diff --git a/backends/advanced/Docs/architecture.md b/backends/advanced/Docs/architecture.md
index 7c6427bb..739f0ed7 100644
--- a/backends/advanced/Docs/architecture.md
+++ b/backends/advanced/Docs/architecture.md
@@ -22,7 +22,7 @@ graph TB
 
     %% Main WebSocket Server
     subgraph "WebSocket Server"
-        WS["/ws_pcm endpoint"]
+        WS["/ws?codec=pcm endpoint"]
         AUTH[JWT Auth]
     end
 
@@ -237,13 +237,13 @@ Wyoming is a peer-to-peer protocol for voice assistants that combines JSONL (JSO
 
 #### Backend Implementation
 
-**Advanced Backend (`/ws_pcm`)**:
+**Advanced Backend (`/ws?codec=pcm`)**:
 - **Full Wyoming Protocol Support**: Parses all Wyoming events for comprehensive session management
 - **Session State Tracking**: Only processes audio chunks when session is active (after receiving audio-start)
 - **Conversation Boundaries**: Uses Wyoming audio-start/stop events to define precise conversation segments
 - **PCM Audio Processing**: Direct processing of PCM audio data from all apps
 
-**Advanced Backend (`/ws_omi`)**:
+**Advanced Backend (`/ws?codec=opus`)**:
 - **Wyoming Protocol + Opus Decoding**: Combines Wyoming session management with OMI Opus decoding
 - **Continuous Streaming**: OMI devices stream continuously, audio-start/stop events are optional
 - **Timestamp Preservation**: Uses timestamps from Wyoming headers when provided
@@ -1006,8 +1006,8 @@ src/advanced_omi_backend/
 - `POST /api/conversations/{conversation_id}/activate-transcript` - Switch transcript version
 - `POST /api/conversations/{conversation_id}/activate-memory` - Switch memory version
 - `POST /api/audio/upload` - Batch audio file upload and processing
-- WebSocket `/ws_omi` - Real-time Opus audio streaming with Wyoming protocol (OMI devices)
-- WebSocket `/ws_pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps)
+- WebSocket `/ws?codec=opus` - Real-time Opus audio streaming with Wyoming protocol (OMI devices)
+- WebSocket `/ws?codec=pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps)
 
 ### Authentication & Authorization
 - **JWT Tokens**: All API endpoints require valid JWT authentication
diff --git a/backends/advanced/Docs/auth.md b/backends/advanced/Docs/auth.md
index acbf8df4..7998750e 100644
--- a/backends/advanced/Docs/auth.md
+++ b/backends/advanced/Docs/auth.md
@@ -100,13 +100,13 @@ curl -X POST "http://localhost:8000/auth/jwt/login" \
 
 #### Token-based (Recommended)
 ```javascript
-const ws = new WebSocket('ws://localhost:8000/ws_pcm?token=JWT_TOKEN&device_name=phone');
+const ws = new WebSocket('ws://localhost:8000/ws?codec=pcm?token=JWT_TOKEN&device_name=phone');
 ```
 
 #### Cookie-based
 ```javascript
 // Requires existing cookie from web login
-const ws = new WebSocket('ws://localhost:8000/ws_pcm?device_name=phone');
+const ws = new WebSocket('ws://localhost:8000/ws?codec=pcm?device_name=phone');
 ```
 
 ## Client ID Management
@@ -183,8 +183,8 @@ COOKIE_SECURE=false
 - `PATCH /api/users/me` - Update user profile
 
 ### WebSocket Endpoints
-- `ws://host/ws` - Opus audio stream with auth
-- `ws://host/ws_pcm` - PCM audio stream with auth
+- `ws://host/ws?codec=opus` - Opus audio stream with auth
+- `ws://host/ws?codec=pcm` - PCM audio stream with auth (default)
 
 ## Error Handling
 
diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index 36c2cf0f..d4eb6504 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -14,7 +14,7 @@ services:
     volumes:
       - ./src:/app/src  # Mount source code for easier development
       - ./data/test_audio_chunks:/app/audio_chunks
-      - ./data/test_debug_dir:/app/debug_dir
+      - ./data/test_debug_dir:/app/debug  # Fixed: mount to /app/debug for plugin database
       - ./data/test_data:/app/data
       - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
       - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
@@ -24,7 +24,7 @@ services:
       - QDRANT_BASE_URL=qdrant-test
       - QDRANT_PORT=6333
       - REDIS_URL=redis://redis-test:6379/0
-      - DEBUG_DIR=/app/debug_dir
+      - DEBUG_DIR=/app/debug  # Fixed: match plugin database mount path
       # Import API keys from environment
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - OPENAI_API_KEY=${OPENAI_API_KEY}
@@ -163,7 +163,7 @@ services:
       - ./src:/app/src
       - ./worker_orchestrator.py:/app/worker_orchestrator.py
       - ./data/test_audio_chunks:/app/audio_chunks
-      - ./data/test_debug_dir:/app/debug_dir
+      - ./data/test_debug_dir:/app/debug  # Fixed: mount to /app/debug for plugin database
       - ./data/test_data:/app/data
       - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
       - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
@@ -173,7 +173,7 @@ services:
       - QDRANT_BASE_URL=qdrant-test
       - QDRANT_PORT=6333
       - REDIS_URL=redis://redis-test:6379/0
-      - DEBUG_DIR=/app/debug_dir
+      - DEBUG_DIR=/app/debug  # Fixed: match plugin database mount path
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - GROQ_API_KEY=${GROQ_API_KEY}
@@ -206,40 +206,6 @@ services:
         condition: service_healthy
     restart: unless-stopped
 
-  deepgram-streaming-worker-test:
-    build:
-      context: .
-      dockerfile: Dockerfile
-      target: dev  # Use dev stage with test dependencies
-    command: >
-      uv run --group test python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
-    volumes:
-      - ./src:/app/src
-      - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml
-      - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
-    environment:
-      - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
-      - REDIS_URL=redis://redis-test:6379/0
-      - HA_TOKEN=${HA_TOKEN}
-      - MONGODB_URI=mongodb://mongo-test:27017/test_db
-      - QDRANT_BASE_URL=qdrant-test
-      - QDRANT_PORT=6333
-      - DEBUG_DIR=/app/debug_dir
-      - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - GROQ_API_KEY=${GROQ_API_KEY}
-      - AUTH_SECRET_KEY=test-jwt-signing-key-for-integration-tests
-      - ADMIN_PASSWORD=test-admin-password-123
-      - ADMIN_EMAIL=test-admin@example.com
-      - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram}
-      - MEMORY_PROVIDER=${MEMORY_PROVIDER:-chronicle}
-    depends_on:
-      redis-test:
-        condition: service_started
-      mongo-test:
-        condition: service_healthy
-    restart: unless-stopped
-
   # Mycelia - AI memory and timeline service (test environment)
   # mycelia-backend-test:
   #   build:
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
index b9133876..ceaaf6a8 100644
--- a/backends/advanced/docker-compose.yml
+++ b/backends/advanced/docker-compose.yml
@@ -117,36 +117,6 @@ services:
         condition: service_started
     restart: unless-stopped
 
-  # Deepgram WebSocket streaming worker
-  # Real-time transcription worker that processes audio via Deepgram's WebSocket API
-  # Publishes interim results to Redis Pub/Sub for client display
-  # Publishes final results to Redis Streams for storage
-  # Triggers plugins on final results only
-  deepgram-streaming-worker:
-    build:
-      context: .
-      dockerfile: Dockerfile
-      target: prod  # Use prod stage without test dependencies
-    command: >
-      uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_streaming_worker
-    env_file:
-      - .env
-    volumes:
-      - ./src:/app/src
-      - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml
-      - ../../config/plugins.yml:/app/plugins.yml
-    environment:
-      - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
-      - REDIS_URL=redis://redis:6379/0
-      - HA_TOKEN=${HA_TOKEN}
-    depends_on:
-      redis:
-        condition: service_healthy
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    restart: unless-stopped
-
   webui:
     build:
       context: ./webui
diff --git a/backends/advanced/scripts/laptop_client.py b/backends/advanced/scripts/laptop_client.py
index 385a4a1b..a0047f3b 100644
--- a/backends/advanced/scripts/laptop_client.py
+++ b/backends/advanced/scripts/laptop_client.py
@@ -15,7 +15,7 @@
 # Default WebSocket settings
 DEFAULT_HOST = "localhost"
 DEFAULT_PORT = 8000
-DEFAULT_ENDPOINT = "/ws_pcm"
+DEFAULT_ENDPOINT = "/ws?codec=pcm"
 
 # Audio format will be determined from the InputMicStream instance
 
diff --git a/backends/advanced/src/advanced_omi_backend/app_config.py b/backends/advanced/src/advanced_omi_backend/app_config.py
index 1e24fb54..15e825ec 100644
--- a/backends/advanced/src/advanced_omi_backend/app_config.py
+++ b/backends/advanced/src/advanced_omi_backend/app_config.py
@@ -47,11 +47,6 @@ def __init__(self):
             os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5")
         )
 
-        # Audio cropping configuration
-        self.audio_cropping_enabled = os.getenv("AUDIO_CROPPING_ENABLED", "true").lower() == "true"
-        self.min_speech_segment_duration = float(os.getenv("MIN_SPEECH_SEGMENT_DURATION", "1.0"))
-        self.cropping_context_padding = float(os.getenv("CROPPING_CONTEXT_PADDING", "0.1"))
-
         # Transcription Configuration (registry-based)
         self.transcription_provider = get_transcription_provider(None)
         if self.transcription_provider:
diff --git a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py
index af89fd51..edddd914 100644
--- a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py
+++ b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py
@@ -65,7 +65,7 @@ def __init__(
         base_url: str,
         token: str,
         device_name: str = "python-client",
-        endpoint: str = "ws_pcm",
+        endpoint: str = "ws?codec=pcm",
     ):
         """Initialize the audio stream client.
 
@@ -73,7 +73,7 @@ def __init__(
             base_url: Base URL of the backend (e.g., "http://localhost:8000")
             token: JWT authentication token
             device_name: Device name for client identification
-            endpoint: WebSocket endpoint ("ws_pcm" or "ws_omi")
+            endpoint: WebSocket endpoint ("ws?codec=pcm" or "ws?codec=opus")
         """
         self.base_url = base_url
         self.token = token
@@ -87,7 +87,9 @@ def __init__(
     def ws_url(self) -> str:
         """Build WebSocket URL from base URL."""
         url = self.base_url.replace("http://", "ws://").replace("https://", "wss://")
-        return f"{url}/{self.endpoint}?token={self.token}&device_name={self.device_name}"
+        # Check if endpoint already has query params
+        separator = "&" if "?" in self.endpoint else "?"
+        return f"{url}/{self.endpoint}{separator}token={self.token}&device_name={self.device_name}"
 
     async def connect(self, wait_for_ready: bool = True) -> WebSocketClientProtocol:
         """Connect to the WebSocket endpoint.
@@ -105,8 +107,8 @@ async def connect(self, wait_for_ready: bool = True) -> WebSocketClientProtocol:
         self.ws = await websockets.connect(self.ws_url)
         logger.info("WebSocket connected")
 
-        if wait_for_ready and self.endpoint == "ws_pcm":
-            # PCM endpoint sends "ready" message after auth (line 261-268 in websocket_controller.py)
+        if wait_for_ready and "codec=pcm" in self.endpoint:
+            # PCM codec sends "ready" message after auth (line 261-268 in websocket_controller.py)
             ready_msg = await self.ws.recv()
             ready = json.loads(ready_msg.strip() if isinstance(ready_msg, str) else ready_msg.decode().strip())
             if ready.get("type") != "ready":
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
index 4810810d..e63dd883 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
@@ -219,14 +219,13 @@ async def upload_and_process_audio_files(
         )
 
 
-async def get_conversation_audio_path(conversation_id: str, user: User, cropped: bool = False) -> Path:
+async def get_conversation_audio_path(conversation_id: str, user: User) -> Path:
     """
     Get the file path for a conversation's audio file.
 
     Args:
         conversation_id: The conversation ID
         user: The authenticated user
-        cropped: If True, return cropped audio path; if False, return original audio path
 
     Returns:
         Path object for the audio file
@@ -244,12 +243,11 @@ async def get_conversation_audio_path(conversation_id: str, user: User, cropped:
     if not user.is_superuser and conversation.user_id != str(user.user_id):
         raise ValueError("Access denied")
 
-    # Get the appropriate audio path
-    audio_path = conversation.cropped_audio_path if cropped else conversation.audio_path
+    # Get the audio path
+    audio_path = conversation.audio_path
 
     if not audio_path:
-        audio_type = "cropped" if cropped else "original"
-        raise ValueError(f"No {audio_type} audio file available for this conversation")
+        raise ValueError(f"No audio file available for this conversation")
 
     # Build full file path
     from advanced_omi_backend.app_config import get_audio_chunk_dir
@@ -261,39 +259,3 @@ async def get_conversation_audio_path(conversation_id: str, user: User, cropped:
         raise ValueError("Audio file not found on disk")
 
     return file_path
-
-
-async def get_cropped_audio_info(audio_uuid: str, user: User):
-    """
-    Get audio cropping metadata from the conversation.
-
-    This is an audio service operation that retrieves cropping-related metadata
-    such as speech segments, cropped audio path, and cropping timestamps.
-
-    Used for: Checking cropping status and retrieving audio processing details.
-    Works with: Conversation model.
-    """
-    try:
-        # Find the conversation
-        conversation = await Conversation.find_one(Conversation.audio_uuid == audio_uuid)
-        if not conversation:
-            return JSONResponse(status_code=404, content={"error": "Conversation not found"})
-
-        # Check ownership for non-admin users
-        if not user.is_superuser:
-            if conversation.user_id != str(user.user_id):
-                return JSONResponse(status_code=404, content={"error": "Conversation not found"})
-
-        return {
-            "audio_uuid": audio_uuid,
-            "cropped_audio_path": conversation.cropped_audio_path,
-            "speech_segments": conversation.speech_segments if hasattr(conversation, 'speech_segments') else [],
-            "cropped_duration": conversation.cropped_duration if hasattr(conversation, 'cropped_duration') else None,
-            "cropped_at": conversation.cropped_at if hasattr(conversation, 'cropped_at') else None,
-            "original_audio_path": conversation.audio_path,
-        }
-
-    except Exception as e:
-        # Database or unexpected errors when fetching audio metadata
-        audio_logger.exception("Error fetching cropped audio info")
-        return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"})
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
index b9533391..943d86bd 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -103,7 +103,6 @@ async def get_conversation(conversation_id: str, user: User):
             "user_id": conversation.user_id,
             "client_id": conversation.client_id,
             "audio_path": conversation.audio_path,
-            "cropped_audio_path": conversation.cropped_audio_path,
             "created_at": conversation.created_at.isoformat() if conversation.created_at else None,
             "deleted": conversation.deleted,
             "deletion_reason": conversation.deletion_reason,
@@ -154,7 +153,6 @@ async def get_conversations(user: User):
                 "user_id": conv.user_id,
                 "client_id": conv.client_id,
                 "audio_path": conv.audio_path,
-                "cropped_audio_path": conv.cropped_audio_path,
                 "created_at": conv.created_at.isoformat() if conv.created_at else None,
                 "deleted": conv.deleted,
                 "deletion_reason": conv.deletion_reason,
@@ -210,7 +208,6 @@ async def delete_conversation(conversation_id: str, user: User):
 
         # Get file paths before deletion
         audio_path = conversation.audio_path
-        cropped_audio_path = conversation.cropped_audio_path
         audio_uuid = conversation.audio_uuid
         client_id = conversation.client_id
 
@@ -237,17 +234,6 @@ async def delete_conversation(conversation_id: str, user: User):
             except Exception as e:
                 logger.warning(f"Failed to delete audio file {audio_path}: {e}")
 
-        if cropped_audio_path:
-            try:
-                # Construct full path to cropped audio file
-                full_cropped_path = Path("/app/audio_chunks") / cropped_audio_path
-                if full_cropped_path.exists():
-                    full_cropped_path.unlink()
-                    deleted_files.append(str(full_cropped_path))
-                    logger.info(f"Deleted cropped audio file: {full_cropped_path}")
-            except Exception as e:
-                logger.warning(f"Failed to delete cropped audio file {cropped_audio_path}: {e}")
-
         logger.info(f"Successfully deleted conversation {conversation_id} for user {user.user_id}")
 
         # Prepare response message
@@ -321,10 +307,9 @@ async def reprocess_transcript(conversation_id: str, user: User):
         import uuid
         version_id = str(uuid.uuid4())
 
-        # Enqueue job chain with RQ (transcription -> speaker recognition -> cropping -> memory)
+        # Enqueue job chain with RQ (transcription -> speaker recognition -> memory)
         from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
         from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
-        from advanced_omi_backend.workers.audio_jobs import process_cropping_job
         from advanced_omi_backend.workers.memory_jobs import process_memory_job
         from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL
 
@@ -361,33 +346,19 @@ async def reprocess_transcript(conversation_id: str, user: User):
         )
         logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})")
 
-        # Job 3: Audio cropping (depends on speaker recognition)
-        cropping_job = default_queue.enqueue(
-            process_cropping_job,
-            conversation_id,
-            str(full_audio_path),
-            depends_on=speaker_job,
-            job_timeout=300,
-            result_ttl=JOB_RESULT_TTL,
-            job_id=f"crop_{conversation_id[:8]}",
-            description=f"Crop audio for {conversation_id[:8]}",
-            meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
-        )
-        logger.info(f"📥 RQ: Enqueued audio cropping job {cropping_job.id} (depends on {speaker_job.id})")
-
-        # Job 4: Extract memories (depends on cropping)
+        # Job 3: Extract memories (depends on speaker recognition)
         # Note: redis_client is injected by @async_job decorator, don't pass it directly
         memory_job = memory_queue.enqueue(
             process_memory_job,
             conversation_id,
-            depends_on=cropping_job,
+            depends_on=speaker_job,
             job_timeout=1800,
             result_ttl=JOB_RESULT_TTL,
             job_id=f"memory_{conversation_id[:8]}",
             description=f"Extract memories for {conversation_id[:8]}",
             meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
         )
-        logger.info(f"📥 RQ: Enqueued memory job {memory_job.id} (depends on {cropping_job.id})")
+        logger.info(f"📥 RQ: Enqueued memory job {memory_job.id} (depends on {speaker_job.id})")
 
         job = transcript_job  # For backward compatibility with return value
         logger.info(f"Created transcript reprocessing job {job.id} (version: {version_id}) for conversation {conversation_id}")
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
index 91773756..f1944c7e 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
@@ -113,9 +113,12 @@ def get_jobs(
     Returns:
         Dict with jobs list and pagination metadata matching frontend expectations
     """
+    logger.info(f"🔍 DEBUG get_jobs: Filtering - queue_name={queue_name}, job_type={job_type}, client_id={client_id}")
     all_jobs = []
+    seen_job_ids = set()  # Track which job IDs we've already processed to avoid duplicates
 
     queues_to_check = [queue_name] if queue_name else QUEUE_NAMES
+    logger.info(f"🔍 DEBUG get_jobs: Checking queues: {queues_to_check}")
 
     for qname in queues_to_check:
         queue = get_queue(qname)
@@ -131,6 +134,11 @@ def get_jobs(
 
         for job_ids, status in registries:
             for job_id in job_ids:
+                # Skip if we've already processed this job_id (prevents duplicates across registries)
+                if job_id in seen_job_ids:
+                    continue
+                seen_job_ids.add(job_id)
+
                 try:
                     job = Job.fetch(job_id, connection=redis_conn)
 
@@ -140,16 +148,23 @@ def get_jobs(
                     # Extract just the function name (e.g., "listen_for_speech_job" from "module.listen_for_speech_job")
                     func_name = job.func_name.split('.')[-1] if job.func_name else "unknown"
 
+                    # Debug: Log job details before filtering
+                    logger.debug(f"🔍 DEBUG get_jobs: Job {job_id} - func_name={func_name}, full_func_name={job.func_name}, meta_client_id={job.meta.get('client_id', '') if job.meta else ''}, status={status}")
+
                     # Apply job_type filter
                     if job_type and job_type not in func_name:
+                        logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - job_type '{job_type}' not in func_name '{func_name}'")
                         continue
 
                     # Apply client_id filter (partial match in meta)
                     if client_id:
                         job_client_id = job.meta.get("client_id", "") if job.meta else ""
                         if client_id not in job_client_id:
+                            logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - client_id '{client_id}' not in job_client_id '{job_client_id}'")
                             continue
 
+                    logger.debug(f"🔍 DEBUG get_jobs: Including job {job_id} in results")
+
                     all_jobs.append({
                         "job_id": job.id,
                         "job_type": func_name,
@@ -182,6 +197,8 @@ def get_jobs(
     paginated_jobs = all_jobs[offset:offset + limit]
     has_more = (offset + limit) < total_jobs
 
+    logger.info(f"🔍 DEBUG get_jobs: Found {total_jobs} matching jobs (returning {len(paginated_jobs)} after pagination)")
+
     return {
         "jobs": paginated_jobs,
         "pagination": {
@@ -296,6 +313,7 @@ def start_streaming_jobs(
         meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True}
     )
     logger.info(f"📥 RQ: Enqueued speech detection job {speech_job.id}")
+    logger.info(f"🔍 DEBUG: Created job - ID={speech_job.id}, func_name={speech_job.func_name}, client_id={client_id}, meta={speech_job.meta}")
 
     # Store job ID for cleanup (keyed by client_id for easy WebSocket cleanup)
     try:
@@ -319,6 +337,7 @@ def start_streaming_jobs(
         meta={'audio_uuid': session_id, 'session_level': True}  # Mark as session-level job
     )
     logger.info(f"📥 RQ: Enqueued audio persistence job {audio_job.id} on audio queue")
+    logger.info(f"🔍 DEBUG: Created audio job - ID={audio_job.id}, func_name={audio_job.func_name}, client_id={client_id}, meta={audio_job.meta}")
 
     return {
         'speech_detection': speech_job.id,
@@ -341,10 +360,9 @@ def start_post_conversation_jobs(
 
     This creates the standard processing chain after a conversation is created:
     1. [Optional] Transcription job - Batch transcription (if post_transcription=True)
-    2. Audio cropping job - Removes silence from audio
-    3. Speaker recognition job - Identifies speakers in audio
-    4. Memory extraction job - Extracts memories from conversation (parallel)
-    5. Title/summary generation job - Generates title and summary (parallel)
+    2. Speaker recognition job - Identifies speakers in audio
+    3. Memory extraction job - Extracts memories from conversation (parallel)
+    4. Title/summary generation job - Generates title and summary (parallel)
 
     Args:
         conversation_id: Conversation identifier
@@ -354,14 +372,13 @@ def start_post_conversation_jobs(
         post_transcription: If True, run batch transcription step (for uploads)
                            If False, skip transcription (streaming already has it)
         transcript_version_id: Transcript version ID (auto-generated if None)
-        depends_on_job: Optional job dependency for cropping job
+        depends_on_job: Optional job dependency for first job
 
     Returns:
         Dict with job IDs (transcription will be None if post_transcription=False)
     """
     from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
     from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
-    from advanced_omi_backend.workers.audio_jobs import process_cropping_job
     from advanced_omi_backend.workers.memory_jobs import process_memory_job
     from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job
 
@@ -392,29 +409,11 @@ def start_post_conversation_jobs(
         meta=job_meta
     )
     logger.info(f"📥 RQ: Enqueued transcription job {transcription_job.id}, meta={transcription_job.meta}")
-    crop_depends_on = transcription_job
-
-    # Step 2: Audio cropping job (depends on transcription if it ran, otherwise depends_on_job)
-    crop_job_id = f"crop_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating crop job with job_id={crop_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
-
-    cropping_job = default_queue.enqueue(
-        process_cropping_job,
-        conversation_id,
-        audio_file_path,
-        job_timeout=300,  # 5 minutes
-        result_ttl=JOB_RESULT_TTL,
-        depends_on=crop_depends_on,
-        job_id=crop_job_id,
-        description=f"Crop audio for conversation {conversation_id[:8]}",
-        meta=job_meta
-    )
-    logger.info(f"📥 RQ: Enqueued cropping job {cropping_job.id}, meta={cropping_job.meta}")
 
-    # Speaker recognition depends on cropping
-    speaker_depends_on = cropping_job
+    # Speaker recognition depends on transcription (no cropping step)
+    speaker_depends_on = transcription_job
 
-    # Step 3: Speaker recognition job
+    # Step 2: Speaker recognition job
     speaker_job_id = f"speaker_{conversation_id[:12]}"
     logger.info(f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
 
@@ -434,7 +433,7 @@ def start_post_conversation_jobs(
     )
     logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {speaker_depends_on.id})")
 
-    # Step 4: Memory extraction job (parallel with title/summary)
+    # Step 3: Memory extraction job (parallel with title/summary)
     memory_job_id = f"memory_{conversation_id[:12]}"
     logger.info(f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
 
@@ -450,7 +449,7 @@ def start_post_conversation_jobs(
     )
     logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {speaker_job.id})")
 
-    # Step 5: Title/summary generation job (parallel with memory, independent)
+    # Step 4: Title/summary generation job (parallel with memory, independent)
     # This ensures conversations always get titles/summaries even if memory job fails
     title_job_id = f"title_summary_{conversation_id[:12]}"
     logger.info(f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
@@ -468,7 +467,6 @@ def start_post_conversation_jobs(
     logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {speaker_job.id})")
 
     return {
-        'cropping': cropping_job.id,
         'transcription': transcription_job.id if transcription_job else None,
         'speaker_recognition': speaker_job.id,
         'memory': memory_job.id,
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index 2b98bcbb..28e9924f 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -381,10 +381,9 @@ async def _initialize_streaming_session(
         application_logger.debug(f"Session already initialized for {client_id}")
         return None
 
-    # Initialize stream session
-    client_state.stream_session_id = str(uuid.uuid4())
-    client_state.stream_chunk_count = 0
-    client_state.stream_audio_format = audio_format
+    # Initialize stream session - use client_id as session_id for predictable lookup
+    # All other session metadata goes to Redis (single source of truth)
+    client_state.stream_session_id = client_state.client_id
     application_logger.info(f"🆔 Created stream session: {client_state.stream_session_id}")
 
     # Determine transcription provider from config.yml
@@ -398,21 +397,31 @@ async def _initialize_streaming_session(
     if not stt_model:
         raise ValueError("No default STT model configured in config.yml (defaults.stt)")
 
-    provider = stt_model.model_provider.lower()
-    if provider not in ["deepgram", "parakeet"]:
-        raise ValueError(f"Unsupported STT provider: {provider}. Expected: deepgram or parakeet")
+    # Use model_provider for session tracking (generic, not validated against hardcoded list)
+    provider = stt_model.model_provider.lower() if stt_model.model_provider else stt_model.name
 
     application_logger.info(f"📋 Using STT provider: {provider} (model: {stt_model.name})")
-    
-    # Initialize session tracking in Redis
+
+    # Initialize session tracking in Redis (SINGLE SOURCE OF TRUTH for session metadata)
+    # This includes user_email, connection info, audio format, chunk counters, job IDs, etc.
+    connection_id = f"ws_{client_id}_{int(time.time())}"
     await audio_stream_producer.init_session(
         session_id=client_state.stream_session_id,
         user_id=user_id,
         client_id=client_id,
+        user_email=user_email,
+        connection_id=connection_id,
         mode="streaming",
         provider=provider
     )
 
+    # Store audio format in Redis session (not in ClientState)
+    from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer
+    import json
+    session_key = f"audio:session:{client_state.stream_session_id}"
+    redis_client = audio_stream_producer.redis_client
+    await redis_client.hset(session_key, "audio_format", json.dumps(audio_format))
+
     # Enqueue streaming jobs (speech detection + audio persistence)
     from advanced_omi_backend.controllers.queue_controller import start_streaming_jobs
 
@@ -422,8 +431,12 @@ async def _initialize_streaming_session(
         client_id=client_id
     )
 
-    client_state.speech_detection_job_id = job_ids['speech_detection']
-    client_state.audio_persistence_job_id = job_ids['audio_persistence']
+    # Store job IDs in Redis session (not in ClientState)
+    await audio_stream_producer.update_session_job_ids(
+        session_id=client_state.stream_session_id,
+        speech_detection_job_id=job_ids['speech_detection'],
+        audio_persistence_job_id=job_ids['audio_persistence']
+    )
 
     # Launch interim results subscriber if WebSocket provided
     subscriber_task = None
@@ -494,11 +507,10 @@ async def _finalize_streaming_session(
             f"✅ Session {session_id[:12]} marked as finalizing - open_conversation_job will handle cleanup"
         )
 
-        # Clear session state
-        for attr in ['stream_session_id', 'stream_chunk_count', 'stream_audio_format',
-                     'speech_detection_job_id', 'audio_persistence_job_id']:
-            if hasattr(client_state, attr):
-                delattr(client_state, attr)
+        # Clear session state from ClientState (only stream_session_id is stored there now)
+        # All other session metadata lives in Redis (single source of truth)
+        if hasattr(client_state, 'stream_session_id'):
+            delattr(client_state, 'stream_session_id')
 
     except Exception as finalize_error:
         application_logger.error(
@@ -534,14 +546,18 @@ async def _publish_audio_to_stream(
         application_logger.warning(f"⚠️ Received audio chunk before session initialized for {client_id}")
         return
 
-    # Increment chunk count and format chunk ID
-    client_state.stream_chunk_count += 1
-    chunk_id = f"{client_state.stream_chunk_count:05d}"
+    session_id = client_state.stream_session_id
+
+    # Increment chunk count in Redis (single source of truth) and format chunk ID
+    session_key = f"audio:session:{session_id}"
+    redis_client = audio_stream_producer.redis_client
+    chunk_count = await redis_client.hincrby(session_key, "chunks_published", 1)
+    chunk_id = f"{chunk_count:05d}"
 
     # Publish to Redis Stream using producer
     await audio_stream_producer.add_audio_chunk(
         audio_data=audio_data,
-        session_id=client_state.stream_session_id,
+        session_id=session_id,
         chunk_id=chunk_id,
         user_id=user_id,
         client_id=client_id,
diff --git a/backends/advanced/src/advanced_omi_backend/main.py b/backends/advanced/src/advanced_omi_backend/main.py
index df51e1cc..5160c230 100644
--- a/backends/advanced/src/advanced_omi_backend/main.py
+++ b/backends/advanced/src/advanced_omi_backend/main.py
@@ -2,7 +2,7 @@
 """
 Unified Omi-audio service
 
- * Accepts Opus packets over a WebSocket (`/ws`) or PCM over a WebSocket (`/ws_pcm`).
+ * Accepts audio over a unified WebSocket endpoint (`/ws`) with codec parameter (pcm or opus).
  * Uses a central queue to decouple audio ingestion from processing.
  * A saver consumer buffers PCM and writes 30-second WAV chunks to `./data/audio_chunks/`.
  * A transcription consumer sends each chunk to a Wyoming ASR service.
diff --git a/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py b/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py
index eafeffec..4cff21eb 100644
--- a/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py
+++ b/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py
@@ -56,8 +56,6 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
         "/auth/jwt/logout",
         "/auth/cookie/logout",
         "/ws",
-        "/ws_omi",
-        "/ws_pcm",
         "/mcp",
         "/health",
         "/auth/health",
diff --git a/backends/advanced/src/advanced_omi_backend/models/audio_file.py b/backends/advanced/src/advanced_omi_backend/models/audio_file.py
index e1e2c09a..ca154500 100644
--- a/backends/advanced/src/advanced_omi_backend/models/audio_file.py
+++ b/backends/advanced/src/advanced_omi_backend/models/audio_file.py
@@ -41,9 +41,6 @@ class AudioFile(Document):
     user_id: Indexed(str) = Field(description="User who owns this audio")
     user_email: Optional[str] = Field(None, description="User email")
 
-    # Audio processing
-    cropped_audio_path: Optional[str] = Field(None, description="Path to cropped audio (speech only)")
-
     # Speech-driven conversation linking
     conversation_id: Optional[str] = Field(
         None,
diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py
index 735a8be5..00178f10 100644
--- a/backends/advanced/src/advanced_omi_backend/models/conversation.py
+++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py
@@ -19,11 +19,15 @@ class Conversation(Document):
 
     # Nested Enums
     class TranscriptProvider(str, Enum):
-        """Supported transcription providers."""
+        """
+        Transcription provider identifiers.
+
+        Note: Actual providers are configured in config.yml.
+        Any provider name from config.yml is valid - this enum is for common values only.
+        """
         DEEPGRAM = "deepgram"
-        PARAKEET = "parakeet"
-        SPEECH_DETECTION = "speech_detection"  # Legacy value
-        UNKNOWN = "unknown"  # Fallback value
+        SPEECH_DETECTION = "speech_detection"
+        UNKNOWN = "unknown"
 
     class MemoryProvider(str, Enum):
         """Supported memory providers."""
@@ -86,7 +90,6 @@ class MemoryVersion(BaseModel):
 
     # Audio file reference
     audio_path: Optional[str] = Field(None, description="Path to audio file (relative to CHUNK_DIR)")
-    cropped_audio_path: Optional[str] = Field(None, description="Path to cropped audio file (relative to CHUNK_DIR)")
 
     # Creation metadata
     created_at: Indexed(datetime) = Field(default_factory=datetime.utcnow, description="When the conversation was created")
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
index 056e7667..58a33ff5 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
@@ -37,7 +37,6 @@ async def upload_audio_from_drive_folder(
 @router.get("/get_audio/{conversation_id}")
 async def get_conversation_audio(
     conversation_id: str,
-    cropped: bool = Query(default=False, description="Serve cropped (speech-only) audio instead of original"),
     token: Optional[str] = Query(default=None, description="JWT token for audio element access"),
     current_user: Optional[User] = Depends(current_active_user_optional),
 ):
@@ -52,7 +51,6 @@ async def get_conversation_audio(
 
     Args:
         conversation_id: The conversation ID
-        cropped: If True, serve cropped audio; if False, serve original audio
         token: Optional JWT token as query param (for audio elements)
         current_user: Authenticated user (from header)
 
@@ -75,8 +73,7 @@ async def get_conversation_audio(
     try:
         file_path = await audio_controller.get_conversation_audio_path(
             conversation_id=conversation_id,
-            user=current_user,
-            cropped=cropped
+            user=current_user
         )
     except ValueError as e:
         # Map ValueError messages to appropriate HTTP status codes
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
index 8da0f5b0..2fc05425 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
@@ -42,14 +42,6 @@ async def get_conversation_detail(
     return await conversation_controller.get_conversation(conversation_id, current_user)
 
 
-@router.get("/{audio_uuid}/cropped")
-async def get_cropped_audio_info(
-    audio_uuid: str, current_user: User = Depends(current_active_user)
-):
-    """Get cropped audio information for a conversation. Users can only access their own conversations."""
-    return await audio_controller.get_cropped_audio_info(audio_uuid, current_user)
-
-
 # New reprocessing endpoints
 @router.post("/{conversation_id}/reprocess-transcript")
 async def reprocess_transcript(
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py
index d7a62ba9..96ee72fe 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py
@@ -139,7 +139,6 @@ async def health_check():
             "chunk_dir": str(os.getenv("CHUNK_DIR", "./audio_chunks")),
             "active_clients": get_client_manager().get_client_count(),
             "new_conversation_timeout_minutes": float(os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5")),
-            "audio_cropping_enabled": os.getenv("AUDIO_CROPPING_ENABLED", "true").lower() == "true",
             "llm_provider": (_llm_def.model_provider if _llm_def else None),
             "llm_model": (_llm_def.model_name if _llm_def else None),
             "llm_base_url": (_llm_def.model_url if _llm_def else None),
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py
index d9754a87..2671d7f6 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py
@@ -18,21 +18,34 @@
 # Create router
 router = APIRouter(tags=["websocket"])
 
-@router.websocket("/ws_omi")
-async def ws_endpoint_omi(
+@router.websocket("/ws")
+async def ws_endpoint(
     ws: WebSocket,
+    codec: str = Query("pcm"),
     token: Optional[str] = Query(None),
     device_name: Optional[str] = Query(None),
 ):
-    """Accepts WebSocket connections with Wyoming protocol, decodes OMI Opus audio, and processes per-client."""
-    await handle_omi_websocket(ws, token, device_name)
-
-
-@router.websocket("/ws_pcm")
-async def ws_endpoint_pcm(
-    ws: WebSocket,
-    token: Optional[str] = Query(None),
-    device_name: Optional[str] = Query(None)
-):
-    """Accepts WebSocket connections, processes PCM audio per-client."""
-    await handle_pcm_websocket(ws, token, device_name)
\ No newline at end of file
+    """
+    WebSocket endpoint for audio streaming with multiple codec support.
+
+    Args:
+        codec: Audio codec (pcm, opus). Default: pcm
+        token: JWT auth token
+        device_name: Device identifier
+
+    Examples:
+        /ws?codec=pcm&token=xxx&device_name=laptop
+        /ws?codec=opus&token=xxx&device_name=omi-device
+    """
+    # Validate and normalize codec
+    codec = codec.lower()
+    if codec not in ["pcm", "opus"]:
+        logger.warning(f"Unsupported codec requested: {codec}")
+        await ws.close(code=1008, reason=f"Unsupported codec: {codec}. Supported: pcm, opus")
+        return
+
+    # Route to appropriate handler
+    if codec == "opus":
+        await handle_omi_websocket(ws, token, device_name)
+    else:
+        await handle_pcm_websocket(ws, token, device_name)
\ No newline at end of file
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
index f7299cda..1fa06011 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
@@ -41,32 +41,57 @@ async def init_session(
         session_id: str,
         user_id: str,
         client_id: str,
+        user_email: str = "",
+        connection_id: str = "",
         mode: str = "streaming",
         provider: str = "deepgram"
     ):
         """
-        Initialize session tracking metadata.
+        Initialize session tracking metadata in Redis.
+
+        This is the SINGLE SOURCE OF TRUTH for session state.
+        All session metadata is stored here instead of in-memory ClientState.
 
         Args:
-            session_id: Session identifier
-            user_id: User identifier
-            client_id: Client identifier
+            session_id: Unique session identifier
+            user_id: User identifier (MongoDB ObjectId)
+            client_id: Client identifier (objectid_suffix-device_name)
+            user_email: User email for debugging/tracking
+            connection_id: WebSocket connection identifier
             mode: Processing mode (streaming/batch)
-            provider: Transcription provider ("deepgram", "parakeet", etc.)
+            provider: Transcription provider from config.yml
         """
         # Client-specific stream naming (one stream per client for isolation)
         stream_name = f"audio:stream:{client_id}"
         session_key = f"audio:session:{session_id}"
 
         await self.redis_client.hset(session_key, mapping={
+            # User & Client tracking
             "user_id": user_id,
+            "user_email": user_email,
             "client_id": client_id,
+            "connection_id": connection_id,
+
+            # Stream configuration
             "stream_name": stream_name,
             "provider": provider,
             "mode": mode,
+
+            # Timestamps
             "started_at": str(time.time()),
-            "chunks_published": "0",
             "last_chunk_at": str(time.time()),
+
+            # Counters
+            "chunks_published": "0",
+
+            # Job tracking (populated by queue_controller when jobs start)
+            "speech_detection_job_id": "",
+            "audio_persistence_job_id": "",
+
+            # Connection state
+            "websocket_connected": "true",
+
+            # Session status
             "status": "active"
         })
 
@@ -134,6 +159,63 @@ async def send_session_end_signal(self, session_id: str):
         )
         logger.info(f"📡 Sent end-of-session signal for {session_id} to {stream_name}")
 
+    async def get_session(self, session_id: str) -> dict:
+        """
+        Get session metadata from Redis.
+
+        Args:
+            session_id: Session identifier
+
+        Returns:
+            Dictionary with session metadata, empty dict if not found
+        """
+        session_key = f"audio:session:{session_id}"
+        session_data = await self.redis_client.hgetall(session_key)
+
+        # Convert bytes to strings for easier handling
+        return {k.decode() if isinstance(k, bytes) else k: v.decode() if isinstance(v, bytes) else v
+                for k, v in session_data.items()} if session_data else {}
+
+    async def update_session_job_ids(
+        self,
+        session_id: str,
+        speech_detection_job_id: str = None,
+        audio_persistence_job_id: str = None
+    ):
+        """
+        Update job IDs in session metadata.
+
+        Args:
+            session_id: Session identifier
+            speech_detection_job_id: Speech detection job ID (optional)
+            audio_persistence_job_id: Audio persistence job ID (optional)
+        """
+        session_key = f"audio:session:{session_id}"
+        updates = {}
+
+        if speech_detection_job_id:
+            updates["speech_detection_job_id"] = speech_detection_job_id
+        if audio_persistence_job_id:
+            updates["audio_persistence_job_id"] = audio_persistence_job_id
+
+        if updates:
+            await self.redis_client.hset(session_key, mapping=updates)
+            logger.debug(f"📊 Updated job IDs for session {session_id}: {updates}")
+
+    async def mark_websocket_disconnected(self, session_id: str):
+        """
+        Mark session's websocket as disconnected.
+
+        Args:
+            session_id: Session identifier
+        """
+        session_key = f"audio:session:{session_id}"
+        await self.redis_client.hset(session_key, mapping={
+            "websocket_connected": "false",
+            "disconnected_at": str(time.time())
+        })
+        logger.info(f"🔌 Marked websocket disconnected for session {session_id}")
+
     async def finalize_session(self, session_id: str):
         """
         Mark session as finalizing and clean up buffer.
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py
deleted file mode 100644
index ef54a3d9..00000000
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-Deepgram transcription consumer for Redis Streams architecture.
-
-Uses the registry-driven transcription provider for Deepgram batch transcription.
-"""
-
-import logging
-
-from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer
-from advanced_omi_backend.services.transcription import get_transcription_provider
-
-logger = logging.getLogger(__name__)
-
-
-class DeepgramStreamConsumer:
-    """
-    Deepgram consumer for Redis Streams architecture.
-
-    Reads from: specified stream (client-specific or provider-specific)
-    Writes to: transcription:results:{session_id}
-
-    Uses RegistryBatchTranscriptionProvider configured via config.yml for
-    Deepgram transcription. This ensures consistent behavior with batch
-    transcription jobs.
-    """
-
-    def __init__(self, redis_client, buffer_chunks: int = 30):
-        """
-        Initialize Deepgram consumer.
-
-        Dynamically discovers all audio:stream:* streams and claims them using Redis locks.
-        Uses config.yml stt-deepgram configuration for transcription.
-
-        Args:
-            redis_client: Connected Redis client
-            buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s)
-        """
-
-        # Get registry-driven transcription provider
-        self.provider = get_transcription_provider(mode="batch")
-        if not self.provider:
-            raise RuntimeError(
-                "Failed to load transcription provider. Ensure config.yml has a default 'stt' model configured."
-            )
-
-        # Create a concrete subclass that implements transcribe_audio
-        class _ConcreteConsumer(BaseAudioStreamConsumer):
-            def __init__(inner_self, provider_name: str, redis_client, buffer_chunks: int):
-                super().__init__(provider_name, redis_client, buffer_chunks)
-                inner_self._transcription_provider = self.provider
-
-            async def transcribe_audio(inner_self, audio_data: bytes, sample_rate: int) -> dict:
-                """Transcribe using registry-driven transcription provider."""
-                try:
-                    result = await inner_self._transcription_provider.transcribe(
-                        audio_data=audio_data,
-                        sample_rate=sample_rate,
-                        diarize=True
-                    )
-
-                    # Calculate confidence
-                    confidence = 0.0
-                    if result.get("words"):
-                        confidences = [
-                            w.get("confidence", 0)
-                            for w in result["words"]
-                            if "confidence" in w
-                        ]
-                        if confidences:
-                            confidence = sum(confidences) / len(confidences)
-
-                    return {
-                        "text": result.get("text", ""),
-                        "words": result.get("words", []),
-                        "segments": result.get("segments", []),
-                        "confidence": confidence
-                    }
-
-                except Exception as e:
-                    logger.error(f"Deepgram transcription failed: {e}", exc_info=True)
-                    raise
-
-        # Instantiate the concrete consumer
-        self._consumer = _ConcreteConsumer("deepgram", redis_client, buffer_chunks)
-
-    async def start_consuming(self):
-        """Delegate to base consumer."""
-        return await self._consumer.start_consuming()
-
-    async def stop(self):
-        """Delegate to base consumer."""
-        return await self._consumer.stop()
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py
deleted file mode 100644
index f629cefd..00000000
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-Parakeet stream consumer for Redis Streams architecture.
-
-Reads from: audio:stream:* streams
-Writes to: transcription:results:{session_id}
-"""
-
-import logging
-
-from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer
-from advanced_omi_backend.services.transcription import get_transcription_provider
-
-logger = logging.getLogger(__name__)
-
-
-class ParakeetStreamConsumer:
-    """
-    Parakeet consumer for Redis Streams architecture.
-
-    Reads from: specified stream (client-specific or provider-specific)
-    Writes to: transcription:results:{session_id}
-
-    This inherits from BaseAudioStreamConsumer and implements transcribe_audio().
-    """
-
-    def __init__(self, redis_client, buffer_chunks: int = 30):
-        """
-        Initialize Parakeet consumer.
-
-        Dynamically discovers all audio:stream:* streams and claims them using Redis locks.
-        Uses config.yml stt-parakeet-batch configuration for transcription.
-
-        Args:
-            redis_client: Connected Redis client
-            buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s)
-        """
-        # Get registry-driven transcription provider
-        self.provider = get_transcription_provider(mode="batch")
-        if not self.provider:
-            raise RuntimeError(
-                "Failed to load transcription provider. Ensure config.yml has a default 'stt' model configured."
-            )
-
-        # Create a concrete subclass that implements transcribe_audio
-        class _ConcreteConsumer(BaseAudioStreamConsumer):
-            def __init__(inner_self, provider_name: str, redis_client, buffer_chunks: int):
-                super().__init__(provider_name, redis_client, buffer_chunks)
-                inner_self._parakeet_provider = self.provider
-
-            async def transcribe_audio(inner_self, audio_data: bytes, sample_rate: int) -> dict:
-                """Transcribe using ParakeetProvider."""
-                try:
-                    result = await inner_self._parakeet_provider.transcribe(
-                        audio_data=audio_data,
-                        sample_rate=sample_rate
-                    )
-
-                    # Calculate confidence (Parakeet may not provide confidence, default to 0.9)
-                    confidence = 0.9
-                    if result.get("words"):
-                        confidences = [
-                            w.get("confidence", 0.9)
-                            for w in result["words"]
-                            if "confidence" in w
-                        ]
-                        if confidences:
-                            confidence = sum(confidences) / len(confidences)
-
-                    return {
-                        "text": result.get("text", ""),
-                        "words": result.get("words", []),
-                        "segments": result.get("segments", []),
-                        "confidence": confidence
-                    }
-
-                except Exception as e:
-                    logger.error(f"Parakeet transcription failed: {e}", exc_info=True)
-                    raise
-
-        # Instantiate the concrete consumer
-        self._consumer = _ConcreteConsumer("parakeet", redis_client, buffer_chunks)
-
-    async def start_consuming(self):
-        """Delegate to base consumer."""
-        return await self._consumer.start_consuming()
-
-    async def stop(self):
-        """Delegate to base consumer."""
-        return await self._consumer.stop()
-
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
similarity index 92%
rename from backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
rename to backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
index 7f166890..2f986c5a 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram_stream_consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
@@ -1,5 +1,7 @@
 """
-Deepgram WebSocket streaming consumer for real-time transcription.
+Generic streaming transcription consumer for real-time audio processing.
+
+Uses registry-driven transcription provider from config.yml (supports any streaming provider).
 
 Reads from: audio:stream:* streams
 Publishes interim to: Redis Pub/Sub channel transcription:interim:{session_id}
@@ -24,22 +26,24 @@
 logger = logging.getLogger(__name__)
 
 
-class DeepgramStreamingConsumer:
+class StreamingTranscriptionConsumer:
     """
-    Deepgram streaming consumer for real-time WebSocket transcription.
+    Generic streaming transcription consumer using registry-driven providers.
 
     - Discovers audio:stream:* streams dynamically
     - Uses Redis consumer groups for fan-out (allows batch workers to process same stream)
-    - Starts WebSocket connections to Deepgram per stream
+    - Starts WebSocket connections using configured provider (from config.yml)
     - Sends audio immediately (no buffering)
     - Publishes interim results to Redis Pub/Sub for client display
     - Publishes final results to Redis Streams for storage
     - Triggers plugins only on final results
+
+    Supported providers (via config.yml): Any streaming STT service with WebSocket API
     """
 
     def __init__(self, redis_client: redis.Redis, plugin_router: Optional[PluginRouter] = None):
         """
-        Initialize Deepgram streaming consumer.
+        Initialize streaming transcription consumer.
 
         Args:
             redis_client: Connected Redis client
@@ -235,22 +239,30 @@ async def store_final_result(self, session_id: str, result: Dict, chunk_id: str
         try:
             stream_name = f"transcription:results:{session_id}"
 
-            # Prepare result entry
+            # Prepare result entry - MUST match aggregator's expected schema
+            # All keys and values must be bytes to match consumer.py format
             entry = {
-                "message_id": chunk_id or f"final_{int(time.time() * 1000)}",
-                "text": result.get("text", ""),
-                "confidence": result.get("confidence", 0.0),
-                "provider": "deepgram-stream",
-                "timestamp": time.time(),
-                "words": json.dumps(result.get("words", [])),
-                "segments": json.dumps(result.get("segments", [])),
-                "is_final": "true"
+                b"text": result.get("text", "").encode(),
+                b"chunk_id": (chunk_id or f"final_{int(time.time() * 1000)}").encode(),
+                b"provider": b"deepgram-stream",
+                b"confidence": str(result.get("confidence", 0.0)).encode(),
+                b"processing_time": b"0.0",  # Streaming has minimal processing time
+                b"timestamp": str(time.time()).encode(),
             }
 
+            # Add optional JSON fields
+            words = result.get("words", [])
+            if words:
+                entry[b"words"] = json.dumps(words).encode()
+
+            segments = result.get("segments", [])
+            if segments:
+                entry[b"segments"] = json.dumps(segments).encode()
+
             # Write to Redis Stream
             await self.redis_client.xadd(stream_name, entry)
 
-            logger.info(f"💾 Stored final result to {stream_name}: {entry['text'][:50]}...")
+            logger.info(f"💾 Stored final result to {stream_name}: {result.get('text', '')[:50]}...")
 
         except Exception as e:
             logger.error(f"Error storing final result for {session_id}: {e}", exc_info=True)
diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
index 3a3b554d..4d3fa0ae 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
@@ -275,73 +275,6 @@ async def process_audio_chunk(
         client_state.update_audio_received(chunk)
 
 
-async def _process_audio_cropping_with_relative_timestamps(
-    original_path: str,
-    speech_segments: list[tuple[float, float]],
-    output_path: str,
-    audio_uuid: str,
-    _deprecated_chunk_repo=None,  # Deprecated - kept for backward compatibility
-) -> tuple[bool, list[dict]]:
-    """
-    Process audio cropping with speech segments already in relative format.
-
-    The segments are expected to be in relative format (seconds from audio start),
-    as provided by Deepgram transcription. No timestamp conversion is needed.
-
-    Note: Database updates are now handled by the caller (audio_jobs.py).
-
-    Returns:
-        Tuple of (success: bool, segment_mapping: list[dict])
-    """
-    try:
-        # Validate input segments
-        validated_segments = []
-        for start_rel, end_rel in speech_segments:
-            # Validate input timestamps
-            if start_rel >= end_rel:
-                logger.warning(
-                    f"⚠️ Invalid speech segment: start={start_rel} >= end={end_rel}, skipping"
-                )
-                continue
-
-            # Ensure timestamps are positive (sanity check)
-            if start_rel < 0:
-                logger.warning(
-                    f"⚠️ Negative start timestamp: {start_rel}, clamping to 0.0"
-                )
-                start_rel = 0.0
-            if end_rel < 0:
-                logger.warning(
-                    f"⚠️ Negative end timestamp: {end_rel}, skipping segment"
-                )
-                continue
-
-            validated_segments.append((start_rel, end_rel))
-
-        logger.info(f"🕐 Processing cropping for {audio_uuid}")
-        logger.info(f"🕐 Input segments (relative timestamps): {speech_segments}")
-        logger.info(f"🕐 Validated segments: {validated_segments}")
-
-        # Validate that we have valid segments
-        if not validated_segments:
-            logger.warning(
-                f"No valid segments for cropping {audio_uuid}"
-            )
-            return False, []
-
-        success, segment_mapping = await _crop_audio_with_ffmpeg(original_path, validated_segments, output_path)
-        if success:
-            cropped_filename = output_path.split("/")[-1]
-            logger.info(f"Successfully processed cropped audio: {cropped_filename}")
-            return True, segment_mapping
-        else:
-            logger.error(f"Failed to crop audio for {audio_uuid}")
-            return False, segment_mapping
-    except Exception as e:
-        logger.error(f"Error in audio cropping task for {audio_uuid}: {e}", exc_info=True)
-        return False, []
-
-
 def write_pcm_to_wav(
     pcm_data: bytes,
     output_path: str,
@@ -383,142 +316,3 @@ def write_pcm_to_wav(
     except Exception as e:
         logger.error(f"❌ Failed to write PCM to WAV: {e}")
         raise
-
-
-async def _crop_audio_with_ffmpeg(
-    original_path: str, speech_segments: list[tuple[float, float]], output_path: str
-) -> tuple[bool, list[dict]]:
-    """
-    Use ffmpeg to crop audio - runs as async subprocess, no GIL issues.
-
-    Returns:
-        Tuple of (success: bool, segment_mapping: list[dict])
-
-        segment_mapping contains one entry per input segment with:
-        - original_index: Index in input speech_segments
-        - original_start/end: Original timestamps in source audio
-        - cropped_start/end: Where the speech starts/ends in cropped file (None if filtered)
-        - kept: Whether segment was kept (True) or filtered out (False)
-    """
-    logger.info(f"Cropping audio {original_path} with {len(speech_segments)} speech segments")
-
-    if not speech_segments:
-        logger.warning(f"No speech segments to crop for {original_path}")
-        return False, []
-
-    # Check if the original file exists
-    if not os.path.exists(original_path):
-        logger.error(f"Original audio file does not exist: {original_path}")
-        return False, []
-
-    # Filter out segments that are too short and build mapping
-    filtered_segments = []
-    segment_mapping = []
-    current_cropped_offset = 0.0
-
-    for idx, (start, end) in enumerate(speech_segments):
-        duration = end - start
-        if duration >= MIN_SPEECH_SEGMENT_DURATION:
-            # Add padding around speech segments
-            padded_start = max(0, start - CROPPING_CONTEXT_PADDING)
-            padded_end = end + CROPPING_CONTEXT_PADDING
-            padded_duration = padded_end - padded_start
-
-            filtered_segments.append((padded_start, padded_end))
-
-            # Calculate where the speech (not padding) appears in cropped file
-            # The cropped file will have: [padding_before][speech][padding_after]
-            padding_before = start - padded_start
-            speech_start_in_cropped = current_cropped_offset + padding_before
-            speech_end_in_cropped = speech_start_in_cropped + duration
-
-            segment_mapping.append({
-                "original_index": idx,
-                "original_start": start,
-                "original_end": end,
-                "cropped_start": speech_start_in_cropped,
-                "cropped_end": speech_end_in_cropped,
-                "kept": True
-            })
-
-            # Move offset by the full padded duration
-            current_cropped_offset += padded_duration
-        else:
-            # Segment filtered out
-            segment_mapping.append({
-                "original_index": idx,
-                "original_start": start,
-                "original_end": end,
-                "cropped_start": None,
-                "cropped_end": None,
-                "kept": False
-            })
-            logger.debug(
-                f"Skipping short segment: {start}-{end} ({duration:.2f}s < {MIN_SPEECH_SEGMENT_DURATION}s)"
-            )
-
-    if not filtered_segments:
-        logger.warning(
-            f"No segments meet minimum duration ({MIN_SPEECH_SEGMENT_DURATION}s) for {original_path}"
-        )
-        return False, segment_mapping
-
-    logger.info(
-        f"Cropping audio {original_path} with {len(filtered_segments)} speech segments (filtered from {len(speech_segments)})"
-    )
-
-    try:
-        # Build ffmpeg filter for concatenating speech segments
-        filter_parts = []
-        for i, (start, end) in enumerate(filtered_segments):
-            duration = end - start
-            filter_parts.append(
-                f"[0:a]atrim=start={start}:duration={duration},asetpts=PTS-STARTPTS[seg{i}]"
-            )
-
-        # Concatenate all segments
-        inputs = "".join(f"[seg{i}]" for i in range(len(filtered_segments)))
-        concat_filter = f"{inputs}concat=n={len(filtered_segments)}:v=0:a=1[out]"
-
-        full_filter = ";".join(filter_parts + [concat_filter])
-
-        # Run ffmpeg as async subprocess
-        cmd = [
-            "ffmpeg",
-            "-y",  # -y = overwrite output
-            "-i",
-            original_path,
-            "-filter_complex",
-            full_filter,
-            "-map",
-            "[out]",
-            "-c:a",
-            "pcm_s16le",  # Keep same format as original
-            output_path,
-        ]
-
-        logger.info(f"Running ffmpeg command: {' '.join(cmd)}")
-
-        process = await asyncio.create_subprocess_exec(
-            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
-        )
-
-        stdout, stderr = await process.communicate()
-        if stdout:
-            logger.debug(f"FFMPEG stdout: {stdout.decode()}")
-
-        if process.returncode == 0:
-            # Calculate cropped duration
-            cropped_duration = sum(end - start for start, end in filtered_segments)
-            logger.info(
-                f"Successfully cropped {original_path} -> {output_path} ({cropped_duration:.1f}s from {len(filtered_segments)} segments)"
-            )
-            return True, segment_mapping
-        else:
-            error_msg = stderr.decode() if stderr else "Unknown ffmpeg error"
-            logger.error(f"ffmpeg failed for {original_path}: {error_msg}")
-            return False, segment_mapping
-
-    except Exception as e:
-        logger.error(f"Error running ffmpeg on {original_path}: {e}", exc_info=True)
-        return False, segment_mapping
diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py
index fb32797d..ea82056b 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py
@@ -6,7 +6,7 @@
 - speaker_jobs: Speaker recognition and identification
 - conversation_jobs: Conversation management and updates
 - memory_jobs: Memory extraction and processing
-- audio_jobs: Audio file processing and cropping
+- audio_jobs: Audio file processing
 
 Queue configuration and utilities are in controllers/queue_controller.py
 """
@@ -36,9 +36,7 @@
 
 # Import from audio_jobs
 from .audio_jobs import (
-    process_cropping_job,
     audio_streaming_persistence_job,
-    enqueue_cropping,
 )
 
 # Import from queue_controller
@@ -78,10 +76,6 @@
     "process_memory_job",
     "enqueue_memory_processing",
 
-    # Audio jobs
-    "process_cropping_job",
-    "enqueue_cropping",
-
     # Queue utils
     "get_queue",
     "get_job_stats",
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
index 56df7149..fa75cd40 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
@@ -21,170 +21,6 @@
 logger = logging.getLogger(__name__)
 
 
-@async_job(redis=True, beanie=True)
-async def process_cropping_job(
-    conversation_id: str,
-    audio_path: str,
-    *,
-    redis_client=None
-) -> Dict[str, Any]:
-    """
-    RQ job function for audio cropping - removes silent segments from audio.
-
-    This job:
-    1. Reads transcript segments from conversation
-    2. Extracts speech timestamps
-    3. Creates cropped audio file with only speech segments
-    4. Updates conversation with cropped file path
-
-    Args:
-        conversation_id: Conversation ID
-        audio_path: Path to original audio file
-        redis_client: Redis client (injected by decorator)
-
-    Returns:
-        Dict with processing results
-    """
-    from pathlib import Path
-    from advanced_omi_backend.utils.audio_utils import _process_audio_cropping_with_relative_timestamps
-    from advanced_omi_backend.models.conversation import Conversation
-    from advanced_omi_backend.config import CHUNK_DIR
-
-    try:
-        logger.info(f"🔄 RQ: Starting audio cropping for conversation {conversation_id}")
-
-        # Get conversation to access segments
-        conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
-        if not conversation:
-            raise ValueError(f"Conversation {conversation_id} not found")
-
-        # Extract speech segments from transcript (property returns data from active version)
-        segments = conversation.segments
-        if not segments or len(segments) == 0:
-            logger.warning(f"⚠️ No segments found for conversation {conversation_id}, skipping cropping")
-            return {
-                "success": False,
-                "conversation_id": conversation_id,
-                "reason": "no_segments"
-            }
-
-        # Convert segments to (start, end) tuples
-        speech_segments = [(seg.start, seg.end) for seg in segments]
-        logger.info(f"Found {len(speech_segments)} speech segments for cropping")
-
-        # Generate output path for cropped audio
-        audio_uuid = conversation.audio_uuid
-
-        # Build full path from conversation.audio_path (which may include folder prefix)
-        # conversation.audio_path is like "fixtures/filename.wav" or just "filename.wav"
-        full_audio_path = CHUNK_DIR / conversation.audio_path
-        original_path = Path(full_audio_path)
-        cropped_filename = f"cropped_{original_path.name}"
-
-        # If the conversation's audio_path contains a folder prefix, use the same folder for cropped audio
-        if conversation.audio_path and "/" in conversation.audio_path:
-            folder = conversation.audio_path.split("/")[0]
-            output_dir = CHUNK_DIR / folder
-            output_dir.mkdir(parents=True, exist_ok=True)
-            output_path = output_dir / cropped_filename
-            cropped_path_for_db = f"{folder}/{cropped_filename}"
-        else:
-            output_path = CHUNK_DIR / cropped_filename
-            cropped_path_for_db = cropped_filename
-
-        # Process cropping (no repository needed - we update conversation directly)
-        success, segment_mapping = await _process_audio_cropping_with_relative_timestamps(
-            str(original_path),
-            speech_segments,
-            str(output_path),
-            audio_uuid,
-            None  # No repository - we update conversation model directly
-        )
-
-        if not success:
-            logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}")
-            return {
-                "success": False,
-                "conversation_id": conversation_id,
-                "reason": "cropping_failed"
-            }
-
-        # Calculate actual cropped duration from kept segments
-        kept_segments = [m for m in segment_mapping if m["kept"]]
-        if kept_segments:
-            # Duration is end of last kept segment
-            cropped_duration_seconds = kept_segments[-1]["cropped_end"]
-        else:
-            cropped_duration_seconds = 0.0
-
-        # Update segment timestamps using the mapping
-        # Only keep segments that weren't filtered out
-        updated_segments = []
-        for i, seg in enumerate(segments):
-            if i >= len(segment_mapping):
-                logger.warning(f"⚠️ Segment {i} not in mapping, skipping")
-                continue
-
-            mapping = segment_mapping[i]
-            if mapping["kept"]:
-                # Segment was kept - use the cropped timestamps
-                updated_seg = seg.model_copy()
-                updated_seg.start = mapping["cropped_start"]
-                updated_seg.end = mapping["cropped_end"]
-                updated_segments.append(updated_seg)
-                logger.debug(
-                    f"Segment {i}: {seg.start:.2f}-{seg.end:.2f}s → "
-                    f"{updated_seg.start:.2f}-{updated_seg.end:.2f}s (in cropped audio)"
-                )
-            else:
-                # Segment was filtered out (too short)
-                logger.debug(
-                    f"Segment {i} filtered out (duration {seg.end - seg.start:.2f}s < MIN_SPEECH_SEGMENT_DURATION)"
-                )
-
-        # Update conversation with cropped audio path and adjusted segments
-        conversation.cropped_audio_path = cropped_path_for_db
-
-        # Update the active transcript version segments
-        # Find and update the version directly in the list to ensure Beanie detects the change
-        if conversation.active_transcript_version:
-            for i, version in enumerate(conversation.transcript_versions):
-                if version.version_id == conversation.active_transcript_version:
-                    conversation.transcript_versions[i].segments = updated_segments
-                    logger.info(f"📝 Updated segments in transcript version {version.version_id[:12]}")
-                    break
-
-        await conversation.save()
-        logger.info(f"💾 Updated conversation {conversation_id[:12]} with cropped_audio_path and adjusted {len(updated_segments)} segment timestamps")
-
-        logger.info(f"✅ RQ: Completed audio cropping for conversation {conversation_id} ({cropped_duration_seconds:.1f}s)")
-
-        # Update job metadata with cropped duration
-        from rq import get_current_job
-        current_job = get_current_job()
-        if current_job:
-            if not current_job.meta:
-                current_job.meta = {}
-            current_job.meta['cropped_duration_seconds'] = round(cropped_duration_seconds, 1)
-            current_job.meta['segments_cropped'] = len(speech_segments)
-            current_job.save_meta()
-
-        return {
-            "success": True,
-            "conversation_id": conversation_id,
-            "audio_uuid": audio_uuid,
-            "original_path": str(original_path),
-            "cropped_path": str(output_path),
-            "cropped_filename": cropped_filename,
-            "segments_count": len(speech_segments),
-            "cropped_duration_seconds": cropped_duration_seconds
-        }
-
-    except Exception as e:
-        logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}: {e}")
-        raise
-
-
 @async_job(redis=True, beanie=True)
 async def audio_streaming_persistence_job(
     session_id: str,
@@ -480,40 +316,3 @@ async def audio_streaming_persistence_job(
 
 
 # Enqueue wrapper functions
-
-def enqueue_cropping(
-    conversation_id: str,
-    audio_path: str,
-    priority: JobPriority = JobPriority.NORMAL
-):
-    """
-    Enqueue an audio cropping job.
-
-    Args:
-        conversation_id: Conversation ID
-        audio_path: Path to audio file
-        priority: Job priority level
-
-    Returns:
-        RQ Job object for tracking.
-    """
-    timeout_mapping = {
-        JobPriority.URGENT: 300,  # 5 minutes
-        JobPriority.HIGH: 240,    # 4 minutes
-        JobPriority.NORMAL: 180,  # 3 minutes
-        JobPriority.LOW: 120      # 2 minutes
-    }
-
-    job = default_queue.enqueue(
-        process_cropping_job,
-        conversation_id,
-        audio_path,
-        job_timeout=timeout_mapping.get(priority, 180),
-        result_ttl=JOB_RESULT_TTL,
-        job_id=f"crop_{conversation_id[:12]}",
-        description=f"Crop audio for conversation {conversation_id[:12]}",
-        meta={'conversation_id': conversation_id}
-    )
-
-    logger.info(f"📥 RQ: Enqueued cropping job {job.id} for conversation {conversation_id}")
-    return job
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py
deleted file mode 100644
index a58682c1..00000000
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Deepgram audio stream worker.
-
-Starts a consumer that reads from audio:stream:deepgram and transcribes audio.
-"""
-
-import asyncio
-import logging
-import os
-import signal
-import sys
-
-import redis.asyncio as redis
-
-from advanced_omi_backend.services.transcription.deepgram import DeepgramStreamConsumer
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
-)
-
-logger = logging.getLogger(__name__)
-
-
-async def main():
-    """Main worker entry point."""
-    logger.info("🚀 Starting Deepgram audio stream worker")
-
-    # Check that config.yml has Deepgram configured
-    # The registry provider will load configuration from config.yml
-    api_key = os.getenv("DEEPGRAM_API_KEY")
-    if not api_key:
-        logger.warning("DEEPGRAM_API_KEY environment variable not set")
-        logger.warning("Ensure config.yml has a default 'stt' model configured for Deepgram")
-        logger.warning("Audio transcription will use alternative providers if configured in config.yml")
-
-    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-
-    # Create Redis client
-    redis_client = await redis.from_url(
-        redis_url,
-        encoding="utf-8",
-        decode_responses=False
-    )
-    logger.info("Connected to Redis")
-
-    # Create consumer with balanced buffer size
-    # 20 chunks = ~5 seconds of audio
-    # Balance between transcription accuracy and latency
-    # Consumer uses registry-driven provider from config.yml
-    consumer = DeepgramStreamConsumer(
-        redis_client=redis_client,
-        buffer_chunks=20  # 5 seconds - good context without excessive delay
-    )
-
-    # Setup signal handlers for graceful shutdown
-    def signal_handler(signum, frame):
-        logger.info(f"Received signal {signum}, shutting down...")
-        asyncio.create_task(consumer.stop())
-
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        logger.info("✅ Deepgram worker ready")
-
-        # This blocks until consumer is stopped
-        await consumer.start_consuming()
-
-    except Exception as e:
-        logger.error(f"Worker error: {e}", exc_info=True)
-        sys.exit(1)
-    finally:
-        await redis_client.aclose()
-        logger.info("👋 Deepgram worker stopped")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py
deleted file mode 100644
index 56f2f26b..00000000
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parakeet audio stream worker.
-
-Starts a consumer that reads from audio:stream:* and transcribes audio using Parakeet.
-"""
-
-import asyncio
-import logging
-import os
-import signal
-import sys
-
-import redis.asyncio as redis
-
-from advanced_omi_backend.services.transcription.parakeet_stream_consumer import ParakeetStreamConsumer
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
-)
-
-logger = logging.getLogger(__name__)
-
-
-async def main():
-    """Main worker entry point."""
-    logger.info("🚀 Starting Parakeet audio stream worker")
-
-    # Check that config.yml has Parakeet configured
-    # The registry provider will load configuration from config.yml
-    service_url = os.getenv("PARAKEET_ASR_URL")
-    if not service_url:
-        logger.warning("PARAKEET_ASR_URL environment variable not set")
-        logger.warning("Ensure config.yml has a default 'stt' model configured for Parakeet")
-        logger.warning("Audio transcription will use alternative providers if configured in config.yml")
-
-    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-
-    # Create Redis client
-    redis_client = await redis.from_url(
-        redis_url,
-        encoding="utf-8",
-        decode_responses=False
-    )
-    logger.info("Connected to Redis")
-
-    # Create consumer with balanced buffer size
-    # 20 chunks = ~5 seconds of audio
-    # Balance between transcription accuracy and latency
-    # Consumer uses registry-driven provider from config.yml
-    consumer = ParakeetStreamConsumer(
-        redis_client=redis_client,
-        buffer_chunks=20  # 5 seconds - good context without excessive delay
-    )
-
-    # Setup signal handlers for graceful shutdown
-    shutdown_event = asyncio.Event()
-
-    def signal_handler(signum, _frame):
-        logger.info(f"Received signal {signum}, shutting down...")
-        shutdown_event.set()
-
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        logger.info("✅ Parakeet worker ready")
-
-        # This blocks until consumer is stopped or shutdown signaled
-        consume_task = asyncio.create_task(consumer.start_consuming())
-        shutdown_task = asyncio.create_task(shutdown_event.wait())
-
-        done, pending = await asyncio.wait(
-            [consume_task, shutdown_task],
-            return_when=asyncio.FIRST_COMPLETED
-        )
-
-        # Cancel pending tasks
-        for task in pending:
-            task.cancel()
-
-        await consumer.stop()
-
-    except Exception as e:
-        logger.error(f"Worker error: {e}", exc_info=True)
-        sys.exit(1)
-    finally:
-        await redis_client.aclose()
-        logger.info("👋 Parakeet worker stopped")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py
similarity index 76%
rename from backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
rename to backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py
index 0a893e6a..df133de4 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_streaming_worker.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 """
-Deepgram WebSocket streaming audio worker.
+Generic streaming transcription worker using registry-driven providers.
 
-Starts a consumer that reads from audio:stream:* streams and transcribes via Deepgram WebSocket API.
+Starts a consumer that reads from audio:stream:* streams and transcribes via configured provider.
+Provider configuration is loaded from config.yml (supports any streaming STT service).
 Publishes interim results to Redis Pub/Sub for real-time client display.
 Publishes final results to Redis Streams for storage.
 Triggers plugins on final results only.
@@ -17,7 +18,7 @@
 import redis.asyncio as redis
 
 from advanced_omi_backend.services.plugin_service import init_plugin_router
-from advanced_omi_backend.services.transcription.deepgram_stream_consumer import DeepgramStreamingConsumer
+from advanced_omi_backend.services.transcription.streaming_consumer import StreamingTranscriptionConsumer
 from advanced_omi_backend.client_manager import initialize_redis_for_client_manager
 
 logging.basicConfig(
@@ -30,14 +31,8 @@
 
 async def main():
     """Main worker entry point."""
-    logger.info("🚀 Starting Deepgram WebSocket streaming worker")
-
-    # Validate DEEPGRAM_API_KEY
-    api_key = os.getenv("DEEPGRAM_API_KEY")
-    if not api_key:
-        logger.error("DEEPGRAM_API_KEY environment variable not set")
-        logger.error("Cannot start Deepgram streaming worker without API key")
-        sys.exit(1)
+    logger.info("🚀 Starting streaming transcription worker")
+    logger.info("📋 Provider configuration loaded from config.yml (defaults.stt_stream)")
 
     redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
 
@@ -76,15 +71,16 @@ async def main():
         logger.error(f"Failed to initialize plugin router: {e}", exc_info=True)
         plugin_router = None
 
-    # Create Deepgram streaming consumer
+    # Create streaming transcription consumer (uses registry-driven provider from config.yml)
     try:
-        consumer = DeepgramStreamingConsumer(
+        consumer = StreamingTranscriptionConsumer(
             redis_client=redis_client,
             plugin_router=plugin_router
         )
-        logger.info("✅ Deepgram streaming consumer created")
+        logger.info("✅ Streaming transcription consumer created")
     except Exception as e:
-        logger.error(f"Failed to create Deepgram streaming consumer: {e}", exc_info=True)
+        logger.error(f"Failed to create streaming transcription consumer: {e}", exc_info=True)
+        logger.error("Ensure config.yml has defaults.stt_stream configured with valid provider")
         await redis_client.aclose()
         sys.exit(1)
 
@@ -97,7 +93,7 @@ def signal_handler(signum, frame):
     signal.signal(signal.SIGTERM, signal_handler)
 
     try:
-        logger.info("✅ Deepgram streaming worker ready")
+        logger.info("✅ Streaming transcription worker ready")
         logger.info("📡 Listening for audio streams on audio:stream:* pattern")
         logger.info("📢 Publishing interim results to transcription:interim:{session_id}")
         logger.info("💾 Publishing final results to transcription:results:{session_id}")
@@ -112,7 +108,7 @@ def signal_handler(signum, frame):
         sys.exit(1)
     finally:
         await redis_client.aclose()
-        logger.info("👋 Deepgram streaming worker stopped")
+        logger.info("👋 Streaming transcription worker stopped")
 
 
 if __name__ == "__main__":
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
index 512f4a9a..c5f3942f 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
@@ -14,57 +14,27 @@
 logger = logging.getLogger(__name__)
 
 
-def get_default_stt_provider() -> str:
+def has_streaming_stt_configured() -> bool:
     """
-    Query config.yml for the default STT provider.
+    Check if streaming STT provider is configured in config.yml.
 
     Returns:
-        Provider name (e.g., "deepgram", "parakeet") or empty string if not configured
+        True if defaults.stt_stream is configured, False otherwise
+
+    Note: Batch STT is handled by RQ workers in transcription_jobs.py,
+          no separate worker needed.
     """
     try:
         from advanced_omi_backend.model_registry import get_models_registry
 
         registry = get_models_registry()
         if registry and registry.defaults:
-            stt_model = registry.get_default("stt")
-            if stt_model:
-                return stt_model.model_provider or ""
+            stt_stream_model = registry.get_default("stt_stream")
+            return stt_stream_model is not None
     except Exception as e:
-        logger.warning(f"Failed to read STT provider from config.yml: {e}")
-
-    return ""
-
-
-def should_start_deepgram_batch() -> bool:
-    """
-    Check if Deepgram batch worker should start.
-
-    Conditions:
-    - DEFAULT_STT provider is "deepgram" (from config.yml)
-    - DEEPGRAM_API_KEY is set in environment
-    """
-    stt_provider = get_default_stt_provider()
-    has_api_key = bool(os.getenv("DEEPGRAM_API_KEY"))
-
-    enabled = stt_provider == "deepgram" and has_api_key
+        logger.warning(f"Failed to read streaming STT config from config.yml: {e}")
 
-    if stt_provider == "deepgram" and not has_api_key:
-        logger.warning(
-            "Deepgram configured as default STT but DEEPGRAM_API_KEY not set - worker disabled"
-        )
-
-    return enabled
-
-
-def should_start_parakeet() -> bool:
-    """
-    Check if Parakeet stream worker should start.
-
-    Conditions:
-    - DEFAULT_STT provider is "parakeet" (from config.yml)
-    """
-    stt_provider = get_default_stt_provider()
-    return stt_provider == "parakeet"
+    return False
 
 
 def build_worker_definitions() -> List[WorkerDefinition]:
@@ -115,43 +85,38 @@ def build_worker_definitions() -> List[WorkerDefinition]:
         )
     )
 
-    # Deepgram Batch Worker - Conditional (if DEFAULT_STT=deepgram + API key)
-    workers.append(
-        WorkerDefinition(
-            name="deepgram-batch",
-            command=[
-                "uv",
-                "run",
-                "python",
-                "-m",
-                "advanced_omi_backend.workers.audio_stream_deepgram_worker",
-            ],
-            worker_type=WorkerType.STREAM_CONSUMER,
-            enabled_check=should_start_deepgram_batch,
-            restart_on_failure=True,
-        )
-    )
-
-    # Parakeet Stream Worker - Conditional (if DEFAULT_STT=parakeet)
+    # Streaming STT Worker - Conditional (if streaming STT is configured in config.yml)
+    # This worker uses the registry-driven streaming provider (RegistryStreamingTranscriptionProvider)
+    # Batch transcription happens via RQ jobs in transcription_jobs.py (already uses registry provider)
     workers.append(
         WorkerDefinition(
-            name="parakeet-stream",
+            name="streaming-stt",
             command=[
                 "uv",
                 "run",
                 "python",
                 "-m",
-                "advanced_omi_backend.workers.audio_stream_parakeet_worker",
+                "advanced_omi_backend.workers.audio_stream_worker",
             ],
             worker_type=WorkerType.STREAM_CONSUMER,
-            enabled_check=should_start_parakeet,
+            enabled_check=has_streaming_stt_configured,
             restart_on_failure=True,
         )
     )
 
     # Log worker configuration
-    stt_provider = get_default_stt_provider()
-    logger.info(f"STT Provider from config.yml: {stt_provider or 'none'}")
+    try:
+        from advanced_omi_backend.model_registry import get_models_registry
+        registry = get_models_registry()
+        if registry:
+            stt_stream = registry.get_default("stt_stream")
+            stt_batch = registry.get_default("stt")
+            if stt_stream:
+                logger.info(f"Streaming STT configured: {stt_stream.name} ({stt_stream.model_provider})")
+            if stt_batch:
+                logger.info(f"Batch STT configured: {stt_batch.name} ({stt_batch.model_provider}) - handled by RQ workers")
+    except Exception as e:
+        logger.warning(f"Failed to log STT configuration: {e}")
 
     enabled_workers = [w for w in workers if w.is_enabled()]
     disabled_workers = [w for w in workers if not w.is_enabled()]
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index cf65b2d9..f25e468f 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -344,7 +344,7 @@ async def transcribe_full_audio_job(
         for seg in segments:
             # Use identified_as if available (from speaker recognition), otherwise use speaker label
             speaker_id = seg.get("identified_as") or seg.get("speaker", "Unknown")
-            # Convert speaker ID to string if it's an integer (Deepgram returns int speaker IDs)
+            # Convert speaker ID to string if it's an integer (some providers return int speaker IDs)
             speaker_name = f"Speaker {speaker_id}" if isinstance(speaker_id, int) else speaker_id
 
             speaker_segments.append(
@@ -357,8 +357,8 @@ async def transcribe_full_audio_job(
                 )
             )
     elif transcript_text:
-        # NOTE: Parakeet falls here.
-        # If no segments but we have text, create a single segment from the full transcript
+        # Fallback: If no segments but we have text, create a single segment from the full transcript
+        # This handles providers that don't support segmentation
         # Calculate duration from words if available, otherwise estimate from audio
         start_time_seg = 0.0
         end_time_seg = 0.0
diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh
index 4235b16c..847e3a6e 100755
--- a/backends/advanced/start-k8s.sh
+++ b/backends/advanced/start-k8s.sh
@@ -79,15 +79,16 @@ sleep 1
 
 # Function to start all workers
 start_workers() {
-    # NEW WORKERS - Redis Streams multi-provider architecture
+    # NEW WORKERS - Registry-driven streaming transcription architecture
     # Single worker ensures sequential processing of audio chunks (matching worker_orchestrator.py)
-    echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
-    if python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
+    # Uses config.yml for provider selection (Deepgram, Parakeet, etc.)
+    echo "🎵 Starting streaming transcription worker (registry-driven provider from config.yml)..."
+    if python3 -m advanced_omi_backend.workers.audio_stream_worker &
     then
         AUDIO_WORKER_1_PID=$!
-        echo "  ✅ Deepgram stream worker started with PID: $AUDIO_WORKER_1_PID"
+        echo "  ✅ Streaming transcription worker started with PID: $AUDIO_WORKER_1_PID"
     else
-        echo "  ❌ Failed to start Deepgram stream worker"
+        echo "  ❌ Failed to start streaming transcription worker"
         exit 1
     fi
 
diff --git a/backends/advanced/uv.lock b/backends/advanced/uv.lock
index c73386c8..afd88ad2 100644
--- a/backends/advanced/uv.lock
+++ b/backends/advanced/uv.lock
@@ -56,6 +56,7 @@ dev = [
     { name = "pre-commit-uv" },
 ]
 test = [
+    { name = "aiosqlite" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
@@ -108,6 +109,7 @@ dev = [
     { name = "pre-commit-uv", specifier = ">=4.1.4" },
 ]
 test = [
+    { name = "aiosqlite", specifier = ">=0.20.0" },
     { name = "pytest", specifier = ">=8.4.1" },
     { name = "pytest-asyncio", specifier = ">=1.0.0" },
     { name = "pytest-cov", specifier = ">=6.0.0" },
@@ -226,6 +228,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "aiosqlite"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/8a/64761f4005f17809769d23e518d915db74e6310474e733e3593cfc854ef1/aiosqlite-0.22.1.tar.gz", hash = "sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650", size = 14821, upload-time = "2025-12-23T19:25:43.997Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" },
+]
+
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
diff --git a/backends/advanced/webui/src/components/audio/DebugPanel.tsx b/backends/advanced/webui/src/components/audio/DebugPanel.tsx
index 4a82d380..a3785f1d 100644
--- a/backends/advanced/webui/src/components/audio/DebugPanel.tsx
+++ b/backends/advanced/webui/src/components/audio/DebugPanel.tsx
@@ -65,7 +65,7 @@ export default function DebugPanel({ recording }: DebugPanelProps) {
       )}
 
       <div className="mt-3 text-xs text-gray-500 dark:text-gray-500">
-        <p>• WebSocket URL: {recording.hasValidWebSocket ? 'ws_pcm endpoint' : 'Not connected'}</p>
+        <p>• WebSocket URL: {recording.hasValidWebSocket ? '/ws?codec=pcm endpoint' : 'Not connected'}</p>
         <p>• Audio Format: 16kHz, Mono, PCM Int16</p>
         <p>• Protocol: Wyoming (JSON headers + binary payloads)</p>
         <p>• Direct Checks: WS={recording.hasValidWebSocket ? '✅' : '❌'} Mic={recording.hasValidMicrophone ? '✅' : '❌'} Ctx={recording.hasValidAudioContext ? '✅' : '❌'}</p>
diff --git a/backends/advanced/webui/src/components/audio/RecordingStatus.tsx b/backends/advanced/webui/src/components/audio/RecordingStatus.tsx
index d8ad608e..b208beaa 100644
--- a/backends/advanced/webui/src/components/audio/RecordingStatus.tsx
+++ b/backends/advanced/webui/src/components/audio/RecordingStatus.tsx
@@ -57,7 +57,7 @@ export default function RecordingStatus({ recording }: RecordingStatusProps) {
               User: {user?.name || user?.email}
             </p>
             <p className="text-sm text-gray-600 dark:text-gray-400">
-              Endpoint: /ws_pcm
+              Endpoint: /ws?codec=pcm
             </p>
           </div>
         </div>
diff --git a/backends/advanced/webui/src/hooks/useAudioRecording.ts b/backends/advanced/webui/src/hooks/useAudioRecording.ts
index dbb29889..164fa9d5 100644
--- a/backends/advanced/webui/src/hooks/useAudioRecording.ts
+++ b/backends/advanced/webui/src/hooks/useAudioRecording.ts
@@ -141,13 +141,13 @@ export const useAudioRecording = (): UseAudioRecordingReturn => {
         if (BACKEND_URL && BACKEND_URL.startsWith('http')) {
           // BACKEND_URL is a full URL (e.g., http://localhost:8000)
           const backendHost = BACKEND_URL.replace(/^https?:\/\//, '')
-          wsUrl = `${wsProtocol}//${backendHost}/ws_pcm?token=${token}&device_name=webui-recorder`
+          wsUrl = `${wsProtocol}//${backendHost}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
         } else if (BACKEND_URL && BACKEND_URL !== '') {
           // BACKEND_URL is a path (e.g., /prod)
-          wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws_pcm?token=${token}&device_name=webui-recorder`
+          wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
         } else {
           // BACKEND_URL is empty (same origin)
-          wsUrl = `${wsProtocol}//${window.location.host}/ws_pcm?token=${token}&device_name=webui-recorder`
+          wsUrl = `${wsProtocol}//${window.location.host}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
         }
       const ws = new WebSocket(wsUrl)
       // Note: Don't set binaryType yet - will cause protocol violations with text messages
diff --git a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts
index cb3e3eee..91f394c9 100644
--- a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts
+++ b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts
@@ -168,13 +168,13 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => {
     if (BACKEND_URL && BACKEND_URL.startsWith('http')) {
       // BACKEND_URL is a full URL (e.g., http://localhost:8000)
       const backendHost = BACKEND_URL.replace(/^https?:\/\//, '')
-      wsUrl = `${wsProtocol}//${backendHost}/ws_pcm?token=${token}&device_name=webui-simple-recorder`
+      wsUrl = `${wsProtocol}//${backendHost}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
     } else if (BACKEND_URL && BACKEND_URL !== '') {
       // BACKEND_URL is a path (e.g., /prod)
-      wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws_pcm?token=${token}&device_name=webui-simple-recorder`
+      wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
     } else {
       // BACKEND_URL is empty (same origin)
-      wsUrl = `${wsProtocol}//${window.location.host}/ws_pcm?token=${token}&device_name=webui-simple-recorder`
+      wsUrl = `${wsProtocol}//${window.location.host}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
     }
     
     return new Promise<WebSocket>((resolve, reject) => {
diff --git a/backends/advanced/webui/src/pages/Conversations.tsx b/backends/advanced/webui/src/pages/Conversations.tsx
index d4b76ed3..cd60dc08 100644
--- a/backends/advanced/webui/src/pages/Conversations.tsx
+++ b/backends/advanced/webui/src/pages/Conversations.tsx
@@ -15,7 +15,6 @@ interface Conversation {
   segment_count?: number  // From list endpoint
   memory_count?: number  // From list endpoint
   audio_path?: string
-  cropped_audio_path?: string
   duration_seconds?: number
   has_memory?: boolean
   transcript?: string  // From detail endpoint
@@ -295,10 +294,9 @@ export default function Conversations() {
     }
   }
 
-  const handleSegmentPlayPause = (conversationId: string, segmentIndex: number, segment: any, useCropped: boolean) => {
+  const handleSegmentPlayPause = (conversationId: string, segmentIndex: number, segment: any) => {
     const segmentId = `${conversationId}-${segmentIndex}`;
-    // Include cropped flag in cache key to handle mode switches
-    const audioKey = `${conversationId}-${useCropped ? 'cropped' : 'original'}`;
+    const audioKey = conversationId; // Use conversation ID as cache key
 
     // If this segment is already playing, pause it
     if (playingSegment === segmentId) {
@@ -316,7 +314,7 @@ export default function Conversations() {
 
     // Stop any currently playing segment
     if (playingSegment) {
-      // Stop all audio elements (could be playing from different mode)
+      // Stop all audio elements
       Object.values(audioRefs.current).forEach(audio => {
         audio.pause();
       });
@@ -326,13 +324,13 @@ export default function Conversations() {
       }
     }
 
-    // Get or create audio element for this conversation + mode combination
+    // Get or create audio element for this conversation
     let audio = audioRefs.current[audioKey];
 
     // Check if we need to create a new audio element (none exists or previous had error)
     if (!audio || audio.error) {
       const token = localStorage.getItem(getStorageKey('token')) || '';
-      const audioUrl = `${BACKEND_URL}/api/audio/get_audio/${conversationId}?cropped=${useCropped}&token=${token}`;
+      const audioUrl = `${BACKEND_URL}/api/audio/get_audio/${conversationId}?token=${token}`;
       console.log('Creating audio element with URL:', audioUrl);
       console.log('Token present:', !!token, 'Token length:', token.length);
       audio = new Audio(audioUrl);
@@ -635,12 +633,11 @@ export default function Conversations() {
               {/* Audio Player */}
               <div className="mb-4">
                 <div className="space-y-2">
-                  {(conversation.audio_path || conversation.cropped_audio_path) && (
+                  {conversation.audio_path && (
                     <>
                       <div className="flex items-center space-x-2 text-sm text-gray-700 dark:text-gray-300">
                         <span className="font-medium">
-                          {debugMode ? '🔧 Original Audio' : '🎵 Audio'}
-                          {debugMode && conversation.cropped_audio_path && ' (Debug Mode)'}
+                          🎵 Audio
                         </span>
                       </div>
                       <audio
@@ -648,20 +645,10 @@ export default function Conversations() {
                         className="w-full h-10"
                         preload="metadata"
                         style={{ minWidth: '300px' }}
-                        src={`${BACKEND_URL}/api/audio/get_audio/${conversation.conversation_id}?cropped=${!debugMode}&token=${localStorage.getItem(getStorageKey('token')) || ''}`}
+                        src={`${BACKEND_URL}/api/audio/get_audio/${conversation.conversation_id}?token=${localStorage.getItem(getStorageKey('token')) || ''}`}
                       >
                         Your browser does not support the audio element.
                       </audio>
-                      {debugMode && conversation.cropped_audio_path && (
-                        <div className="text-xs text-gray-500 dark:text-gray-400">
-                          💡 Cropped version available: {conversation.cropped_audio_path}
-                        </div>
-                      )}
-                      {!debugMode && conversation.cropped_audio_path && (
-                        <div className="text-xs text-gray-500 dark:text-gray-400">
-                          💡 Enable debug mode to hear original with silence
-                        </div>
-                      )}
                     </>
                   )}
                 </div>
@@ -724,9 +711,7 @@ export default function Conversations() {
                           const conversationKey = conversation.conversation_id || conversation.audio_uuid
                           const segmentId = `${conversationKey}-${index}`
                           const isPlaying = playingSegment === segmentId
-                          const hasAudio = conversation.cropped_audio_path || conversation.audio_path
-                          // Use cropped audio only if available and not in debug mode
-                          const useCropped = !debugMode && !!conversation.cropped_audio_path
+                          const hasAudio = !!conversation.audio_path
 
                           return (
                             <div
@@ -738,7 +723,7 @@ export default function Conversations() {
                               {/* Play/Pause Button */}
                               {hasAudio && (
                                 <button
-                                  onClick={() => handleSegmentPlayPause(conversationKey, index, segment, useCropped)}
+                                  onClick={() => handleSegmentPlayPause(conversationKey, index, segment)}
                                   className={`flex-shrink-0 w-5 h-5 rounded-full flex items-center justify-center transition-colors mt-0.5 ${
                                     isPlaying
                                       ? 'bg-blue-600 text-white hover:bg-blue-700'
@@ -815,8 +800,7 @@ export default function Conversations() {
                   <div className="text-xs text-gray-600 dark:text-gray-400 space-y-1">
                     <div>Conversation ID: {conversation.conversation_id || 'N/A'}</div>
                     <div>Audio UUID: {conversation.audio_uuid}</div>
-                    <div>Original Audio: {conversation.audio_path || 'N/A'}</div>
-                    <div>Cropped Audio: {conversation.cropped_audio_path || 'N/A'}</div>
+                    <div>Audio Path: {conversation.audio_path || 'N/A'}</div>
                     <div>Transcript Version Count: {conversation.transcript_version_count || 0}</div>
                     <div>Memory Version Count: {conversation.memory_version_count || 0}</div>
                     <div>Segment Count: {conversation.segment_count || 0}</div>
diff --git a/backends/advanced/webui/src/pages/ConversationsTimeline.tsx b/backends/advanced/webui/src/pages/ConversationsTimeline.tsx
index 5c3f748f..40626f5c 100644
--- a/backends/advanced/webui/src/pages/ConversationsTimeline.tsx
+++ b/backends/advanced/webui/src/pages/ConversationsTimeline.tsx
@@ -15,7 +15,6 @@ interface Conversation {
   segment_count?: number
   memory_count?: number
   audio_path?: string
-  cropped_audio_path?: string
   duration_seconds?: number
   has_memory?: boolean
   transcript?: string
@@ -170,21 +169,11 @@ function ConversationCard({ conversation, formatDuration }: ConversationCardProp
             )}
           </div>
 
-          {/* Audio Paths */}
-          {(conversation.audio_path || conversation.cropped_audio_path) && (
-            <div className="text-xs space-y-1">
-              {conversation.audio_path && (
-                <div>
-                  <span className="font-medium text-gray-700">Audio:</span>{' '}
-                  <span className="text-gray-600 font-mono">{conversation.audio_path}</span>
-                </div>
-              )}
-              {conversation.cropped_audio_path && (
-                <div>
-                  <span className="font-medium text-gray-700">Cropped:</span>{' '}
-                  <span className="text-gray-600 font-mono">{conversation.cropped_audio_path}</span>
-                </div>
-              )}
+          {/* Audio Path */}
+          {conversation.audio_path && (
+            <div className="text-xs">
+              <span className="font-medium text-gray-700">Audio:</span>{' '}
+              <span className="text-gray-600 font-mono">{conversation.audio_path}</span>
             </div>
           )}
         </div>
diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx
index 3dc774f3..fcacb817 100644
--- a/backends/advanced/webui/src/pages/Queue.tsx
+++ b/backends/advanced/webui/src/pages/Queue.tsx
@@ -516,7 +516,7 @@ const Queue: React.FC = () => {
       borderColor = 'border-green-600';
     }
     // Audio processing - orange shades
-    else if (type.includes('audio') || type.includes('persist') || type.includes('cropping')) {
+    else if (type.includes('audio') || type.includes('persist')) {
       bgColor = 'bg-orange-500';
       borderColor = 'border-orange-600';
     }
@@ -639,22 +639,6 @@ const Queue: React.FC = () => {
     return `${Math.floor(durationMs / 3600000)}h ${Math.floor((durationMs % 3600000) / 60000)}m`;
   };
 
-  // Format seconds to readable time format (e.g., 3m34s or 1h22m32s)
-  const formatSeconds = (seconds: number): string => {
-    if (seconds < 60) {
-      return `${Math.floor(seconds)}s`;
-    } else if (seconds < 3600) {
-      const mins = Math.floor(seconds / 60);
-      const secs = Math.floor(seconds % 60);
-      return `${mins}m${secs}s`;
-    } else {
-      const hours = Math.floor(seconds / 3600);
-      const mins = Math.floor((seconds % 3600) / 60);
-      const secs = Math.floor(seconds % 60);
-      return `${hours}h${mins}m${secs}s`;
-    }
-  };
-
   const toggleSessionExpansion = (sessionId: string) => {
     const newExpanded = new Set(expandedSessions);
 
@@ -2483,22 +2467,6 @@ const Queue: React.FC = () => {
                       </div>
                     )}
 
-                    {/* process_cropping_job formatted metadata */}
-                    {selectedJob.func_name?.includes('process_cropping_job') && (
-                      <div className="bg-green-50 p-3 rounded mb-3 space-y-2">
-                        {selectedJob.meta.cropped_duration_seconds !== undefined && (
-                          <div className="text-sm">
-                            <span className="font-medium">Cropped Duration:</span> {formatSeconds(selectedJob.meta.cropped_duration_seconds)}
-                          </div>
-                        )}
-                        {selectedJob.meta.segments_cropped !== undefined && (
-                          <div className="text-sm">
-                            <span className="font-medium">Segments Cropped:</span> {selectedJob.meta.segments_cropped}
-                          </div>
-                        )}
-                      </div>
-                    )}
-
                     {/* Raw JSON metadata (collapsible) */}
                     <details className="mt-2">
                       <summary className="text-sm font-medium text-gray-700 cursor-pointer hover:text-gray-900">
diff --git a/backends/charts/advanced-backend/ingress-values.yaml b/backends/charts/advanced-backend/ingress-values.yaml
index b20786cd..4f47ca43 100644
--- a/backends/charts/advanced-backend/ingress-values.yaml
+++ b/backends/charts/advanced-backend/ingress-values.yaml
@@ -7,10 +7,6 @@ ingress:
       pathType: Prefix
     - path: /users
       pathType: Prefix
-    - path: /ws_pcm
-      pathType: Prefix
-    - path: /ws_omi
-      pathType: Prefix
     - path: /ws
       pathType: Prefix
     - path: /health
diff --git a/extras/havpe-relay/README.md b/extras/havpe-relay/README.md
index 2793b36d..f5a6b5db 100644
--- a/extras/havpe-relay/README.md
+++ b/extras/havpe-relay/README.md
@@ -6,7 +6,7 @@ TCP-to-WebSocket relay for ESPHome Voice-PE that connects to the Omi advanced ba
 
 - **TCP Server**: Listens on port 8989 for ESP32 Voice-PE connections
 - **Audio Format Conversion**: Converts 32-bit PCM to 16-bit PCM using easy-audio-interfaces
-- **WebSocket Client**: Forwards converted audio to backend at `/ws_pcm` endpoint
+- **WebSocket Client**: Forwards converted audio to backend at `/ws?codec=pcm` endpoint
 - **Graceful Handling**: Supports reconnections and proper cleanup
 - **Configurable**: Command-line options for ports and endpoints
 
@@ -42,7 +42,7 @@ uv run main.py
 
 This will:
 - Listen for TCP connections on port 8989
-- Forward to WebSocket at `ws://127.0.0.1:8000/ws_pcm`
+- Forward to WebSocket at `ws://127.0.0.1:8000/ws?codec=pcm`
 
 ### Advanced Usage
 
@@ -51,14 +51,14 @@ This will:
 uv run main.py --tcp-port 9090
 
 # Custom WebSocket URL
-uv run main.py --ws-url "ws://192.168.1.100:8000/ws_pcm"
+uv run main.py --ws-url "ws://192.168.1.100:8000/ws?codec=pcm"
 
 # Verbose logging
 uv run main.py -v    # INFO level
 uv run main.py -vv   # DEBUG level
 
 # Full configuration example
-uv run main.py --tcp-port 8989 --ws-url "ws://localhost:8000/ws_pcm" -v
+uv run main.py --tcp-port 8989 --ws-url "ws://localhost:8000/ws?codec=pcm" -v
 ```
 
 ### Command Line Options
@@ -66,13 +66,13 @@ uv run main.py --tcp-port 8989 --ws-url "ws://localhost:8000/ws_pcm" -v
 | Option | Default | Description |
 |--------|---------|-------------|
 | `--tcp-port` | 8989 | TCP port to listen on for ESP32 connections |
-| `--ws-url` | `ws://127.0.0.1:8000/ws_pcm` | WebSocket URL to forward audio to |
+| `--ws-url` | `ws://127.0.0.1:8000/ws?codec=pcm` | WebSocket URL to forward audio to |
 | `-v` / `--verbose` | WARNING | Increase verbosity (-v: INFO, -vv: DEBUG) |
 
 ## Architecture
 
 ```
-ESP32 Voice-PE → TCP:8989 → HAVPE Relay → WebSocket:/ws_pcm → Omi Backend
+ESP32 Voice-PE → TCP:8989 → HAVPE Relay → WebSocket:/ws?codec=pcm → Omi Backend
      (32-bit PCM)                    (16-bit PCM)
 ```
 
@@ -88,7 +88,7 @@ The relay automatically includes the following WebSocket parameters when connect
 
 Example WebSocket URL sent to backend:
 ```
-ws://127.0.0.1:8000/ws_pcm?user_id=esp32_voice_pe&rate=16000&width=2&channels=2&src=voice_pe
+ws://127.0.0.1:8000/ws?codec=pcm?user_id=esp32_voice_pe&rate=16000&width=2&channels=2&src=voice_pe
 ```
 
 ## Development
diff --git a/extras/havpe-relay/docker-compose.yml b/extras/havpe-relay/docker-compose.yml
index a5c0aa10..055f6492 100644
--- a/extras/havpe-relay/docker-compose.yml
+++ b/extras/havpe-relay/docker-compose.yml
@@ -7,7 +7,7 @@ services:
       - "${TCP_PORT:-8989}:8989"
     environment:
       # Connect to backend running on host (adjust as needed)
-      - WS_URL=${WS_URL:-ws://host.docker.internal:8000/ws_pcm}
+      - WS_URL=${WS_URL:-ws://host.docker.internal:8000/ws?codec=pcm}
       - TCP_PORT=${TCP_PORT:-8989}
       # Authentication credentials for backend
       - AUTH_USERNAME=${AUTH_USERNAME}
diff --git a/extras/havpe-relay/main.py b/extras/havpe-relay/main.py
index eac6d58b..36002be3 100644
--- a/extras/havpe-relay/main.py
+++ b/extras/havpe-relay/main.py
@@ -122,7 +122,7 @@ def create_authenticated_websocket_uri(base_ws_url: str, client_id: str, jwt_tok
     Returns:
         Authenticated WebSocket URI
     """
-    return f"{base_ws_url}/ws_pcm?token={jwt_token}&device_name={DEVICE_NAME}"
+    return f"{base_ws_url}/ws?codec=pcm&token={jwt_token}&device_name={DEVICE_NAME}"
 
 
 async def get_authenticated_socket_client(
@@ -151,7 +151,7 @@ async def get_authenticated_socket_client(
     
     # Create authenticated WebSocket URI (client_id will be generated by backend)
     ws_uri = create_authenticated_websocket_uri(backend_ws_url, "", jwt_token)
-    logger.info(f"🔗 Creating WebSocket connection to: {backend_ws_url}/ws_pcm?token={jwt_token[:20]}...&device_name={DEVICE_NAME}")
+    logger.info(f"🔗 Creating WebSocket connection to: {backend_ws_url}/ws?codec=pcm&token={jwt_token[:20]}...&device_name={DEVICE_NAME}")
     
     # Create socket client
     return SocketClient(uri=ws_uri)
diff --git a/extras/local-omi-bt/send_to_adv.py b/extras/local-omi-bt/send_to_adv.py
index 1705a355..cae774cb 100644
--- a/extras/local-omi-bt/send_to_adv.py
+++ b/extras/local-omi-bt/send_to_adv.py
@@ -9,7 +9,7 @@
 from wyoming.audio import AudioChunk
 
 # Configuration
-websocket_uri = "ws://100.83.66.30:8000/ws_pcm"
+websocket_uri = "ws://100.83.66.30:8000/ws?codec=pcm"
 backend_url = "http://100.83.66.30:8000"
 env_path = ".env"
 load_dotenv(env_path)
diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml
index 6a2a11bd..e92bcbc5 100644
--- a/tests/configs/deepgram-openai.yml
+++ b/tests/configs/deepgram-openai.yml
@@ -5,6 +5,7 @@ defaults:
   embedding: openai-embed
   llm: openai-llm
   stt: stt-deepgram
+  stt_stream: stt-deepgram-stream
   vector_store: vs-qdrant
 memory:
   extraction:
@@ -82,6 +83,37 @@ models:
           text: results.channels[0].alternatives[0].transcript
           words: results.channels[0].alternatives[0].words
         type: json
+- api_family: websocket
+  api_key: ${DEEPGRAM_API_KEY:-}
+  description: Deepgram Nova 3 (streaming)
+  model_provider: deepgram
+  model_type: stt_stream
+  model_url: wss://api.deepgram.com/v1/listen
+  name: stt-deepgram-stream
+  operations:
+    chunk_header:
+      message: {}
+    end:
+      message:
+        type: CloseStream
+    expect:
+      extract:
+        segments: []
+        text: channel.alternatives[0].transcript
+        words: channel.alternatives[0].words
+      final_type: Results
+      interim_type: Results
+    query:
+      channels: 1
+      encoding: linear16
+      interim_results: true
+      language: multi
+      model: nova-3
+      punctuate: true
+      sample_rate: 16000
+      smart_format: true
+    start:
+      message: {}
 speaker_recognition:
   enabled: false
   timeout: 60
diff --git a/tests/endpoints/health_tests.robot b/tests/endpoints/health_tests.robot
index b0eaa5e8..e8ee45ec 100644
--- a/tests/endpoints/health_tests.robot
+++ b/tests/endpoints/health_tests.robot
@@ -57,7 +57,6 @@ Health Check Test
     Dictionary Should Contain Key    ${config}    chunk_dir
     Dictionary Should Contain Key    ${config}    active_clients
     Dictionary Should Contain Key    ${config}    new_conversation_timeout_minutes
-    Dictionary Should Contain Key    ${config}    audio_cropping_enabled
     Dictionary Should Contain Key    ${config}    llm_provider
     Dictionary Should Contain Key    ${config}    llm_model
     Dictionary Should Contain Key    ${config}    llm_base_url
@@ -71,7 +70,6 @@ Health Check Test
     Should Not Be Empty    ${config}[chunk_dir]
     Should Be True        isinstance(${config}[active_clients], int)
     Should Be True        ${config}[new_conversation_timeout_minutes] > 0
-    Should Be True        isinstance(${config}[audio_cropping_enabled], bool)
     Should Not Be Empty    ${config}[llm_provider]
     Should Not Be Empty    ${config}[llm_model]
     Should Not Be Empty    ${config}[llm_base_url]
diff --git a/tests/endpoints/plugin_tests.robot b/tests/endpoints/plugin_tests.robot
index 7e5ae0f9..893cd9fb 100644
--- a/tests/endpoints/plugin_tests.robot
+++ b/tests/endpoints/plugin_tests.robot
@@ -34,8 +34,9 @@ Plugin Mock Config Creation
     [Tags]    infra
 
     # Test single event subscription
+    ${single_subscription}=    Create List    transcript.streaming
     ${config}=    Create Mock Plugin Config
-    ...    subscriptions=["transcript.streaming"]
+    ...    subscriptions=${single_subscription}
     Verify Plugin Config Format    ${config}
 
     ${subscriptions}=    Get From Dictionary    ${config}    subscriptions
@@ -43,8 +44,9 @@ Plugin Mock Config Creation
     ...    msg=Plugin should subscribe to transcript.streaming event
 
     # Test multiple event subscriptions
+    ${subscriptions_list}=    Create List    transcript.streaming    transcript.batch    conversation.complete
     ${multi_config}=    Create Mock Plugin Config
-    ...    subscriptions=["transcript.streaming", "transcript.batch", "conversation.complete"]
+    ...    subscriptions=${subscriptions_list}
     ${multi_subs}=    Get From Dictionary    ${multi_config}    subscriptions
     ${length}=    Get Length    ${multi_subs}
     Should Be Equal As Integers    ${length}    3
@@ -55,8 +57,9 @@ Plugin Mock With Wake Word Trigger
     [Tags]    infra
 
     ${wake_words}=    Create List    hey vivi    vivi    hey jarvis
+    ${wake_word_subscriptions}=    Create List    transcript.streaming
     ${config}=    Create Mock Plugin Config
-    ...    subscriptions=["transcript.streaming"]
+    ...    subscriptions=${wake_word_subscriptions}
     ...    trigger_type=wake_word
     ...    wake_words=${wake_words}
 
diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot
index c0d401db..1cbc920d 100644
--- a/tests/infrastructure/infra_tests.robot
+++ b/tests/infrastructure/infra_tests.robot
@@ -272,19 +272,20 @@ WebSocket Disconnect Conversation End Reason Test
 
     # Start audio stream and send chunks to trigger conversation
     ${device_name}=    Set Variable    disconnect
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
     ${stream_id}=    Open Audio Stream    device_name=${device_name}
 
     # Send audio fast (no realtime pacing) to simulate disconnect before END signal
-    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200 
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200
 
     # Wait for conversation job to be created and conversation_id to be populated
     # Transcription + speech analysis takes time (30-60s with queue)
     ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ${device_name}
+    ...    Job Type Exists For Client    open_conversation    ${client_id}
 
     # Wait for conversation_id in job meta (created asynchronously)
     ${conversation_id}=    Wait Until Keyword Succeeds    10s    0.5s
-    ...    Get Conversation ID From Job Meta    open_conversation    ${device_name}
+    ...    Get Conversation ID From Job Meta    open_conversation    ${client_id}
 
     # Simulate WebSocket disconnect (Bluetooth dropout)
     Close Audio Stream    ${stream_id}
diff --git a/tests/integration/audio_streaming_integration_tests.robot b/tests/integration/audio_streaming_integration_tests.robot
new file mode 100644
index 00000000..3e70c718
--- /dev/null
+++ b/tests/integration/audio_streaming_integration_tests.robot
@@ -0,0 +1,187 @@
+*** Settings ***
+Documentation    Audio Streaming Integration Tests
+...              Tests for streaming transcription provider (Phase 1) and Redis session state (Phase 2)
+...
+...              This test suite validates:
+...              - Phase 1: Registry-driven transcription provider works
+...              - Phase 2: Redis sessions as single source of truth (user_email, job IDs, chunk tracking)
+...              - Phase 2: Session lifecycle management (init, update, cleanup)
+Resource         ../resources/websocket_keywords.robot
+Resource         ../resources/queue_keywords.robot
+Resource         ../resources/redis_keywords.robot
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+*** Variables ***
+
+
+*** Test Cases ***
+
+Redis Session Schema Contains All Required Fields
+    [Documentation]    Verify Redis session has all Phase 2 fields after stream initialization
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    redis-schema-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+
+    # Send a few chunks to trigger session initialization
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=5
+
+    # Allow time for async session initialization to complete
+    Sleep    2s
+
+    # Get session data from Redis using client_id (not stream_id)
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+    ${session_data}=    Get Redis Session Data    ${client_id}
+
+    # Verify required fields exist
+    Should Not Be Empty    ${session_data}[user_id]    Session missing user_id
+    Should Not Be Empty    ${session_data}[user_email]    Session missing user_email
+    Should Not Be Empty    ${session_data}[client_id]    Session missing client_id
+    Should Not Be Empty    ${session_data}[connection_id]    Session missing connection_id
+    Should Not Be Empty    ${session_data}[stream_name]    Session missing stream_name
+    Should Not Be Empty    ${session_data}[provider]    Session missing provider
+    Should Not Be Empty    ${session_data}[mode]    Session should have mode
+
+    # Verify job IDs are tracked
+    Dictionary Should Contain Key    ${session_data}    speech_detection_job_id
+    Dictionary Should Contain Key    ${session_data}    audio_persistence_job_id
+
+    # Verify connection state
+    Should Be Equal    ${session_data}[websocket_connected]    true
+    Should Be Equal    ${session_data}[status]    active
+
+    Log    ✅ Redis session schema verified
+    [Teardown]    Close Audio Stream    ${stream_id}
+
+
+Chunk Count Increments In Redis Session
+    [Documentation]    Verify chunk count is tracked in Redis (not ClientState)
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    chunk-count-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send chunks and verify count increases
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=3
+    Sleep    1s    # Allow chunk counter to update
+    ${session1}=    Get Redis Session Data    ${client_id}
+    ${count1}=    Convert To Integer    ${session1}[chunks_published]
+
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=5
+    Sleep    1s    # Allow chunk counter to update
+    ${session2}=    Get Redis Session Data    ${client_id}
+    ${count2}=    Convert To Integer    ${session2}[chunks_published]
+
+    # Verify count increased (should be at least 8)
+    Should Be True    ${count2} > ${count1}
+    Should Be True    ${count2} >= 8
+
+    Log    ✅ Chunk count tracked in Redis: ${count1} → ${count2}
+    [Teardown]    Close Audio Stream    ${stream_id}
+
+
+Job IDs Stored In Redis Session
+    [Documentation]    Verify job IDs are stored in Redis session (not ClientState)
+    [Tags]    infra	audio-streaming	queue
+
+    ${device_name}=    Set Variable    job-ids-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send audio to trigger jobs
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=10
+    Sleep    2s
+
+    # Get session data
+    ${session_data}=    Get Redis Session Data    ${client_id}
+
+    # Verify job IDs are populated (not empty strings)
+    Should Not Be Empty    ${session_data}[speech_detection_job_id]
+    Should Not Be Empty    ${session_data}[audio_persistence_job_id]
+
+    Log    ✅ Speech detection job: ${session_data}[speech_detection_job_id]
+    Log    ✅ Audio persistence job: ${session_data}[audio_persistence_job_id]
+
+    [Teardown]    Close Audio Stream    ${stream_id}
+
+
+Generic Transcription Provider Works
+    [Documentation]    Verify streaming transcription works with registry-driven provider
+    ...                This tests Phase 1 provider consolidation
+    [Tags]    audio-streaming	queue	e2e
+
+    ${device_name}=    Set Variable    provider-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send sufficient audio for transcription
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+
+    # Wait for speech detection job to process
+    Wait Until Keyword Succeeds    30s    2s
+    ...    Job Type Exists For Client    stream_speech_detection_job    ${client_id}
+
+    # Verify provider is set in Redis session
+    ${session_data}=    Get Redis Session Data    ${client_id}
+    Should Not Be Empty    ${session_data}[provider]
+    Log    ✅ Transcription provider: ${session_data}[provider]
+
+    [Teardown]    Close Audio Stream    ${stream_id}
+
+
+Session Cleaned Up After Stream Close
+    [Documentation]    Verify session status is updated when stream closes
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    cleanup-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send some audio
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=5
+    Sleep    1s    # Allow session to be initialized
+
+    # Verify session is active
+    ${session_before}=    Get Redis Session Data    ${client_id}
+    Should Be Equal    ${session_before}[status]    active
+    Should Be Equal    ${session_before}[websocket_connected]    true
+
+    # Close stream
+    Close Audio Stream    ${stream_id}
+
+    # Wait for finalization
+    Sleep    2s
+
+    # Verify session is finalized or complete (jobs may finish quickly for short streams)
+    ${session_after}=    Get Redis Session Data    ${client_id}
+    Should Be True    '${session_after}[status]' in ['finalizing', 'complete']
+    ...    Session status should be finalizing or complete, got: ${session_after}[status]
+
+    Log    ✅ Session status updated to ${session_after}[status]
+
+
+User Email Tracked In Session
+    [Documentation]    Verify user_email is stored in Redis session for debugging
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    email-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send a chunk
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=1
+    Sleep    1s    # Allow session to be initialized
+
+    # Get session and verify email
+    ${session_data}=    Get Redis Session Data    ${client_id}
+    Should Not Be Empty    ${session_data}[user_email]
+    Should Contain    ${session_data}[user_email]    @    Email should contain @
+
+    Log    ✅ User email tracked: ${session_data}[user_email]
+    [Teardown]    Close Audio Stream    ${stream_id}
diff --git a/tests/integration/conversation_queue.robot b/tests/integration/conversation_queue.robot
index bde80392..3cda6639 100644
--- a/tests/integration/conversation_queue.robot
+++ b/tests/integration/conversation_queue.robot
@@ -78,7 +78,7 @@ Test Reprocess Conversation Job Queue
 
     # Wait for initial upload processing to complete (transcription job chain)
     Log    Waiting for initial conversation processing to complete...    INFO
-    Sleep    10s    # Give time for initial job chain (transcription -> speaker -> cropping -> memory)
+    Sleep    10s    # Give time for initial job chain (transcription -> speaker -> memory)
 
     # Get conversation to verify initial state
     ${initial_conversation}=    Get Conversation By ID    ${conversation_id}
diff --git a/tests/integration/integration_test.robot b/tests/integration/integration_test.robot
index 4b08381b..3d561616 100644
--- a/tests/integration/integration_test.robot
+++ b/tests/integration/integration_test.robot
@@ -60,8 +60,8 @@ Audio Playback And Segment Timing Test
 
     Log    Conversation created: ${conversation_id}    INFO
 
-    # Wait for cropping job to complete (depends on transcription)
-    Sleep    10s    Wait for post-processing jobs to complete
+    # Wait for post-processing jobs to complete
+    Sleep    10s    Wait for post-processing jobs
 
     # Refresh conversation data
     ${conversation}=    Get Conversation By ID    ${conversation_id}
@@ -73,21 +73,6 @@ Audio Playback And Segment Timing Test
     Should Be True    ${original_audio_size} > 1000    Original audio file too small: ${original_audio_size} bytes
     Log    Original audio accessible: ${original_audio_size} bytes    INFO
 
-    # Verify cropped audio is accessible (if available)
-    &{params}=    Create Dictionary    cropped=true
-    ${cropped_response}=    GET On Session    api    /api/audio/get_audio/${conversation_id}    params=${params}    expected_status=any
-    IF    ${cropped_response.status_code} == 200
-        Should Be Equal As Strings    ${cropped_response.headers}[content-type]    audio/wav
-        ${cropped_audio_size}=    Get Length    ${cropped_response.content}
-        Should Be True    ${cropped_audio_size} > 0    Cropped audio file is empty
-        Log    Cropped audio accessible: ${cropped_audio_size} bytes    INFO
-
-        # Cropped audio should be smaller or equal to original (silence removed)
-        Should Be True    ${cropped_audio_size} <= ${original_audio_size}    Cropped audio larger than original
-    ELSE
-        Log    Cropped audio not yet available (cropping job may still be running)    WARN
-    END
-
     # Verify segments exist and have valid timestamps
     Dictionary Should Contain Key    ${conversation}    segments
     ${segments}=    Set Variable    ${conversation}[segments]
diff --git a/tests/integration/plugin_event_tests.robot b/tests/integration/plugin_event_tests.robot
index 4bdd49d1..28c78f10 100644
--- a/tests/integration/plugin_event_tests.robot
+++ b/tests/integration/plugin_event_tests.robot
@@ -18,9 +18,8 @@ Resource         ../resources/audio_keywords.robot
 Resource         ../resources/plugin_keywords.robot
 Resource         ../resources/websocket_keywords.robot
 Variables        ../setup/test_data.py
-Suite Setup      Test Suite Setup
+Suite Setup      Suite Setup
 Suite Teardown   Suite Teardown
-Test Setup       Test Cleanup
 
 *** Variables ***
 # TEST_AUDIO_FILE is loaded from test_data.py
diff --git a/tests/integration/websocket_streaming_tests.robot b/tests/integration/websocket_streaming_tests.robot
index 01e0a533..fecc86aa 100644
--- a/tests/integration/websocket_streaming_tests.robot
+++ b/tests/integration/websocket_streaming_tests.robot
@@ -60,7 +60,9 @@ Conversation Job Created After Speech Detection
     [Tags]    audio-streaming	queue	conversation
 
     # Open stream
-    ${stream_id}=    Open Audio Stream    device_name=ws-conv
+    ${device_name}=    Set Variable    ws-conv
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
 
     # Send enough audio to trigger speech detection (test audio has speech)
     # Test audio is 4 minutes long at 16kHz, sending 200 chunks ensures enough speech
@@ -69,13 +71,13 @@ Conversation Job Created After Speech Detection
     # Wait for open_conversation job to be created (transcription + speech analysis takes time)
     # Deepgram/OpenAI API calls + job processing can take 30-60s with queue
     Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ws-conv
+    ...    Job Type Exists For Client    open_conversation    ${client_id}
 
     Log To Console    Open conversation job created after speech detection
 
     # Then verify speech detection job has conversation_job_id linked
     ${speech_jobs}=    Wait Until Keyword Succeeds    15s    2s
-    ...    Job Type Exists For Client    speech_detection    ws-conv
+    ...    Job Type Exists For Client    speech_detection    ${client_id}
     Job Has Conversation ID    ${speech_jobs}[0]
     [Teardown]    Close Audio Stream    ${stream_id}
 
@@ -90,6 +92,7 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     [Tags]    audio-streaming	queue	conversation
 
     ${device_name}=    Set Variable    test-post
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
 
     # Open stream and send enough audio to trigger speech detection and conversation
     ${stream_id}=    Open Audio Stream    device_name=${device_name}
@@ -97,7 +100,7 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
 
     # Wait for conversation job to be created (transcription + speech analysis takes time)
     ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ${device_name}
+    ...    Job Type Exists For Client    open_conversation    ${client_id}
     ${conv_job}=    Set Variable    ${conv_jobs}[0]
     ${conv_job_id}=    Set Variable    ${conv_job}[job_id]
     ${conv_meta}=    Set Variable    ${conv_job}[meta]
@@ -105,7 +108,7 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     Log To Console    Conversation job created: ${conv_job_id}, conversation_id: ${conversation_id}
 
     # Record the initial speech detection job (will be replaced after timeout)
-    ${initial_speech_jobs}=    Get Jobs By Type And Client    speech_detection    ${device_name}
+    ${initial_speech_jobs}=    Get Jobs By Type And Client    speech_detection    ${client_id}
     ${initial_speech_count}=    Get Length    ${initial_speech_jobs}
     Log To Console    Initial speech detection jobs: ${initial_speech_count}
 
@@ -121,7 +124,7 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     # Verify a NEW speech detection job (2nd one) was created for next conversation
     # The handle_end_of_conversation function creates a new speech_detection job
     ${new_speech_jobs}=    Wait Until Keyword Succeeds    30s    2s
-    ...    Job Type Exists For Client    speech_detection    ${device_name}    2
+    ...    Job Type Exists For Client    speech_detection    ${client_id}    2
     ${new_speech_count}=    Get Length    ${new_speech_jobs}
     Should Be True    ${new_speech_count} >= ${initial_speech_count}
     ...    Expected new speech detection job but count is ${new_speech_count} (was ${initial_speech_count})
@@ -137,10 +140,6 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     ${speaker_jobs}=    Get Jobs By Type And Conversation    recognise_speakers_job    ${conversation_id}
     Log To Console    Speaker recognition jobs found: ${speaker_jobs.__len__()}
 
-    # Audio cropping job should be created
-    ${cropping_jobs}=    Get Jobs By Type And Conversation    process_cropping_job    ${conversation_id}
-    Log To Console    Cropping jobs found: ${cropping_jobs.__len__()}
-
     # Title/summary generation job should be created
     ${title_jobs}=    Get Jobs By Type And Conversation    generate_title_summary_job    ${conversation_id}
     Log To Console    Title/summary jobs found: ${title_jobs.__len__()}
@@ -150,70 +149,4 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     Log To Console    Memory jobs found: ${memory_jobs.__len__()}
 
 
-Segment Timestamps Match Cropped Audio
-    [Documentation]    Verify that after conversation closes and cropping completes,
-    ...                segment timestamps are adjusted to match the cropped audio file.
-    [Tags]    audio-streaming	audio-upload
-
-    ${device_name}=    Set Variable    seg-test
-
-    # # Open stream
-    ${stream_id}=    Open Audio Stream    device_name=${device_name}
-
-    # Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=250
-
-    # Wait for conversation job to be created (transcription + speech analysis takes time)
-    # ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    # ...    Job Type Exists For Client    open_conversation    ${device_name}
-
-    # conversation 1
-    ${conversation_id_1}=    Stream And Wait For Conversation    ${stream_id}    ${TEST_AUDIO_FILE}    ${device_name}    num_chunks=250
-    Log To Console    Conversation 1 completed: ${conversation_id_1}
-
-    # conversation 2, with 500 chunks (enough for 8 segments to match expected timestamps)
-    ${conversation_id}=    Stream And Wait For Conversation    ${stream_id}    ${TEST_AUDIO_FILE}    ${device_name}    num_chunks=500
-    Log To Console    Conversation 2 completed: ${conversation_id}
-
-    # Wait for cropping job to complete
-    ${cropping_jobs}=    Wait Until Keyword Succeeds    30s    2s
-    ...    Job Type Exists For Conversation    process_cropping_job    ${conversation_id}
-    ${cropping_job}=    Set Variable    ${cropping_jobs}[0]
-    Wait For Job Status    ${cropping_job}[job_id]    completed    timeout=30s    interval=2s
-    Log To Console    Cropping job completed
-
-    # Wait for database updates
-    Sleep    2s
-
-    # Fetch the conversation with updated segments
-    ${conversation}=    get conversation by id    ${conversation_id}
-
-    # Verify cropped audio path exists
-    Should Not Be Empty    ${conversation}[cropped_audio_path]
-    Log To Console    Cropped audio: ${conversation}[cropped_audio_path]
-
-    # Get segments
-    ${segments}=    Set Variable    ${conversation}[segments]
-
-    ${segment_count}=    Get Length    ${segments}
-    Should Be True    ${segment_count} > 0    No segments found
-    Log To Console    Found ${segment_count} segments
-
-    # Verify timestamps are adjusted to cropped audio (should start from 0)
-    ${first_segment}=    Set Variable    ${segments}[0]
-    Should Be True    ${first_segment}[start] == 0.0    First segment should start at 0.0s after cropping
-
-    # Verify last segment timing is reasonable (should be within the audio duration)
-    ${last_segment}=    Set Variable    ${segments}[-1]
-    # Should Be True    ${last_segment}[end] > 50    Last segment should extend beyond 50s for 100s audio
-    Should Be True    ${last_segment}[end] < 110    Last segment should be within 110s
-
-    # Verify segments match expected test data timestamps
-    # Uses default EXPECTED_SEGMENT_TIMES from test_data.py
-    # To use a different dataset: Verify Segments Match Expected Timestamps    ${segments}    ${EXPECTED_SEGMENT_TIMES_SHORT}
-    # To use custom tolerance: Verify Segments Match Expected Timestamps    ${segments}    ${EXPECTED_SEGMENT_TIMES}    ${tolerance}=1.0
-    Verify Segments Match Expected Timestamps    ${segments}    expected_segments=${EXPECTED_SEGMENT_TIMES}   
-
-    Log To Console    ✓ Validated ${segment_count} segments with proper cropped timestamps matching expected data
-
-
 
diff --git a/tests/resources/audio_keywords.robot b/tests/resources/audio_keywords.robot
index 2d37fcbc..aa045ae1 100644
--- a/tests/resources/audio_keywords.robot
+++ b/tests/resources/audio_keywords.robot
@@ -111,11 +111,3 @@ Upload Audio File And Wait For Memory
     Log    Successfully extracted ${memory_count} memories
 
     RETURN    ${conversation}    ${memories}
-
-
-Get Cropped Audio Info
-    [Documentation]    Get cropped audio information for a conversation
-    [Arguments]     ${audio_uuid}
-
-    ${response}=    GET On Session    api    /api/conversations/${audio_uuid}/cropped    headers=${headers}
-    RETURN    ${response.json()}[cropped_audios]    
diff --git a/tests/resources/conversation_keywords.robot b/tests/resources/conversation_keywords.robot
index 3b8e4632..36496646 100644
--- a/tests/resources/conversation_keywords.robot
+++ b/tests/resources/conversation_keywords.robot
@@ -95,13 +95,6 @@ Close Current Conversation
     ${response}=    POST On Session    api    /api/conversations/${client_id}/close    headers=${headers}
     RETURN    ${response.json()}
 
-Get Cropped Audio Info
-    [Documentation]    Get cropped audio information for a conversation
-    [Arguments]     ${audio_uuid}
-
-    ${response}=    GET On Session    api    /api/conversations/${audio_uuid}/cropped    headers=${headers}
-    RETURN    ${response.json()}[cropped_audios]    
-
 Add Speaker To Conversation
     [Documentation]    Add a speaker to the speakers_identified list
     [Arguments]    ${audio_uuid}    ${speaker_id}
diff --git a/tests/resources/plugin_keywords.robot b/tests/resources/plugin_keywords.robot
index a7c2cd8b..2b1b19e7 100644
--- a/tests/resources/plugin_keywords.robot
+++ b/tests/resources/plugin_keywords.robot
@@ -87,14 +87,14 @@ Verify HA Plugin Uses Events
 
 Clear Plugin Events
     [Documentation]    Clear all events from test plugin database
-    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
     Execute SQL String    DELETE FROM plugin_events
     Disconnect From Database
 
 Get Plugin Events By Type
     [Arguments]    ${event_type}
     [Documentation]    Query plugin events by event type
-    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
     ${query}=    Query    SELECT * FROM plugin_events WHERE event = '${event_type}' ORDER BY created_at DESC
     Disconnect From Database
     RETURN    ${query}
@@ -102,14 +102,14 @@ Get Plugin Events By Type
 Get Plugin Events By User
     [Arguments]    ${user_id}
     [Documentation]    Query plugin events by user_id
-    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
     ${query}=    Query    SELECT * FROM plugin_events WHERE user_id = '${user_id}' ORDER BY created_at DESC
     Disconnect From Database
     RETURN    ${query}
 
 Get All Plugin Events
     [Documentation]    Get all events from test plugin database
-    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
     ${query}=    Query    SELECT * FROM plugin_events ORDER BY created_at DESC
     Disconnect From Database
     RETURN    ${query}
@@ -117,7 +117,7 @@ Get All Plugin Events
 Get Plugin Event Count
     [Arguments]    ${event_type}=${NONE}
     [Documentation]    Get count of events, optionally filtered by type
-    Connect To Database    sqlite3    /app/debug/test_plugin_events.db
+    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
     IF    '${event_type}' != 'None'
         ${count}=    Row Count    SELECT COUNT(*) FROM plugin_events WHERE event = '${event_type}'
     ELSE
diff --git a/tests/resources/queue_keywords.robot b/tests/resources/queue_keywords.robot
index 3d709661..734d7857 100644
--- a/tests/resources/queue_keywords.robot
+++ b/tests/resources/queue_keywords.robot
@@ -309,9 +309,9 @@ Get Most Recent Job
 
 Get Conversation ID From Job Meta
     [Documentation]    Extract conversation_id from job meta, fails if not present
-    [Arguments]    ${job_type}    ${device_name}
+    [Arguments]    ${job_type}    ${client_id}
 
-    ${conv_jobs}=    Get Jobs By Type And Client    ${job_type}    ${device_name}
+    ${conv_jobs}=    Get Jobs By Type And Client    ${job_type}    ${client_id}
     ${conv_job}=    Get Most Recent Job    ${conv_jobs}
     ${conv_meta}=    Set Variable    ${conv_job}[meta]
     ${conversation_id}=    Evaluate    $conv_meta.get('conversation_id', '')
diff --git a/tests/resources/redis_keywords.robot b/tests/resources/redis_keywords.robot
new file mode 100644
index 00000000..1aa85e9b
--- /dev/null
+++ b/tests/resources/redis_keywords.robot
@@ -0,0 +1,75 @@
+*** Settings ***
+Documentation    Redis session management and verification keywords
+...
+...              This file contains keywords for interacting with Redis sessions
+...              and verifying session state during tests.
+...
+...              Keywords in this file handle:
+...              - Reading Redis session data
+...              - Verifying session schema
+...              - Session state checks
+...
+...              Keywords that should NOT be in this file:
+...              - Verification/assertion keywords (belong in tests)
+...              - API session management (belong in session_resources.robot)
+Library          Process
+Library          Collections
+Variables        ../setup/test_env.py
+
+*** Keywords ***
+
+Get Redis Session Data
+    [Documentation]    Get session data from Redis for a given stream/session ID
+    [Arguments]    ${session_id}
+
+    # Use redis-cli to get session hash
+    ${redis_key}=    Set Variable    audio:session:${session_id}
+    ${result}=    Run Process    docker    exec    ${REDIS_CONTAINER}
+    ...    redis-cli    HGETALL    ${redis_key}
+
+    Should Be Equal As Integers    ${result.rc}    0
+    ...    Redis command failed: ${result.stderr}
+
+    # Parse output (HGETALL returns: field1 value1 field2 value2 ...)
+    @{lines}=    Split String    ${result.stdout}    \n
+    &{session_data}=    Create Dictionary
+
+    # Process pairs
+    ${length}=    Get Length    ${lines}
+    FOR    ${i}    IN RANGE    0    ${length}    2
+        ${key}=    Get From List    ${lines}    ${i}
+        ${value_index}=    Evaluate    ${i} + 1
+        IF    ${value_index} < ${length}
+            ${value}=    Get From List    ${lines}    ${value_index}
+            Set To Dictionary    ${session_data}    ${key}=${value}
+        END
+    END
+
+    RETURN    ${session_data}
+
+
+Verify Session Has Field
+    [Documentation]    Verify a Redis session has a specific field
+    [Arguments]    ${session_id}    ${field_name}
+
+    ${session}=    Get Redis Session Data    ${session_id}
+    Dictionary Should Contain Key    ${session}    ${field_name}
+    ...    Session ${session_id} missing field: ${field_name}
+
+
+Get Session Field Value
+    [Documentation]    Get a specific field value from Redis session
+    [Arguments]    ${session_id}    ${field_name}
+
+    ${session}=    Get Redis Session Data    ${session_id}
+    ${value}=    Get From Dictionary    ${session}    ${field_name}
+    RETURN    ${value}
+
+
+Session Field Should Equal
+    [Documentation]    Verify a session field has a specific value
+    [Arguments]    ${session_id}    ${field_name}    ${expected_value}
+
+    ${actual}=    Get Session Field Value    ${session_id}    ${field_name}
+    Should Be Equal    ${actual}    ${expected_value}
+    ...    Session field ${field_name} mismatch: expected ${expected_value}, got ${actual}
diff --git a/tests/resources/websocket_keywords.robot b/tests/resources/websocket_keywords.robot
index f1ee54b4..6d318f38 100644
--- a/tests/resources/websocket_keywords.robot
+++ b/tests/resources/websocket_keywords.robot
@@ -2,12 +2,31 @@
 Documentation    WebSocket audio streaming keywords using the shared AudioStreamClient
 Library          Collections
 Library          OperatingSystem
+Library          String
 Library          ../libs/audio_stream_library.py
 Variables        ../setup/test_env.py
 Resource         session_keywords.robot
 Resource         queue_keywords.robot
 
 *** Keywords ***
+Get Client ID From Device Name
+    [Documentation]    Construct client_id from device_name for test admin user
+    ...                Format: {last_6_chars_of_user_id}-{first_10_chars_of_device_name}
+    ...                Matches backend logic in client_manager.py:generate_client_id()
+    [Arguments]    ${device_name}
+
+    # Test admin user ID: 695f6e8595eae00281d26432 (actual ID from test environment)
+    ${user_suffix}=    Set Variable    d26432
+
+    # Sanitize and truncate device name to 10 chars (matches backend: [:10])
+    # Backend sanitizes: lowercase, alphanumeric + hyphens only
+    ${device_lower}=    Convert To Lower Case    ${device_name}
+    ${device_truncated}=    Get Substring    ${device_lower}    0    10
+
+    ${client_id}=    Set Variable    ${user_suffix}-${device_truncated}
+    RETURN    ${client_id}
+
+
 Stream Audio File Via WebSocket
     [Documentation]    Stream a WAV file via WebSocket using Wyoming protocol
     ...                Uses the shared AudioStreamClient from advanced_omi_backend.clients
@@ -117,8 +136,11 @@ Stream And Wait For Conversation
     ...                Works correctly even with existing conversations by tracking new conversation creation.
     [Arguments]    ${stream_id}    ${audio_file_path}    ${device_name}    ${num_chunks}=100
 
+    # Construct client_id from device_name for job lookups
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
     # Get baseline conversation IDs before streaming to detect new conversation
-    ${baseline_jobs}=    Get Jobs By Type And Client    open_conversation    ${device_name}
+    ${baseline_jobs}=    Get Jobs By Type And Client    open_conversation    ${client_id}
     ${existing_conv_ids}=    Create List
     FOR    ${job}    IN    @{baseline_jobs}
         ${meta}=    Set Variable    ${job}[meta]
@@ -134,7 +156,7 @@ Stream And Wait For Conversation
 
     # Wait for NEW conversation job to be created (not in baseline)
     ${new_job}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Wait For New Conversation Job    open_conversation    ${device_name}    ${existing_conv_ids}
+    ...    Wait For New Conversation Job    open_conversation    ${client_id}    ${existing_conv_ids}
 
     ${conv_meta}=    Set Variable    ${new_job}[meta]
     ${conversation_id}=    Evaluate    $conv_meta.get('conversation_id', '')
diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh
index ea7fa949..14644d90 100755
--- a/tests/run-robot-tests.sh
+++ b/tests/run-robot-tests.sh
@@ -236,8 +236,8 @@ done
 print_info "Building workers..."
 docker compose -f docker-compose-test.yml build workers-test
 
-print_info "Starting RQ workers and Deepgram streaming worker..."
-docker compose -f docker-compose-test.yml up -d workers-test deepgram-streaming-worker-test
+print_info "Starting RQ workers..."
+docker compose -f docker-compose-test.yml up -d workers-test
 
 # Wait for workers container
 print_info "Waiting for workers container (up to 30s)..."
@@ -273,35 +273,21 @@ for i in {1..30}; do
     sleep 2
 done
 
-# Verify batch Deepgram worker is running
-print_info "Verifying Deepgram batch worker process..."
-BATCH_WORKER_CHECK=$(docker compose -f docker-compose-test.yml exec -T workers-test ps aux | grep -c "audio_stream_deepgram_worker" || echo "0" | tr -d '\n\r')
-BATCH_WORKER_CHECK=${BATCH_WORKER_CHECK//[^0-9]/}  # Remove non-numeric characters
-if [ -n "$BATCH_WORKER_CHECK" ] && [ "$BATCH_WORKER_CHECK" -gt 0 ]; then
-    print_success "Deepgram batch worker process is running"
+# Verify unified audio stream worker is running
+print_info "Verifying unified audio stream worker process..."
+STREAM_WORKER_CHECK=$(docker compose -f docker-compose-test.yml exec -T workers-test ps aux | grep -c "audio_stream_worker" || echo "0" | tr -d '\n\r')
+STREAM_WORKER_CHECK=${STREAM_WORKER_CHECK//[^0-9]/}  # Remove non-numeric characters
+if [ -n "$STREAM_WORKER_CHECK" ] && [ "$STREAM_WORKER_CHECK" -gt 0 ]; then
+    print_success "Unified audio stream worker process is running"
 else
-    print_warning "Deepgram batch worker process not found - checking logs..."
-    docker compose -f docker-compose-test.yml logs --tail=30 workers-test | grep -i "deepgram" || true
+    print_warning "Audio stream worker process not found - checking logs..."
+    docker compose -f docker-compose-test.yml logs --tail=30 workers-test | grep -i "audio.*stream.*worker" || true
 fi
 
 # Check Redis consumer groups registration
 print_info "Checking Redis Streams consumer groups..."
 docker compose -f docker-compose-test.yml exec -T redis-test redis-cli KEYS "audio:stream:*" 2>/dev/null || true
 
-# Wait for streaming worker to start
-print_info "Waiting for Deepgram streaming worker (up to 30s)..."
-for i in {1..15}; do
-    if docker compose -f docker-compose-test.yml ps deepgram-streaming-worker-test | grep -q "Up"; then
-        print_success "Deepgram streaming worker is running"
-        break
-    fi
-    if [ $i -eq 15 ]; then
-        print_warning "Deepgram streaming worker not detected (may still start async)"
-        break
-    fi
-    sleep 2
-done
-
 print_success "All services ready!"
 
 # Return to tests directory
diff --git a/tests/setup/test_data.py b/tests/setup/test_data.py
index 6d73b265..e1821796 100644
--- a/tests/setup/test_data.py
+++ b/tests/setup/test_data.py
@@ -51,7 +51,7 @@
 ]
 
 # Expected segment timestamps for DIY Glass Blowing audio (4-minute version, 500 chunks)
-# These are the cropped audio timestamps after silence removal
+# These are the audio segment timestamps with speech detection
 # Updated 2025-01-22 based on actual test output with streaming websocket processing
 EXPECTED_SEGMENT_TIMES = [
     {"start": 0.0, "end": 10.08},
diff --git a/tests/test-requirements.txt b/tests/test-requirements.txt
index 4efaf39b..ba6fb687 100644
--- a/tests/test-requirements.txt
+++ b/tests/test-requirements.txt
@@ -2,6 +2,7 @@ robotframework
 robotframework-tidy
 robotframework-requests
 robotframework-browser
+robotframework-databaselibrary
 python-dotenv
 websockets
  
\ No newline at end of file

From 60aa8e1eb0270469f5f46bdc06d25bac727f9485 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sun, 11 Jan 2026 23:16:18 +0000
Subject: [PATCH 22/25] Enhance testing infrastructure and API routes for
 plugin events

- Updated `docker-compose-test.yml` to introduce low speech detection thresholds for testing, improving the accuracy of speech detection during tests.
- Added new test-only API routes in `test_routes.py` for clearing and retrieving plugin events, ensuring a clean state between tests.
- Refactored existing test scripts to utilize the new API endpoints for event management, enhancing test reliability and clarity.
- Improved logging and error handling in various components to facilitate debugging during test execution.
- Adjusted environment variable handling in test setup scripts to streamline configuration and improve flexibility.
---
 backends/advanced/docker-compose-test.yml     |  10 +-
 .../controllers/queue_controller.py           |  53 +++++-
 .../plugins/test_event/event_storage.py       |  50 +++++-
 .../routers/api_router.py                     |   9 +
 .../routers/modules/queue_routes.py           |   6 +
 .../routers/modules/test_routes.py            | 121 +++++++++++++
 .../services/audio_stream/aggregator.py       |  55 ++----
 .../services/plugin_service.py                |   6 +
 .../transcription/streaming_consumer.py       |  20 +++
 .../utils/conversation_utils.py               |  66 +++----
 .../advanced_omi_backend/utils/job_utils.py   |  17 +-
 .../workers/audio_jobs.py                     |   2 +-
 .../workers/conversation_jobs.py              | 151 ++++++++++++++--
 .../workers/memory_jobs.py                    |  16 +-
 .../workers/transcription_jobs.py             |  57 +++++-
 tests/.env.test                               |   9 +-
 tests/integration/plugin_event_tests.robot    |  73 ++------
 tests/libs/auth_helpers.py                    |  41 +++++
 tests/resources/plugin_keywords.robot         |  92 +++++++---
 tests/resources/session_keywords.robot        |  12 ++
 tests/resources/websocket_keywords.robot      |  10 +-
 tests/run-custom.sh                           | 152 ++++++++++++++--
 tests/run-robot-tests.sh                      | 169 +-----------------
 tests/setup/setup_keywords.robot              |  35 +++-
 tests/setup/test_env.py                       |   5 +-
 tests/setup/test_manager_keywords.robot       |   6 +-
 26 files changed, 867 insertions(+), 376 deletions(-)
 create mode 100644 backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py
 create mode 100644 tests/libs/auth_helpers.py

diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index d4eb6504..e01a75f6 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -47,6 +47,9 @@ services:
       - CORS_ORIGINS=http://localhost:3001,http://localhost:8001,https://localhost:3001,https://localhost:8001
       # Set low inactivity timeout for tests (2 seconds instead of 60)
       - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2
+      # Set low speech detection thresholds for tests
+      - SPEECH_DETECTION_MIN_DURATION=2.0  # 2 seconds instead of 10
+      - SPEECH_DETECTION_MIN_WORDS=5  # 5 words instead of 10
       # Wait for audio queue to drain before timing out (test mode)
       - WAIT_FOR_AUDIO_QUEUE_DRAIN=true
     depends_on:
@@ -56,8 +59,6 @@ services:
         condition: service_healthy
       redis-test:
         condition: service_started
-      speaker-service-test:
-        condition: service_healthy
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"]
       interval: 10s
@@ -191,6 +192,9 @@ services:
       - SPEAKER_SERVICE_URL=http://speaker-service-test:8085
       # Set low inactivity timeout for tests (2 seconds instead of 60)
       - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2
+      # Set low speech detection thresholds for tests
+      - SPEECH_DETECTION_MIN_DURATION=2.0  # 2 seconds instead of 10
+      - SPEECH_DETECTION_MIN_WORDS=5  # 5 words instead of 10
       # Wait for audio queue to drain before timing out (test mode)
       - WAIT_FOR_AUDIO_QUEUE_DRAIN=true
     depends_on:
@@ -202,8 +206,6 @@ services:
         condition: service_started
       qdrant-test:
         condition: service_started
-      speaker-service-test:
-        condition: service_healthy
     restart: unless-stopped
 
   # Mycelia - AI memory and timeline service (test environment)
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
index f1944c7e..cd4f7455 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
@@ -307,13 +307,22 @@ def start_streaming_jobs(
         user_id,
         client_id,
         job_timeout=86400,  # 24 hours for all-day sessions
-        result_ttl=JOB_RESULT_TTL,
+        ttl=None,  # No pre-run expiry (job can wait indefinitely in queue)
+        result_ttl=JOB_RESULT_TTL,  # Cleanup AFTER completion
+        failure_ttl=86400,  # Cleanup failed jobs after 24h
         job_id=f"speech-detect_{session_id[:12]}",
         description=f"Listening for speech...",
         meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True}
     )
+    # Log job enqueue with TTL information for debugging
+    actual_ttl = redis_conn.ttl(f"rq:job:{speech_job.id}")
     logger.info(f"📥 RQ: Enqueued speech detection job {speech_job.id}")
-    logger.info(f"🔍 DEBUG: Created job - ID={speech_job.id}, func_name={speech_job.func_name}, client_id={client_id}, meta={speech_job.meta}")
+    logger.info(
+        f"🔍 Job enqueue details: ID={speech_job.id}, "
+        f"job_timeout={speech_job.timeout}, result_ttl={speech_job.result_ttl}, "
+        f"failure_ttl={speech_job.failure_ttl}, redis_key_ttl={actual_ttl}, "
+        f"queue_length={transcription_queue.count}, client_id={client_id}"
+    )
 
     # Store job ID for cleanup (keyed by client_id for easy WebSocket cleanup)
     try:
@@ -331,13 +340,22 @@ def start_streaming_jobs(
         user_id,
         client_id,
         job_timeout=86400,  # 24 hours for all-day sessions
-        result_ttl=JOB_RESULT_TTL,
+        ttl=None,  # No pre-run expiry (job can wait indefinitely in queue)
+        result_ttl=JOB_RESULT_TTL,  # Cleanup AFTER completion
+        failure_ttl=86400,  # Cleanup failed jobs after 24h
         job_id=f"audio-persist_{session_id[:12]}",
         description=f"Audio persistence for session {session_id[:12]}",
         meta={'audio_uuid': session_id, 'session_level': True}  # Mark as session-level job
     )
+    # Log job enqueue with TTL information for debugging
+    actual_ttl = redis_conn.ttl(f"rq:job:{audio_job.id}")
     logger.info(f"📥 RQ: Enqueued audio persistence job {audio_job.id} on audio queue")
-    logger.info(f"🔍 DEBUG: Created audio job - ID={audio_job.id}, func_name={audio_job.func_name}, client_id={client_id}, meta={audio_job.meta}")
+    logger.info(
+        f"🔍 Job enqueue details: ID={audio_job.id}, "
+        f"job_timeout={audio_job.timeout}, result_ttl={audio_job.result_ttl}, "
+        f"failure_ttl={audio_job.failure_ttl}, redis_key_ttl={actual_ttl}, "
+        f"queue_length={audio_queue.count}, client_id={client_id}"
+    )
 
     return {
         'speech_detection': speech_job.id,
@@ -380,7 +398,7 @@ def start_post_conversation_jobs(
     from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
     from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
     from advanced_omi_backend.workers.memory_jobs import process_memory_job
-    from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job
+    from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job, dispatch_conversation_complete_event_job
 
     version_id = transcript_version_id or str(uuid.uuid4())
 
@@ -466,11 +484,34 @@ def start_post_conversation_jobs(
     )
     logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {speaker_job.id})")
 
+    # Step 5: Dispatch conversation.complete event (runs after both memory and title/summary complete)
+    # This ensures plugins receive the event after all processing is done
+    event_job_id = f"event_complete_{conversation_id[:12]}"
+    logger.info(f"🔍 DEBUG: Creating conversation complete event job with job_id={event_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
+
+    # Event job depends on both memory and title/summary jobs completing
+    # Use RQ's depends_on list to wait for both
+    event_dispatch_job = default_queue.enqueue(
+        dispatch_conversation_complete_event_job,
+        conversation_id,
+        audio_uuid,
+        client_id or "",
+        user_id,
+        job_timeout=120,  # 2 minutes
+        result_ttl=JOB_RESULT_TTL,
+        depends_on=[memory_job, title_summary_job],  # Wait for both parallel jobs
+        job_id=event_job_id,
+        description=f"Dispatch conversation complete event for {conversation_id[:8]}",
+        meta=job_meta
+    )
+    logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (depends on {memory_job.id} and {title_summary_job.id})")
+
     return {
         'transcription': transcription_job.id if transcription_job else None,
         'speaker_recognition': speaker_job.id,
         'memory': memory_job.id,
-        'title_summary': title_summary_job.id
+        'title_summary': title_summary_job.id,
+        'event_dispatch': event_dispatch_job.id
     }
 
 
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
index 16e98792..00bc674d 100644
--- a/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
@@ -5,6 +5,7 @@
 """
 import json
 import logging
+import os
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -24,9 +25,52 @@ def __init__(self, db_path: str = "/app/debug/test_plugin_events.db"):
     async def initialize(self):
         """Initialize database and create tables"""
         # Ensure directory exists
-        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
-
-        self.db = await aiosqlite.connect(self.db_path)
+        logger.info(f"🔍 DEBUG: Initializing event storage with db_path={self.db_path}")
+
+        db_dir = Path(self.db_path).parent
+        logger.info(f"🔍 DEBUG: Database directory: {db_dir}")
+        logger.info(f"🔍 DEBUG: Directory exists before mkdir: {db_dir.exists()}")
+
+        try:
+            db_dir.mkdir(parents=True, exist_ok=True)
+            logger.info(f"🔍 DEBUG: Directory created/verified: {db_dir}")
+            logger.info(f"🔍 DEBUG: Directory permissions: {oct(db_dir.stat().st_mode)}")
+        except Exception as e:
+            logger.error(f"🔍 DEBUG: Failed to create directory: {e}")
+            raise
+
+        logger.info(f"🔍 DEBUG: Attempting to connect to SQLite database...")
+        try:
+            self.db = await aiosqlite.connect(self.db_path)
+            logger.info(f"🔍 DEBUG: Successfully connected to database")
+
+            # Enable WAL mode for better concurrent access (allows concurrent reads/writes)
+            # This fixes the "readonly database" error when Robot tests access from host
+            await self.db.execute("PRAGMA journal_mode=WAL")
+            await self.db.execute("PRAGMA busy_timeout=5000")  # Wait up to 5s for locks
+            logger.info(f"✓ Enabled WAL mode for concurrent access")
+
+            # Set file permissions to 666 so host user can write (container runs as root)
+            # Robot tests run as host user and need write access to the database
+            try:
+                os.chmod(self.db_path, 0o666)
+                # Also set permissions on WAL and SHM files if they exist
+                wal_file = f"{self.db_path}-wal"
+                shm_file = f"{self.db_path}-shm"
+                if os.path.exists(wal_file):
+                    os.chmod(wal_file, 0o666)
+                if os.path.exists(shm_file):
+                    os.chmod(shm_file, 0o666)
+                logger.info(f"✓ Set database file permissions to 666 for host access")
+            except Exception as perm_error:
+                logger.warning(f"Could not set database permissions: {perm_error}")
+
+        except Exception as e:
+            logger.error(f"🔍 DEBUG: Failed to connect to database: {e}")
+            logger.error(f"🔍 DEBUG: Database file exists: {Path(self.db_path).exists()}")
+            if Path(self.db_path).exists():
+                logger.error(f"🔍 DEBUG: Database file permissions: {oct(Path(self.db_path).stat().st_mode)}")
+            raise
 
         # Create events table
         await self.db.execute("""
diff --git a/backends/advanced/src/advanced_omi_backend/routers/api_router.py b/backends/advanced/src/advanced_omi_backend/routers/api_router.py
index 9e761f8e..80c03eae 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/api_router.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/api_router.py
@@ -6,6 +6,7 @@
 """
 
 import logging
+import os
 
 from fastapi import APIRouter
 
@@ -40,5 +41,13 @@
 router.include_router(queue_router)
 router.include_router(health_router)  # Also include under /api for frontend compatibility
 
+# Conditionally include test routes (only in test environments)
+if os.getenv("DEBUG_DIR"):
+    try:
+        from .modules.test_routes import router as test_router
+        router.include_router(test_router)
+        logger.info("✅ Test routes loaded (test environment detected)")
+    except Exception as e:
+        logger.error(f"Error loading test routes: {e}", exc_info=True)
 
 logger.info("API router initialized with all sub-modules")
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
index 2da3767b..38bafa9a 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
@@ -635,6 +635,12 @@ async def flush_all_jobs(
                         # Try to fetch the job
                         job = Job.fetch(job_id, connection=redis_conn)
 
+                        # Skip session-level jobs (e.g., speech_detection, audio_persistence)
+                        # These run for the entire session and should not be killed by test cleanup
+                        if job.meta and job.meta.get("session_level"):
+                            logger.info(f"Skipping session-level job {job_id} ({job.description})")
+                            continue
+
                         # Handle running jobs differently to avoid worker deadlock
                         if job.is_started:
                             # Send stop command to worker instead of canceling/deleting immediately
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py
new file mode 100644
index 00000000..6255b6d6
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py
@@ -0,0 +1,121 @@
+"""
+Test-only API routes for integration testing.
+
+These routes are ONLY loaded when DEBUG_DIR environment variable is set,
+which happens in test environments. They should never be available in production.
+"""
+
+import logging
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+
+from advanced_omi_backend.services.plugin_service import get_plugin_router
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/test", tags=["testing"])
+
+
+@router.delete("/plugins/events")
+async def clear_test_plugin_events():
+    """
+    Clear all test plugin events.
+
+    This endpoint is ONLY available in test environments and provides a clean
+    way to reset plugin event state between tests without direct database access.
+
+    Returns:
+        dict: Confirmation message with number of events cleared
+    """
+    plugin_router = get_plugin_router()
+
+    if not plugin_router:
+        return {"message": "No plugin router initialized", "events_cleared": 0}
+
+    total_cleared = 0
+
+    # Clear events from all plugins that have storage
+    for plugin_id, plugin in plugin_router.plugins.items():
+        if hasattr(plugin, 'storage') and plugin.storage:
+            try:
+                cleared = await plugin.storage.clear_events()
+                total_cleared += cleared
+                logger.info(f"Cleared {cleared} events from plugin '{plugin_id}'")
+            except Exception as e:
+                logger.error(f"Error clearing events from plugin '{plugin_id}': {e}")
+
+    return {
+        "message": "Test plugin events cleared",
+        "events_cleared": total_cleared
+    }
+
+
+@router.get("/plugins/events/count")
+async def get_test_plugin_event_count(event_type: Optional[str] = None):
+    """
+    Get count of test plugin events.
+
+    Args:
+        event_type: Optional event type to filter by (e.g., 'transcript.batch')
+
+    Returns:
+        dict: Event count and event type filter
+    """
+    plugin_router = get_plugin_router()
+
+    if not plugin_router:
+        return {"count": 0, "event_type": event_type, "message": "No plugin router initialized"}
+
+    # Get count from first plugin with storage (usually test_event plugin)
+    for plugin_id, plugin in plugin_router.plugins.items():
+        if hasattr(plugin, 'storage') and plugin.storage:
+            try:
+                count = await plugin.storage.get_event_count(event_type)
+                return {
+                    "count": count,
+                    "event_type": event_type,
+                    "plugin_id": plugin_id
+                }
+            except Exception as e:
+                logger.error(f"Error getting event count from plugin '{plugin_id}': {e}")
+                raise HTTPException(status_code=500, detail=str(e))
+
+    return {"count": 0, "event_type": event_type, "message": "No plugin with storage found"}
+
+
+@router.get("/plugins/events")
+async def get_test_plugin_events(event_type: Optional[str] = None):
+    """
+    Get test plugin events.
+
+    Args:
+        event_type: Optional event type to filter by
+
+    Returns:
+        dict: List of events
+    """
+    plugin_router = get_plugin_router()
+
+    if not plugin_router:
+        return {"events": [], "message": "No plugin router initialized"}
+
+    # Get events from first plugin with storage
+    for plugin_id, plugin in plugin_router.plugins.items():
+        if hasattr(plugin, 'storage') and plugin.storage:
+            try:
+                if event_type:
+                    events = await plugin.storage.get_events_by_type(event_type)
+                else:
+                    events = await plugin.storage.get_all_events()
+
+                return {
+                    "events": events,
+                    "count": len(events),
+                    "event_type": event_type,
+                    "plugin_id": plugin_id
+                }
+            except Exception as e:
+                logger.error(f"Error getting events from plugin '{plugin_id}': {e}")
+                raise HTTPException(status_code=500, detail=str(e))
+
+    return {"events": [], "message": "No plugin with storage found"}
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py
index 26b985ab..f31f7453 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py
@@ -49,8 +49,8 @@ async def get_session_results(self, session_id: str) -> list[dict]:
                     "text": fields[b"text"].decode(),
                     "confidence": float(fields[b"confidence"].decode()),
                     "provider": fields[b"provider"].decode(),
-                    "chunk_id": fields[b"chunk_id"].decode(),
-                    "processing_time": float(fields[b"processing_time"].decode()),
+                    "chunk_id": fields.get(b"chunk_id", b"unknown").decode(),  # Handle missing chunk_id gracefully
+                    "processing_time": float(fields.get(b"processing_time", b"0.0").decode()),
                     "timestamp": float(fields[b"timestamp"].decode()),
                 }
 
@@ -82,8 +82,6 @@ async def get_combined_results(self, session_id: str) -> dict:
         """
         Get all transcription results combined into a single aggregated result.
 
-        This is what an aggregator should do - combine multiple chunks into one.
-
         Args:
             session_id: Session identifier
 
@@ -109,43 +107,24 @@ async def get_combined_results(self, session_id: str) -> dict:
                 "provider": None
             }
 
-        # Combine text
-        full_text = " ".join([r.get("text", "") for r in results if r.get("text")])
-
-        # Combine words
-        all_words = []
-        for r in results:
-            if "words" in r and r["words"]:
-                all_words.extend(r["words"])
-
-        # Combine segments
-        all_segments = []
-        for r in results:
-            if "segments" in r and r["segments"]:
-                all_segments.extend(r["segments"])
-
-        # Sort segments by start time
-        all_segments.sort(key=lambda s: s.get("start", 0.0))
-
-        # Calculate average confidence
-        confidences = [r.get("confidence", 0.0) for r in results]
-        avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
-
-        # Get provider (assume all chunks from same provider)
-        provider = results[0].get("provider") if results else None
+        # For streaming providers (Deepgram), use ONLY the latest final result
+        # Each is_final=true result supersedes interim results for the same speech segment
+        # The latest result contains the most accurate transcription with best timing/confidence
+        latest_result = results[-1]
 
         combined = {
-            "text": full_text,
-            "words": all_words,
-            "segments": all_segments,
-            "chunk_count": len(results),
-            "total_confidence": avg_confidence,
-            "provider": provider
+            "text": latest_result.get("text", ""),
+            "words": latest_result.get("words", []),
+            "segments": latest_result.get("segments", []),
+            "chunk_count": len(results),  # Track how many results were received
+            "total_confidence": latest_result.get("confidence", 0.0),
+            "provider": latest_result.get("provider")
         }
 
-        logger.debug(
-            f"📦 Combined {len(results)} chunks for session {session_id}: "
-            f"{len(full_text)} chars, {len(all_words)} words, {len(all_segments)} segments"
+        logger.info(
+            f"🔤 TRANSCRIPT [AGGREGATOR] session={session_id}, "
+            f"total_results={len(results)}, words={len(combined['words'])}, "
+            f"text=\"{combined['text']}\""
         )
 
         return combined
@@ -188,7 +167,7 @@ async def get_realtime_results(
                             "text": fields[b"text"].decode(),
                             "confidence": float(fields[b"confidence"].decode()),
                             "provider": fields[b"provider"].decode(),
-                            "chunk_id": fields[b"chunk_id"].decode(),
+                            "chunk_id": fields.get(b"chunk_id", b"unknown").decode(),  # Handle missing chunk_id gracefully
                         }
 
                         # Optional fields
diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
index f97399e3..0dc693d6 100644
--- a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
+++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
@@ -112,6 +112,9 @@ def init_plugin_router() -> Optional[PluginRouter]:
 
         # Load plugin configuration
         plugins_yml = Path("/app/plugins.yml")
+        logger.info(f"🔍 Looking for plugins config at: {plugins_yml}")
+        logger.info(f"🔍 File exists: {plugins_yml.exists()}")
+
         if plugins_yml.exists():
             with open(plugins_yml, 'r') as f:
                 plugins_config = yaml.safe_load(f)
@@ -119,8 +122,11 @@ def init_plugin_router() -> Optional[PluginRouter]:
                 plugins_config = expand_env_vars(plugins_config)
                 plugins_data = plugins_config.get('plugins', {})
 
+            logger.info(f"🔍 Loaded plugins config with {len(plugins_data)} plugin(s): {list(plugins_data.keys())}")
+
             # Initialize each enabled plugin
             for plugin_id, plugin_config in plugins_data.items():
+                logger.info(f"🔍 Processing plugin '{plugin_id}', enabled={plugin_config.get('enabled', False)}")
                 if not plugin_config.get('enabled', False):
                     continue
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
index 2f986c5a..579bc195 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
@@ -131,6 +131,15 @@ async def start_session_stream(self, session_id: str, sample_rate: int = 16000):
 
         except Exception as e:
             logger.error(f"Failed to start Deepgram stream for {session_id}: {e}", exc_info=True)
+
+            # Set error flag in Redis so speech detection can detect failure early
+            session_key = f"audio:session:{session_id}"
+            try:
+                await self.redis_client.hset(session_key, "transcription_error", str(e))
+                logger.info(f"Set transcription error flag for {session_id}")
+            except Exception as redis_error:
+                logger.warning(f"Failed to set error flag in Redis: {redis_error}")
+
             raise
 
     async def end_session_stream(self, session_id: str):
@@ -182,12 +191,23 @@ async def process_audio_chunk(self, session_id: str, audio_chunk: bytes, chunk_i
             # Deepgram returns None if no response yet, or a dict with results
             if result:
                 is_final = result.get("is_final", False)
+                text = result.get("text", "")
+                word_count = len(result.get("words", []))
+
+                # Track transcript at each step
+                logger.info(
+                    f"🔤 TRANSCRIPT [DEEPGRAM] session={session_id}, is_final={is_final}, "
+                    f"words={word_count}, text=\"{text}\""
+                )
 
                 # Always publish to clients (interim + final) for real-time display
                 await self.publish_to_client(session_id, result, is_final=is_final)
 
                 # If final result, also store and trigger plugins
                 if is_final:
+                    logger.info(
+                        f"🔤 TRANSCRIPT [STORE] session={session_id}, words={word_count}, text=\"{text}\""
+                    )
                     await self.store_final_result(session_id, result, chunk_id=chunk_id)
 
                     # Trigger plugins on final results only
diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
index b2cddf4c..3acba204 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
@@ -87,37 +87,41 @@ def analyze_speech(transcript_data: dict) -> dict:
         valid_words = [w for w in words if w.get("confidence", 0) >= settings["min_confidence"]]
 
         if len(valid_words) < settings["min_words"]:
-            return {
-                "has_speech": False,
-                "reason": f"Not enough valid words ({len(valid_words)} < {settings['min_words']})",
-                "word_count": len(valid_words),
-                "duration": 0.0,
-            }
-
-        # Calculate speech duration from word timing
-        if valid_words:
-            speech_start = valid_words[0].get("start", 0)
-            speech_end = valid_words[-1].get("end", 0)
-            speech_duration = speech_end - speech_start
-
-            # Check minimum duration threshold
-            min_duration = settings.get("min_duration", 10.0)
-            if speech_duration < min_duration:
-                return {
-                    "has_speech": False,
-                    "reason": f"Speech too short ({speech_duration:.1f}s < {min_duration}s)",
-                    "word_count": len(valid_words),
-                    "duration": speech_duration,
-                }
-
-            return {
-                "has_speech": True,
-                "word_count": len(valid_words),
-                "speech_start": speech_start,
-                "speech_end": speech_end,
-                "duration": speech_duration,
-                "reason": f"Valid speech detected ({len(valid_words)} words, {speech_duration:.1f}s)",
-            }
+            # Not enough valid words in word-level data - fall through to text-only analysis
+            # This handles cases where word-level data is incomplete or low confidence
+            logger.debug(f"Only {len(valid_words)} valid words, falling back to text-only analysis")
+            # Continue to Method 2 (don't return early)
+        else:
+            # Calculate speech duration from word timing
+            if valid_words:
+                speech_start = valid_words[0].get("start", 0)
+                speech_end = valid_words[-1].get("end", 0)
+                speech_duration = speech_end - speech_start
+
+                # If no timing data (duration = 0), fall back to text-only analysis
+                # This happens with some streaming transcription services
+                if speech_duration == 0:
+                    logger.debug("Word timing data missing, falling back to text-only analysis")
+                    # Continue to Method 2 (text-only fallback)
+                else:
+                    # Check minimum duration threshold when we have timing data
+                    min_duration = settings.get("min_duration", 10.0)
+                    if speech_duration < min_duration:
+                        return {
+                            "has_speech": False,
+                            "reason": f"Speech too short ({speech_duration:.1f}s < {min_duration}s)",
+                            "word_count": len(valid_words),
+                            "duration": speech_duration,
+                        }
+
+                    return {
+                        "has_speech": True,
+                        "word_count": len(valid_words),
+                        "speech_start": speech_start,
+                        "speech_end": speech_end,
+                        "duration": speech_duration,
+                        "reason": f"Valid speech detected ({len(valid_words)} words, {speech_duration:.1f}s)",
+                    }
 
     # Method 2: Text-only fallback (when no word-level data available)
     text = transcript_data.get("text", "").strip()
diff --git a/backends/advanced/src/advanced_omi_backend/utils/job_utils.py b/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
index 6200af82..ba9fcc74 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
@@ -10,7 +10,7 @@
 logger = logging.getLogger(__name__)
 
 
-async def check_job_alive(redis_client, current_job) -> bool:
+async def check_job_alive(redis_client, current_job, session_id: Optional[str] = None) -> bool:
     """
     Check if current RQ job still exists in Redis.
 
@@ -20,6 +20,7 @@ async def check_job_alive(redis_client, current_job) -> bool:
     Args:
         redis_client: Async Redis client
         current_job: RQ job instance from get_current_job()
+        session_id: Optional session ID to check if session has ended naturally
 
     Returns:
         False if job is zombie (caller should exit), True otherwise
@@ -32,13 +33,23 @@ async def check_job_alive(redis_client, current_job) -> bool:
 
         while True:
             # Check for zombie state each iteration
-            if not await check_job_alive(redis_client, current_job):
+            if not await check_job_alive(redis_client, current_job, session_id):
                 break
             # ... do work ...
     """
     if current_job:
         job_exists = await redis_client.exists(f"rq:job:{current_job.id}")
         if not job_exists:
-            logger.error(f"🧟 Zombie job detected - job {current_job.id} deleted from Redis, exiting")
+            # Check if this is a natural exit (session ended) vs true zombie
+            if session_id:
+                session_key = f"audio:session:{session_id}"
+                session_status = await redis_client.hget(session_key, "status")
+                if session_status and session_status.decode() in ["finalizing", "complete", "closed"]:
+                    # Session ended naturally - not a zombie, just natural cleanup
+                    logger.debug(f"📋 Job {current_job.id} ending naturally (session closed)")
+                    return False
+
+            # True zombie - job deleted while session still active
+            logger.error(f"🧟 Zombie job detected - job {current_job.id} deleted from Redis while session still active, exiting")
             return False
     return True
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
index fa75cd40..34900898 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
@@ -103,7 +103,7 @@ async def audio_streaming_persistence_job(
 
     while True:
         # Check if job still exists in Redis (detect zombie state)
-        if not await check_job_alive(redis_client, current_job):
+        if not await check_job_alive(redis_client, current_job, session_id):
             if file_sink:
                 await file_sink.close()
             break
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 024c22f2..1d3f81f3 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -10,11 +10,12 @@
 from datetime import datetime
 from typing import Dict, Any
 from rq.job import Job
+from rq.exceptions import NoSuchJobError
 
 from advanced_omi_backend.models.job import async_job
 from advanced_omi_backend.controllers.queue_controller import redis_conn
 from advanced_omi_backend.controllers.session_controller import mark_session_complete
-from advanced_omi_backend.services.plugin_service import get_plugin_router
+from advanced_omi_backend.services.plugin_service import get_plugin_router, init_plugin_router
 
 from advanced_omi_backend.utils.conversation_utils import (
     analyze_speech,
@@ -238,14 +239,35 @@ async def open_conversation_job(
     # Link job metadata to conversation (cascading updates)
     current_job.meta["conversation_id"] = conversation_id
     current_job.save_meta()
-    speech_job = Job.fetch(speech_job_id, connection=redis_conn)
-    speech_job.meta["conversation_id"] = conversation_id
-    speech_job.save_meta()
-    speaker_check_job_id = speech_job.meta.get("speaker_check_job_id")
-    if speaker_check_job_id:
-        speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn)
-        speaker_check_job.meta["conversation_id"] = conversation_id
-        speaker_check_job.save_meta()
+
+    try:
+        speech_job = Job.fetch(speech_job_id, connection=redis_conn)
+        speech_job.meta["conversation_id"] = conversation_id
+        speech_job.save_meta()
+        speaker_check_job_id = speech_job.meta.get("speaker_check_job_id")
+        if speaker_check_job_id:
+            try:
+                speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn)
+                speaker_check_job.meta["conversation_id"] = conversation_id
+                speaker_check_job.save_meta()
+            except Exception as e:
+                if isinstance(e, NoSuchJobError):
+                    logger.error(
+                        f"❌ Missing job hash for speaker_check job {speaker_check_job_id}: "
+                        f"Job was linked to speech_job {speech_job_id} but hash key disappeared. "
+                        f"This may indicate TTL expiry or job collision."
+                    )
+                else:
+                    raise
+    except Exception as e:
+        if isinstance(e, NoSuchJobError):
+            logger.error(
+                f"❌ Missing job hash for speech_job {speech_job_id}: "
+                f"Job was created for session {session_id} but hash key disappeared before metadata link. "
+                f"This may indicate TTL expiry or job collision."
+            )
+        else:
+            raise
     
     # Signal audio persistence job to rotate to this conversation's file
     rotation_signal_key = f"conversation:current:{session_id}"
@@ -286,7 +308,7 @@ async def open_conversation_job(
     while True:
         # Check if job still exists in Redis (detect zombie state)
         from advanced_omi_backend.utils.job_utils import check_job_alive
-        if not await check_job_alive(redis_client, current_job):
+        if not await check_job_alive(redis_client, current_job, session_id):
             break
 
         # Check if session is finalizing (set by producer when recording stops)
@@ -711,3 +733,112 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None)
         "detailed_summary": conversation.detailed_summary,
         "processing_time_seconds": processing_time,
     }
+
+
+@async_job(redis=True, beanie=True)
+async def dispatch_conversation_complete_event_job(
+    conversation_id: str,
+    audio_uuid: str,
+    client_id: str,
+    user_id: str,
+    *,
+    redis_client=None
+) -> Dict[str, Any]:
+    """
+    Dispatch conversation.complete plugin event for file upload processing.
+
+    This job runs at the end of the post-conversation job chain to ensure
+    plugins receive the conversation.complete event for uploaded audio files.
+    WebSocket streaming dispatches this event in open_conversation_job instead.
+
+    Args:
+        conversation_id: Conversation ID
+        audio_uuid: Audio UUID
+        client_id: Client ID
+        user_id: User ID
+        redis_client: Redis client (injected by decorator)
+
+    Returns:
+        Dict with success status and plugin results
+    """
+    from advanced_omi_backend.models.conversation import Conversation
+
+    logger.info(f"📌 Dispatching conversation.complete event for conversation {conversation_id}")
+
+    start_time = time.time()
+
+    # Get the conversation to include in event data
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
+    if not conversation:
+        logger.error(f"Conversation {conversation_id} not found")
+        return {"success": False, "error": "Conversation not found"}
+
+    # Get user email for event data
+    from advanced_omi_backend.models.user import User
+    user = await User.get(user_id)
+    user_email = user.email if user else ""
+
+    # Prepare plugin event data (same format as open_conversation_job)
+    try:
+        # Get or initialize plugin router (same pattern as transcription_jobs.py)
+        plugin_router = get_plugin_router()
+        if not plugin_router:
+            logger.info("🔧 Initializing plugin router in worker process...")
+            plugin_router = init_plugin_router()
+
+            # Initialize all plugins asynchronously (same as app_factory.py)
+            if plugin_router:
+                for plugin_id, plugin in plugin_router.plugins.items():
+                    try:
+                        await plugin.initialize()
+                        logger.info(f"✅ Plugin '{plugin_id}' initialized")
+                    except Exception as e:
+                        logger.error(f"Failed to initialize plugin '{plugin_id}': {e}")
+
+        if not plugin_router:
+            logger.warning("⚠️ Plugin router could not be initialized, skipping event dispatch")
+            return {"success": True, "skipped": True, "reason": "No plugin router"}
+
+        plugin_data = {
+            'conversation': {
+                'audio_uuid': audio_uuid,
+                'client_id': client_id,
+                'user_id': user_id,
+            },
+            'transcript': conversation.transcript if conversation else "",
+            'duration': 0,  # Duration not tracked for file uploads
+            'conversation_id': conversation_id,
+        }
+
+        plugin_results = await plugin_router.dispatch_event(
+            event='conversation.complete',
+            user_id=user_id,
+            data=plugin_data,
+            metadata={'end_reason': 'file_upload'}
+        )
+
+        if plugin_results:
+            logger.info(f"📌 Triggered {len(plugin_results)} conversation-level plugins")
+            for result in plugin_results:
+                if result.message:
+                    logger.info(f"  Plugin result: {result.message}")
+
+        processing_time = time.time() - start_time
+        logger.info(
+            f"✅ Conversation complete event dispatched for {conversation_id} in {processing_time:.2f}s"
+        )
+
+        return {
+            "success": True,
+            "conversation_id": conversation_id,
+            "plugin_count": len(plugin_results) if plugin_results else 0,
+            "processing_time_seconds": processing_time,
+        }
+
+    except Exception as e:
+        logger.warning(f"⚠️ Error dispatching conversation complete event: {e}")
+        return {
+            "success": False,
+            "error": str(e),
+            "conversation_id": conversation_id,
+        }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
index a307f004..ee02b065 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
@@ -16,7 +16,7 @@
 )
 from advanced_omi_backend.models.job import BaseRQJob, JobPriority, async_job
 from advanced_omi_backend.services.memory.base import MemoryEntry
-from advanced_omi_backend.services.plugin_service import get_plugin_router
+from advanced_omi_backend.services.plugin_service import get_plugin_router, init_plugin_router
 
 logger = logging.getLogger(__name__)
 
@@ -243,7 +243,21 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict
 
             # Trigger memory-level plugins
             try:
+                # Get or initialize plugin router (same pattern as conversation_jobs.py)
                 plugin_router = get_plugin_router()
+                if not plugin_router:
+                    logger.info("🔧 Initializing plugin router in worker process...")
+                    plugin_router = init_plugin_router()
+
+                    # Initialize all plugins asynchronously (same as app_factory.py)
+                    if plugin_router:
+                        for plugin_id, plugin in plugin_router.plugins.items():
+                            try:
+                                await plugin.initialize()
+                                logger.info(f"✅ Plugin '{plugin_id}' initialized")
+                            except Exception as e:
+                                logger.error(f"Failed to initialize plugin '{plugin_id}': {e}")
+
                 if plugin_router:
                     plugin_data = {
                         'memories': created_memory_ids,
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index f25e468f..b37f6454 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -9,6 +9,7 @@
 import logging
 import time
 from typing import Dict, Any
+from rq.exceptions import NoSuchJobError
 
 from advanced_omi_backend.models.job import JobPriority, BaseRQJob, async_job
 
@@ -216,9 +217,12 @@ async def transcribe_full_audio_job(
 
             # Initialize plugin router if not already initialized (worker context)
             plugin_router = get_plugin_router()
+            logger.info(f"🔍 DEBUG: Plugin router from service: {plugin_router is not None}")
+
             if not plugin_router:
                 logger.info("🔧 Initializing plugin router in worker process...")
                 plugin_router = init_plugin_router()
+                logger.info(f"🔧 After init, plugin_router: {plugin_router is not None}, plugins count: {len(plugin_router.plugins) if plugin_router else 0}")
 
                 # Initialize async plugins
                 if plugin_router:
@@ -229,7 +233,7 @@ async def transcribe_full_audio_job(
                         except Exception as e:
                             logger.exception(f"Failed to initialize plugin '{plugin_id}' in worker: {e}")
 
-            logger.info(f"🔍 DEBUG: Plugin router retrieved: {plugin_router is not None}")
+            logger.info(f"🔍 DEBUG: Plugin router final check: {plugin_router is not None}, has {len(plugin_router.plugins) if plugin_router else 0} plugins")
 
             if plugin_router:
                 logger.info(f"🔍 DEBUG: Preparing to trigger transcript plugins for conversation {conversation_id}")
@@ -308,7 +312,10 @@ async def transcribe_full_audio_job(
                             cancelled_jobs.append(job_id)
                             logger.info(f"✅ Cancelled dependent job: {job_id}")
                     except Exception as e:
-                        logger.debug(f"Job {job_id} not found or already completed: {e}")
+                        if isinstance(e, NoSuchJobError):
+                            logger.debug(f"Job {job_id} hash not found (likely already completed or expired)")
+                        else:
+                            logger.debug(f"Job {job_id} not found or already completed: {e}")
 
                 if cancelled_jobs:
                     logger.info(
@@ -584,18 +591,30 @@ async def stream_speech_detection_job(
         )
         current_job.save_meta()
 
+    # Track when session closes for graceful shutdown
+    session_closed_at = None
+    final_check_grace_period = 15  # Wait up to 15 seconds for final transcription after session closes
+
     # Main loop: Listen for speech
     while True:
         # Check if job still exists in Redis (detect zombie state)
         from advanced_omi_backend.utils.job_utils import check_job_alive
 
-        if not await check_job_alive(redis_client, current_job):
+        if not await check_job_alive(redis_client, current_job, session_id):
             break
 
-        # Exit conditions
+        # Check if session has closed
         session_status = await redis_client.hget(session_key, "status")
-        if session_status and session_status.decode() in ["complete", "closed"]:
-            logger.info(f"🛑 Session ended, exiting")
+        session_closed = session_status and session_status.decode() in ["complete", "closed"]
+
+        if session_closed and session_closed_at is None:
+            # Session just closed - start grace period for final transcription
+            session_closed_at = time.time()
+            logger.info(f"🛑 Session closed, waiting up to {final_check_grace_period}s for final transcription results...")
+
+        # Exit if grace period expired without speech
+        if session_closed_at and (time.time() - session_closed_at) > final_check_grace_period:
+            logger.info(f"✅ Session ended without speech (grace period expired)")
             break
 
         if time.time() - start_time > max_runtime:
@@ -605,11 +624,35 @@ async def stream_speech_detection_job(
         # Get transcription results
         combined = await aggregator.get_combined_results(session_id)
         if not combined["text"]:
+            # Health check: detect transcription errors early during grace period
+            if session_closed_at:
+                # Check for streaming consumer errors in session metadata
+                error_status = await redis_client.hget(session_key, "transcription_error")
+                if error_status:
+                    error_msg = error_status.decode()
+                    logger.warning(f"❌ Transcription error detected: {error_msg}")
+                    logger.info(f"✅ Session ended without speech (transcription error)")
+                    break
+
+                # Check if we've been waiting too long with no results at all
+                grace_elapsed = time.time() - session_closed_at
+                if grace_elapsed > 5 and not combined.get("chunk_count", 0):
+                    # 5+ seconds with no transcription activity at all - likely API key issue
+                    logger.warning(f"⚠️ No transcription activity after {grace_elapsed:.1f}s - possible API key or connectivity issue")
+                    logger.info(f"✅ Session ended without speech (no transcription activity)")
+                    break
+
             await asyncio.sleep(2)
             continue
 
         # Step 1: Check for meaningful speech
         transcript_data = {"text": combined["text"], "words": combined.get("words", [])}
+
+        logger.info(
+            f"🔤 TRANSCRIPT [SPEECH_DETECT] session={session_id}, "
+            f"words={len(combined.get('words', []))}, text=\"{combined['text']}\""
+        )
+
         speech_analysis = analyze_speech(transcript_data)
 
         logger.info(
@@ -668,8 +711,6 @@ async def stream_speech_detection_job(
                 try:
                     speaker_check_job.refresh()
                 except Exception as e:
-                    from rq.exceptions import NoSuchJobError
-
                     if isinstance(e, NoSuchJobError):
                         logger.warning(
                             f"⚠️ Speaker check job disappeared from Redis (likely completed quickly), assuming not enrolled"
diff --git a/tests/.env.test b/tests/.env.test
index 974dcee2..5d981b68 100644
--- a/tests/.env.test
+++ b/tests/.env.test
@@ -15,10 +15,5 @@ TEST_DEVICE_NAME=robot-test
 
 MEMORY_PROVIDER=chronicle
 
-# Docker container names (test environment)
-BACKEND_CONTAINER=advanced-chronicle-backend-test-1
-WORKERS_CONTAINER=advanced-workers-test-1
-MONGO_CONTAINER=advanced-mongo-test-1
-REDIS_CONTAINER=advanced-redis-test-1
-QDRANT_CONTAINER=advanced-qdrant-test-1
-WEBUI_CONTAINER=advanced-webui-test-1
\ No newline at end of file
+# Docker Compose Project Name (used by test_env.py to generate container names)
+COMPOSE_PROJECT_NAME=advanced-backend-test
\ No newline at end of file
diff --git a/tests/integration/plugin_event_tests.robot b/tests/integration/plugin_event_tests.robot
index 28c78f10..82fe00d2 100644
--- a/tests/integration/plugin_event_tests.robot
+++ b/tests/integration/plugin_event_tests.robot
@@ -58,14 +58,10 @@ Upload Audio And Verify Transcript Batch Event
     # Upload test audio file
     File Should Exist    ${TEST_AUDIO_FILE}
     ...    msg=Test audio file should exist
-    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
 
-    # Wait for transcription to complete
-    Sleep    15s
-
-    # Query plugin events database
-    ${final_count}=    Get Plugin Event Count    transcript.batch
-    ${new_events}=    Evaluate    ${final_count} - ${baseline_count}
+    # Wait for transcription to complete (polls every 2s, max 30s)
+    ${new_events}=    Wait For Plugin Event    transcript.batch    ${baseline_count}    timeout=30s
 
     # Verify at least one new event was received
     Should Be True    ${new_events} > 0
@@ -80,9 +76,11 @@ Upload Audio And Verify Transcript Batch Event
     ${event}=    Set Variable    ${events}[0]
     Log    Event data: ${event}
 
-    # Verify event contains transcript data (data field is JSON, so check the data column)
-    Should Not Be Empty    ${event}[3]
-    ...    msg=Event should have transcript data
+    # Verify event contains required fields (API returns dictionaries)
+    Dictionary Should Contain Key    ${event}    data
+    ...    msg=Event should have data field
+    Dictionary Should Contain Key    ${event}    user_id
+    ...    msg=Event should have user_id field
 
 Conversation Complete Should Trigger Event
     [Documentation]    Verify conversation.complete event after conversation ends
@@ -96,14 +94,10 @@ Conversation Complete Should Trigger Event
 
     # Upload audio (triggers conversation creation and completion)
     File Should Exist    ${TEST_AUDIO_FILE}
-    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
-
-    # Wait for full pipeline: transcription → conversation
-    Sleep    20s
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
 
-    # Verify conversation.complete event
-    ${final_count}=    Get Plugin Event Count    conversation.complete
-    ${new_events}=    Evaluate    ${final_count} - ${baseline_count}
+    # Wait for full pipeline: transcription → conversation (polls every 2s, max 40s)
+    ${new_events}=    Wait For Plugin Event    conversation.complete    ${baseline_count}    timeout=40s
 
     Should Be True    ${new_events} > 0
     ...    msg=At least one conversation.complete event should be logged
@@ -124,14 +118,10 @@ Memory Processing Should Trigger Event
 
     # Upload audio with meaningful content for memory extraction
     File Should Exist    ${TEST_AUDIO_FILE}
-    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
-
-    # Wait for full pipeline: transcription → conversation → memory
-    Sleep    30s
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
 
-    # Verify memory.processed event
-    ${final_count}=    Get Plugin Event Count    memory.processed
-    ${new_events}=    Evaluate    ${final_count} - ${baseline_count}
+    # Wait for full pipeline: transcription → conversation → memory (polls every 2s, max 60s)
+    ${new_events}=    Wait For Plugin Event    memory.processed    ${baseline_count}    timeout=60s
 
     Should Be True    ${new_events} > 0
     ...    msg=At least one memory.processed event should be logged
@@ -154,19 +144,12 @@ Verify All Events Are Logged
 
     # Upload audio file (should trigger all events)
     File Should Exist    ${TEST_AUDIO_FILE}
-    ${result}=    Upload Audio For Processing    ${TEST_AUDIO_FILE}
-
-    # Wait for full pipeline
-    Sleep    35s
-
-    # Verify all events were triggered
-    ${batch_final}=    Get Plugin Event Count    transcript.batch
-    ${conv_final}=    Get Plugin Event Count    conversation.complete
-    ${mem_final}=    Get Plugin Event Count    memory.processed
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
 
-    ${batch_new}=    Evaluate    ${batch_final} - ${batch_baseline}
-    ${conv_new}=    Evaluate    ${conv_final} - ${conv_baseline}
-    ${mem_new}=    Evaluate    ${mem_final} - ${mem_baseline}
+    # Wait for events in pipeline order (polls every 2s for each)
+    ${batch_new}=    Wait For Plugin Event    transcript.batch    ${batch_baseline}    timeout=30s
+    ${conv_new}=    Wait For Plugin Event    conversation.complete    ${conv_baseline}    timeout=30s
+    ${mem_new}=    Wait For Plugin Event    memory.processed    ${mem_baseline}    timeout=60s
 
     Should Be True    ${batch_new} > 0
     ...    msg=transcript.batch events should be logged
@@ -195,21 +178,3 @@ Test Cleanup
     # Standard cleanup
     # Note: We intentionally don't clear plugin events between tests
     # to allow for debugging and event inspection
-
-Upload Audio For Processing
-    [Arguments]    ${audio_file}
-    [Documentation]    Upload audio file for batch processing
-
-    # Get admin session
-    ${session}=    Get Admin API Session
-
-    # Upload audio file
-    ${files}=    Create Dictionary    files=${audio_file}
-    ${response}=    POST On Session    ${session}    /api/process-audio-files
-    ...    files=${files}
-    ...    expected_status=200
-
-    ${result}=    Set Variable    ${response.json()}
-    Log    Upload result: ${result}
-
-    RETURN    ${result}
diff --git a/tests/libs/auth_helpers.py b/tests/libs/auth_helpers.py
new file mode 100644
index 00000000..e9625d85
--- /dev/null
+++ b/tests/libs/auth_helpers.py
@@ -0,0 +1,41 @@
+"""Robot Framework library for authentication helpers.
+
+Provides utilities for working with JWT tokens and user authentication.
+"""
+
+import base64
+import json
+
+
+def get_user_id_from_token(jwt_token: str) -> str:
+    """Extract user ID from JWT token.
+
+    Args:
+        jwt_token: JWT token string (format: header.payload.signature)
+
+    Returns:
+        User ID from the 'sub' field in the token payload
+
+    Example:
+        ${user_id}=    Get User ID From Token    ${token}
+    """
+    # Split token into parts
+    parts = jwt_token.split('.')
+    if len(parts) != 3:
+        raise ValueError(f"Invalid JWT token format: expected 3 parts, got {len(parts)}")
+
+    # Decode payload (add padding if needed)
+    payload_b64 = parts[1]
+    padding = (4 - len(payload_b64) % 4) % 4
+    payload_b64_padded = payload_b64 + ('=' * padding)
+
+    # Base64 decode and parse JSON
+    payload_bytes = base64.urlsafe_b64decode(payload_b64_padded)
+    payload = json.loads(payload_bytes.decode('utf-8'))
+
+    # Extract user ID from 'sub' field
+    user_id = payload.get('sub')
+    if not user_id:
+        raise ValueError("Token payload does not contain 'sub' field")
+
+    return user_id
diff --git a/tests/resources/plugin_keywords.robot b/tests/resources/plugin_keywords.robot
index 2b1b19e7..98b2f7e8 100644
--- a/tests/resources/plugin_keywords.robot
+++ b/tests/resources/plugin_keywords.robot
@@ -5,13 +5,13 @@ Documentation    Plugin testing resource file
 ...              Keywords in this file should handle:
 ...              - Mock plugin creation and registration
 ...              - Plugin event subscription verification
-...              - Event dispatch testing
+...              - Event dispatch testing via API
 ...              - Wake word trigger testing
 ...
 Library          Collections
 Library          OperatingSystem
 Library          Process
-Library          DatabaseLibrary
+Library          RequestsLibrary
 
 *** Keywords ***
 Create Mock Plugin Config
@@ -86,45 +86,50 @@ Verify HA Plugin Uses Events
 # Test Plugin Event Database Keywords
 
 Clear Plugin Events
-    [Documentation]    Clear all events from test plugin database
-    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
-    Execute SQL String    DELETE FROM plugin_events
-    Disconnect From Database
+    [Documentation]    Clear all events from test plugin database via API
+    ${response}=    DELETE On Session    api    /api/test/plugins/events
+    Should Be Equal As Integers    ${response.status_code}    200
+    Log    Cleared ${response.json()}[events_cleared] plugin events
 
 Get Plugin Events By Type
     [Arguments]    ${event_type}
-    [Documentation]    Query plugin events by event type
-    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
-    ${query}=    Query    SELECT * FROM plugin_events WHERE event = '${event_type}' ORDER BY created_at DESC
-    Disconnect From Database
-    RETURN    ${query}
+    [Documentation]    Query plugin events by event type via API
+    ${response}=    GET On Session    api    /api/test/plugins/events    params=event_type=${event_type}
+    Should Be Equal As Integers    ${response.status_code}    200
+    RETURN    ${response.json()}[events]
 
 Get Plugin Events By User
     [Arguments]    ${user_id}
     [Documentation]    Query plugin events by user_id
-    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
-    ${query}=    Query    SELECT * FROM plugin_events WHERE user_id = '${user_id}' ORDER BY created_at DESC
-    Disconnect From Database
-    RETURN    ${query}
+    # Note: Not implemented in API yet, keeping for backward compatibility
+    ${response}=    GET On Session    api    /api/test/plugins/events
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${all_events}=    Set Variable    ${response.json()}[events]
+    # Filter by user_id in Robot Framework
+    ${filtered}=    Create List
+    FOR    ${event}    IN    @{all_events}
+        IF    '${event}[user_id]' == '${user_id}'
+            Append To List    ${filtered}    ${event}
+        END
+    END
+    RETURN    ${filtered}
 
 Get All Plugin Events
-    [Documentation]    Get all events from test plugin database
-    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
-    ${query}=    Query    SELECT * FROM plugin_events ORDER BY created_at DESC
-    Disconnect From Database
-    RETURN    ${query}
+    [Documentation]    Get all events from test plugin database via API
+    ${response}=    GET On Session    api    /api/test/plugins/events
+    Should Be Equal As Integers    ${response.status_code}    200
+    RETURN    ${response.json()}[events]
 
 Get Plugin Event Count
     [Arguments]    ${event_type}=${NONE}
-    [Documentation]    Get count of events, optionally filtered by type
-    Connect To Database    sqlite3    ${CURDIR}/../../backends/advanced/data/test_debug_dir/test_plugin_events.db
+    [Documentation]    Get count of events via API, optionally filtered by type
     IF    '${event_type}' != 'None'
-        ${count}=    Row Count    SELECT COUNT(*) FROM plugin_events WHERE event = '${event_type}'
+        ${response}=    GET On Session    api    /api/test/plugins/events/count    params=event_type=${event_type}
     ELSE
-        ${count}=    Row Count    SELECT COUNT(*) FROM plugin_events
+        ${response}=    GET On Session    api    /api/test/plugins/events/count
     END
-    Disconnect From Database
-    RETURN    ${count}
+    Should Be Equal As Integers    ${response.status_code}    200
+    RETURN    ${response.json()}[count]
 
 Verify Event Contains Data
     [Arguments]    ${event}    @{required_fields}
@@ -133,3 +138,38 @@ Verify Event Contains Data
         Dictionary Should Contain Key    ${event}    ${field}
         ...    msg=Event should contain field '${field}'
     END
+
+Wait For Plugin Event
+    [Documentation]    Wait for at least one new plugin event of the specified type
+    ...
+    ...    Polls the database until the event count increases above the baseline.
+    ...    Uses configurable timeout and retry interval for efficient polling.
+    ...
+    ...    Arguments:
+    ...    - event_type: The event type to wait for (e.g., 'transcript.batch')
+    ...    - baseline_count: The event count before the operation started
+    ...    - timeout: Maximum time to wait (default: 30s)
+    ...    - retry_interval: Time between polling attempts (default: 2s)
+    [Arguments]    ${event_type}    ${baseline_count}    ${timeout}=30s    ${retry_interval}=2s
+
+    Wait Until Keyword Succeeds    ${timeout}    ${retry_interval}
+    ...    Plugin Event Count Should Be Greater Than    ${event_type}    ${baseline_count}
+
+    # After successful wait, get the final count
+    ${current_count}=    Get Plugin Event Count    ${event_type}
+    ${new_events}=    Evaluate    ${current_count} - ${baseline_count}
+    RETURN    ${new_events}
+
+Plugin Event Count Should Be Greater Than
+    [Documentation]    Assert that the current event count is greater than baseline
+    ...
+    ...    This keyword is used by Wait For Plugin Event for polling.
+    ...    It will fail (causing a retry) until the condition is met.
+    [Arguments]    ${event_type}    ${baseline_count}
+
+    ${current_count}=    Get Plugin Event Count    ${event_type}
+    ${new_events}=    Evaluate    ${current_count} - ${baseline_count}
+    Should Be True    ${new_events} > 0
+    ...    msg=Expected new ${event_type} events, but count is still ${current_count} (baseline: ${baseline_count})
+
+    RETURN    ${new_events}
diff --git a/tests/resources/session_keywords.robot b/tests/resources/session_keywords.robot
index b7c21619..bb4d4444 100644
--- a/tests/resources/session_keywords.robot
+++ b/tests/resources/session_keywords.robot
@@ -42,6 +42,18 @@ Get Anonymous Session
 
     Create Session    ${session_name}    ${base_url}    verify=True
 
+Get Admin API Session
+    [Documentation]    Get an authenticated admin API session (creates if doesn't exist)
+    [Arguments]    ${session_name}=api
+
+    # Try to use existing session, create if needed
+    ${session_exists}=    Run Keyword And Return Status    GET On Session    ${session_name}    /health    expected_status=any
+    IF    not ${session_exists}
+        Create API Session    ${session_name}
+    END
+
+    RETURN    ${session_name}
+
 
 # Core Authentication
 Get Authentication Token
diff --git a/tests/resources/websocket_keywords.robot b/tests/resources/websocket_keywords.robot
index 6d318f38..1db7a212 100644
--- a/tests/resources/websocket_keywords.robot
+++ b/tests/resources/websocket_keywords.robot
@@ -4,6 +4,7 @@ Library          Collections
 Library          OperatingSystem
 Library          String
 Library          ../libs/audio_stream_library.py
+Library          ../libs/auth_helpers.py
 Variables        ../setup/test_env.py
 Resource         session_keywords.robot
 Resource         queue_keywords.robot
@@ -15,8 +16,13 @@ Get Client ID From Device Name
     ...                Matches backend logic in client_manager.py:generate_client_id()
     [Arguments]    ${device_name}
 
-    # Test admin user ID: 695f6e8595eae00281d26432 (actual ID from test environment)
-    ${user_suffix}=    Set Variable    d26432
+    # Get admin user ID dynamically from JWT token (changes on each database reset)
+    ${admin_session}=    Get Admin API Session
+    ${token}=    Get Authentication Token    ${admin_session}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+    ${user_id}=    Get User ID From Token    ${token}
+
+    # Extract last 6 characters of user ID (matches backend logic)
+    ${user_suffix}=    Get Substring    ${user_id}    -6
 
     # Sanitize and truncate device name to 10 chars (matches backend: [:10])
     # Backend sanitizes: lowercase, alphanumeric + hyphens only
diff --git a/tests/run-custom.sh b/tests/run-custom.sh
index c1ce1317..8ed2f4cb 100755
--- a/tests/run-custom.sh
+++ b/tests/run-custom.sh
@@ -1,20 +1,144 @@
 #!/bin/bash
-# Quick wrapper for running Robot tests with custom configs
-# Usage: ./run-custom.sh <config-name> [parakeet-url]
-#
-# Examples:
-#   ./run-custom.sh parakeet-openai http://host.docker.internal:8767
-#   ./run-custom.sh deepgram-openai
-#   ./run-custom.sh parakeet-ollama http://host.docker.internal:8767
+# Custom test runner for debugging - runs specific test files or tags
+# Usage:
+#   ./run-custom.sh integration/phase1_phase2_tests.robot  # Run specific file
+#   ./run-custom.sh --tag audio-streaming                  # Run by tag
+#   ./run-custom.sh --test "Generic Transcription Provider Works"  # Run specific test
 
 set -e
 
-CONFIG_NAME="${1:-parakeet-openai}"
-PARAKEET_URL="${2:-http://host.docker.internal:8767}"
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
 
-echo "Running Robot tests with config: ${CONFIG_NAME}"
-echo "Parakeet ASR URL: ${PARAKEET_URL}"
+print_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
 
-CONFIG_FILE="../tests/configs/${CONFIG_NAME}.yml" \
-  PARAKEET_ASR_URL="${PARAKEET_URL}" \
-  ./run-robot-tests.sh
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if we're in the right directory
+if [ ! -f "Makefile" ] || [ ! -d "endpoints" ]; then
+    print_error "Please run this script from the tests/ directory"
+    exit 1
+fi
+
+# Parse arguments
+TEST_FILE=""
+TAG=""
+TEST_NAME=""
+OUTPUTDIR="${OUTPUTDIR:-results-custom}"
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --tag)
+            TAG="$2"
+            shift 2
+            ;;
+        --test)
+            TEST_NAME="$2"
+            shift 2
+            ;;
+        --output)
+            OUTPUTDIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS] [TEST_FILE]"
+            echo ""
+            echo "Options:"
+            echo "  --tag TAG          Run tests with specific tag"
+            echo "  --test NAME        Run specific test by name"
+            echo "  --output DIR       Output directory (default: results-custom)"
+            echo "  -h, --help         Show this help message"
+            echo ""
+            echo "Examples:"
+            echo "  $0 integration/phase1_phase2_tests.robot"
+            echo "  $0 --tag audio-streaming"
+            echo "  $0 --test \"Generic Transcription Provider Works\""
+            exit 0
+            ;;
+        *)
+            TEST_FILE="$1"
+            shift
+            ;;
+    esac
+done
+
+# Load environment variables
+if [ -f "setup/.env.test" ]; then
+    print_info "Loading environment from setup/.env.test..."
+    set -a
+    source setup/.env.test
+    set +a
+else
+    print_error "setup/.env.test not found. Run ./run-robot-tests.sh first to create test environment."
+    exit 1
+fi
+
+# Verify services are running
+print_info "Checking if test services are running..."
+if ! curl -s http://localhost:8001/health > /dev/null 2>&1; then
+    print_error "Backend test service is not running on port 8001"
+    print_info "Start services with: ./setup-test-containers.sh"
+    print_info "Or let this script start them (will take time)..."
+    read -p "Start test services now? [y/N] " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        print_info "Starting test infrastructure..."
+        # Get the script directory to find setup script
+        SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+        "$SCRIPT_DIR/setup-test-containers.sh"
+    else
+        exit 1
+    fi
+fi
+
+print_success "Backend is ready"
+
+# Build robot command
+ROBOT_CMD="uv run --with-requirements test-requirements.txt robot"
+ROBOT_CMD="$ROBOT_CMD --outputdir $OUTPUTDIR"
+ROBOT_CMD="$ROBOT_CMD --loglevel DEBUG"  # Enable debug logging
+
+if [ -n "$TAG" ]; then
+    print_info "Running tests with tag: $TAG"
+    ROBOT_CMD="$ROBOT_CMD --include $TAG"
+fi
+
+if [ -n "$TEST_NAME" ]; then
+    print_info "Running test: $TEST_NAME"
+    ROBOT_CMD="$ROBOT_CMD --test \"$TEST_NAME\""
+fi
+
+if [ -n "$TEST_FILE" ]; then
+    print_info "Running test file: $TEST_FILE"
+    ROBOT_CMD="$ROBOT_CMD $TEST_FILE"
+else
+    # Run all tests if no specific file/tag/test specified
+    print_info "Running all tests (no filter specified)"
+    ROBOT_CMD="$ROBOT_CMD endpoints integration infrastructure"
+fi
+
+print_info "Command: $ROBOT_CMD"
+print_info "Output directory: $OUTPUTDIR"
+echo ""
+
+# Run the tests
+if eval $ROBOT_CMD; then
+    print_success "Tests completed successfully!"
+    exit 0
+else
+    print_error "Tests failed!"
+    print_info "View results: $OUTPUTDIR/log.html"
+    exit 1
+fi
diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh
index 14644d90..e7885d10 100755
--- a/tests/run-robot-tests.sh
+++ b/tests/run-robot-tests.sh
@@ -43,7 +43,7 @@ print_info "Robot Framework Test Runner"
 print_info "============================"
 
 # Configuration
-CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-true}"
+CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-false}"  # Changed default: keep containers running for faster re-runs
 OUTPUTDIR="${OUTPUTDIR:-results}"
 
 # Set default CONFIG_FILE if not provided
@@ -138,160 +138,15 @@ DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
 # Test Configuration
 TEST_TIMEOUT=120
 TEST_DEVICE_NAME=robot-test
+
+# Docker Compose Project Name (defaults to advanced-backend-test if not set)
+COMPOSE_PROJECT_NAME=${COMPOSE_PROJECT_NAME:-advanced-backend-test}
 EOF
     print_success "Created setup/.env.test"
 fi
 
-# Navigate to backend directory for docker compose
-cd "$BACKEND_DIR"
-
-print_info "Starting test infrastructure..."
-
-# Use unique project name to avoid conflicts with development environment
-export COMPOSE_PROJECT_NAME="advanced-backend-test"
-
-# Ensure required config files exist
-# memory_config.yaml no longer used; memory settings live in config.yml
-
-# Clean up any existing test containers and volumes for fresh start
-print_info "Cleaning up any existing test environment..."
-
-# Try cleanup with current project name
-docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
-
-# Also try cleanup with default project name (in case containers were started without COMPOSE_PROJECT_NAME)
-COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
-
-# Force remove any stuck containers with both naming patterns
-print_info "Removing any stuck test containers..."
-TEST_SERVICES=(mongo-test redis-test qdrant-test chronicle-backend-test workers-test webui-test speaker-service-test)
-
-# Remove containers with new project name (advanced-backend-test)
-for service in "${TEST_SERVICES[@]}"; do
-    docker rm -f "advanced-backend-test-${service}-1" 2>/dev/null || true
-done
-
-# Remove containers with old/default project name (advanced)
-for service in "${TEST_SERVICES[@]}"; do
-    docker rm -f "advanced-${service}-1" 2>/dev/null || true
-done
-
-# Start infrastructure services (MongoDB, Redis, Qdrant)
-print_info "Starting MongoDB, Redis, and Qdrant (fresh containers)..."
-docker compose -f docker-compose-test.yml up -d --quiet-pull mongo-test redis-test qdrant-test
-
-# Wait for MongoDB
-print_info "Waiting for MongoDB (up to 60s)..."
-for i in {1..30}; do
-    if docker compose -f docker-compose-test.yml exec -T mongo-test mongosh --eval "db.adminCommand({ping: 1})" > /dev/null 2>&1; then
-        print_success "MongoDB is ready"
-        break
-    fi
-    if [ $i -eq 30 ]; then
-        print_error "MongoDB failed to start"
-        docker compose -f docker-compose-test.yml logs mongo-test
-        exit 1
-    fi
-    sleep 2
-done
-
-# Wait for Qdrant
-print_info "Waiting for Qdrant (up to 60s)..."
-for i in {1..30}; do
-    if curl -s http://localhost:6337/healthz > /dev/null 2>&1; then
-        print_success "Qdrant is ready"
-        break
-    fi
-    if [ $i -eq 30 ]; then
-        print_error "Qdrant failed to start"
-        docker compose -f docker-compose-test.yml logs qdrant-test
-        exit 1
-    fi
-    sleep 2
-done
-
-# Build and start backend
-print_info "Building backend..."
-docker compose -f docker-compose-test.yml build chronicle-backend-test
-
-print_info "Starting backend..."
-docker compose -f docker-compose-test.yml up -d chronicle-backend-test
-
-# Wait for backend
-print_info "Waiting for backend (up to 120s)..."
-for i in {1..40}; do
-    if curl -s http://localhost:8001/health > /dev/null 2>&1; then
-        print_success "Backend is ready"
-        break
-    fi
-    if [ $i -eq 40 ]; then
-        print_error "Backend failed to start"
-        docker compose -f docker-compose-test.yml logs chronicle-backend-test
-        exit 1
-    fi
-    sleep 3
-done
-
-# Build and start workers
-print_info "Building workers..."
-docker compose -f docker-compose-test.yml build workers-test
-
-print_info "Starting RQ workers..."
-docker compose -f docker-compose-test.yml up -d workers-test
-
-# Wait for workers container
-print_info "Waiting for workers container (up to 30s)..."
-for i in {1..15}; do
-    if docker compose -f docker-compose-test.yml ps workers-test | grep -q "Up"; then
-        print_success "Workers container is running"
-        break
-    fi
-    if [ $i -eq 15 ]; then
-        print_error "Workers container failed to start"
-        docker compose -f docker-compose-test.yml logs workers-test
-        exit 1
-    fi
-    sleep 2
-done
-
-# Verify workers are registered
-print_info "Waiting for workers to register with Redis (up to 60s)..."
-for i in {1..30}; do
-    WORKER_COUNT=$(docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); print(len(Worker.all(connection=r)))' 2>/dev/null || echo "0")
-
-    if [ "$WORKER_COUNT" -ge 6 ]; then
-        print_success "Found $WORKER_COUNT RQ workers registered"
-        break
-    fi
-
-    if [ $i -eq 30 ]; then
-        print_error "Workers failed to register after 60s"
-        docker compose -f docker-compose-test.yml logs --tail=50 workers-test
-        exit 1
-    fi
-
-    sleep 2
-done
-
-# Verify unified audio stream worker is running
-print_info "Verifying unified audio stream worker process..."
-STREAM_WORKER_CHECK=$(docker compose -f docker-compose-test.yml exec -T workers-test ps aux | grep -c "audio_stream_worker" || echo "0" | tr -d '\n\r')
-STREAM_WORKER_CHECK=${STREAM_WORKER_CHECK//[^0-9]/}  # Remove non-numeric characters
-if [ -n "$STREAM_WORKER_CHECK" ] && [ "$STREAM_WORKER_CHECK" -gt 0 ]; then
-    print_success "Unified audio stream worker process is running"
-else
-    print_warning "Audio stream worker process not found - checking logs..."
-    docker compose -f docker-compose-test.yml logs --tail=30 workers-test | grep -i "audio.*stream.*worker" || true
-fi
-
-# Check Redis consumer groups registration
-print_info "Checking Redis Streams consumer groups..."
-docker compose -f docker-compose-test.yml exec -T redis-test redis-cli KEYS "audio:stream:*" 2>/dev/null || true
-
-print_success "All services ready!"
-
-# Return to tests directory
-cd "$TESTS_DIR"
+# Start test containers using dedicated startup script
+FRESH_BUILD=true "$TESTS_DIR/setup-test-containers.sh"
 
 # Run Robot Framework tests via Makefile
 # Dependencies are handled automatically by 'uv run' in Makefile
@@ -405,16 +260,10 @@ cd "$TESTS_DIR"
 
 # Cleanup test containers
 if [ "$CLEANUP_CONTAINERS" = "true" ]; then
-    print_info "Cleaning up test containers..."
-    cd "$BACKEND_DIR"
-    docker compose -f docker-compose-test.yml down -v
-    # Also cleanup with default project name
-    COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
-    cd "$TESTS_DIR"
-    print_success "Cleanup complete"
+    REMOVE_VOLUMES=true "$TESTS_DIR/teardown-test-containers.sh"
 else
-    print_warning "Skipping container cleanup (CLEANUP_CONTAINERS=false)"
-    print_info "To cleanup manually: cd $BACKEND_DIR && docker compose -f docker-compose-test.yml down -v"
+    print_warning "Keeping containers running for next test (CLEANUP_CONTAINERS=false)"
+    print_info "To cleanup manually: REMOVE_VOLUMES=true ./teardown-test-containers.sh"
 fi
 
 if [ $TEST_EXIT_CODE -eq 0 ]; then
diff --git a/tests/setup/setup_keywords.robot b/tests/setup/setup_keywords.robot
index 656a082d..b0cb87b7 100644
--- a/tests/setup/setup_keywords.robot
+++ b/tests/setup/setup_keywords.robot
@@ -104,15 +104,26 @@ Start Docker Services
         RETURN
     END
 
+    # Get HF_TOKEN from environment if available
+    ${hf_token}=    Get Environment Variable    HF_TOKEN    default=${EMPTY}
+
     # Clean up any stopped/stuck containers first
     Run Process    docker    compose    -f    ${compose_file}    down    -v    cwd=${working_dir}    shell=True
     Run Process    docker    rm    -f    ${MONGO_CONTAINER}    ${REDIS_CONTAINER}    ${QDRANT_CONTAINER}    ${BACKEND_CONTAINER}    ${WORKERS_CONTAINER}    ${WEBUI_CONTAINER}    shell=True
 
-    # Start containers
+    # Start containers with HF_TOKEN passed through if available
     IF    ${build}
-        Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+        IF    '${hf_token}' != '${EMPTY}'
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True    env:HF_TOKEN=${hf_token}
+        ELSE
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+        END
     ELSE
-        Run Process    docker    compose    -f    ${compose_file}    up    -d    cwd=${working_dir}    shell=True
+        IF    '${hf_token}' != '${EMPTY}'
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    cwd=${working_dir}    shell=True    env:HF_TOKEN=${hf_token}
+        ELSE
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    cwd=${working_dir}    shell=True
+        END
     END
 
     Log    Waiting for services to be ready...
@@ -132,8 +143,15 @@ Rebuild Docker Services
     [Documentation]    Rebuild and restart Docker services
     [Arguments]    ${compose_file}=docker-compose-test.yml    ${working_dir}=${BACKEND_DIR}
 
+    # Get HF_TOKEN from environment if available
+    ${hf_token}=    Get Environment Variable    HF_TOKEN    default=${EMPTY}
+
     Log To Console    Rebuilding containers with latest code...
-    Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+    IF    '${hf_token}' != '${EMPTY}'
+        Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True    env:HF_TOKEN=${hf_token}
+    ELSE
+        Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+    END
 
     Log To Console    Waiting for services to be ready...
     Wait Until Keyword Succeeds    60s    5s    Check Services Ready    ${API_URL}
@@ -154,8 +172,15 @@ Start Speaker Recognition Service
         RETURN
     END
 
+    # Get HF_TOKEN from environment if available
+    ${hf_token}=    Get Environment Variable    HF_TOKEN    default=${EMPTY}
+
     Log    Starting speaker-recognition-service
-    Run Process    docker    compose    -f    docker-compose-test.yml    up    -d    --build    cwd=${SPEAKER_RECOGNITION_DIR}    shell=True
+    IF    '${hf_token}' != '${EMPTY}'
+        Run Process    docker    compose    -f    docker-compose-test.yml    up    -d    --build    cwd=${SPEAKER_RECOGNITION_DIR}    shell=True    env:HF_TOKEN=${hf_token}
+    ELSE
+        Run Process    docker    compose    -f    docker-compose-test.yml    up    -d    --build    cwd=${SPEAKER_RECOGNITION_DIR}    shell=True
+    END
 
     Log    Waiting for speaker recognition service to start...
     Wait Until Keyword Succeeds    60s    5s    Check Services Ready    ${SPEAKER_RECOGNITION_URL}
diff --git a/tests/setup/test_env.py b/tests/setup/test_env.py
index 94956a14..a7cdc363 100644
--- a/tests/setup/test_env.py
+++ b/tests/setup/test_env.py
@@ -11,6 +11,7 @@
 # Find repository root (tests/setup/test_env.py -> go up 2 levels)
 REPO_ROOT = Path(__file__).parent.parent.parent
 backend_dir = REPO_ROOT / "backends" / "advanced"
+tests_dir = REPO_ROOT / "tests"
 
 # Export absolute paths for Robot Framework keywords
 BACKEND_DIR = str(backend_dir.absolute())
@@ -19,7 +20,9 @@
 
 # Load in reverse order of precedence (since override=False won't overwrite existing vars)
 # Load .env.test first (will set test-specific values)
-load_dotenv(backend_dir / ".env.test", override=False)
+# Try tests/setup/.env.test first, then fall back to tests/.env.test
+load_dotenv(Path(__file__).parent / ".env.test", override=False)
+load_dotenv(tests_dir / ".env.test", override=False)
 
 # Load .env second (will only fill in missing values, won't override .env.test or existing env vars)
 load_dotenv(backend_dir / ".env", override=False)
diff --git a/tests/setup/test_manager_keywords.robot b/tests/setup/test_manager_keywords.robot
index 8927994a..3de2728c 100644
--- a/tests/setup/test_manager_keywords.robot
+++ b/tests/setup/test_manager_keywords.robot
@@ -63,12 +63,14 @@ Clear Test Databases
 
     # Clear audio files (except fixtures subfolder)
     Run Process    bash    -c    find ${BACKEND_DIR}/data/test_audio_chunks -maxdepth 1 -name "*.wav" -delete || true    shell=True
-    Run Process    bash    -c    rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true    shell=True
+    # Don't delete plugin database - just clear its contents later via Clear Plugin Events keyword
+    # Run Process    bash    -c    rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true    shell=True
     Log To Console    Audio files cleared (fixtures/ subfolder preserved)
 
     # Clear container audio files (except fixtures subfolder)
     Run Process    bash    -c    docker exec ${BACKEND_CONTAINER} find /app/audio_chunks -maxdepth 1 -name "*.wav" -delete || true    shell=True
-    Run Process    bash    -c    docker exec ${BACKEND_CONTAINER} find /app/debug_dir -name "*" -type f -delete || true    shell=True
+    # Don't delete plugin database files - database is cleared via Clear Plugin Events keyword
+    # Run Process    bash    -c    docker exec ${BACKEND_CONTAINER} find /app/debug_dir -name "*" -type f -delete || true    shell=True
 
     # Clear Redis queues and job registries (preserve worker registrations, failed and completed jobs)
     # Delete all rq:* keys except worker registrations (rq:worker:*), failed jobs (rq:failed:*), and completed jobs (rq:finished:*)

From 61d72a5d5c747afcc5f2050d35eb3d2f3ec2eb9e Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sun, 11 Jan 2026 23:40:56 +0000
Subject: [PATCH 23/25] Add audio pipeline architecture documentation and
 improve audio persistence worker configuration

- Introduced a comprehensive documentation file detailing the audio pipeline architecture, covering data flow, processing stages, and key components.
- Enhanced the audio persistence worker setup by implementing multiple concurrent workers to improve audio processing efficiency.
- Adjusted sleep intervals in the audio streaming persistence job for better responsiveness and event loop yielding.
- Updated test script to run the full suite of integration tests from the specified directory, ensuring thorough testing coverage.
---
 Docs/audio-pipeline-architecture.md           | 1244 +++++++++++++++++
 backends/advanced/run-test.sh                 |    3 +-
 .../workers/audio_jobs.py                     |    6 +-
 .../workers/orchestrator/worker_registry.py   |   34 +-
 4 files changed, 1267 insertions(+), 20 deletions(-)
 create mode 100644 Docs/audio-pipeline-architecture.md

diff --git a/Docs/audio-pipeline-architecture.md b/Docs/audio-pipeline-architecture.md
new file mode 100644
index 00000000..f36f6e40
--- /dev/null
+++ b/Docs/audio-pipeline-architecture.md
@@ -0,0 +1,1244 @@
+# Audio Pipeline Architecture
+
+This document explains how audio flows through the Chronicle system from initial capture to final storage, including all intermediate processing stages, Redis streams, and data storage locations.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Architecture Diagram](#architecture-diagram)
+- [Data Sources](#data-sources)
+- [Redis Streams: The Central Pipeline](#redis-streams-the-central-pipeline)
+- [Producer: AudioStreamProducer](#producer-audiostreamproducer)
+- [Dual-Consumer Architecture](#dual-consumer-architecture)
+- [Transcription Results Aggregator](#transcription-results-aggregator)
+- [Job Queue Orchestration (RQ)](#job-queue-orchestration-rq)
+- [Data Storage](#data-storage)
+- [Complete End-to-End Flow](#complete-end-to-end-flow)
+- [Key Design Patterns](#key-design-patterns)
+- [Failure Handling](#failure-handling)
+
+## Overview
+
+Chronicle's audio pipeline is built on three core technologies:
+
+- **Redis Streams**: Distributed message queues for audio chunks and transcription results
+- **Background Tasks**: Async consumers that process streams independently
+- **RQ Job Queue**: Orchestrates session-level and conversation-level workflows
+
+**Key Insight**: Multiple workers can independently consume the **same audio stream** using Redis Consumer Groups, enabling parallel processing paths (transcription + disk persistence) without duplication.
+
+## Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        AUDIO INPUT                              │
+│  WebSocket (/ws) │ File Upload (/audio/upload) │ Google Drive  │
+└────────────────────────────────┬────────────────────────────────┘
+                                 ↓
+                    ┌────────────────────────┐
+                    │  AudioStreamProducer   │
+                    │  - Chunk audio (0.25s) │
+                    │  - Session metadata    │
+                    └────────────┬───────────┘
+                                 ↓
+                    ┌────────────────────────────────┐
+                    │  Redis Stream (Per Client)     │
+                    │  audio:stream:{client_id}      │
+                    └─────┬──────────────────┬───────┘
+                          ↓                  ↓
+          ┌───────────────────────┐  ┌──────────────────────┐
+          │ Transcription Consumer│  │ Audio Persistence    │
+          │ Group (streaming/batch)│  │ Consumer Group       │
+          │                       │  │                      │
+          │ → Deepgram WebSocket  │  │ → Writes WAV files   │
+          │ → Batch buffering     │  │ → Monitors rotation  │
+          │ → Publish results     │  │ → Stores file paths  │
+          └───────────┬───────────┘  └──────────┬───────────┘
+                      ↓                          ↓
+          ┌───────────────────────┐  ┌──────────────────────┐
+          │ transcription:results │  │ Disk Storage         │
+          │ :{session_id}         │  │ data/chunks/*.wav    │
+          └───────────┬───────────┘  └──────────────────────┘
+                      ↓
+          ┌───────────────────────┐
+          │ TranscriptionResults  │
+          │ Aggregator            │
+          │ - Combines chunks     │
+          │ - Merges timestamps   │
+          └───────────┬───────────┘
+                      ↓
+          ┌───────────────────────┐
+          │   RQ Job Pipeline     │
+          ├───────────────────────┤
+          │ speech_detection_job  │ ← Session-level
+          │         ↓             │
+          │ open_conversation_job │ ← Conversation-level
+          │         ↓             │
+          │ Post-Conversation:    │
+          │ • transcribe_full     │
+          │ • speaker_recognition │
+          │ • memory_extraction   │
+          │ • title_generation    │
+          └───────────┬───────────┘
+                      ↓
+          ┌───────────────────────┐
+          │   Final Storage       │
+          ├───────────────────────┤
+          │ MongoDB: conversations│
+          │ Disk: WAV files       │
+          │ Qdrant: Memories      │
+          └───────────────────────┘
+```
+
+## Data Sources
+
+### 1. WebSocket Streaming (`/ws`)
+
+**Endpoint**: `/ws?codec=pcm|opus&token=xxx&device_name=xxx`
+
+**Handlers**:
+- `handle_pcm_websocket()` - Raw PCM audio
+- `handle_omi_websocket()` - Opus-encoded audio (compressed, used by OMI devices)
+
+**Protocol**: Wyoming Protocol (JSON lines + binary frames)
+
+**Authentication**: JWT token required
+
+**Location**: `backends/advanced/src/advanced_omi_backend/routers/websocket_routes.py`
+
+**Container**: `chronicle-backend`
+
+### 2. File Upload (`/audio/upload`)
+
+**Endpoint**: `POST /api/audio/upload`
+
+**Accepts**: Multiple WAV files (multipart form data)
+
+**Authentication**: Admin only
+
+**Device ID**: Auto-generated as `{user_id_suffix}-upload` or custom `device_name`
+
+**Location**: `backends/advanced/src/advanced_omi_backend/routers/api_router.py`
+
+**Container**: `chronicle-backend`
+
+### 3. Google Drive Upload
+
+**Endpoint**: `POST /api/audio/upload_audio_from_gdrive`
+
+**Source**: Google Drive folder ID
+
+**Processing**: Downloads files and enqueues for processing
+
+**Container**: `chronicle-backend`
+
+## Redis Streams: The Central Pipeline
+
+### Stream Naming Convention
+
+```
+audio:stream:{client_id}
+```
+
+**Examples**:
+- `audio:stream:user01-phone`
+- `audio:stream:user01-omi-device`
+- `audio:stream:user01-upload`
+
+**Characteristics**:
+- **Client-specific isolation**: Each device has its own stream
+- **Fan-out pattern**: Multiple consumer groups read the same stream
+- **MAXLEN constraint**: Keeps last 25,000 entries (auto-trimming)
+- **No TTL**: Streams persist until manually deleted
+- **Container**: `redis` service
+
+### Session Metadata Storage
+
+```
+audio:session:{session_id}
+```
+
+**Type**: Redis Hash
+
+**Fields**:
+- `user_id`: MongoDB ObjectId
+- `client_id`: Device identifier
+- `connection_id`: WebSocket connection ID
+- `stream_name`: `audio:stream:{client_id}`
+- `status`: `"active"` → `"finalizing"` → `"complete"`
+- `chunks_published`: Integer count
+- `speech_detection_job_id`: RQ job ID
+- `audio_persistence_job_id`: RQ job ID
+- `websocket_connected`: `true|false`
+- `transcription_error`: Error message (if any)
+
+**TTL**: 1 hour
+
+**Container**: `redis`
+
+### Transcription Results Stream
+
+```
+transcription:results:{session_id}
+```
+
+**Type**: Redis Stream
+
+**Written by**: Transcription consumers (streaming or batch)
+
+**Read by**: `TranscriptionResultsAggregator`
+
+**Message Fields**:
+- `text`: Transcribed text for this chunk
+- `chunk_id`: Redis message ID from audio stream
+- `provider`: `"deepgram"` or `"parakeet"`
+- `confidence`: Float (0.0-1.0)
+- `words`: JSON array of word-level timestamps
+- `segments`: JSON array of speaker segments
+
+**Lifecycle**: Deleted when conversation completes
+
+**Container**: `redis`
+
+### Conversation Tracking
+
+```
+conversation:current:{session_id}
+```
+
+**Type**: Redis String
+
+**Value**: Current `conversation_id` (UUID)
+
+**Purpose**: Signals audio persistence job to rotate WAV file
+
+**TTL**: 24 hours
+
+**Container**: `redis`
+
+### Audio File Path Mapping
+
+```
+audio:file:{conversation_id}
+```
+
+**Type**: Redis String
+
+**Value**: File path (e.g., `1704067200000_user01-phone_convid.wav`)
+
+**Purpose**: Links conversation to its audio file on disk
+
+**TTL**: 24 hours
+
+**Container**: `redis`
+
+## Producer: AudioStreamProducer
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py`
+
+**Container**: `chronicle-backend` (in-memory, no persistence)
+
+### Responsibilities
+
+#### 1. Session Initialization
+
+```python
+async def init_session(
+    session_id: str,
+    user_id: str,
+    client_id: str,
+    provider: str,
+    mode: str
+) -> None
+```
+
+**Actions**:
+- Creates `audio:session:{session_id}` hash in Redis
+- Initializes in-memory buffer for chunking
+- Stores session metadata (user, client, provider)
+
+#### 2. Audio Chunking
+
+```python
+async def add_audio_chunk(
+    session_id: str,
+    audio_data: bytes
+) -> list[str]
+```
+
+**Process**:
+1. Buffers incoming audio (arbitrary size from WebSocket)
+2. Creates **fixed-size chunks**: 0.25 seconds = 8,000 bytes
+   - Assumes: 16kHz sample rate, 16-bit mono PCM
+3. Prevents cutting audio mid-word (aligned chunks)
+4. Publishes each chunk to `audio:stream:{client_id}` via `XADD`
+5. Returns Redis message IDs for tracking
+
+**In-Memory Storage**: Session buffers stored in `AudioStreamProducer._session_buffers` dict
+
+#### 3. Session End Signal
+
+```python
+async def send_session_end_signal(session_id: str) -> None
+```
+
+**Actions**:
+- Publishes special `{"type": "END"}` message to stream
+- Signals all consumers to flush buffers and finalize
+- Updates session status to `"finalizing"`
+
+### Data Location
+
+**Memory**: `chronicle-backend` container (in-memory buffers)
+
+**Redis**: Published chunks in `audio:stream:{client_id}` (redis container)
+
+## Dual-Consumer Architecture
+
+Chronicle uses **Redis Consumer Groups** to enable multiple independent consumers to read the **same audio stream** without message duplication.
+
+### Consumer Group 1: Transcription
+
+Two implementations available:
+
+#### A. Streaming Transcription Consumer
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py`
+
+**Class**: `StreamingTranscriptionConsumer`
+
+**Consumer Group**: `streaming-transcription`
+
+**Provider**: Deepgram (WebSocket-based)
+
+**Process**:
+1. Discovers `audio:stream:*` streams dynamically using `SCAN`
+2. Opens persistent WebSocket connection to Deepgram per stream
+3. Sends audio chunks **immediately** (no buffering)
+4. Publishes **interim results** to `transcription:interim:{session_id}` (Redis Pub/Sub)
+5. Publishes **final results** to `transcription:results:{session_id}` (Redis Stream)
+6. Triggers plugins on final results only
+7. ACKs messages with `XACK` to prevent reprocessing
+8. Handles END signal: closes WebSocket, cleans up
+
+**Container**: `chronicle-backend` (Background Task via `BackgroundTaskManager`)
+
+**Real-time Updates**: Interim results pushed to WebSocket clients via Pub/Sub
+
+#### B. Batch Transcription Consumer
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py`
+
+**Class**: `BaseAudioStreamConsumer`
+
+**Consumer Group**: `{provider_name}_workers` (e.g., `deepgram_workers`, `parakeet_workers`)
+
+**Providers**: Deepgram (batch), Parakeet ASR (offline)
+
+**Process**:
+1. Reads from `audio:stream:{client_id}` using `XREADGROUP`
+2. Buffers chunks per session (default: 30 chunks = ~7.5 seconds)
+3. When buffer full:
+   - Combines chunks into single audio buffer
+   - Transcribes using provider API
+   - Adjusts word/segment timestamps relative to session start
+   - Publishes result to `transcription:results:{session_id}`
+4. Flushes remaining buffer on END signal
+5. ACKs all buffered messages with `XACK`
+6. Trims stream to keep only last 1,000 entries (`XTRIM MAXLEN`)
+
+**Container**: `chronicle-backend` (Background Task)
+
+**Batching Benefits**: Reduces API calls, improves transcription accuracy (more context)
+
+### Consumer Group 2: Audio Persistence
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py`
+
+**Function**: `audio_streaming_persistence_job()`
+
+**Consumer Group**: `audio_persistence`
+
+**Consumer Name**: `persistence-worker-{session_id}`
+
+**Process**:
+1. Reads audio chunks from `audio:stream:{client_id}` using `XREADGROUP`
+2. Monitors `conversation:current:{session_id}` for rotation signals
+3. On conversation rotation:
+   - Closes current WAV file
+   - Opens new WAV file with new conversation ID
+4. Writes chunks immediately to disk (real-time persistence)
+5. Stores file path in `audio:file:{conversation_id}` (Redis)
+6. Handles END signal: closes file, returns statistics
+7. ACKs messages after writing to disk
+
+**Container**: `chronicle-backend` (RQ Worker)
+
+**Output Location**: `backends/advanced/data/chunks/` (volume-mounted)
+
+**File Format**: `{timestamp_ms}_{client_id}_{conversation_id}.wav`
+
+### Fan-Out Pattern Visualization
+
+```
+audio:stream:user01-phone
+    ↓
+    ├─ Consumer Group: "streaming-transcription"
+    │  └─ Worker: streaming-worker-12345
+    │     → Reads: chunks → Deepgram WS → Results stream
+    │
+    ├─ Consumer Group: "deepgram_workers"
+    │  ├─ Worker: deepgram-worker-67890
+    │  ├─ Worker: deepgram-worker-67891
+    │  └─ Reads: chunks → Buffer (30) → Batch API → Results stream
+    │
+    └─ Consumer Group: "audio_persistence"
+       └─ Worker: persistence-worker-sessionXYZ
+          → Reads: chunks → WAV file (disk)
+```
+
+**Key Benefits**:
+- **Horizontal scaling**: Multiple workers per group
+- **Independent processing**: Each group processes all messages
+- **No message loss**: Messages ACKed only after processing
+- **Decoupled**: Producer doesn't know about consumers
+
+## Transcription Results Aggregator
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py`
+
+**Class**: `TranscriptionResultsAggregator`
+
+**Container**: `chronicle-backend` (in-memory, stateless)
+
+### Methods
+
+#### Get Combined Results
+
+```python
+async def get_combined_results(session_id: str) -> dict
+```
+
+**Returns**:
+```python
+{
+    "text": "Full transcript...",
+    "segments": [SpeakerSegment, ...],
+    "words": [Word, ...],
+    "provider": "deepgram",
+    "chunk_count": 42
+}
+```
+
+**Process**:
+- Reads all entries from `transcription:results:{session_id}`
+- For **streaming mode**: Uses latest final result only (supersedes interim)
+- For **batch mode**: Combines all chunks sequentially
+- Adjusts timestamps across chunks (adds audio offset)
+- Merges speaker segments, words
+
+#### Get Session Results (Raw)
+
+```python
+async def get_session_results(session_id: str) -> list[dict]
+```
+
+**Returns**: Raw list of transcription result messages
+
+#### Get Real-time Results
+
+```python
+async def get_realtime_results(
+    session_id: str,
+    last_id: str = "0-0"
+) -> tuple[list[dict], str]
+```
+
+**Returns**: `(new_results, new_last_id)`
+
+**Purpose**: Incremental polling for live UI updates
+
+### Data Location
+
+**Input**: `transcription:results:{session_id}` stream (redis container)
+
+**Processing**: In-memory (chronicle-backend container)
+
+**Output**: Returned to caller (no persistence)
+
+## Job Queue Orchestration (RQ)
+
+**Library**: Python RQ (Redis Queue)
+
+**File**: `backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py`
+
+**Containers**:
+- `chronicle-backend` (enqueues jobs)
+- `rq-worker` (executes jobs)
+
+### Job Pipeline
+
+```
+Session Starts
+    ↓
+┌─────────────────────────────────┐
+│ stream_speech_detection_job     │ ← Session-level (long-running)
+│ - Polls transcription results   │
+│ - Analyzes speech content       │
+│ - Checks speaker filters        │
+└─────────────┬───────────────────┘
+              ↓ (when speech detected)
+┌─────────────────────────────────┐
+│ open_conversation_job           │ ← Conversation-level (long-running)
+│ - Creates conversation          │
+│ - Signals file rotation         │
+│ - Monitors activity             │
+│ - Detects end conditions        │
+└─────────────┬───────────────────┘
+              ↓ (when conversation ends)
+┌─────────────────────────────────┐
+│ Post-Conversation Pipeline      │ ← Parallel batch jobs
+├─────────────────────────────────┤
+│ • transcribe_full_audio_job     │
+│ • recognize_speakers_job        │
+│ • memory_extraction_job         │
+│ • generate_title_summary_job    │
+│ • dispatch_conversation_complete│
+└─────────────────────────────────┘
+```
+
+### Session-Level Jobs
+
+#### Speech Detection Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py`
+
+**Function**: `stream_speech_detection_job()`
+
+**Scope**: Entire session (can handle multiple conversations)
+
+**Max Duration**: 24 hours
+
+**Process**:
+1. Polls `TranscriptionResultsAggregator.get_combined_results()` (1-second intervals)
+2. Analyzes speech content:
+   - Word count > 10
+   - Duration > 5 seconds
+   - Confidence > threshold
+3. If speaker filter enabled: checks for enrolled speakers
+4. When speech detected:
+   - Creates conversation in MongoDB
+   - Enqueues `open_conversation_job`
+   - **Exits** (restarts when conversation completes)
+5. Handles transcription errors (marks session with error flag)
+
+**RQ Queue**: `speech_detection_queue` (dedicated queue)
+
+**Container**: `rq-worker`
+
+### Conversation-Level Jobs
+
+#### Open Conversation Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py`
+
+**Function**: `open_conversation_job()`
+
+**Scope**: Single conversation
+
+**Max Duration**: 3 hours
+
+**Process**:
+1. Creates conversation document in MongoDB `conversations` collection
+2. Sets `conversation:current:{session_id}` = `conversation_id` (Redis)
+   - **Triggers audio persistence job to rotate WAV file**
+3. Polls for transcription updates (1-second intervals)
+4. Tracks speech activity (inactivity timeout = 60 seconds default)
+5. Detects end conditions:
+   - WebSocket disconnect
+   - User manual stop
+   - Inactivity timeout
+6. Waits for audio file path from persistence job
+7. Saves `audio_path` to conversation document
+8. Triggers conversation-level plugins
+9. Enqueues post-conversation jobs
+10. Calls `handle_end_of_conversation()` for cleanup + restart
+
+**RQ Queue**: `default`
+
+**Container**: `rq-worker`
+
+#### Audio Persistence Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py`
+
+**Function**: `audio_streaming_persistence_job()`
+
+**Scope**: Entire session (parallel with open_conversation_job)
+
+**Max Duration**: 24 hours
+
+**Process**:
+1. Monitors `conversation:current:{session_id}` for rotation signals
+2. For each conversation:
+   - Opens new WAV file: `{timestamp}_{client_id}_{conversation_id}.wav`
+   - Writes chunks immediately as they arrive from stream
+   - Stores file path in `audio:file:{conversation_id}`
+3. On rotation signal:
+   - Closes current file
+   - Opens new file for next conversation
+4. On END signal:
+   - Closes file
+   - Returns statistics (chunk count, bytes, duration)
+
+**Output**: WAV files in `backends/advanced/data/chunks/`
+
+**Container**: `rq-worker`
+
+### Post-Conversation Pipeline
+
+All jobs run **in parallel** after conversation completes:
+
+#### 1. Transcribe Full Audio Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py`
+
+**Function**: `transcribe_full_audio_job()`
+
+**Input**: Audio file from disk (`data/chunks/*.wav`)
+
+**Process**:
+- Batch transcribes entire conversation audio
+- Validates meaningful speech
+- Marks conversation `deleted` if no speech detected
+- Stores transcript, segments, words in MongoDB
+
+**Container**: `rq-worker`
+
+#### 2. Recognize Speakers Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py`
+
+**Function**: `recognize_speakers_job()`
+
+**Prerequisite**: `transcribe_full_audio_job` completes
+
+**Process**:
+- Sends audio + segments to speaker recognition service
+- Identifies speakers using voice embeddings
+- Updates segment speaker labels in MongoDB
+
+**Optional**: Only runs if `DISABLE_SPEAKER_RECOGNITION=false`
+
+**Container**: `rq-worker`
+
+**External Service**: `speaker-recognition` container (if enabled)
+
+#### 3. Memory Extraction Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py`
+
+**Function**: `memory_extraction_job()`
+
+**Prerequisite**: `transcribe_full_audio_job` completes
+
+**Process**:
+- Uses LLM (OpenAI/Ollama) to extract semantic facts
+- Stores embeddings in vector database:
+  - **Chronicle provider**: Qdrant
+  - **OpenMemory MCP provider**: External OpenMemory server
+
+**Container**: `rq-worker`
+
+**External Services**:
+- `ollama` or OpenAI API (LLM)
+- `qdrant` or OpenMemory MCP (vector storage)
+
+#### 4. Generate Title Summary Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py`
+
+**Function**: `generate_title_summary_job()`
+
+**Prerequisite**: `transcribe_full_audio_job` completes
+
+**Process**:
+- Uses LLM to generate:
+  - Title (short summary)
+  - Summary (1-2 sentences)
+  - Detailed summary (paragraph)
+- Updates conversation document in MongoDB
+
+**Container**: `rq-worker`
+
+#### 5. Dispatch Conversation Complete Event
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py`
+
+**Function**: `dispatch_conversation_complete_event_job()`
+
+**Process**:
+- Triggers `conversation.complete` plugin event
+- Only runs for **file uploads** (not streaming sessions)
+
+**Container**: `rq-worker`
+
+### Session Restart
+
+**File**: `backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py`
+
+**Function**: `handle_end_of_conversation()`
+
+**Process**:
+1. Deletes transcription results stream: `transcription:results:{session_id}`
+2. Increments `session:conversation_count:{session_id}`
+3. Checks if session still active (WebSocket connected)
+4. If active: Re-enqueues `stream_speech_detection_job` for next conversation
+5. Cleans up consumer groups and pending messages
+
+**Purpose**: Allows continuous recording with multiple conversations per session
+
+## Data Storage
+
+### MongoDB Collections
+
+**Database**: `chronicle`
+
+**Container**: `mongo`
+
+**Volume**: `mongodb_data` (persistent)
+
+#### `conversations` Collection
+
+**Schema**:
+```python
+{
+    "_id": ObjectId,
+    "conversation_id": "uuid-string",
+    "audio_uuid": "session_id",
+    "user_id": ObjectId,
+    "client_id": "user01-phone",
+
+    # Content
+    "title": "Meeting notes",
+    "summary": "Discussion about...",
+    "detailed_summary": "Longer summary...",
+    "transcript": "Full transcript text",
+    "audio_path": "1704067200000_user01-phone_convid.wav",
+
+    # Versioned Transcripts
+    "active_transcript_version": "v1",
+    "transcript_versions": {
+        "v1": {
+            "text": "Full transcript",
+            "segments": [SpeakerSegment],
+            "words": [Word],
+            "provider": "deepgram",
+            "processing_time_seconds": 45.2,
+            "created_at": "2025-01-11T12:00:00Z"
+        }
+    },
+    "segments": [SpeakerSegment],  # From active version
+
+    # Metadata
+    "created_at": "2025-01-11T12:00:00Z",
+    "completed_at": "2025-01-11T12:15:00Z",
+    "end_reason": "user_stopped|inactivity_timeout|websocket_disconnect",
+    "deleted": false
+}
+```
+
+**Indexes**:
+- `user_id` (for user-scoped queries)
+- `client_id` (for device filtering)
+- `conversation_id` (unique)
+
+#### `audio_chunks` Collection
+
+**Purpose**: Stores raw audio session data
+
+**Schema**:
+```python
+{
+    "_id": ObjectId,
+    "audio_uuid": "session_id",
+    "user_id": ObjectId,
+    "client_id": "user01-phone",
+    "created_at": "2025-01-11T12:00:00Z",
+    "metadata": { ... }
+}
+```
+
+**Use Case**: Speech-driven architecture (sessions without conversations)
+
+#### `users` Collection
+
+**Purpose**: User accounts, authentication, preferences
+
+**Schema**:
+```python
+{
+    "_id": ObjectId,
+    "email": "user@example.com",
+    "hashed_password": "...",
+    "is_active": true,
+    "is_superuser": false,
+    "created_at": "2025-01-11T12:00:00Z"
+}
+```
+
+### Disk Storage
+
+**Location**: `backends/advanced/data/chunks/`
+
+**Container**: `chronicle-backend` (volume-mounted)
+
+**Volume**: `./backends/advanced/data/chunks:/app/data/chunks`
+
+**File Format**: WAV files
+
+**Naming Convention**: `{timestamp_ms}_{client_id}_{conversation_id}.wav`
+
+**Example**: `1704067200000_user01-phone_550e8400-e29b-41d4-a716-446655440000.wav`
+
+**Created by**: `audio_streaming_persistence_job()`
+
+**Read by**: Post-conversation transcription jobs
+
+**Retention**: Manual cleanup (no automatic deletion)
+
+### Redis Storage
+
+**Container**: `redis`
+
+**Volume**: `redis_data` (persistent)
+
+| Key Pattern | Type | Purpose | TTL | Created By |
+|-------------|------|---------|-----|------------|
+| `audio:stream:{client_id}` | Stream | Audio chunks for transcription | None (MAXLEN=25k) | AudioStreamProducer |
+| `audio:session:{session_id}` | Hash | Session metadata | 1 hour | AudioStreamProducer |
+| `transcription:results:{session_id}` | Stream | Transcription results | Manual delete | Transcription consumers |
+| `transcription:interim:{session_id}` | Pub/Sub | Real-time interim results | N/A (ephemeral) | Streaming consumer |
+| `conversation:current:{session_id}` | String | Current conversation ID | 24 hours | open_conversation_job |
+| `audio:file:{conversation_id}` | String | Audio file path | 24 hours | audio_persistence_job |
+| `session:conversation_count:{session_id}` | Counter | Conversation count | 1 hour | handle_end_of_conversation |
+| `speech_detection_job:{client_id}` | String | Job ID for cleanup | 1 hour | speech_detection_job |
+| `rq:job:{job_id}` | Hash | RQ job metadata | 24 hours (default) | RQ |
+
+### Vector Storage (Memory)
+
+#### Option A: Qdrant (Chronicle Native Provider)
+
+**Container**: `qdrant`
+
+**Volume**: `qdrant_data` (persistent)
+
+**Ports**: 6333 (HTTP), 6334 (gRPC)
+
+**Collections**: User-specific collections for semantic embeddings
+
+**Written by**: `memory_extraction_job()`
+
+**Read by**: Memory search API (`/api/memories/search`)
+
+#### Option B: OpenMemory MCP
+
+**Container**: `openmemory-mcp` (external service)
+
+**Port**: 8765
+
+**Protocol**: MCP (Model Context Protocol)
+
+**Collections**: Cross-client memory storage
+
+**Written by**: `memory_extraction_job()` (via MCP provider)
+
+**Read by**: Memory search API (via MCP provider)
+
+## Complete End-to-End Flow
+
+### Step-by-Step Data Journey
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ 1. AUDIO INPUT                                                  │
+└─────────────────────────────────────────────────────────────────┘
+  WebSocket (/ws) or File Upload (/audio/upload)
+  ↓
+  Container: chronicle-backend
+  ↓
+  AudioStreamProducer.init_session()
+  - Creates: audio:session:{session_id} (Redis)
+  - Initializes: In-memory buffer (chronicle-backend container)
+  ↓
+  AudioStreamProducer.add_audio_chunk()
+  - Buffers: In-memory (chronicle-backend)
+  - Chunks: Fixed 0.25s chunks (8,000 bytes)
+  - Publishes: audio:stream:{client_id} (Redis)
+  - Returns: Redis message IDs
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 2. SESSION-LEVEL JOB (RQ)                                       │
+└─────────────────────────────────────────────────────────────────┘
+  stream_speech_detection_job
+  Container: rq-worker
+  ↓
+  Polls: TranscriptionResultsAggregator.get_combined_results()
+  Reads: transcription:results:{session_id} (Redis)
+  ↓
+  Analyzes: Word count, duration, confidence
+  ↓
+  When speech detected:
+    - Creates: Conversation document (MongoDB)
+    - Enqueues: open_conversation_job (RQ)
+    - Exits (restarts when conversation ends)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 3a. TRANSCRIPTION CONSUMER (Background Task)                    │
+└─────────────────────────────────────────────────────────────────┘
+  StreamingTranscriptionConsumer (or BaseAudioStreamConsumer)
+  Container: chronicle-backend (Background Task)
+  ↓
+  Reads: audio:stream:{client_id} (Redis, via XREADGROUP)
+  Consumer Group: streaming-transcription (or batch provider)
+  ↓
+  STREAMING PATH:
+    • Opens: WebSocket to Deepgram
+    • Sends: Chunks immediately (no buffering)
+    • Publishes Interim: transcription:interim:{session_id} (Redis Pub/Sub)
+    • Publishes Final: transcription:results:{session_id} (Redis Stream)
+    • Triggers: Plugins on final results
+
+  BATCH PATH:
+    • Buffers: 30 chunks (~7.5s) in memory (chronicle-backend)
+    • Combines: All buffered chunks
+    • Transcribes: Via provider API (Deepgram/Parakeet)
+    • Adjusts: Timestamps relative to session start
+    • Publishes: transcription:results:{session_id} (Redis Stream)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 3b. AUDIO PERSISTENCE CONSUMER (RQ Job)                         │
+└─────────────────────────────────────────────────────────────────┘
+  audio_streaming_persistence_job
+  Container: rq-worker
+  ↓
+  Reads: audio:stream:{client_id} (Redis, via XREADGROUP)
+  Consumer Group: audio_persistence
+  ↓
+  Monitors: conversation:current:{session_id} (Redis)
+  ↓
+  For each conversation:
+    • Opens: New WAV file (data/chunks/, chronicle-backend volume)
+    • Writes: Chunks immediately (real-time)
+    • Stores: audio:file:{conversation_id} = path (Redis)
+  ↓
+  On rotation signal:
+    • Closes: Current file
+    • Opens: New file for next conversation
+  ↓
+  On END signal:
+    • Closes: File
+    • Returns: Statistics (chunks, bytes, duration)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 4. CONVERSATION-LEVEL JOB (RQ)                                  │
+└─────────────────────────────────────────────────────────────────┘
+  open_conversation_job
+  Container: rq-worker
+  ↓
+  Creates: Conversation document (MongoDB conversations collection)
+  ↓
+  Sets: conversation:current:{session_id} = conversation_id (Redis)
+    → Triggers audio persistence job to rotate WAV file
+  ↓
+  Polls: TranscriptionResultsAggregator for updates (1s intervals)
+  Reads: transcription:results:{session_id} (Redis)
+  ↓
+  Tracks: Speech activity (inactivity timeout = 60s)
+  ↓
+  Detects End:
+    - Inactivity (60s)
+    - User manual stop
+    - WebSocket disconnect
+  ↓
+  Waits: For audio file path from persistence job
+  Reads: audio:file:{conversation_id} (Redis)
+  ↓
+  Saves: audio_path to conversation document (MongoDB)
+  ↓
+  Enqueues: POST-CONVERSATION PIPELINE (RQ)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 5. POST-CONVERSATION PIPELINE (RQ - Parallel Jobs)              │
+└─────────────────────────────────────────────────────────────────┘
+  All jobs run in parallel
+  Container: rq-worker
+  ↓
+  Reads: Audio file from disk (data/chunks/*.wav)
+
+  ┌─ transcribe_full_audio_job
+  │  - Batch transcribes: Complete audio file
+  │  - Validates: Meaningful speech
+  │  - Marks deleted: If no speech
+  │  - Stores: MongoDB (transcript, segments, words)
+  │
+  │  └─ recognize_speakers_job (if enabled)
+  │     - Sends: Audio + segments to speaker-recognition service
+  │     - Identifies: Speakers via voice embeddings
+  │     - Updates: MongoDB (segment speaker labels)
+  │
+  │  └─ memory_extraction_job
+  │     - Uses: LLM (OpenAI/Ollama) to extract facts
+  │     - Stores: Qdrant (Chronicle) or OpenMemory MCP (vector DB)
+  │
+  └─ generate_title_summary_job
+     - Uses: LLM (OpenAI/Ollama)
+     - Generates: Title, summary, detailed_summary
+     - Stores: MongoDB (conversation document)
+
+  └─ dispatch_conversation_complete_event_job
+     - Triggers: conversation.complete plugins
+     - Only for: File uploads (not streaming)
+
+  All results stored: MongoDB conversations collection
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 6. SESSION RESTART                                              │
+└─────────────────────────────────────────────────────────────────┘
+  handle_end_of_conversation()
+  Container: chronicle-backend
+  ↓
+  Deletes: transcription:results:{session_id} (Redis)
+  ↓
+  Increments: session:conversation_count:{session_id} (Redis)
+  ↓
+  Checks: Session still active? (WebSocket connected)
+  ↓
+  If active:
+    - Re-enqueues: stream_speech_detection_job (RQ)
+    - Session remains: "active" for next conversation
+```
+
+### Data Locations Summary
+
+| Stage | Data Type | Location | Container |
+|-------|-----------|----------|-----------|
+| Input | Audio bytes | In-memory buffers | chronicle-backend |
+| Producer | Fixed chunks | `audio:stream:{client_id}` | redis |
+| Session metadata | Hash | `audio:session:{session_id}` | redis |
+| Transcription consumer | Interim results | `transcription:interim:{session_id}` (Pub/Sub) | redis |
+| Transcription consumer | Final results | `transcription:results:{session_id}` (Stream) | redis |
+| Audio persistence | WAV files | `data/chunks/*.wav` (disk volume) | chronicle-backend (volume) |
+| Audio persistence | File paths | `audio:file:{conversation_id}` | redis |
+| Conversation job | Conversation doc | MongoDB `conversations` | mongo |
+| Post-processing | Transcript | MongoDB `conversations` | mongo |
+| Post-processing | Memories | Qdrant or OpenMemory MCP | qdrant / openmemory-mcp |
+| Post-processing | Title/summary | MongoDB `conversations` | mongo |
+
+## Key Design Patterns
+
+### 1. Speech-Driven Architecture
+
+**Principle**: Conversations only created when speech is detected
+
+**Benefits**:
+- Clean user experience (no noise-only sessions in UI)
+- Reduced memory processing load
+- Automatic quality filtering
+
+**Implementation**:
+- `audio_chunks` collection: Always stores sessions
+- `conversations` collection: Only created with speech
+- Speech detection: Analyzes word count, duration, confidence
+
+### 2. Versioned Processing
+
+**Principle**: Store multiple versions of transcripts/memories
+
+**Benefits**:
+- Reprocess without losing originals
+- A/B testing different providers
+- Rollback to previous versions
+
+**Implementation**:
+- `transcript_versions` dict with version IDs (v1, v2, ...)
+- `active_transcript_version` pointer
+- `segments` field mirrors active version (quick access)
+
+### 3. Session-Level vs Conversation-Level
+
+**Session**: WebSocket connection lifetime (multiple conversations)
+- Duration: Up to 24 hours
+- Job: `stream_speech_detection_job`
+- Purpose: Continuous monitoring for speech
+
+**Conversation**: Speech burst between silence periods
+- Duration: Typically minutes
+- Job: `open_conversation_job`
+- Purpose: Process single meaningful exchange
+
+**Benefits**:
+- Continuous recording without manual start/stop
+- Automatic conversation segmentation
+- Efficient resource usage (one session, many conversations)
+
+### 4. Job Metadata Cascading
+
+**Pattern**: Parent jobs link to child jobs
+
+**Example**:
+```
+speech_detection_job
+  ↓ job_id stored in
+audio:session:{session_id}
+  ↓ creates
+open_conversation_job
+  ↓ job_id stored in
+conversation document
+  ↓ creates
+post-conversation jobs (parallel)
+```
+
+**Benefits**:
+- Job grouping and cleanup
+- Dependency tracking
+- Debugging (trace job lineage)
+
+### 5. Real-Time + Batch Hybrid
+
+**Real-Time Path** (Streaming Consumer):
+- Low latency (interim results in <1 second)
+- WebSocket to Deepgram
+- Publishes to Pub/Sub for live UI updates
+
+**Batch Path** (Batch Consumer):
+- High accuracy (more context)
+- Buffers 7.5 seconds
+- API-based transcription
+
+**Both paths** write to same `transcription:results:{session_id}` stream
+
+**Benefits**:
+- Live UI updates (interim results)
+- Accurate final results (batch processing)
+- Provider flexibility (switch between streaming/batch)
+
+### 6. Fan-Out via Redis Consumer Groups
+
+**Pattern**: Multiple consumer groups read same stream
+
+**Example**: `audio:stream:{client_id}` consumed by:
+- Transcription consumer group
+- Audio persistence consumer group
+
+**Benefits**:
+- Parallel processing paths
+- Horizontal scaling (multiple workers per group)
+- No message duplication (each group processes independently)
+
+### 7. File Rotation via Redis Signals
+
+**Pattern**: Conversation job signals persistence job via Redis key
+
+**Implementation**:
+```python
+# Conversation job
+redis.set(f"conversation:current:{session_id}", conversation_id)
+
+# Persistence job (monitors key)
+current_conv = redis.get(f"conversation:current:{session_id}")
+if current_conv != last_conv:
+    close_current_file()
+    open_new_file(current_conv)
+```
+
+**Benefits**:
+- Decoupled jobs (no direct communication)
+- Real-time file rotation
+- Multiple files per session (one per conversation)
+
+## Failure Handling
+
+### Transcription Errors
+
+**Detection**: `stream_speech_detection_job` polls results
+
+**Action**:
+- Sets `transcription_error` field in `audio:session:{session_id}`
+- Logs error for debugging
+- Session remains active (can recover)
+
+### No Meaningful Speech
+
+**Detection**: `transcribe_full_audio_job` validates transcript
+
+**Criteria**:
+- Word count < 10
+- Duration < 5 seconds
+- All words low confidence
+
+**Action**:
+- Marks conversation `deleted=True`
+- Sets `end_reason="no_meaningful_speech"`
+- Conversation hidden from UI
+
+### Audio File Not Ready
+
+**Detection**: `open_conversation_job` waits for file path
+
+**Timeout**: 30 seconds (configurable)
+
+**Action**:
+- Marks conversation `deleted=True`
+- Sets `end_reason="audio_file_not_ready"`
+- Logs error for debugging
+
+### Job Zombies (Stuck Jobs)
+
+**Detection**: `check_job_alive()` utility
+
+**Method**: Checks Redis for job existence
+
+**Action**:
+- Returns `False` if job missing
+- Caller can retry or fail gracefully
+
+### Dead Consumers
+
+**Detection**: Consumer group lag monitoring
+
+**Cleanup**:
+- Removes idle consumers (>30 seconds)
+- Claims pending messages from dead consumers
+- Redistributes to active workers
+
+### Stream Trimming
+
+**Prevention**: Streams don't grow unbounded
+
+**Implementation**:
+- `XTRIM MAXLEN 25000` on `audio:stream:{client_id}`
+- Keeps last 25k messages (~104 minutes @ 0.25s chunks)
+- Deletes `transcription:results:{session_id}` after conversation ends
+
+### Session Timeout
+
+**Max Duration**: 24 hours
+
+**Action**:
+- Jobs exit gracefully
+- Session marked `"complete"`
+- Resources cleaned up (streams deleted, consumer groups removed)
+
+---
+
+## Conclusion
+
+Chronicle's audio pipeline is designed for:
+- **Real-time processing**: Low-latency transcription and live UI updates
+- **Horizontal scalability**: Redis Consumer Groups enable multiple workers
+- **Fault tolerance**: Decoupled components, job retries, graceful error handling
+- **Resource efficiency**: Speech-driven architecture filters noise automatically
+- **Flexibility**: Pluggable providers (Deepgram/Parakeet, OpenAI/Ollama, Qdrant/OpenMemory)
+
+All coordinated through **Redis Streams** for data flow and **RQ** for orchestration, with **MongoDB** for final storage and **disk** for audio archives.
diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh
index a18dc895..c68a30ea 100755
--- a/backends/advanced/run-test.sh
+++ b/backends/advanced/run-test.sh
@@ -256,8 +256,9 @@ export TEST_MODE=dev
 
 # Run the Robot Framework integration tests with extended timeout (mem0 needs time for comprehensive extraction)
 # IMPORTANT: Robot tests must be run from the repository root where backends/ and tests/ are siblings
+# Run full test suite from tests/integration/ directory (includes all test files)
 print_info "Starting Robot Framework integration tests (timeout: 15 minutes)..."
-if (cd ../.. && timeout 900 robot --outputdir test-results --loglevel INFO tests/integration/integration_test.robot); then
+if (cd ../.. && timeout 900 uv run --with-requirements tests/test-requirements.txt robot --outputdir test-results --loglevel INFO tests/integration/); then
     print_success "Integration tests completed successfully!"
 else
     TEST_EXIT_CODE=$?
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
index 34900898..99f6dd53 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
@@ -216,7 +216,7 @@ async def audio_streaming_persistence_job(
 
         # If no file open yet, wait for conversation to be created
         if not file_sink:
-            await asyncio.sleep(0.5)
+            await asyncio.sleep(0.0001)  # Minimal sleep to yield to event loop
             continue
 
         # Read audio chunks from stream (non-blocking)
@@ -226,7 +226,7 @@ async def audio_streaming_persistence_job(
                 audio_consumer_name,
                 {audio_stream_name: ">"},
                 count=20,  # Read up to 20 chunks at a time for efficiency
-                block=500  # 500ms timeout
+                block=100  # 100ms timeout - more responsive
             )
 
             if audio_messages:
@@ -279,7 +279,7 @@ async def audio_streaming_persistence_job(
             # Stream might not exist yet or other transient errors
             logger.debug(f"Audio stream read error (non-fatal): {audio_error}")
 
-        await asyncio.sleep(0.1)  # Check every 100ms for responsiveness
+        await asyncio.sleep(0.0001)  # Minimal sleep to yield to event loop
 
     # Job complete - calculate final stats
     runtime_seconds = time.time() - start_time
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
index c5f3942f..a5cf4b74 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
@@ -67,23 +67,25 @@ def build_worker_definitions() -> List[WorkerDefinition]:
             )
         )
 
-    # Audio Persistence Worker - Single-queue worker (audio queue)
-    workers.append(
-        WorkerDefinition(
-            name="audio-persistence",
-            command=[
-                "uv",
-                "run",
-                "python",
-                "-m",
-                "advanced_omi_backend.workers.rq_worker_entry",
-                "audio",
-            ],
-            worker_type=WorkerType.RQ_WORKER,
-            queues=["audio"],
-            restart_on_failure=True,
+    # Audio Persistence Workers - Single-queue workers (audio queue)
+    # Multiple workers allow concurrent audio persistence for multiple sessions
+    for i in range(1, 4):  # 3 audio workers
+        workers.append(
+            WorkerDefinition(
+                name=f"audio-persistence-{i}",
+                command=[
+                    "uv",
+                    "run",
+                    "python",
+                    "-m",
+                    "advanced_omi_backend.workers.rq_worker_entry",
+                    "audio",
+                ],
+                worker_type=WorkerType.RQ_WORKER,
+                queues=["audio"],
+                restart_on_failure=True,
+            )
         )
-    )
 
     # Streaming STT Worker - Conditional (if streaming STT is configured in config.yml)
     # This worker uses the registry-driven streaming provider (RegistryStreamingTranscriptionProvider)

From 9ab28ed01bbb1e4de2d0920ff69df651a39e651a Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Sun, 11 Jan 2026 23:53:43 +0000
Subject: [PATCH 24/25] Add test container setup and teardown scripts

- Introduced `setup-test-containers.sh` for streamlined startup of test containers, including health checks and environment variable loading.
- Added `teardown-test-containers.sh` for simplified container shutdown, with options to remove volumes.
- Enhanced user feedback with color-coded messages for better visibility during test setup and teardown processes.
---
 tests/setup-test-containers.sh    | 86 +++++++++++++++++++++++++++++++
 tests/teardown-test-containers.sh | 33 ++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100755 tests/setup-test-containers.sh
 create mode 100755 tests/teardown-test-containers.sh

diff --git a/tests/setup-test-containers.sh b/tests/setup-test-containers.sh
new file mode 100755
index 00000000..5d0bf5db
--- /dev/null
+++ b/tests/setup-test-containers.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Test Container Startup Script
+# Smart startup - checks if already running, handles port conflicts automatically
+
+set -e
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
+print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
+print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
+print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
+
+# Navigate to backend directory
+SCRIPT_DIR="$(dirname "$0")"
+cd "$SCRIPT_DIR/../backends/advanced" || exit 1
+
+# Load environment variables for tests
+if [ -f "$SCRIPT_DIR/setup/.env.test" ]; then
+    print_info "Loading test environment..."
+    set -a
+    source "$SCRIPT_DIR/setup/.env.test"
+    set +a
+fi
+
+# Load HF_TOKEN from speaker-recognition service if available
+SPEAKER_ENV="$SCRIPT_DIR/../extras/speaker-recognition/.env"
+if [ -f "$SPEAKER_ENV" ] && [ -z "$HF_TOKEN" ]; then
+    print_info "Loading HF_TOKEN from speaker-recognition..."
+    set -a
+    source "$SPEAKER_ENV"
+    set +a
+fi
+
+# Use unique project name
+export COMPOSE_PROJECT_NAME="advanced-backend-test"
+
+# Configuration
+FRESH_BUILD="${FRESH_BUILD:-false}"  # Set to true for clean rebuild with volume removal
+
+# Check if containers are already running and healthy
+if [ "$FRESH_BUILD" = "false" ]; then
+    if curl -s http://localhost:8001/health > /dev/null 2>&1; then
+        print_success "Test containers already running and healthy"
+        print_info "Backend: http://localhost:8001"
+        print_info "To force rebuild: FRESH_BUILD=true ./setup-test-containers.sh"
+        exit 0
+    fi
+fi
+
+# Clean up any existing test containers from ANY project name to avoid port conflicts
+print_info "Cleaning up any existing test containers..."
+docker compose -f docker-compose-test.yml down 2>/dev/null || true
+COMPOSE_PROJECT_NAME=advanced docker compose -f docker-compose-test.yml down 2>/dev/null || true
+
+# Remove any stale "Created" containers that might be holding ports
+docker ps -a --filter "name=backend-test" --filter "status=created" --format "{{.Names}}" | xargs -r docker rm -f 2>/dev/null || true
+
+# Fresh build - remove everything and rebuild
+if [ "$FRESH_BUILD" = "true" ]; then
+    print_info "Fresh build requested - removing volumes and rebuilding images..."
+    docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
+
+    # Start with build flag
+    print_info "Building and starting test containers..."
+    docker compose -f docker-compose-test.yml up -d --build --wait
+
+    print_success "Fresh build complete!"
+else
+    # Normal startup
+    print_info "Starting test containers..."
+    docker compose -f docker-compose-test.yml up -d --wait
+
+    print_success "Containers started!"
+fi
+
+print_success "All services ready!"
+print_info "Backend: http://localhost:8001"
+print_info "MongoDB: localhost:27018"
+print_info "Redis: localhost:6380"
+print_info "Qdrant: localhost:6337"
diff --git a/tests/teardown-test-containers.sh b/tests/teardown-test-containers.sh
new file mode 100755
index 00000000..4df129ee
--- /dev/null
+++ b/tests/teardown-test-containers.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Test Container Teardown Script
+# Simplified - just uses docker compose down
+
+set -e
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
+print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
+print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
+
+# Navigate to backend directory
+cd "$(dirname "$0")/../backends/advanced" || exit 1
+
+# Use unique project name
+export COMPOSE_PROJECT_NAME="advanced-backend-test"
+
+if [ "${REMOVE_VOLUMES:-false}" = "true" ]; then
+    print_info "Stopping containers and removing volumes..."
+    docker compose -f docker-compose-test.yml down -v
+    print_success "Containers and volumes removed"
+else
+    print_info "Stopping containers (keeping volumes)..."
+    docker compose -f docker-compose-test.yml down
+    print_success "Containers stopped (volumes preserved)"
+    print_warning "To remove volumes: REMOVE_VOLUMES=true ./teardown-test-containers.sh"
+fi

From 387385f905dc4c5d6f2b9cfd6b31ed2f76707fde Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Mon, 12 Jan 2026 00:24:00 +0000
Subject: [PATCH 25/25] Update worker count validation and websocket disconnect
 tests

- Adjusted worker count expectations in the Worker Count Validation Test to reflect an increase from 7 to 9 workers, accounting for additional audio persistence workers.
- Enhanced the WebSocket Disconnect Conversation End Reason Test by adding steps to maintain audio streaming during disconnection, ensuring accurate simulation of network dropout scenarios.
- Improved comments for clarity and added critical notes regarding inactivity timeout handling.
---
 tests/infrastructure/infra_tests.robot        | 19 +++++++++++-------
 .../websocket_streaming_tests.robot           | 20 ++++++++++++++++---
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot
index 1cbc920d..a5b84996 100644
--- a/tests/infrastructure/infra_tests.robot
+++ b/tests/infrastructure/infra_tests.robot
@@ -180,7 +180,7 @@ Worker Count Validation Test
     ...
     ...                This test validates that:
     ...                - Health endpoint includes worker_count field
-    ...                - Worker count matches expected number (7 workers: 6 RQ + 1 audio)
+    ...                - Worker count matches expected number (9 workers: 6 RQ + 3 audio)
     ...                - Worker state information is accurate
     [Tags]    health	queue
 
@@ -217,10 +217,10 @@ Worker Count Validation Test
     Log To Console    Idle workers: ${idle_workers}
 
     # Verify exact worker count
-    # Expected: 7 RQ workers (6 general workers + 1 audio persistence worker)
+    # Expected: 9 RQ workers (6 general workers + 3 audio persistence workers)
     # Note: Audio stream workers (Deepgram/Parakeet) are NOT RQ workers - they don't register
     # We wait up to 20s for registration, so all workers should be present
-    Should Be Equal As Integers    ${worker_count}    7    msg=Expected exactly 7 RQ workers (6 general + 1 audio persistence)
+    Should Be Equal As Integers    ${worker_count}    9    msg=Expected exactly 9 RQ workers (6 general + 3 audio persistence)
 
     # Verify active + idle = total
     ${sum}=    Evaluate    ${active_workers} + ${idle_workers}
@@ -265,9 +265,10 @@ WebSocket Disconnect Conversation End Reason Test
     ...
     ...                This test simulates a Bluetooth/network dropout scenario:
     ...                1. Start streaming audio and create conversation
-    ...                2. Abruptly close WebSocket (simulating disconnect)
-    ...                3. Verify job exits gracefully (no 3600s timeout)
-    ...                4. Verify conversation has end_reason='websocket_disconnect'
+    ...                2. Keep sending audio to prevent inactivity timeout
+    ...                3. Abruptly close WebSocket (simulating disconnect)
+    ...                4. Verify job exits gracefully (no 3600s timeout)
+    ...                5. Verify conversation has end_reason='websocket_disconnect'
     [Tags]    infra	audio-streaming
 
     # Start audio stream and send chunks to trigger conversation
@@ -275,7 +276,7 @@ WebSocket Disconnect Conversation End Reason Test
     ${client_id}=    Get Client ID From Device Name    ${device_name}
     ${stream_id}=    Open Audio Stream    device_name=${device_name}
 
-    # Send audio fast (no realtime pacing) to simulate disconnect before END signal
+    # Send audio fast (no realtime pacing) to trigger conversation creation
     Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200
 
     # Wait for conversation job to be created and conversation_id to be populated
@@ -287,6 +288,10 @@ WebSocket Disconnect Conversation End Reason Test
     ${conversation_id}=    Wait Until Keyword Succeeds    10s    0.5s
     ...    Get Conversation ID From Job Meta    open_conversation    ${client_id}
 
+    # CRITICAL: Keep sending audio to prevent inactivity timeout (SPEECH_INACTIVITY_THRESHOLD_SECONDS=2)
+    # Send a few more chunks to keep the conversation alive before disconnect
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+
     # Simulate WebSocket disconnect (Bluetooth dropout)
     Close Audio Stream    ${stream_id}
 
diff --git a/tests/integration/websocket_streaming_tests.robot b/tests/integration/websocket_streaming_tests.robot
index fecc86aa..cfc79799 100644
--- a/tests/integration/websocket_streaming_tests.robot
+++ b/tests/integration/websocket_streaming_tests.robot
@@ -76,9 +76,23 @@ Conversation Job Created After Speech Detection
     Log To Console    Open conversation job created after speech detection
 
     # Then verify speech detection job has conversation_job_id linked
-    ${speech_jobs}=    Wait Until Keyword Succeeds    15s    2s
-    ...    Job Type Exists For Client    speech_detection    ${client_id}
-    Job Has Conversation ID    ${speech_jobs}[0]
+    # Note: After conversation completes, a NEW speech_detection job is created for the next conversation
+    # So we need to get the jobs and find the one with conversation_job_id set
+    ${speech_jobs}=    Get Jobs By Type And Client    speech_detection    ${client_id}
+    Should Not Be Empty    ${speech_jobs}    msg=No speech_detection jobs found for ${client_id}
+
+    # Find the job with conversation_job_id (the original one that created the conversation)
+    ${found_linked_job}=    Set Variable    ${False}
+    FOR    ${job}    IN    @{speech_jobs}
+        ${meta}=    Set Variable    ${job}[meta]
+        ${conv_job_id}=    Evaluate    $meta.get('conversation_job_id')
+        IF    '${conv_job_id}' != 'None'
+            ${found_linked_job}=    Set Variable    ${True}
+            Log To Console    Found speech_detection job with conversation_job_id: ${conv_job_id}
+            BREAK
+        END
+    END
+    Should Be True    ${found_linked_job}    msg=No speech_detection job has conversation_job_id set
     [Teardown]    Close Audio Stream    ${stream_id}