SimpleOpenSoftware · AnkushMalaker · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/backends/advanced/.env.template b/backends/advanced/.env.template
@@ -165,6 +165,13 @@ DEBUG_DIR=./data/debug_dir
 # HF_TOKEN=
 # SPEAKER_SERVICE_URL=http://speaker-recognition:8001
 
+# Speaker recognition chunking configuration (for large files)
+# Files longer than SPEAKER_CHUNK_THRESHOLD will be split into smaller segments
+# for processing to avoid memory issues
+SPEAKER_CHUNK_THRESHOLD=1500  # 25 minutes - chunk files larger than this (seconds)
+SPEAKER_CHUNK_SIZE=900         # 15 minutes - size of each chunk (seconds)
+SPEAKER_CHUNK_OVERLAP=30       # 30 seconds - overlap between chunks for continuity
+
 # Audio processing settings
 # NEW_CONVERSATION_TIMEOUT_MINUTES=1.5
 # AUDIO_CROPPING_ENABLED=true

diff --git a/backends/advanced/Docs/memories.md b/backends/advanced/Docs/memories.md
@@ -98,7 +98,7 @@ MEM0_CONFIG = {
     "vector_store": {
         "provider": "qdrant",
         "config": {
-            "collection_name": "omi_memories",
+            "collection_name": "chronicle_memories",
             "embedding_model_dims": 768,
             "host": QDRANT_BASE_URL,
             "port": 6333,
@@ -499,7 +499,7 @@ This will:
 3. **Search Not Working**
    - Ensure embedding model is available in Ollama
    - Check vector dimensions match between embedder and Qdrant
-   - Verify collection has vectors: `curl http://localhost:6333/collections/omi_memories`
+   - Verify collection has vectors: `curl http://localhost:6333/collections/chronicle_memories`
 
 ### Required Ollama Models
 

diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
@@ -16,7 +16,8 @@ services:
       - ./data/test_audio_chunks:/app/audio_chunks
       - ./data/test_debug_dir:/app/debug  # Fixed: mount to /app/debug for plugin database
       - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
+      - ../../config:/app/config  # Mount config directory with defaults.yml
+      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Override main config (for test-specific configs)
       - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
     environment:
       # Override with test-specific settings
@@ -168,7 +169,8 @@ services:
       - ./data/test_audio_chunks:/app/audio_chunks
       - ./data/test_debug_dir:/app/debug  # Fixed: mount to /app/debug for plugin database
       - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
+      - ../../config:/app/config  # Mount config directory with defaults.yml
+      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Override main config (for test-specific configs)
       - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/plugins.yml  # Mount test plugins config
     environment:
       # Same environment as backend

diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
@@ -39,8 +39,7 @@ services:
       - ./data/audio_chunks:/app/audio_chunks
       - ./data/debug_dir:/app/debug_dir
       - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml  # Main config file
-      - ../../config/plugins.yml:/app/plugins.yml  # Plugin configuration
+      - ../../config:/app/config  # Mount entire config directory (includes config.yml, defaults.yml, plugins.yml)
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
@@ -94,8 +93,7 @@ services:
       - ./worker_orchestrator.py:/app/worker_orchestrator.py
       - ./data/audio_chunks:/app/audio_chunks
       - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml
-      - ../../config/plugins.yml:/app/plugins.yml
+      - ../../config:/app/config  # Mount entire config directory (includes config.yml, defaults.yml, plugins.yml)
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}

diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py
@@ -56,10 +56,11 @@ async def lifespan(app: FastAPI):
         from advanced_omi_backend.models.conversation import Conversation
         from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
         from advanced_omi_backend.models.user import User
+        from advanced_omi_backend.models.waveform import WaveformData
 
         await init_beanie(
             database=config.db,
-            document_models=[User, Conversation, AudioChunkDocument],
+            document_models=[User, Conversation, AudioChunkDocument, WaveformData],
         )
         application_logger.info("Beanie initialized for all document models")
     except Exception as e:

diff --git a/backends/advanced/src/advanced_omi_backend/config.py b/backends/advanced/src/advanced_omi_backend/config.py
@@ -9,6 +9,7 @@
 import logging
 import os
 import shutil
+import yaml
 from dataclasses import dataclass, asdict
 from pathlib import Path
 from typing import Optional
@@ -60,17 +61,149 @@ def get_diarization_config_path():
     data_path = Path("/app/data/diarization_config.json")
     if data_path.parent.exists():
         return data_path
-    
+
     # 2. App root directory
     app_path = Path("/app/diarization_config.json")
     if app_path.parent.exists():
         return app_path
-    
+
     # 3. Local development path
     local_path = Path("diarization_config.json")
     return local_path
 
 
+# ============================================================================
+# Configuration Merging System (for defaults.yml + config.yml)
+# ============================================================================
+
+def get_config_dir() -> Path:
+    """
+    Get config directory path. Single source of truth for config location.
+    Matches root config_manager.py logic.
+
+    Returns:
+        Path to config directory
+    """
+    config_dir = os.getenv("CONFIG_DIR", "/app/config")
+    return Path(config_dir)
+
+
+def get_config_yml_path() -> Path:
+    """Get path to config.yml file."""
+    return get_config_dir() / "config.yml"
+
+
+def get_defaults_yml_path() -> Path:
+    """Get path to defaults.yml file."""
+    return get_config_dir() / "defaults.yml"
+
+
+def get_defaults_config_path():
+    """
+    Get the path to the defaults config file.
+
+    DEPRECATED: Use get_defaults_yml_path() instead.
+    Kept for backward compatibility.
+    """
+    defaults_path = get_defaults_yml_path()
+    return defaults_path if defaults_path.exists() else None
+
+
+def merge_configs(defaults: dict, overrides: dict) -> dict:
+    """
+    Deep merge two configuration dictionaries.
+
+    Override values take precedence over defaults.
+    Lists are replaced (not merged).
+
+    Args:
+        defaults: Default configuration values
+        overrides: User-provided overrides
+
+    Returns:
+        Merged configuration dictionary
+    """
+    result = defaults.copy()
+
+    for key, value in overrides.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            # Recursively merge dictionaries
+            result[key] = merge_configs(result[key], value)
+        else:
+            # Override (lists, scalars, new keys)
+            result[key] = value
+
+    return result
+
+
+# Global cache for merged config
+_config_cache: Optional[dict] = None
+
+
+def get_config(force_reload: bool = False) -> dict:
+    """
+    Get merged configuration from defaults.yml + config.yml.
+
+    Priority order: config.yml > environment variables > defaults.yml
+
+    Args:
+        force_reload: If True, reload from disk even if cached
+
+    Returns:
+        Merged configuration dictionary with all settings
+    """
+    global _config_cache
+
+    if _config_cache is not None and not force_reload:
+        return _config_cache
+
+    # Load defaults
+    defaults_path = get_defaults_yml_path()
+    defaults = {}
+    if defaults_path.exists():
+        try:
+            with open(defaults_path, 'r') as f:
+                defaults = yaml.safe_load(f) or {}
+            logger.info(f"Loaded defaults from {defaults_path}")
+        except Exception as e:
+            logger.warning(f"Could not load defaults from {defaults_path}: {e}")
+
+    # Load user config
+    config_path = get_config_yml_path()
+    user_config = {}
+    if config_path.exists():
+        try:
+            with open(config_path, 'r') as f:
+                user_config = yaml.safe_load(f) or {}
+            logger.info(f"Loaded config from {config_path}")
+        except Exception as e:
+            logger.error(f"Error loading config from {config_path}: {e}")
+
+    # Merge configurations
+    merged = merge_configs(defaults, user_config)
+
+    # Resolve environment variables (lazy import to avoid circular dependency)
+    try:
+        from advanced_omi_backend.model_registry import _deep_resolve_env
+        merged = _deep_resolve_env(merged)
+    except ImportError:
+        # If model_registry not available, skip env resolution
+        # (will be resolved when model_registry loads the config)
+        logger.warning("Could not import _deep_resolve_env, environment variables may not be resolved")
+
+    # Cache result
+    _config_cache = merged
+
+    return merged
+
+
+def reload_config():
+    """Reload configuration from disk (invalidate cache)."""
+    global _config_cache
+    _config_cache = None
+    return get_config(force_reload=True)
+
+
 def load_diarization_settings_from_file():
     """Load diarization settings from file or create from template."""
     global _diarization_settings

diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
@@ -144,11 +144,30 @@ async def upload_and_process_audio_files(
                         f"📦 Converted uploaded file to {num_chunks} MongoDB chunks "
                         f"(conversation {conversation_id[:12]})"
                     )
+                except ValueError as val_error:
+                    # Handle validation errors (e.g., file too long)
+                    audio_logger.error(f"Audio validation failed: {val_error}")
+                    processed_files.append({
+                        "filename": file.filename,
+                        "status": "error",
+                        "error": str(val_error),
+                    })
+                    # Delete the conversation since it won't have audio chunks
+                    await conversation.delete()
+                    continue
                 except Exception as chunk_error:
                     audio_logger.error(
                         f"Failed to convert uploaded file to chunks: {chunk_error}",
                         exc_info=True
                     )
+                    processed_files.append({
+                        "filename": file.filename,
+                        "status": "error",
+                        "error": f"Audio conversion failed: {str(chunk_error)}",
+                    })
+                    # Delete the conversation since it won't have audio chunks
+                    await conversation.delete()
+                    continue
 
                 # Enqueue batch transcription job first (file uploads need transcription)
                 from advanced_omi_backend.controllers.queue_controller import (

diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -130,18 +130,26 @@ async def get_conversation(conversation_id: str, user: User):
         return JSONResponse(status_code=500, content={"error": "Error fetching conversation"})
 
 
-async def get_conversations(user: User):
+async def get_conversations(user: User, include_deleted: bool = False):
     """Get conversations with speech only (speech-driven architecture)."""
     try:
         # Build query based on user permissions using Beanie
         if not user.is_superuser:
             # Regular users can only see their own conversations
-            user_conversations = await Conversation.find(
-                Conversation.user_id == str(user.user_id)
-            ).sort(-Conversation.created_at).to_list()
+            # Filter by deleted status
+            query = Conversation.user_id == str(user.user_id)
+            if not include_deleted:
+                query = query & (Conversation.deleted == False)
+            user_conversations = await Conversation.find(query).sort(-Conversation.created_at).to_list()
         else:
             # Admins see all conversations
-            user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list()
+            # Filter by deleted status
+            if not include_deleted:
+                user_conversations = await Conversation.find(
+                    Conversation.deleted == False
+                ).sort(-Conversation.created_at).to_list()
+            else:
+                user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list()
 
         # Build response with explicit curated fields - minimal for list view
         conversations = []