diff --git a/Cargo.lock b/Cargo.lock index d2f8619636..eb7ec932e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1053,6 +1053,9 @@ dependencies = [ "cidre", "cpal 0.15.3 (git+https://github.com/CapSoftware/cpal?rev=3cc779a7b4ca)", "ffmpeg-next", + "serde", + "serde_json", + "tempfile", "tokio", "tracing", "workspace-hack", @@ -1529,6 +1532,7 @@ dependencies = [ "cap-mediafoundation-ffmpeg", "cap-mediafoundation-utils", "cap-project", + "cap-rendering", "cap-timestamp", "cap-utils", "chrono", diff --git a/apps/desktop/src-tauri/src/export.rs b/apps/desktop/src-tauri/src/export.rs index 1786561fb7..d881b35766 100644 --- a/apps/desktop/src-tauri/src/export.rs +++ b/apps/desktop/src-tauri/src/export.rs @@ -225,7 +225,7 @@ pub async fn generate_export_preview( RenderVideoConstants::new( &recordings.segments, recording_meta.clone(), - studio_meta.clone(), + (**studio_meta).clone(), ) .await .map_err(|e| format!("Failed to create render constants: {e}"))?, diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index c6ce3257a9..532328d7c6 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1456,7 +1456,7 @@ async fn get_video_metadata(path: PathBuf) -> Result { vec![recording_meta.path(&segment.display.path)] } @@ -1888,15 +1888,13 @@ impl RecordingMetaWithMetadata { RecordingMetaInner::Instant(_) => RecordingMode::Instant, }, status: match &inner.inner { - RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { inner }) => { - inner + RecordingMetaInner::Studio(meta) => match &**meta { + StudioRecordingMeta::MultipleSegments { inner } => inner .status .clone() - .unwrap_or(StudioRecordingStatus::Complete) - } - RecordingMetaInner::Studio(StudioRecordingMeta::SingleSegment { .. }) => { - StudioRecordingStatus::Complete - } + .unwrap_or(StudioRecordingStatus::Complete), + StudioRecordingMeta::SingleSegment { .. } => StudioRecordingStatus::Complete, + }, RecordingMetaInner::Instant(InstantRecordingMeta::InProgress { .. }) => { StudioRecordingStatus::InProgress } @@ -3072,8 +3070,10 @@ async fn resume_uploads(app: AppHandle) -> Result<(), String> { // Check if recording is still marked as in-progress and if so mark as failed // This should only happen if the application crashes while recording match &mut meta.inner { - RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { inner }) => { - if let Some(StudioRecordingStatus::InProgress) = &inner.status { + RecordingMetaInner::Studio(meta_box) => { + if let StudioRecordingMeta::MultipleSegments { inner } = &mut **meta_box + && let Some(StudioRecordingStatus::InProgress) = &inner.status + { inner.status = Some(StudioRecordingStatus::Failed { error: "Recording crashed".to_string(), }); diff --git a/apps/desktop/src-tauri/src/recording.rs b/apps/desktop/src-tauri/src/recording.rs index 54f83b8789..92cce11b78 100644 --- a/apps/desktop/src-tauri/src/recording.rs +++ b/apps/desktop/src-tauri/src/recording.rs @@ -543,13 +543,13 @@ pub async fn start_recording( pretty_name: project_name.clone(), inner: match inputs.mode { RecordingMode::Studio => { - RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { + RecordingMetaInner::Studio(Box::new(StudioRecordingMeta::MultipleSegments { inner: MultipleSegments { segments: Default::default(), cursors: Default::default(), status: Some(StudioRecordingStatus::InProgress), }, - }) + })) } RecordingMode::Instant => { RecordingMetaInner::Instant(InstantRecordingMeta::InProgress { recording: true }) @@ -1213,6 +1213,7 @@ pub async fn take_screenshot( path: relative_path, fps: 0, start_time: Some(0.0), + device_id: None, }; let segment = cap_project::SingleSegment { @@ -1227,9 +1228,9 @@ pub async fn take_screenshot( project_path: project_file_path.clone(), pretty_name: project_name, sharing: None, - inner: cap_project::RecordingMetaInner::Studio( + inner: cap_project::RecordingMetaInner::Studio(Box::new( cap_project::StudioRecordingMeta::SingleSegment { segment }, - ), + )), upload: None, }; @@ -1331,7 +1332,7 @@ async fn handle_recording_end( { match &mut project_meta.inner { RecordingMetaInner::Studio(meta) => { - if let StudioRecordingMeta::MultipleSegments { inner } = meta { + if let StudioRecordingMeta::MultipleSegments { inner } = &mut **meta { inner.status = Some(StudioRecordingStatus::Failed { error }); } } @@ -1396,7 +1397,7 @@ async fn handle_recording_finish( let (meta_inner, sharing) = match completed_recording { CompletedRecording::Studio { recording, .. } => { - let meta_inner = RecordingMetaInner::Studio(recording.meta.clone()); + let meta_inner = RecordingMetaInner::Studio(Box::new(recording.meta.clone())); if let Ok(mut meta) = RecordingMeta::load_for_project(&recording_dir).map_err(|err| { error!("Failed to load recording meta while saving finished recording: {err}") @@ -1508,7 +1509,10 @@ async fn handle_recording_finish( config.write(&recording_dir).map_err(|e| e.to_string())?; - (RecordingMetaInner::Studio(updated_studio_meta), None) + ( + RecordingMetaInner::Studio(Box::new(updated_studio_meta)), + None, + ) } CompletedRecording::Instant { recording, @@ -1910,7 +1914,7 @@ pub fn generate_zoom_segments_from_clicks( project_path: recording.project_path.clone(), pretty_name: String::new(), sharing: None, - inner: RecordingMetaInner::Studio(recording.meta.clone()), + inner: RecordingMetaInner::Studio(Box::new(recording.meta.clone())), upload: None, }; @@ -1930,7 +1934,7 @@ pub fn generate_zoom_segments_for_project( let mut all_clicks = Vec::new(); let mut all_moves = Vec::new(); - match studio_meta { + match &**studio_meta { StudioRecordingMeta::SingleSegment { segment } => { if let Some(cursor_path) = &segment.cursor { let mut events = CursorEvents::load_from_file(&recording_meta.path(cursor_path)) diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs index 90bffe7b60..8ffcd06dd5 100644 --- a/apps/desktop/src-tauri/src/screenshot_editor.rs +++ b/apps/desktop/src-tauri/src/screenshot_editor.rs @@ -214,6 +214,7 @@ impl ScreenshotEditorInstances { path: relative_path.clone(), fps: 30, start_time: Some(0.0), + device_id: None, }; let segment = SingleSegment { display: video_meta.clone(), @@ -227,7 +228,7 @@ impl ScreenshotEditorInstances { project_path: path.parent().unwrap().to_path_buf(), pretty_name: "Screenshot".to_string(), sharing: None, - inner: RecordingMetaInner::Studio(studio_meta.clone()), + inner: RecordingMetaInner::Studio(Box::new(studio_meta.clone())), upload: None, } }; @@ -283,7 +284,7 @@ impl ScreenshotEditorInstances { queue: (*queue).clone(), device: (*device).clone(), options, - meta: studio_meta, + meta: *studio_meta, recording_meta: recording_meta.clone(), background_textures: Arc::new(tokio::sync::RwLock::new(HashMap::new())), is_software_adapter, diff --git a/apps/desktop/src/utils/tauri.ts b/apps/desktop/src/utils/tauri.ts index 8b937880ab..76ce95e4af 100644 --- a/apps/desktop/src/utils/tauri.ts +++ b/apps/desktop/src/utils/tauri.ts @@ -374,7 +374,7 @@ export type AspectRatio = "wide" | "vertical" | "square" | "classic" | "tall" export type Audio = { duration: number; sample_rate: number; channels: number; start_time: number } export type AudioConfiguration = { mute: boolean; improve: boolean; micVolumeDb?: number; micStereoMode?: StereoMode; systemVolumeDb?: number } export type AudioInputLevelChange = number -export type AudioMeta = { path: string; start_time?: number | null } +export type AudioMeta = { path: string; start_time?: number | null; device_id?: string | null } export type AuthSecret = { api_key: string } | { token: string; expires: number } export type AuthStore = { secret: AuthSecret; user_id: string | null; plan: Plan | null; intercom_hash: string | null; organizations?: Organization[] } export type BackgroundConfiguration = { source: BackgroundSource; blur: number; padding: number; rounding: number; roundingType?: CornerStyle; inset: number; crop: Crop | null; shadow?: number; advancedShadow?: ShadowConfiguration | null; border?: BorderConfiguration | null } @@ -520,7 +520,7 @@ export type UploadProgress = { progress: number } export type UploadProgressEvent = { video_id: string; uploaded: string; total: string } export type UploadResult = { Success: string } | "NotAuthenticated" | "PlanCheckFailed" | "UpgradeRequired" export type Video = { duration: number; width: number; height: number; fps: number; start_time: number } -export type VideoMeta = { path: string; fps?: number; start_time?: number | null } +export type VideoMeta = { path: string; fps?: number; start_time?: number | null; device_id?: string | null } export type VideoRecordingMetadata = { duration: number; size: number } export type VideoUploadInfo = { id: string; link: string; config: S3UploadMeta } export type WindowExclusion = { bundleIdentifier?: string | null; ownerName?: string | null; windowTitle?: string | null } diff --git a/crates/audio/Cargo.toml b/crates/audio/Cargo.toml index 39099118e1..3f27ef5cde 100644 --- a/crates/audio/Cargo.toml +++ b/crates/audio/Cargo.toml @@ -9,10 +9,15 @@ ffmpeg = { workspace = true } cpal = { workspace = true } tokio.workspace = true tracing = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } workspace-hack = { version = "0.1", path = "../workspace-hack" } [target.'cfg(target_os = "macos")'.dependencies] cidre = { workspace = true } +[dev-dependencies] +tempfile = "3" + [lints] workspace = true diff --git a/crates/audio/src/calibration_store.rs b/crates/audio/src/calibration_store.rs new file mode 100644 index 0000000000..0e84b691a2 --- /dev/null +++ b/crates/audio/src/calibration_store.rs @@ -0,0 +1,189 @@ +use crate::sync_analysis::DeviceSyncCalibration; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct CalibrationStore { + calibrations: HashMap, + #[serde(default)] + version: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StoredCalibration { + pub camera_id: String, + pub microphone_id: String, + pub offset_secs: f64, + pub confidence: f64, + pub measurement_count: u32, + #[serde(default)] + pub last_updated_ms: u64, +} + +impl CalibrationStore { + const CURRENT_VERSION: u32 = 1; + const FILENAME: &'static str = "sync_calibrations.json"; + + pub fn new() -> Self { + Self { + calibrations: HashMap::new(), + version: Self::CURRENT_VERSION, + } + } + + pub fn load(data_dir: &Path) -> Self { + let path = data_dir.join(Self::FILENAME); + + if !path.exists() { + return Self::new(); + } + + match std::fs::read_to_string(&path) { + Ok(contents) => serde_json::from_str(&contents).unwrap_or_else(|e| { + tracing::warn!("Failed to parse calibration store: {}, creating new", e); + Self::new() + }), + Err(e) => { + tracing::warn!("Failed to read calibration store: {}, creating new", e); + Self::new() + } + } + } + + pub fn save(&self, data_dir: &Path) -> Result<(), std::io::Error> { + let path = data_dir.join(Self::FILENAME); + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + + let contents = serde_json::to_string_pretty(self) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + + std::fs::write(&path, contents) + } + + fn make_key(camera_id: &str, microphone_id: &str) -> String { + format!("{camera_id}|{microphone_id}") + } + + pub fn get_calibration( + &self, + camera_id: &str, + microphone_id: &str, + ) -> Option<&StoredCalibration> { + let key = Self::make_key(camera_id, microphone_id); + self.calibrations.get(&key) + } + + pub fn get_offset(&self, camera_id: &str, microphone_id: &str) -> Option { + self.get_calibration(camera_id, microphone_id) + .filter(|c| c.confidence >= 0.5) + .map(|c| c.offset_secs) + } + + pub fn update_calibration(&mut self, calibration: &DeviceSyncCalibration) { + let key = Self::make_key(&calibration.camera_id, &calibration.microphone_id); + + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + + let stored = StoredCalibration { + camera_id: calibration.camera_id.clone(), + microphone_id: calibration.microphone_id.clone(), + offset_secs: calibration.measured_offset_secs, + confidence: calibration.confidence, + measurement_count: calibration.measurement_count, + last_updated_ms: now_ms, + }; + + self.calibrations.insert(key, stored); + } + + pub fn remove_calibration(&mut self, camera_id: &str, microphone_id: &str) { + let key = Self::make_key(camera_id, microphone_id); + self.calibrations.remove(&key); + } + + pub fn list_calibrations(&self) -> impl Iterator { + self.calibrations.values() + } + + pub fn clear(&mut self) { + self.calibrations.clear(); + } +} + +pub fn apply_calibration_to_offset( + base_offset: f64, + camera_id: Option<&str>, + microphone_id: Option<&str>, + store: &CalibrationStore, +) -> f64 { + match (camera_id, microphone_id) { + (Some(cam), Some(mic)) => { + if let Some(cal_offset) = store.get_offset(cam, mic) { + base_offset + cal_offset + } else { + base_offset + } + } + _ => base_offset, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_calibration_store_save_load() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().to_path_buf(); + + let mut store = CalibrationStore::new(); + let cal = DeviceSyncCalibration { + camera_id: "camera1".into(), + microphone_id: "mic1".into(), + measured_offset_secs: 0.042, + confidence: 0.85, + measurement_count: 5, + }; + store.update_calibration(&cal); + store.save(&path).unwrap(); + + let loaded = CalibrationStore::load(&path); + let retrieved = loaded.get_calibration("camera1", "mic1").unwrap(); + assert!((retrieved.offset_secs - 0.042).abs() < 0.0001); + assert_eq!(retrieved.measurement_count, 5); + } + + #[test] + fn test_get_offset_requires_confidence() { + let mut store = CalibrationStore::new(); + + let low_conf = DeviceSyncCalibration { + camera_id: "cam".into(), + microphone_id: "mic".into(), + measured_offset_secs: 0.05, + confidence: 0.3, + measurement_count: 1, + }; + store.update_calibration(&low_conf); + assert!(store.get_offset("cam", "mic").is_none()); + + let high_conf = DeviceSyncCalibration { + camera_id: "cam".into(), + microphone_id: "mic".into(), + measured_offset_secs: 0.05, + confidence: 0.8, + measurement_count: 3, + }; + store.update_calibration(&high_conf); + assert!(store.get_offset("cam", "mic").is_some()); + } +} diff --git a/crates/audio/src/latency.rs b/crates/audio/src/latency.rs index c948e57e51..e6c4d52cf0 100644 --- a/crates/audio/src/latency.rs +++ b/crates/audio/src/latency.rs @@ -448,6 +448,60 @@ pub fn default_output_latency_hint( } } +#[derive(Debug, Clone, Copy)] +pub struct InputLatencyInfo { + pub device_latency_secs: f64, + pub buffer_latency_secs: f64, + pub total_latency_secs: f64, + pub transport: OutputTransportKind, +} + +impl InputLatencyInfo { + pub fn new( + device_latency_secs: f64, + buffer_latency_secs: f64, + transport: OutputTransportKind, + ) -> Self { + Self { + device_latency_secs, + buffer_latency_secs, + total_latency_secs: device_latency_secs + buffer_latency_secs, + transport, + } + } + + pub fn from_buffer_only(sample_rate: u32, buffer_size_frames: u32) -> Self { + let buffer_latency = if sample_rate > 0 { + buffer_size_frames as f64 / sample_rate as f64 + } else { + 0.0 + }; + Self::new(0.0, buffer_latency, OutputTransportKind::Unknown) + } +} + +pub fn estimate_input_latency( + sample_rate: u32, + buffer_size_frames: u32, + device_name: Option<&str>, +) -> InputLatencyInfo { + if sample_rate == 0 { + return InputLatencyInfo::new(0.0, 0.0, OutputTransportKind::Unknown); + } + + #[cfg(target_os = "macos")] + { + macos::estimate_input_latency(sample_rate, buffer_size_frames, device_name) + .unwrap_or_else(|| InputLatencyInfo::from_buffer_only(sample_rate, buffer_size_frames)) + } + + #[cfg(not(target_os = "macos"))] + { + let _ = device_name; + InputLatencyInfo::from_buffer_only(sample_rate, buffer_size_frames) + } +} + fn time_based_alpha(dt_secs: f64, tau_secs: f64) -> f64 { if tau_secs <= 0.0 { return 1.0; @@ -489,8 +543,8 @@ mod macos { #[cfg(target_os = "macos")] use super::AIRPLAY_MIN_LATENCY_SECS; use super::{ - MAX_LATENCY_SECS, OutputLatencyHint, OutputTransportKind, WIRELESS_FALLBACK_LATENCY_SECS, - WIRELESS_MIN_LATENCY_SECS, transport_constraints, + InputLatencyInfo, MAX_LATENCY_SECS, OutputLatencyHint, OutputTransportKind, + WIRELESS_FALLBACK_LATENCY_SECS, WIRELESS_MIN_LATENCY_SECS, transport_constraints, }; use cidre::{ core_audio::{ @@ -508,6 +562,74 @@ mod macos { compute_latency_hint(&device, sample_rate, fallback_buffer_frames).ok() } + pub(super) fn estimate_input_latency( + sample_rate: u32, + buffer_size_frames: u32, + _device_name: Option<&str>, + ) -> Option { + let device = System::default_input_device().ok()?; + compute_input_latency(&device, sample_rate, buffer_size_frames).ok() + } + + fn compute_input_latency( + device: &Device, + sample_rate: u32, + fallback_buffer_frames: u32, + ) -> os::Result { + let transport = device + .transport_type() + .unwrap_or(DeviceTransportType::UNKNOWN); + let transport_kind = transport_kind(transport); + + let device_latency_frames = + scoped_u32(device, PropSelector::DEVICE_LATENCY, PropScope::INPUT).unwrap_or(0); + let safety_offset_frames = + scoped_u32(device, PropSelector::DEVICE_SAFETY_OFFSET, PropScope::INPUT).unwrap_or(0); + let buffer_frames = device + .prop(&PropSelector::DEVICE_BUF_FRAME_SIZE.global_addr()) + .unwrap_or(fallback_buffer_frames); + let stream_latency_frames = max_input_stream_latency(device).unwrap_or(0); + + let device_sample_rate = device.nominal_sample_rate().unwrap_or(sample_rate as f64); + let effective_rate = if device_sample_rate > 0.0 { + device_sample_rate + } else { + sample_rate as f64 + }; + + let device_latency_total_frames = device_latency_frames as u64 + + safety_offset_frames as u64 + + stream_latency_frames as u64; + + let device_latency_secs = device_latency_total_frames as f64 / effective_rate; + let buffer_latency_secs = buffer_frames as f64 / effective_rate; + + Ok(InputLatencyInfo::new( + device_latency_secs, + buffer_latency_secs, + transport_kind, + )) + } + + fn max_input_stream_latency(device: &Device) -> os::Result { + let streams = device.streams()?; + let mut max_latency = 0u32; + + for stream in streams { + if is_input_stream(&stream)? + && let Ok(latency) = stream.latency() + { + max_latency = max_latency.max(latency); + } + } + + Ok(max_latency) + } + + fn is_input_stream(stream: &Stream) -> os::Result { + stream.direction().map(|dir| dir == 1) + } + fn compute_latency_hint( device: &Device, sample_rate: u32, diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index f07a0ff9bc..2a9c60e8b5 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -1,10 +1,14 @@ mod audio_data; +mod calibration_store; mod latency; mod renderer; +mod sync_analysis; pub use audio_data::*; +pub use calibration_store::*; pub use latency::*; pub use renderer::*; +pub use sync_analysis::*; pub trait FromSampleBytes: cpal::SizedSample + std::fmt::Debug + Send + 'static { const BYTE_SIZE: usize; diff --git a/crates/audio/src/renderer.rs b/crates/audio/src/renderer.rs index bb22ae7ea3..16e9803a81 100644 --- a/crates/audio/src/renderer.rs +++ b/crates/audio/src/renderer.rs @@ -13,7 +13,6 @@ pub struct AudioRendererTrack<'a> { pub offset: isize, } -// Renders a combination of audio tracks into a single stereo buffer pub fn render_audio( tracks: &[AudioRendererTrack], offset: usize, @@ -24,7 +23,15 @@ pub fn render_audio( let samples = samples.min( tracks .iter() - .flat_map(|t| (t.data.samples().len() / t.data.channels() as usize).checked_sub(offset)) + .filter_map(|t| { + let track_samples = t.data.samples().len() / t.data.channels() as usize; + let available = track_samples as isize - offset as isize - t.offset; + if available > 0 { + Some(available as usize) + } else { + None + } + }) .max() .unwrap_or(0), ); diff --git a/crates/audio/src/sync_analysis.rs b/crates/audio/src/sync_analysis.rs new file mode 100644 index 0000000000..1daef4183d --- /dev/null +++ b/crates/audio/src/sync_analysis.rs @@ -0,0 +1,318 @@ +use std::collections::VecDeque; + +#[derive(Debug, Clone)] +pub struct SyncAnalysisResult { + pub offset_secs: f64, + pub confidence: f64, + pub detected_events: Vec, +} + +#[derive(Debug, Clone)] +pub struct SyncEvent { + pub audio_time_secs: f64, + pub video_time_secs: f64, + pub offset_secs: f64, + pub confidence: f64, +} + +pub struct SyncAnalyzer { + sample_rate: u32, + #[allow(dead_code)] + fps: f64, + audio_buffer: Vec, + video_motion_scores: Vec<(f64, f64)>, + detected_events: Vec, +} + +impl SyncAnalyzer { + pub fn new(sample_rate: u32, fps: f64) -> Self { + Self { + sample_rate, + fps, + audio_buffer: Vec::new(), + video_motion_scores: Vec::new(), + detected_events: Vec::new(), + } + } + + pub fn add_audio_samples(&mut self, samples: &[f32], _start_time_secs: f64) { + self.audio_buffer.extend_from_slice(samples); + } + + pub fn add_video_frame_motion(&mut self, time_secs: f64, motion_score: f64) { + self.video_motion_scores.push((time_secs, motion_score)); + } + + pub fn detect_audio_transients(&self) -> Vec<(f64, f64)> { + let mut transients = Vec::new(); + + if self.audio_buffer.len() < 1024 { + return transients; + } + + let window_size = (self.sample_rate as usize) / 100; + let hop_size = window_size / 4; + + let mut energies: VecDeque = VecDeque::with_capacity(10); + + for (i, chunk) in self.audio_buffer.chunks(hop_size).enumerate() { + let energy: f64 = + chunk.iter().map(|s| (*s as f64).powi(2)).sum::() / chunk.len() as f64; + let energy_db = if energy > 1e-10 { + 10.0 * energy.log10() + } else { + -100.0 + }; + + energies.push_back(energy_db); + if energies.len() > 10 { + energies.pop_front(); + } + + if energies.len() >= 5 { + let avg: f64 = energies.iter().take(energies.len() - 1).sum::() + / (energies.len() - 1) as f64; + let current = *energies.back().unwrap(); + + let onset_threshold = 15.0; + if current - avg > onset_threshold && current > -30.0 { + let time_secs = (i * hop_size) as f64 / self.sample_rate as f64; + let strength = (current - avg) / onset_threshold; + transients.push((time_secs, strength.min(3.0))); + } + } + } + + transients + } + + pub fn detect_video_motion_peaks(&self) -> Vec<(f64, f64)> { + let mut peaks = Vec::new(); + + if self.video_motion_scores.len() < 3 { + return peaks; + } + + let motion_threshold = 0.3; + + for i in 1..self.video_motion_scores.len() - 1 { + let (time, score) = self.video_motion_scores[i]; + let prev_score = self.video_motion_scores[i - 1].1; + let next_score = self.video_motion_scores[i + 1].1; + + if score > prev_score && score > next_score && score > motion_threshold { + peaks.push((time, score)); + } + } + + peaks + } + + pub fn correlate_events(&mut self) -> Vec { + let audio_transients = self.detect_audio_transients(); + let video_peaks = self.detect_video_motion_peaks(); + + let max_offset_secs = 0.5; + let mut events = Vec::new(); + + for (audio_time, audio_strength) in &audio_transients { + let mut best_match: Option<(f64, f64, f64)> = None; + + for (video_time, video_strength) in &video_peaks { + let offset = audio_time - video_time; + + if offset.abs() <= max_offset_secs { + let combined_strength = audio_strength * video_strength; + + if best_match.is_none() || combined_strength > best_match.unwrap().2 { + best_match = Some((*video_time, offset, combined_strength)); + } + } + } + + if let Some((video_time, offset, strength)) = best_match { + events.push(SyncEvent { + audio_time_secs: *audio_time, + video_time_secs: video_time, + offset_secs: offset, + confidence: (strength / 3.0).min(1.0), + }); + } + } + + self.detected_events = events.clone(); + events + } + + pub fn calculate_sync_offset(&mut self) -> Option { + let events = self.correlate_events(); + + if events.is_empty() { + return None; + } + + let high_confidence_events: Vec<_> = events.iter().filter(|e| e.confidence > 0.5).collect(); + + if high_confidence_events.is_empty() { + return None; + } + + let total_weight: f64 = high_confidence_events.iter().map(|e| e.confidence).sum(); + let weighted_offset: f64 = high_confidence_events + .iter() + .map(|e| e.offset_secs * e.confidence) + .sum::() + / total_weight; + + let variance: f64 = high_confidence_events + .iter() + .map(|e| (e.offset_secs - weighted_offset).powi(2) * e.confidence) + .sum::() + / total_weight; + + let std_dev = variance.sqrt(); + let consistency = 1.0 / (1.0 + std_dev * 10.0); + + let avg_confidence: f64 = high_confidence_events + .iter() + .map(|e| e.confidence) + .sum::() + / high_confidence_events.len() as f64; + + let overall_confidence = (avg_confidence * consistency).min(1.0); + + Some(SyncAnalysisResult { + offset_secs: weighted_offset, + confidence: overall_confidence, + detected_events: events, + }) + } + + pub fn reset(&mut self) { + self.audio_buffer.clear(); + self.video_motion_scores.clear(); + self.detected_events.clear(); + } +} + +pub fn calculate_frame_motion_score( + current_frame: &[u8], + previous_frame: &[u8], + width: u32, + height: u32, +) -> f64 { + if current_frame.len() != previous_frame.len() || current_frame.is_empty() { + return 0.0; + } + + let sample_step = 16; + let mut diff_sum = 0u64; + let mut sample_count = 0u64; + + let stride = (width * 4) as usize; + + for y in (0..height as usize).step_by(sample_step) { + for x in (0..width as usize).step_by(sample_step) { + let idx = y * stride + x * 4; + if idx + 2 < current_frame.len() { + let curr_luma = (current_frame[idx] as u32 * 299 + + current_frame[idx + 1] as u32 * 587 + + current_frame[idx + 2] as u32 * 114) + / 1000; + let prev_luma = (previous_frame[idx] as u32 * 299 + + previous_frame[idx + 1] as u32 * 587 + + previous_frame[idx + 2] as u32 * 114) + / 1000; + + diff_sum += (curr_luma as i32 - prev_luma as i32).unsigned_abs() as u64; + sample_count += 1; + } + } + } + + if sample_count == 0 { + return 0.0; + } + + (diff_sum as f64 / sample_count as f64) / 255.0 +} + +#[derive(Debug, Clone, Default)] +pub struct DeviceSyncCalibration { + pub camera_id: String, + pub microphone_id: String, + pub measured_offset_secs: f64, + pub confidence: f64, + pub measurement_count: u32, +} + +impl DeviceSyncCalibration { + pub fn new(camera_id: String, microphone_id: String) -> Self { + Self { + camera_id, + microphone_id, + measured_offset_secs: 0.0, + confidence: 0.0, + measurement_count: 0, + } + } + + pub fn update_with_measurement(&mut self, offset_secs: f64, confidence: f64) { + if confidence < 0.3 { + return; + } + + let decay = 0.7f64.powi(self.measurement_count as i32); + let new_weight = confidence * (1.0 - decay) + decay; + + if self.measurement_count == 0 { + self.measured_offset_secs = offset_secs; + self.confidence = confidence; + } else { + let total_weight = self.confidence + new_weight; + self.measured_offset_secs = (self.measured_offset_secs * self.confidence + + offset_secs * new_weight) + / total_weight; + self.confidence = (self.confidence + confidence) / 2.0; + } + + self.measurement_count += 1; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sync_analyzer_creation() { + let analyzer = SyncAnalyzer::new(48000, 30.0); + assert_eq!(analyzer.sample_rate, 48000); + } + + #[test] + fn test_motion_score_identical_frames() { + let frame = vec![128u8; 1920 * 1080 * 4]; + let score = calculate_frame_motion_score(&frame, &frame, 1920, 1080); + assert_eq!(score, 0.0); + } + + #[test] + fn test_motion_score_different_frames() { + let frame1 = vec![0u8; 1920 * 1080 * 4]; + let frame2 = vec![255u8; 1920 * 1080 * 4]; + let score = calculate_frame_motion_score(&frame1, &frame2, 1920, 1080); + assert!(score > 0.9); + } + + #[test] + fn test_calibration_update() { + let mut cal = DeviceSyncCalibration::new("cam1".into(), "mic1".into()); + cal.update_with_measurement(0.05, 0.8); + assert!((cal.measured_offset_secs - 0.05).abs() < 0.001); + + cal.update_with_measurement(0.06, 0.9); + assert!(cal.measured_offset_secs > 0.05); + assert!(cal.measured_offset_secs < 0.06); + } +} diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index 215018ff09..16fc33d7a6 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -132,8 +132,9 @@ impl AudioRenderer { }; let cursor_diff = new_cursor.samples as isize - self.cursor.samples as isize; + let frame_samples = (AudioData::SAMPLE_RATE as usize) / 30; if new_cursor.clip_index != self.cursor.clip_index - || cursor_diff.unsigned_abs() > (AudioData::SAMPLE_RATE as usize) / 5 + || cursor_diff.unsigned_abs() > frame_samples { self.cursor = new_cursor; } @@ -184,9 +185,22 @@ impl AudioRenderer { return None; } + let start = self.cursor; + + let offsets = project + .clips + .iter() + .find(|c| c.index == start.clip_index) + .map(|c| c.offsets) + .unwrap_or_default(); + let max_samples = tracks .iter() - .map(|t| t.data().sample_count()) + .map(|t| { + let track_offset_samples = (t.offset(&offsets) * Self::SAMPLE_RATE as f32) as isize; + let available = t.data().sample_count() as isize - track_offset_samples; + available.max(0) as usize + }) .max() .unwrap(); @@ -197,31 +211,20 @@ impl AudioRenderer { let samples = samples.min(max_samples - self.cursor.samples); - let start = self.cursor; - let mut ret = vec![0.0; samples * 2]; let track_datas = tracks .iter() - .map(|t| { - let offsets = project - .clips - .iter() - .find(|c| c.index == start.clip_index) - .map(|c| c.offsets) - .unwrap_or_default(); - - AudioRendererTrack { - data: t.data().as_ref(), - gain: if project.audio.mute { - f32::NEG_INFINITY - } else { - let g = t.gain(&project.audio); - if g < -30.0 { f32::NEG_INFINITY } else { g } - }, - stereo_mode: t.stereo_mode(&project.audio), - offset: (t.offset(&offsets) * Self::SAMPLE_RATE as f32) as isize, - } + .map(|t| AudioRendererTrack { + data: t.data().as_ref(), + gain: if project.audio.mute { + f32::NEG_INFINITY + } else { + let g = t.gain(&project.audio); + if g < -30.0 { f32::NEG_INFINITY } else { g } + }, + stereo_mode: t.stereo_mode(&project.audio), + offset: (t.offset(&offsets) * Self::SAMPLE_RATE as f32) as isize, }) .collect::>(); @@ -277,10 +280,29 @@ impl AudioPlaybackBuffer { self.frame_buffer.set_playhead(playhead, project); } + #[allow(dead_code)] pub fn current_playhead(&self) -> f64 { self.frame_buffer.elapsed_samples_to_playhead() } + pub fn current_audible_playhead( + &self, + device_sample_rate: u32, + device_latency_secs: f64, + ) -> f64 { + let generated_secs = self.frame_buffer.elapsed_samples_to_playhead(); + let channels = self.resampler.output.channels; + let buffered_elements = self.resampled_buffer.occupied_len(); + let buffered_frames = buffered_elements / channels; + let buffered_secs = buffered_frames as f64 / device_sample_rate as f64; + let audible = generated_secs - buffered_secs - device_latency_secs.max(0.0); + if audible.is_sign_negative() { + 0.0 + } else { + audible + } + } + pub fn buffer_reaching_limit(&self) -> bool { self.resampled_buffer.vacant_len() <= 2 * (Self::PROCESSING_SAMPLES_COUNT as usize) * self.resampler.output.channels diff --git a/crates/editor/src/editor_instance.rs b/crates/editor/src/editor_instance.rs index da5055931c..a33a5c844a 100644 --- a/crates/editor/src/editor_instance.rs +++ b/crates/editor/src/editor_instance.rs @@ -58,7 +58,7 @@ impl EditorInstance { return Err("Cannot edit non-studio recordings".to_string()); }; - let segment_count = match meta { + let segment_count = match meta.as_ref() { StudioRecordingMeta::SingleSegment { .. } => 1, StudioRecordingMeta::MultipleSegments { inner } => inner.segments.len(), }; @@ -73,7 +73,7 @@ impl EditorInstance { if project.timeline.is_none() { warn!("Project config has no timeline, creating one from recording segments"); - let timeline_segments = match meta { + let timeline_segments = match meta.as_ref() { StudioRecordingMeta::SingleSegment { segment } => { let display_path = recording_meta.path(&segment.display.path); let duration = match Video::new(&display_path, 0.0) { @@ -139,17 +139,57 @@ impl EditorInstance { } } + if project.clips.is_empty() { + let calibration_store = load_calibration_store(&recording_meta.project_path); + + match meta.as_ref() { + StudioRecordingMeta::MultipleSegments { inner } => { + project.clips = inner + .segments + .iter() + .enumerate() + .map(|(i, segment)| { + let calibration_offset = get_calibration_offset( + segment.camera_device_id(), + segment.mic_device_id(), + &calibration_store, + ); + cap_project::ClipConfiguration { + index: i as u32, + offsets: segment + .calculate_audio_offsets_with_calibration(calibration_offset), + } + }) + .collect(); + } + StudioRecordingMeta::SingleSegment { .. } => { + project.clips = vec![cap_project::ClipConfiguration { + index: 0, + offsets: cap_project::ClipOffsets::default(), + }]; + } + } + + if let Err(e) = project.write(&recording_meta.project_path) { + warn!("Failed to save auto-generated clip offsets: {}", e); + } + } + let recordings = Arc::new(ProjectRecordingsMeta::new( &recording_meta.project_path, - meta, + meta.as_ref(), )?); - let segments = create_segments(&recording_meta, meta).await?; + let segments = create_segments(&recording_meta, meta.as_ref()).await?; let render_constants = Arc::new( - RenderVideoConstants::new(&recordings.segments, recording_meta.clone(), meta.clone()) - .await - .map_err(|e| format!("Failed to create render constants: {e}"))?, + RenderVideoConstants::new( + &recordings.segments, + recording_meta.clone(), + (**meta).clone(), + ) + .await + .map_err(|e| format!("Failed to create render constants: {e}"))?, ); let renderer = Arc::new(editor::Renderer::spawn( @@ -426,7 +466,7 @@ impl EditorInstance { fn get_studio_meta(&self) -> &StudioRecordingMeta { match &self.meta.inner { - RecordingMetaInner::Studio(meta) => meta, + RecordingMetaInner::Studio(meta) => meta.as_ref(), _ => panic!("Not a studio recording"), } } @@ -548,3 +588,24 @@ pub async fn create_segments( } } } + +fn load_calibration_store(project_path: &std::path::Path) -> cap_audio::CalibrationStore { + let calibration_dir = project_path + .parent() + .and_then(|p| p.parent()) + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| project_path.to_path_buf()); + + cap_audio::CalibrationStore::load(&calibration_dir) +} + +fn get_calibration_offset( + camera_id: Option<&str>, + mic_id: Option<&str>, + store: &cap_audio::CalibrationStore, +) -> Option { + match (camera_id, mic_id) { + (Some(cam), Some(mic)) => store.get_offset(cam, mic).map(|o| o as f32), + _ => None, + } +} diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 4096abfb1b..4c7016462c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -903,6 +903,7 @@ impl AudioPlayback { let latency_config = LatencyCorrectionConfig::default(); let mut latency_corrector = LatencyCorrector::new(static_latency_hint, latency_config); let initial_compensation_secs = latency_corrector.initial_compensation_secs(); + let device_sample_rate = sample_rate; { let project_snapshot = project.borrow(); @@ -934,18 +935,19 @@ impl AudioPlayback { let headroom_for_stream = headroom_samples; let mut playhead_rx_for_stream = playhead_rx.clone(); let mut last_video_playhead = playhead; - const SYNC_THRESHOLD_SECS: f64 = 0.15; + const SYNC_THRESHOLD_SECS: f64 = 0.05; let stream_result = device.build_output_stream( &config, move |buffer: &mut [T], info| { - let _latency_secs = latency_corrector.update_from_callback(info); + let latency_secs = latency_corrector.update_from_callback(info); let project = project_for_stream.borrow(); if playhead_rx_for_stream.has_changed().unwrap_or(false) { let video_playhead = *playhead_rx_for_stream.borrow_and_update(); - let audio_playhead = audio_renderer.current_playhead(); + let audio_playhead = audio_renderer + .current_audible_playhead(device_sample_rate, latency_secs); let drift = (video_playhead - audio_playhead).abs(); if drift > SYNC_THRESHOLD_SECS diff --git a/crates/project/src/meta.rs b/crates/project/src/meta.rs index 4dcd11aca6..05248b5df4 100644 --- a/crates/project/src/meta.rs +++ b/crates/project/src/meta.rs @@ -22,6 +22,8 @@ pub struct VideoMeta { pub fps: u32, #[serde(default, skip_serializing_if = "Option::is_none")] pub start_time: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub device_id: Option, } fn legacy_static_video_fps() -> u32 { @@ -34,6 +36,8 @@ pub struct AudioMeta { pub path: RelativePathBuf, #[serde(default, skip_serializing_if = "Option::is_none")] pub start_time: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub device_id: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Type)] @@ -109,7 +113,7 @@ pub enum UploadMeta { #[derive(Debug, Clone, Serialize, Deserialize, Type)] #[serde(untagged, rename_all = "camelCase")] pub enum RecordingMetaInner { - Studio(StudioRecordingMeta), + Studio(Box), Instant(InstantRecordingMeta), } @@ -408,6 +412,57 @@ impl MultipleSegment { Some(value) } + + pub fn calculate_audio_offsets(&self) -> crate::ClipOffsets { + self.calculate_audio_offsets_with_calibration(None) + } + + pub fn calculate_audio_offsets_with_calibration( + &self, + calibration_offset: Option, + ) -> crate::ClipOffsets { + let latest = match self.latest_start_time() { + Some(t) => t, + None => return crate::ClipOffsets::default(), + }; + + let cal_offset = calibration_offset.unwrap_or(0.0); + + let camera_offset = self + .camera + .as_ref() + .and_then(|c| c.start_time) + .map(|t| (latest - t) as f32) + .unwrap_or(0.0); + + let mic_offset = self + .mic + .as_ref() + .and_then(|m| m.start_time) + .map(|t| (latest - t) as f32 + cal_offset) + .unwrap_or(0.0); + + let system_audio_offset = self + .system_audio + .as_ref() + .and_then(|s| s.start_time) + .map(|t| (latest - t) as f32 + cal_offset) + .unwrap_or(0.0); + + crate::ClipOffsets { + camera: camera_offset, + mic: mic_offset, + system_audio: system_audio_offset, + } + } + + pub fn camera_device_id(&self) -> Option<&str> { + self.camera.as_ref().and_then(|c| c.device_id.as_deref()) + } + + pub fn mic_device_id(&self) -> Option<&str> { + self.mic.as_ref().and_then(|m| m.device_id.as_deref()) + } } #[cfg(test)] diff --git a/crates/recording/Cargo.toml b/crates/recording/Cargo.toml index 6d395e96a2..6d9d796463 100644 --- a/crates/recording/Cargo.toml +++ b/crates/recording/Cargo.toml @@ -15,6 +15,7 @@ cap-audio = { path = "../audio" } cap-fail = { path = "../fail" } cap-project = { path = "../project" } cap-media = { path = "../media" } +cap-rendering = { path = "../rendering" } cap-flags = { path = "../flags" } cap-utils = { path = "../utils" } scap-targets = { path = "../scap-targets" } diff --git a/crates/recording/src/feeds/microphone.rs b/crates/recording/src/feeds/microphone.rs index 32ba88df3b..30d34c7bfb 100644 --- a/crates/recording/src/feeds/microphone.rs +++ b/crates/recording/src/feeds/microphone.rs @@ -364,6 +364,7 @@ pub struct MicrophoneFeedLock { audio_info: AudioInfo, buffer_size_frames: Option, drop_tx: Option>, + device_name: String, } impl MicrophoneFeedLock { @@ -378,6 +379,10 @@ impl MicrophoneFeedLock { pub fn buffer_size_frames(&self) -> Option { self.buffer_size_frames } + + pub fn device_name(&self) -> &str { + &self.device_name + } } impl Deref for MicrophoneFeedLock { @@ -715,6 +720,7 @@ impl Message for MicrophoneFeed { let config = attached.config.clone(); let buffer_size_frames = attached.buffer_size_frames; + let device_name = attached.label.clone(); self.state = State::Locked { inner: attached }; @@ -732,6 +738,7 @@ impl Message for MicrophoneFeed { config, buffer_size_frames, drop_tx: Some(drop_tx), + device_name, }) } } diff --git a/crates/recording/src/lib.rs b/crates/recording/src/lib.rs index 965c709204..96f6991a28 100644 --- a/crates/recording/src/lib.rs +++ b/crates/recording/src/lib.rs @@ -10,6 +10,7 @@ pub mod recovery; pub mod screenshot; pub mod sources; pub mod studio_recording; +pub mod sync_calibration; #[cfg(any(test, feature = "test-utils"))] pub mod test_sources; diff --git a/crates/recording/src/recovery.rs b/crates/recording/src/recovery.rs index 7fa3368989..0a8f554158 100644 --- a/crates/recording/src/recovery.rs +++ b/crates/recording/src/recovery.rs @@ -745,7 +745,7 @@ impl RecoveryManager { let meta = Self::build_recovered_meta(recording)?; let mut recording_meta = recording.meta.clone(); - recording_meta.inner = RecordingMetaInner::Studio(meta.clone()); + recording_meta.inner = RecordingMetaInner::Studio(Box::new(meta.clone())); recording_meta .save_for_project() .map_err(|_| RecoveryError::MetaSave)?; @@ -795,6 +795,7 @@ impl RecoveryManager { path: RelativePathBuf::from(format!("{segment_base}/display.mp4")), fps, start_time: original_segment.and_then(|s| s.display.start_time), + device_id: original_segment.and_then(|s| s.display.device_id.clone()), }, camera: if camera_path.exists() { Some(VideoMeta { @@ -806,6 +807,9 @@ impl RecoveryManager { start_time: original_segment .and_then(|s| s.camera.as_ref()) .and_then(|c| c.start_time), + device_id: original_segment + .and_then(|s| s.camera.as_ref()) + .and_then(|c| c.device_id.clone()), }) } else { None @@ -816,6 +820,9 @@ impl RecoveryManager { start_time: original_segment .and_then(|s| s.mic.as_ref()) .and_then(|m| m.start_time), + device_id: original_segment + .and_then(|s| s.mic.as_ref()) + .and_then(|m| m.device_id.clone()), }) } else { None @@ -826,6 +833,9 @@ impl RecoveryManager { start_time: original_segment .and_then(|s| s.system_audio.as_ref()) .and_then(|a| a.start_time), + device_id: original_segment + .and_then(|s| s.system_audio.as_ref()) + .and_then(|a| a.device_id.clone()), }) } else { None @@ -986,8 +996,8 @@ impl RecoveryManager { let mut meta = RecordingMeta::load_for_project(project_path).map_err(|_| RecoveryError::MetaSave)?; - if let RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { inner, .. }) = - &mut meta.inner + if let RecordingMetaInner::Studio(studio) = &mut meta.inner + && let StudioRecordingMeta::MultipleSegments { inner, .. } = studio.as_mut() { inner.status = Some(StudioRecordingStatus::NeedsRemux); meta.save_for_project() @@ -1001,11 +1011,15 @@ impl RecoveryManager { let mut updated_meta = meta.clone(); let status_updated = match &mut updated_meta.inner { - RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { inner, .. }) => { - inner.status = Some(StudioRecordingStatus::Failed { - error: "No recoverable segments found".to_string(), - }); - true + RecordingMetaInner::Studio(studio) => { + if let StudioRecordingMeta::MultipleSegments { inner, .. } = studio.as_mut() { + inner.status = Some(StudioRecordingStatus::Failed { + error: "No recoverable segments found".to_string(), + }); + true + } else { + false + } } _ => false, }; diff --git a/crates/recording/src/sources/audio_mixer.rs b/crates/recording/src/sources/audio_mixer.rs index 5157aa1374..ea1ecb1b7e 100644 --- a/crates/recording/src/sources/audio_mixer.rs +++ b/crates/recording/src/sources/audio_mixer.rs @@ -359,7 +359,7 @@ impl AudioMixer { } } - fn tick(&mut self, start: Timestamps, now: Timestamp) -> Result<(), ()> { + fn tick(&mut self, _start: Timestamps, now: Timestamp) -> Result<(), ()> { self.buffer_sources(now); let Some(start_timestamp) = self.start_timestamp else { @@ -383,10 +383,10 @@ impl AudioMixer { filtered.set_rate(output_rate_i32 as u32); let elapsed = Duration::from_secs_f64(self.samples_out as f64 / output_rate); - let timestamp = start.instant() + start_timestamp.duration_since(start) + elapsed; + let output_timestamp = start_timestamp + elapsed; let frame_samples = filtered.samples(); - let mut frame = AudioFrame::new(filtered, Timestamp::Instant(timestamp)); + let mut frame = AudioFrame::new(filtered, output_timestamp); loop { match self.output.try_send(frame) { diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs index 9400991c43..4c4c4a3585 100644 --- a/crates/recording/src/studio_recording.rs +++ b/crates/recording/src/studio_recording.rs @@ -103,10 +103,15 @@ impl Actor { (Default::default(), 0) }; + let camera_device_id = self.segment_factory.camera_device_id(); + let mic_device_id = self.segment_factory.mic_device_id(); + self.segments.push(RecordingSegment { start: segment_start_time, end: segment_stop_time, pipeline, + camera_device_id, + mic_device_id, }); Ok(cursors) @@ -299,6 +304,8 @@ pub struct RecordingSegment { pub start: f64, pub end: f64, pipeline: FinishedPipeline, + pub camera_device_id: Option, + pub mic_device_id: Option, } pub struct ScreenPipelineOutput { @@ -697,6 +704,7 @@ async fn stop_recording( DEFAULT_FPS }), start_time: Some(to_start_time(s.pipeline.screen.first_timestamp)), + device_id: None, }, camera: s.pipeline.camera.map(|camera| VideoMeta { path: make_relative(&camera.path), @@ -708,14 +716,17 @@ async fn stop_recording( DEFAULT_FPS }), start_time: Some(to_start_time(camera.first_timestamp)), + device_id: s.camera_device_id.clone(), }), mic: s.pipeline.microphone.map(|mic| AudioMeta { path: make_relative(&mic.path), start_time: Some(to_start_time(mic.first_timestamp)), + device_id: s.mic_device_id.clone(), }), system_audio: s.pipeline.system_audio.map(|audio| AudioMeta { path: make_relative(&audio.path), start_time: Some(to_start_time(audio.first_timestamp)), + device_id: None, }), cursor: s .pipeline @@ -855,6 +866,20 @@ impl SegmentPipelineFactory { pub fn set_camera_feed(&mut self, camera_feed: Option>) { self.base_inputs.camera_feed = camera_feed; } + + pub fn camera_device_id(&self) -> Option { + self.base_inputs + .camera_feed + .as_ref() + .map(|f| f.camera_info().device_id().to_string()) + } + + pub fn mic_device_id(&self) -> Option { + self.base_inputs + .mic_feed + .as_ref() + .map(|f| f.device_name().to_string()) + } } fn completion_rx_to_done_fut( @@ -1141,13 +1166,13 @@ fn write_in_progress_meta(recording_dir: &Path) -> anyhow::Result<()> { project_path: recording_dir.to_path_buf(), pretty_name, sharing: None, - inner: RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { + inner: RecordingMetaInner::Studio(Box::new(StudioRecordingMeta::MultipleSegments { inner: MultipleSegments { segments: Vec::new(), cursors: cap_project::Cursors::default(), status: Some(StudioRecordingStatus::InProgress), }, - }), + })), upload: None, }; diff --git a/crates/recording/src/sync_calibration.rs b/crates/recording/src/sync_calibration.rs new file mode 100644 index 0000000000..f27da19dc0 --- /dev/null +++ b/crates/recording/src/sync_calibration.rs @@ -0,0 +1,154 @@ +use cap_audio::{ + CalibrationStore, DeviceSyncCalibration, SyncAnalyzer, calculate_frame_motion_score, +}; +use std::path::{Path, PathBuf}; +use tracing::{debug, info, warn}; + +pub struct PostRecordingSyncAnalysis { + analyzer: SyncAnalyzer, + camera_device_id: Option, + mic_device_id: Option, + calibration_dir: PathBuf, + previous_frame: Option>, + frame_width: u32, + frame_height: u32, +} + +impl PostRecordingSyncAnalysis { + pub fn new( + sample_rate: u32, + fps: f64, + camera_device_id: Option, + mic_device_id: Option, + calibration_dir: PathBuf, + frame_width: u32, + frame_height: u32, + ) -> Self { + Self { + analyzer: SyncAnalyzer::new(sample_rate, fps), + camera_device_id, + mic_device_id, + calibration_dir, + previous_frame: None, + frame_width, + frame_height, + } + } + + pub fn process_video_frame(&mut self, frame_data: &[u8], time_secs: f64) { + if let Some(prev) = &self.previous_frame { + let motion = + calculate_frame_motion_score(frame_data, prev, self.frame_width, self.frame_height); + self.analyzer.add_video_frame_motion(time_secs, motion); + } + self.previous_frame = Some(frame_data.to_vec()); + } + + pub fn process_audio_samples(&mut self, samples: &[f32], start_time_secs: f64) { + self.analyzer.add_audio_samples(samples, start_time_secs); + } + + pub fn finalize_and_save(&mut self) -> Option { + let (camera_id, mic_id) = match (&self.camera_device_id, &self.mic_device_id) { + (Some(cam), Some(mic)) => (cam.clone(), mic.clone()), + _ => { + debug!("Skipping sync calibration: missing device IDs"); + return None; + } + }; + + let result = self.analyzer.calculate_sync_offset()?; + + if result.confidence < 0.5 { + debug!( + "Sync analysis confidence too low: {:.0}%", + result.confidence * 100.0 + ); + return None; + } + + info!( + "Sync analysis complete: offset={:.1}ms, confidence={:.0}%, events={}", + result.offset_secs * 1000.0, + result.confidence * 100.0, + result.detected_events.len() + ); + + let mut store = CalibrationStore::load(&self.calibration_dir); + + let mut calibration = DeviceSyncCalibration::new(camera_id, mic_id); + calibration.update_with_measurement(result.offset_secs, result.confidence); + + if let Some(existing) = + store.get_calibration(&calibration.camera_id, &calibration.microphone_id) + { + calibration.measured_offset_secs = existing.offset_secs; + calibration.confidence = existing.confidence; + calibration.measurement_count = existing.measurement_count; + calibration.update_with_measurement(result.offset_secs, result.confidence); + } + + store.update_calibration(&calibration); + + if let Err(e) = store.save(&self.calibration_dir) { + warn!("Failed to save calibration store: {}", e); + } else { + info!( + "Saved calibration: offset={:.1}ms after {} measurements", + calibration.measured_offset_secs * 1000.0, + calibration.measurement_count + ); + } + + Some(calibration.measured_offset_secs) + } +} + +pub fn analyze_recording_for_sync( + audio_path: &Path, + video_path: &Path, + camera_device_id: Option<&str>, + mic_device_id: Option<&str>, + calibration_dir: &Path, +) -> Option { + use cap_audio::AudioData; + use cap_rendering::Video; + + let audio = match AudioData::from_file(audio_path) { + Ok(a) => a, + Err(e) => { + warn!("Failed to load audio for sync analysis: {}", e); + return None; + } + }; + + let video = match Video::new(video_path, 0.0) { + Ok(v) => v, + Err(e) => { + warn!("Failed to load video for sync analysis: {}", e); + return None; + } + }; + + let sample_rate = AudioData::SAMPLE_RATE; + let fps = video.fps as f64; + + let mut analyzer = PostRecordingSyncAnalysis::new( + sample_rate, + fps, + camera_device_id.map(String::from), + mic_device_id.map(String::from), + calibration_dir.to_path_buf(), + video.width, + video.height, + ); + + let samples = audio.samples(); + let chunk_size = (sample_rate as usize) / 10; + for (i, chunk) in samples.chunks(chunk_size).enumerate() { + let time = i as f64 * (chunk_size as f64 / sample_rate as f64); + analyzer.process_audio_samples(chunk, time); + } + + analyzer.finalize_and_save() +} diff --git a/crates/recording/tests/recovery.rs b/crates/recording/tests/recovery.rs index 6055099f19..d739821d1a 100644 --- a/crates/recording/tests/recovery.rs +++ b/crates/recording/tests/recovery.rs @@ -103,13 +103,14 @@ impl TestRecording { pretty_name: "Test Recording".to_string(), sharing: None, upload: None, - inner: RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { + inner: RecordingMetaInner::Studio(Box::new(StudioRecordingMeta::MultipleSegments { inner: MultipleSegments { segments: vec![MultipleSegment { display: VideoMeta { path: RelativePathBuf::from("content/segments/segment-0/display.mp4"), fps: 30, start_time: None, + device_id: None, }, camera: None, mic: None, @@ -119,7 +120,7 @@ impl TestRecording { cursors: Cursors::default(), status: Some(status), }, - }), + })), }; let meta_path = self.project_path.join("recording-meta.json"); diff --git a/crates/rendering/src/layers/cursor.rs b/crates/rendering/src/layers/cursor.rs index 2fa10cbbdd..10274197e3 100644 --- a/crates/rendering/src/layers/cursor.rs +++ b/crates/rendering/src/layers/cursor.rs @@ -341,15 +341,18 @@ impl CursorLayer { let mut loaded_cursor = None; let cursor_shape = match &constants.recording_meta.inner { - RecordingMetaInner::Studio(StudioRecordingMeta::MultipleSegments { - inner: - MultipleSegments { - cursors: Cursors::Correct(cursors), - .. - }, - }) => cursors - .get(&interpolated_cursor.cursor_id) - .and_then(|v| v.shape), + RecordingMetaInner::Studio(studio) => match studio.as_ref() { + StudioRecordingMeta::MultipleSegments { + inner: + MultipleSegments { + cursors: Cursors::Correct(cursors), + .. + }, + } => cursors + .get(&interpolated_cursor.cursor_id) + .and_then(|v| v.shape), + _ => None, + }, _ => None, };