//! RTP packetization for H264 video //! //! This module provides H264 RTP packetization using the rtp crate's H264Payloader. //! It handles: //! - NAL unit parsing (Annex B start codes) //! - SPS/PPS collection and STAP-A packetization //! - Single NAL unit mode for small NALs //! - FU-A fragmentation for large NALs //! //! IMPORTANT: Each NAL unit must be sent separately via write_sample(), //! without Annex B start codes. The TrackLocalStaticSample handles //! RTP packetization internally. use bytes::Bytes; use rtp::codecs::h264::H264Payloader; use rtp::packetizer::Payloader; use std::io::Cursor; use std::sync::Arc; use std::time::Duration; use tokio::sync::Mutex; use tracing::{debug, error, trace}; use webrtc::media::io::h264_reader::H264Reader; use webrtc::media::Sample; use webrtc::rtp_transceiver::rtp_codec::RTCRtpCodecCapability; use webrtc::track::track_local::track_local_static_sample::TrackLocalStaticSample; use webrtc::track::track_local::TrackLocal; use crate::error::{AppError, Result}; use crate::video::format::Resolution; /// Default MTU for RTP packets (conservative for most networks) pub const RTP_MTU: usize = 1200; /// H264 clock rate (always 90kHz per RFC 6184) pub const H264_CLOCK_RATE: u32 = 90000; /// H264 video track using TrackLocalStaticSample for proper packetization pub struct H264VideoTrack { /// The underlying WebRTC track with automatic packetization track: Arc, /// Track configuration config: H264VideoTrackConfig, /// H264 payloader for manual packetization (if needed) payloader: Mutex, /// Cached SPS NAL unit for injection before IDR frames /// Some hardware encoders don't repeat SPS/PPS with every keyframe cached_sps: Mutex>, /// Cached PPS NAL unit for injection before IDR frames cached_pps: Mutex>, } /// H264 video track configuration #[derive(Debug, Clone)] pub struct H264VideoTrackConfig { /// Track ID pub track_id: String, /// Stream ID pub stream_id: String, /// Resolution pub resolution: Resolution, /// Target bitrate in kbps pub bitrate_kbps: u32, /// Frames per second pub fps: u32, /// H.264 profile-level-id (e.g., "42001f" for Baseline L3.1, "64001f" for High L3.1) /// If None, uses empty string to let browser negotiate /// Format: PPCCLL where PP=profile_idc, CC=constraint_flags, LL=level_idc pub profile_level_id: Option, } impl Default for H264VideoTrackConfig { fn default() -> Self { Self { track_id: "video0".to_string(), stream_id: "one-kvm-stream".to_string(), resolution: Resolution::HD720, bitrate_kbps: 8000, fps: 30, profile_level_id: None, // Let browser negotiate } } } impl H264VideoTrack { /// Create a new H264 video track /// /// If `config.profile_level_id` is set, it will be used in SDP negotiation. /// Otherwise, uses empty fmtp line to let browser negotiate the best profile. pub fn new(config: H264VideoTrackConfig) -> Self { // Build sdp_fmtp_line based on profile_level_id let sdp_fmtp_line = if let Some(ref profile_level_id) = config.profile_level_id { // Use specified profile-level-id format!( "level-asymmetry-allowed=1;packetization-mode=1;profile-level-id={}", profile_level_id ) } else { // Let browser negotiate - empty string for maximum compatibility String::new() }; let codec = RTCRtpCodecCapability { mime_type: "video/H264".to_string(), clock_rate: H264_CLOCK_RATE, channels: 0, sdp_fmtp_line, rtcp_feedback: vec![], }; let track = Arc::new(TrackLocalStaticSample::new( codec, config.track_id.clone(), config.stream_id.clone(), )); Self { track, config, payloader: Mutex::new(H264Payloader::default()), cached_sps: Mutex::new(None), cached_pps: Mutex::new(None), } } /// Get the underlying WebRTC track for adding to peer connection pub fn track(&self) -> Arc { self.track.clone() } /// Get track as TrackLocal for peer connection pub fn as_track_local(&self) -> Arc { self.track.clone() } /// Write an H264 encoded frame to the track /// /// The frame data should be H264 Annex B format (with start codes 0x00000001 or 0x000001). /// This is the format produced by hwcodec/FFmpeg encoders. /// /// IMPORTANT: Each NAL unit is sent separately via write_sample(), without start codes. /// This is required for proper WebRTC RTP packetization. /// See: https://github.com/webrtc-rs/webrtc/blob/master/examples/examples/play-from-disk-h264/ /// /// # Arguments /// * `data` - H264 Annex B encoded frame data /// * `duration` - Frame duration (typically 1/fps seconds) /// * `is_keyframe` - Whether this is a keyframe (IDR frame) pub async fn write_frame( &self, data: &[u8], _duration: Duration, is_keyframe: bool, ) -> Result<()> { if data.is_empty() { return Ok(()); } // Use H264Reader to parse NAL units from Annex B data let cursor = Cursor::new(data); let mut h264_reader = H264Reader::new(cursor, 1024 * 1024); // Collect all NAL units first to check for SPS/PPS presence let mut nals: Vec = Vec::new(); let mut has_sps = false; let mut has_pps = false; let mut has_idr = false; // Send each NAL unit separately (like official webrtc-rs example) // H264Reader returns NAL data WITHOUT start codes - this is what we need while let Ok(nal) = h264_reader.next_nal() { if nal.data.is_empty() { continue; } let nal_type = nal.data[0] & 0x1F; // Skip AUD NAL units (type 9) - not needed for WebRTC if nal_type == 9 { continue; } // Skip filler data (type 12) if nal_type == 12 { continue; } // Track NAL types match nal_type { 5 => has_idr = true, 7 => { has_sps = true; // Cache SPS for future injection *self.cached_sps.lock().await = Some(nal.data.clone().freeze()); } 8 => { has_pps = true; // Cache PPS for future injection *self.cached_pps.lock().await = Some(nal.data.clone().freeze()); } _ => {} } trace!( "Sending NAL: type={} ({}) size={} bytes", nal_type, match nal_type { 1 => "Non-IDR slice", 5 => "IDR slice", 6 => "SEI", 7 => "SPS", 8 => "PPS", _ => "Other", }, nal.data.len() ); nals.push(nal.data.freeze()); } // Inject cached SPS/PPS before IDR if missing // This is critical for hardware encoders that don't repeat SPS/PPS if has_idr && (!has_sps || !has_pps) { let mut injected_nals: Vec = Vec::new(); if !has_sps { if let Some(sps) = self.cached_sps.lock().await.clone() { debug!("Injecting cached SPS before IDR frame"); injected_nals.push(sps); } } if !has_pps { if let Some(pps) = self.cached_pps.lock().await.clone() { debug!("Injecting cached PPS before IDR frame"); injected_nals.push(pps); } } if !injected_nals.is_empty() { injected_nals.extend(nals); nals = injected_nals; } } let mut nal_count = 0; let mut total_bytes = 0u64; // Send NAL data directly WITHOUT start codes // TrackLocalStaticSample handles RTP packetization internally // Use duration=1s for each NAL like official webrtc-rs example for nal_data in nals { let sample = Sample { data: nal_data.clone(), duration: Duration::from_secs(1), // Like official example ..Default::default() }; if let Err(e) = self.track.write_sample(&sample).await { // Only log periodically to avoid spam when no peer connected if nal_count % 100 == 0 { debug!("Write sample failed (no peer?): {}", e); } } total_bytes += nal_data.len() as u64; nal_count += 1; } trace!( "Sent frame: {} NAL units, {} bytes, keyframe={}", nal_count, total_bytes, is_keyframe ); Ok(()) } /// Write frame with timestamp (for more precise timing control) pub async fn write_frame_with_timestamp( &self, data: &[u8], _pts_ms: u64, is_keyframe: bool, ) -> Result<()> { // Convert pts from milliseconds to frame duration // Assuming constant frame rate from config let duration = Duration::from_millis(1000 / self.config.fps as u64); self.write_frame(data, duration, is_keyframe).await } /// Manually packetize H264 data into RTP payloads /// /// This is useful if you need direct control over RTP packets /// (e.g., for sending via TrackLocalStaticRTP instead of TrackLocalStaticSample) pub async fn packetize(&self, data: &[u8], mtu: usize) -> Result> { let mut payloader = self.payloader.lock().await; let bytes = Bytes::copy_from_slice(data); payloader .payload(mtu, &bytes) .map_err(|e| AppError::VideoError(format!("H264 packetization failed: {}", e))) } /// Get configuration pub fn config(&self) -> &H264VideoTrackConfig { &self.config } } /// Opus audio track using TrackLocalStaticSample pub struct OpusAudioTrack { /// The underlying WebRTC track track: Arc, } impl OpusAudioTrack { /// Create a new Opus audio track pub fn new(track_id: &str, stream_id: &str) -> Self { let codec = RTCRtpCodecCapability { mime_type: "audio/opus".to_string(), clock_rate: 48000, channels: 2, sdp_fmtp_line: "minptime=10;useinbandfec=1".to_string(), rtcp_feedback: vec![], }; let track = Arc::new(TrackLocalStaticSample::new( codec, track_id.to_string(), stream_id.to_string(), )); Self { track, } } /// Get the underlying WebRTC track pub fn track(&self) -> Arc { self.track.clone() } /// Get track as TrackLocal pub fn as_track_local(&self) -> Arc { self.track.clone() } /// Write Opus encoded audio data /// /// # Arguments /// * `data` - Opus encoded packet /// * `samples` - Number of audio samples in this packet (typically 960 for 20ms at 48kHz) pub async fn write_packet(&self, data: &[u8], samples: u32) -> Result<()> { if data.is_empty() { return Ok(()); } // Opus frame duration based on samples // 48000 Hz, so duration = samples / 48000 seconds let duration = Duration::from_micros((samples as u64 * 1_000_000) / 48000); let sample = Sample { data: Bytes::copy_from_slice(data), duration, ..Default::default() }; self.track .write_sample(&sample) .await .map_err(|e| { error!("Failed to write Opus sample: {}", e); AppError::WebRtcError(format!("Failed to write audio sample: {}", e)) }) } } /// Strip AUD (Access Unit Delimiter) NAL units from H264 Annex B data /// AUD (NAL type 9) can cause decoding issues in some browser WebRTC implementations /// Also strips filler data (NAL type 12) and SEI (NAL type 6) for cleaner output pub fn strip_aud_nal_units(data: &[u8]) -> Vec { let mut result = Vec::with_capacity(data.len()); let mut i = 0; while i < data.len() { // Find start code (3 or 4 bytes) let (start_code_pos, start_code_len) = if i + 4 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1 { (i, 4) } else if i + 3 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1 { (i, 3) } else { i += 1; continue; }; let nal_start = start_code_pos + start_code_len; if nal_start >= data.len() { break; } let nal_type = data[nal_start] & 0x1F; // Find next start code to determine NAL unit end let mut nal_end = data.len(); let mut j = nal_start + 1; while j + 3 <= data.len() { if (data[j] == 0 && data[j + 1] == 0 && data[j + 2] == 1) || (j + 4 <= data.len() && data[j] == 0 && data[j + 1] == 0 && data[j + 2] == 0 && data[j + 3] == 1) { nal_end = j; break; } j += 1; } // Skip AUD (9), filler (12), and optionally SEI (6) // Keep SPS (7), PPS (8), IDR (5), non-IDR slice (1) if nal_type != 9 && nal_type != 12 { // Include this NAL unit with start code result.extend_from_slice(&data[start_code_pos..nal_end]); } i = nal_end; } // If nothing was stripped, return original data if result.is_empty() && !data.is_empty() { return data.to_vec(); } result } /// Extract SPS and PPS NAL units from H264 Annex B data /// Returns (SPS data without start code, PPS data without start code) pub fn extract_sps_pps(data: &[u8]) -> (Option>, Option>) { let mut sps: Option> = None; let mut pps: Option> = None; let mut i = 0; while i < data.len() { // Find start code (3 or 4 bytes) let start_code_len = if i + 4 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1 { 4 } else if i + 3 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1 { 3 } else { i += 1; continue; }; let nal_start = i + start_code_len; if nal_start >= data.len() { break; } let nal_type = data[nal_start] & 0x1F; // Find next start code to determine NAL unit end let mut nal_end = data.len(); let mut j = nal_start + 1; while j + 3 <= data.len() { if (data[j] == 0 && data[j + 1] == 0 && data[j + 2] == 1) || (j + 4 <= data.len() && data[j] == 0 && data[j + 1] == 0 && data[j + 2] == 0 && data[j + 3] == 1) { nal_end = j; break; } j += 1; } // Extract SPS (NAL type 7) and PPS (NAL type 8) without start codes match nal_type { 7 => { sps = Some(data[nal_start..nal_end].to_vec()); } 8 => { pps = Some(data[nal_start..nal_end].to_vec()); } _ => {} } i = nal_end; } (sps, pps) } /// Check if H264 Annex B data contains SPS and PPS NAL units pub fn has_sps_pps(data: &[u8]) -> bool { let mut has_sps = false; let mut has_pps = false; let mut i = 0; while i < data.len() { // Find start code (3 or 4 bytes) let start_code_len = if i + 4 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1 { 4 } else if i + 3 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1 { 3 } else { i += 1; continue; }; let nal_start = i + start_code_len; if nal_start >= data.len() { break; } let nal_type = data[nal_start] & 0x1F; match nal_type { 7 => has_sps = true, 8 => has_pps = true, _ => {} } if has_sps && has_pps { return true; } // Move past start code to next position i = nal_start + 1; } has_sps && has_pps } /// Check if H264 data contains a keyframe (IDR NAL unit) pub fn is_h264_keyframe(data: &[u8]) -> bool { // Look for IDR NAL unit (type 5) // NAL units start with 0x00 0x00 0x01 or 0x00 0x00 0x00 0x01 let mut i = 0; while i < data.len() { // Find start code if i + 3 < data.len() && data[i] == 0 && data[i + 1] == 0 { let (nal_start, _start_code_len) = if data[i + 2] == 1 { (i + 3, 3) } else if i + 4 < data.len() && data[i + 2] == 0 && data[i + 3] == 1 { (i + 4, 4) } else { i += 1; continue; }; if nal_start < data.len() { let nal_type = data[nal_start] & 0x1F; // IDR = 5, SPS = 7, PPS = 8 if nal_type == 5 { return true; } } i = nal_start; } else { i += 1; } } false } /// Parse profile-level-id from SPS NAL unit data (without start code) /// /// Returns a 6-character hex string like "42001f" (Baseline L3.1) or "64001f" (High L3.1) /// /// SPS structure (first 4 bytes after NAL header): /// - Byte 0: NAL header (0x67 for SPS) /// - Byte 1: profile_idc (0x42=Baseline, 0x4D=Main, 0x64=High) /// - Byte 2: constraint_set_flags /// - Byte 3: level_idc (0x1f=3.1, 0x28=4.0, 0x33=5.1) pub fn parse_profile_level_id_from_sps(sps: &[u8]) -> Option { // SPS NAL must be at least 4 bytes: NAL header + profile_idc + constraints + level_idc if sps.len() < 4 { return None; } // First byte is NAL header, skip it let profile_idc = sps[1]; let constraint_set_flags = sps[2]; let level_idc = sps[3]; Some(format!( "{:02x}{:02x}{:02x}", profile_idc, constraint_set_flags, level_idc )) } /// Extract profile-level-id from H264 Annex B data (containing SPS) /// /// This function finds the SPS NAL unit and extracts the profile-level-id. /// Useful for determining the actual encoder output profile. /// /// # Example /// ```ignore /// let h264_data = encoder.encode(&yuv)?; /// if let Some(profile_level_id) = extract_profile_level_id(&h264_data) { /// println!("Encoder outputs profile-level-id: {}", profile_level_id); /// // Use this to configure H264VideoTrackConfig /// } /// ``` pub fn extract_profile_level_id(data: &[u8]) -> Option { let (sps, _) = extract_sps_pps(data); sps.and_then(|sps_data| parse_profile_level_id_from_sps(&sps_data)) } /// Common H.264 profile-level-id values pub mod profiles { /// Constrained Baseline Profile Level 3.1 - Maximum browser compatibility pub const CONSTRAINED_BASELINE_31: &str = "42e01f"; /// Baseline Profile Level 3.1 pub const BASELINE_31: &str = "42001f"; /// Main Profile Level 3.1 pub const MAIN_31: &str = "4d001f"; /// High Profile Level 3.1 - Hardware encoders typically output this pub const HIGH_31: &str = "64001f"; /// High Profile Level 4.0 pub const HIGH_40: &str = "640028"; /// High Profile Level 5.1 pub const HIGH_51: &str = "640033"; } #[cfg(test)] mod tests { use super::*; #[test] fn test_is_h264_keyframe() { // IDR frame with 4-byte start code let idr_frame = vec![0x00, 0x00, 0x00, 0x01, 0x65]; // NAL type 5 = IDR assert!(is_h264_keyframe(&idr_frame)); // IDR frame with 3-byte start code let idr_frame_3 = vec![0x00, 0x00, 0x01, 0x65]; assert!(is_h264_keyframe(&idr_frame_3)); // Non-IDR frame (P-frame, NAL type 1) let p_frame = vec![0x00, 0x00, 0x00, 0x01, 0x41]; assert!(!is_h264_keyframe(&p_frame)); // SPS (NAL type 7) - not a keyframe by itself let sps = vec![0x00, 0x00, 0x00, 0x01, 0x67]; assert!(!is_h264_keyframe(&sps)); // Multiple NAL units with IDR let multi_nal = vec![ 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x00, 0x1f, // SPS 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x38, 0x80, // PPS 0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x84, // IDR ]; assert!(is_h264_keyframe(&multi_nal)); } #[test] fn test_h264_track_config_default() { let config = H264VideoTrackConfig::default(); assert_eq!(config.fps, 30); assert_eq!(config.bitrate_kbps, 8000); assert_eq!(config.resolution, Resolution::HD720); } }