Files
One-KVM/src/webrtc/rtp.rs
mofeng 1786b7689d feat: 完善架构优化性能
- 调整音视频架构,提升 RKMPP 编码 MJPEG-->H264 性能,同时解决丢帧马赛克问题;
- 删除多用户逻辑,只保留单用户,支持设置 web 单会话;
- 修复删除体验不好的的回退逻辑,前端页面菜单位置微调;
- 增加 OTG USB 设备动态调整功能;
- 修复 mdns 问题,webrtc 视频切换更顺畅。
2026-01-25 16:04:29 +08:00

684 lines
21 KiB
Rust

//! RTP packetization for H264 video
//!
//! This module provides H264 RTP packetization using the rtp crate's H264Payloader.
//! It handles:
//! - NAL unit parsing (Annex B start codes)
//! - SPS/PPS collection and STAP-A packetization
//! - Single NAL unit mode for small NALs
//! - FU-A fragmentation for large NALs
//!
//! IMPORTANT: Each NAL unit must be sent separately via write_sample(),
//! without Annex B start codes. The TrackLocalStaticSample handles
//! RTP packetization internally.
use bytes::Bytes;
use rtp::codecs::h264::H264Payloader;
use rtp::packetizer::Payloader;
use std::io::Cursor;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
use tracing::{debug, error, trace};
use webrtc::media::io::h264_reader::H264Reader;
use webrtc::media::Sample;
use webrtc::rtp_transceiver::rtp_codec::RTCRtpCodecCapability;
use webrtc::track::track_local::track_local_static_sample::TrackLocalStaticSample;
use webrtc::track::track_local::TrackLocal;
use crate::error::{AppError, Result};
use crate::video::format::Resolution;
/// Default MTU for RTP packets (conservative for most networks)
pub const RTP_MTU: usize = 1200;
/// H264 clock rate (always 90kHz per RFC 6184)
pub const H264_CLOCK_RATE: u32 = 90000;
/// H264 video track using TrackLocalStaticSample for proper packetization
pub struct H264VideoTrack {
/// The underlying WebRTC track with automatic packetization
track: Arc<TrackLocalStaticSample>,
/// Track configuration
config: H264VideoTrackConfig,
/// H264 payloader for manual packetization (if needed)
payloader: Mutex<H264Payloader>,
/// Cached SPS NAL unit for injection before IDR frames
/// Some hardware encoders don't repeat SPS/PPS with every keyframe
cached_sps: Mutex<Option<Bytes>>,
/// Cached PPS NAL unit for injection before IDR frames
cached_pps: Mutex<Option<Bytes>>,
}
/// H264 video track configuration
#[derive(Debug, Clone)]
pub struct H264VideoTrackConfig {
/// Track ID
pub track_id: String,
/// Stream ID
pub stream_id: String,
/// Resolution
pub resolution: Resolution,
/// Target bitrate in kbps
pub bitrate_kbps: u32,
/// Frames per second
pub fps: u32,
/// H.264 profile-level-id (e.g., "42001f" for Baseline L3.1, "64001f" for High L3.1)
/// If None, uses empty string to let browser negotiate
/// Format: PPCCLL where PP=profile_idc, CC=constraint_flags, LL=level_idc
pub profile_level_id: Option<String>,
}
impl Default for H264VideoTrackConfig {
fn default() -> Self {
Self {
track_id: "video0".to_string(),
stream_id: "one-kvm-stream".to_string(),
resolution: Resolution::HD720,
bitrate_kbps: 8000,
fps: 30,
profile_level_id: None, // Let browser negotiate
}
}
}
impl H264VideoTrack {
/// Create a new H264 video track
///
/// If `config.profile_level_id` is set, it will be used in SDP negotiation.
/// Otherwise, uses empty fmtp line to let browser negotiate the best profile.
pub fn new(config: H264VideoTrackConfig) -> Self {
// Build sdp_fmtp_line based on profile_level_id
let sdp_fmtp_line = if let Some(ref profile_level_id) = config.profile_level_id {
// Use specified profile-level-id
format!(
"level-asymmetry-allowed=1;packetization-mode=1;profile-level-id={}",
profile_level_id
)
} else {
// Let browser negotiate - empty string for maximum compatibility
String::new()
};
let codec = RTCRtpCodecCapability {
mime_type: "video/H264".to_string(),
clock_rate: H264_CLOCK_RATE,
channels: 0,
sdp_fmtp_line,
rtcp_feedback: vec![],
};
let track = Arc::new(TrackLocalStaticSample::new(
codec,
config.track_id.clone(),
config.stream_id.clone(),
));
Self {
track,
config,
payloader: Mutex::new(H264Payloader::default()),
cached_sps: Mutex::new(None),
cached_pps: Mutex::new(None),
}
}
/// Get the underlying WebRTC track for adding to peer connection
pub fn track(&self) -> Arc<TrackLocalStaticSample> {
self.track.clone()
}
/// Get track as TrackLocal for peer connection
pub fn as_track_local(&self) -> Arc<dyn TrackLocal + Send + Sync> {
self.track.clone()
}
/// Write an H264 encoded frame to the track
///
/// The frame data should be H264 Annex B format (with start codes 0x00000001 or 0x000001).
/// This is the format produced by hwcodec/FFmpeg encoders.
///
/// IMPORTANT: Each NAL unit is sent separately via write_sample(), without start codes.
/// This is required for proper WebRTC RTP packetization.
/// See: https://github.com/webrtc-rs/webrtc/blob/master/examples/examples/play-from-disk-h264/
///
/// # Arguments
/// * `data` - H264 Annex B encoded frame data
/// * `duration` - Frame duration (typically 1/fps seconds)
/// * `is_keyframe` - Whether this is a keyframe (IDR frame)
pub async fn write_frame(
&self,
data: &[u8],
_duration: Duration,
is_keyframe: bool,
) -> Result<()> {
if data.is_empty() {
return Ok(());
}
// Use H264Reader to parse NAL units from Annex B data
let cursor = Cursor::new(data);
let mut h264_reader = H264Reader::new(cursor, 1024 * 1024);
// Collect all NAL units first to check for SPS/PPS presence
let mut nals: Vec<Bytes> = Vec::new();
let mut has_sps = false;
let mut has_pps = false;
let mut has_idr = false;
// Send each NAL unit separately (like official webrtc-rs example)
// H264Reader returns NAL data WITHOUT start codes - this is what we need
while let Ok(nal) = h264_reader.next_nal() {
if nal.data.is_empty() {
continue;
}
let nal_type = nal.data[0] & 0x1F;
// Skip AUD NAL units (type 9) - not needed for WebRTC
if nal_type == 9 {
continue;
}
// Skip filler data (type 12)
if nal_type == 12 {
continue;
}
// Track NAL types
match nal_type {
5 => has_idr = true,
7 => {
has_sps = true;
// Cache SPS for future injection
*self.cached_sps.lock().await = Some(nal.data.clone().freeze());
}
8 => {
has_pps = true;
// Cache PPS for future injection
*self.cached_pps.lock().await = Some(nal.data.clone().freeze());
}
_ => {}
}
trace!(
"Sending NAL: type={} ({}) size={} bytes",
nal_type,
match nal_type {
1 => "Non-IDR slice",
5 => "IDR slice",
6 => "SEI",
7 => "SPS",
8 => "PPS",
_ => "Other",
},
nal.data.len()
);
nals.push(nal.data.freeze());
}
// Inject cached SPS/PPS before IDR if missing
// This is critical for hardware encoders that don't repeat SPS/PPS
if has_idr && (!has_sps || !has_pps) {
let mut injected_nals: Vec<Bytes> = Vec::new();
if !has_sps {
if let Some(sps) = self.cached_sps.lock().await.clone() {
debug!("Injecting cached SPS before IDR frame");
injected_nals.push(sps);
}
}
if !has_pps {
if let Some(pps) = self.cached_pps.lock().await.clone() {
debug!("Injecting cached PPS before IDR frame");
injected_nals.push(pps);
}
}
if !injected_nals.is_empty() {
injected_nals.extend(nals);
nals = injected_nals;
}
}
let mut nal_count = 0;
let mut total_bytes = 0u64;
// Send NAL data directly WITHOUT start codes
// TrackLocalStaticSample handles RTP packetization internally
// Use duration=1s for each NAL like official webrtc-rs example
for nal_data in nals {
let sample = Sample {
data: nal_data.clone(),
duration: Duration::from_secs(1), // Like official example
..Default::default()
};
if let Err(e) = self.track.write_sample(&sample).await {
// Only log periodically to avoid spam when no peer connected
if nal_count % 100 == 0 {
debug!("Write sample failed (no peer?): {}", e);
}
}
total_bytes += nal_data.len() as u64;
nal_count += 1;
}
trace!(
"Sent frame: {} NAL units, {} bytes, keyframe={}",
nal_count,
total_bytes,
is_keyframe
);
Ok(())
}
/// Write frame with timestamp (for more precise timing control)
pub async fn write_frame_with_timestamp(
&self,
data: &[u8],
_pts_ms: u64,
is_keyframe: bool,
) -> Result<()> {
// Convert pts from milliseconds to frame duration
// Assuming constant frame rate from config
let duration = Duration::from_millis(1000 / self.config.fps as u64);
self.write_frame(data, duration, is_keyframe).await
}
/// Manually packetize H264 data into RTP payloads
///
/// This is useful if you need direct control over RTP packets
/// (e.g., for sending via TrackLocalStaticRTP instead of TrackLocalStaticSample)
pub async fn packetize(&self, data: &[u8], mtu: usize) -> Result<Vec<Bytes>> {
let mut payloader = self.payloader.lock().await;
let bytes = Bytes::copy_from_slice(data);
payloader
.payload(mtu, &bytes)
.map_err(|e| AppError::VideoError(format!("H264 packetization failed: {}", e)))
}
/// Get configuration
pub fn config(&self) -> &H264VideoTrackConfig {
&self.config
}
}
/// Opus audio track using TrackLocalStaticSample
pub struct OpusAudioTrack {
/// The underlying WebRTC track
track: Arc<TrackLocalStaticSample>,
}
impl OpusAudioTrack {
/// Create a new Opus audio track
pub fn new(track_id: &str, stream_id: &str) -> Self {
let codec = RTCRtpCodecCapability {
mime_type: "audio/opus".to_string(),
clock_rate: 48000,
channels: 2,
sdp_fmtp_line: "minptime=10;useinbandfec=1".to_string(),
rtcp_feedback: vec![],
};
let track = Arc::new(TrackLocalStaticSample::new(
codec,
track_id.to_string(),
stream_id.to_string(),
));
Self {
track,
}
}
/// Get the underlying WebRTC track
pub fn track(&self) -> Arc<TrackLocalStaticSample> {
self.track.clone()
}
/// Get track as TrackLocal
pub fn as_track_local(&self) -> Arc<dyn TrackLocal + Send + Sync> {
self.track.clone()
}
/// Write Opus encoded audio data
///
/// # Arguments
/// * `data` - Opus encoded packet
/// * `samples` - Number of audio samples in this packet (typically 960 for 20ms at 48kHz)
pub async fn write_packet(&self, data: &[u8], samples: u32) -> Result<()> {
if data.is_empty() {
return Ok(());
}
// Opus frame duration based on samples
// 48000 Hz, so duration = samples / 48000 seconds
let duration = Duration::from_micros((samples as u64 * 1_000_000) / 48000);
let sample = Sample {
data: Bytes::copy_from_slice(data),
duration,
..Default::default()
};
self.track
.write_sample(&sample)
.await
.map_err(|e| {
error!("Failed to write Opus sample: {}", e);
AppError::WebRtcError(format!("Failed to write audio sample: {}", e))
})
}
}
/// Strip AUD (Access Unit Delimiter) NAL units from H264 Annex B data
/// AUD (NAL type 9) can cause decoding issues in some browser WebRTC implementations
/// Also strips filler data (NAL type 12) and SEI (NAL type 6) for cleaner output
pub fn strip_aud_nal_units(data: &[u8]) -> Vec<u8> {
let mut result = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
// Find start code (3 or 4 bytes)
let (start_code_pos, start_code_len) = if i + 4 <= data.len()
&& data[i] == 0
&& data[i + 1] == 0
&& data[i + 2] == 0
&& data[i + 3] == 1
{
(i, 4)
} else if i + 3 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1 {
(i, 3)
} else {
i += 1;
continue;
};
let nal_start = start_code_pos + start_code_len;
if nal_start >= data.len() {
break;
}
let nal_type = data[nal_start] & 0x1F;
// Find next start code to determine NAL unit end
let mut nal_end = data.len();
let mut j = nal_start + 1;
while j + 3 <= data.len() {
if (data[j] == 0 && data[j + 1] == 0 && data[j + 2] == 1)
|| (j + 4 <= data.len()
&& data[j] == 0
&& data[j + 1] == 0
&& data[j + 2] == 0
&& data[j + 3] == 1)
{
nal_end = j;
break;
}
j += 1;
}
// Skip AUD (9), filler (12), and optionally SEI (6)
// Keep SPS (7), PPS (8), IDR (5), non-IDR slice (1)
if nal_type != 9 && nal_type != 12 {
// Include this NAL unit with start code
result.extend_from_slice(&data[start_code_pos..nal_end]);
}
i = nal_end;
}
// If nothing was stripped, return original data
if result.is_empty() && !data.is_empty() {
return data.to_vec();
}
result
}
/// Extract SPS and PPS NAL units from H264 Annex B data
/// Returns (SPS data without start code, PPS data without start code)
pub fn extract_sps_pps(data: &[u8]) -> (Option<Vec<u8>>, Option<Vec<u8>>) {
let mut sps: Option<Vec<u8>> = None;
let mut pps: Option<Vec<u8>> = None;
let mut i = 0;
while i < data.len() {
// Find start code (3 or 4 bytes)
let start_code_len = if i + 4 <= data.len()
&& data[i] == 0
&& data[i + 1] == 0
&& data[i + 2] == 0
&& data[i + 3] == 1
{
4
} else if i + 3 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1 {
3
} else {
i += 1;
continue;
};
let nal_start = i + start_code_len;
if nal_start >= data.len() {
break;
}
let nal_type = data[nal_start] & 0x1F;
// Find next start code to determine NAL unit end
let mut nal_end = data.len();
let mut j = nal_start + 1;
while j + 3 <= data.len() {
if (data[j] == 0 && data[j + 1] == 0 && data[j + 2] == 1)
|| (j + 4 <= data.len()
&& data[j] == 0
&& data[j + 1] == 0
&& data[j + 2] == 0
&& data[j + 3] == 1)
{
nal_end = j;
break;
}
j += 1;
}
// Extract SPS (NAL type 7) and PPS (NAL type 8) without start codes
match nal_type {
7 => {
sps = Some(data[nal_start..nal_end].to_vec());
}
8 => {
pps = Some(data[nal_start..nal_end].to_vec());
}
_ => {}
}
i = nal_end;
}
(sps, pps)
}
/// Check if H264 Annex B data contains SPS and PPS NAL units
pub fn has_sps_pps(data: &[u8]) -> bool {
let mut has_sps = false;
let mut has_pps = false;
let mut i = 0;
while i < data.len() {
// Find start code (3 or 4 bytes)
let start_code_len = if i + 4 <= data.len()
&& data[i] == 0
&& data[i + 1] == 0
&& data[i + 2] == 0
&& data[i + 3] == 1
{
4
} else if i + 3 <= data.len() && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1 {
3
} else {
i += 1;
continue;
};
let nal_start = i + start_code_len;
if nal_start >= data.len() {
break;
}
let nal_type = data[nal_start] & 0x1F;
match nal_type {
7 => has_sps = true,
8 => has_pps = true,
_ => {}
}
if has_sps && has_pps {
return true;
}
// Move past start code to next position
i = nal_start + 1;
}
has_sps && has_pps
}
/// Check if H264 data contains a keyframe (IDR NAL unit)
pub fn is_h264_keyframe(data: &[u8]) -> bool {
// Look for IDR NAL unit (type 5)
// NAL units start with 0x00 0x00 0x01 or 0x00 0x00 0x00 0x01
let mut i = 0;
while i < data.len() {
// Find start code
if i + 3 < data.len() && data[i] == 0 && data[i + 1] == 0 {
let (nal_start, _start_code_len) = if data[i + 2] == 1 {
(i + 3, 3)
} else if i + 4 < data.len() && data[i + 2] == 0 && data[i + 3] == 1 {
(i + 4, 4)
} else {
i += 1;
continue;
};
if nal_start < data.len() {
let nal_type = data[nal_start] & 0x1F;
// IDR = 5, SPS = 7, PPS = 8
if nal_type == 5 {
return true;
}
}
i = nal_start;
} else {
i += 1;
}
}
false
}
/// Parse profile-level-id from SPS NAL unit data (without start code)
///
/// Returns a 6-character hex string like "42001f" (Baseline L3.1) or "64001f" (High L3.1)
///
/// SPS structure (first 4 bytes after NAL header):
/// - Byte 0: NAL header (0x67 for SPS)
/// - Byte 1: profile_idc (0x42=Baseline, 0x4D=Main, 0x64=High)
/// - Byte 2: constraint_set_flags
/// - Byte 3: level_idc (0x1f=3.1, 0x28=4.0, 0x33=5.1)
pub fn parse_profile_level_id_from_sps(sps: &[u8]) -> Option<String> {
// SPS NAL must be at least 4 bytes: NAL header + profile_idc + constraints + level_idc
if sps.len() < 4 {
return None;
}
// First byte is NAL header, skip it
let profile_idc = sps[1];
let constraint_set_flags = sps[2];
let level_idc = sps[3];
Some(format!(
"{:02x}{:02x}{:02x}",
profile_idc, constraint_set_flags, level_idc
))
}
/// Extract profile-level-id from H264 Annex B data (containing SPS)
///
/// This function finds the SPS NAL unit and extracts the profile-level-id.
/// Useful for determining the actual encoder output profile.
///
/// # Example
/// ```ignore
/// let h264_data = encoder.encode(&yuv)?;
/// if let Some(profile_level_id) = extract_profile_level_id(&h264_data) {
/// println!("Encoder outputs profile-level-id: {}", profile_level_id);
/// // Use this to configure H264VideoTrackConfig
/// }
/// ```
pub fn extract_profile_level_id(data: &[u8]) -> Option<String> {
let (sps, _) = extract_sps_pps(data);
sps.and_then(|sps_data| parse_profile_level_id_from_sps(&sps_data))
}
/// Common H.264 profile-level-id values
pub mod profiles {
/// Constrained Baseline Profile Level 3.1 - Maximum browser compatibility
pub const CONSTRAINED_BASELINE_31: &str = "42e01f";
/// Baseline Profile Level 3.1
pub const BASELINE_31: &str = "42001f";
/// Main Profile Level 3.1
pub const MAIN_31: &str = "4d001f";
/// High Profile Level 3.1 - Hardware encoders typically output this
pub const HIGH_31: &str = "64001f";
/// High Profile Level 4.0
pub const HIGH_40: &str = "640028";
/// High Profile Level 5.1
pub const HIGH_51: &str = "640033";
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_h264_keyframe() {
// IDR frame with 4-byte start code
let idr_frame = vec![0x00, 0x00, 0x00, 0x01, 0x65]; // NAL type 5 = IDR
assert!(is_h264_keyframe(&idr_frame));
// IDR frame with 3-byte start code
let idr_frame_3 = vec![0x00, 0x00, 0x01, 0x65];
assert!(is_h264_keyframe(&idr_frame_3));
// Non-IDR frame (P-frame, NAL type 1)
let p_frame = vec![0x00, 0x00, 0x00, 0x01, 0x41];
assert!(!is_h264_keyframe(&p_frame));
// SPS (NAL type 7) - not a keyframe by itself
let sps = vec![0x00, 0x00, 0x00, 0x01, 0x67];
assert!(!is_h264_keyframe(&sps));
// Multiple NAL units with IDR
let multi_nal = vec![
0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x00, 0x1f, // SPS
0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x38, 0x80, // PPS
0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x84, // IDR
];
assert!(is_h264_keyframe(&multi_nal));
}
#[test]
fn test_h264_track_config_default() {
let config = H264VideoTrackConfig::default();
assert_eq!(config.fps, 30);
assert_eq!(config.bitrate_kbps, 8000);
assert_eq!(config.resolution, Resolution::HD720);
}
}