From 59a15ddd1d8d328916656d28c08e3489d81e41bf Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Thu, 22 Dec 2022 20:10:19 -0500 Subject: [PATCH 1/2] Compute frame pyramid sizes and frame depths --- src/api/internal.rs | 202 +++++++++++++++++++++++++++++------- src/scenechange/mod.rs | 10 +- src/scenechange/standard.rs | 1 + 3 files changed, 170 insertions(+), 43 deletions(-) diff --git a/src/api/internal.rs b/src/api/internal.rs index 7066065cf6..947968f1b1 100644 --- a/src/api/internal.rs +++ b/src/api/internal.rs @@ -28,6 +28,8 @@ use crate::stats::EncoderStats; use crate::tiling::Area; use crate::util::Pixel; use arrayvec::ArrayVec; +use debug_unreachable::debug_unreachable; +use itertools::Itertools; use rust_hawktracer::*; use std::cmp; use std::collections::{BTreeMap, BTreeSet}; @@ -234,11 +236,10 @@ pub(crate) struct ContextInner { pub(super) frame_q: FrameQueue, /// Maps *output_frameno* to frame data pub(super) frame_data: FrameDataQueue, - /// A list of the input_frameno for keyframes in this encode. - /// Needed so that we don't need to keep all of the frame_invariants in - /// memory for the whole life of the encode. - // TODO: Is this needed at all? - keyframes: BTreeSet, + /// A list of the precomputed frame types and pyramid depth for frames within the lookahead. + /// This allows us to have dynamic pyramid depths and widths by computing them before + /// creating the frame invariants. + frame_depths: BTreeMap, // TODO: Is this needed at all? keyframes_forced: BTreeSet, /// A storage space for reordered frames. @@ -266,8 +267,8 @@ impl ContextInner { pub fn new(enc: &EncoderConfig) -> Self { // initialize with temporal delimiter let packet_data = TEMPORAL_DELIMITER.to_vec(); - let mut keyframes = BTreeSet::new(); - keyframes.insert(0); + let mut frame_depths = BTreeMap::new(); + frame_depths.insert(0, FrameDepth::Intra); let maybe_ac_qi_max = if enc.quantizer < 255 { Some(enc.quantizer as u8) } else { None }; @@ -284,7 +285,7 @@ impl ContextInner { frames_processed: 0, frame_q: BTreeMap::new(), frame_data: BTreeMap::new(), - keyframes, + frame_depths, keyframes_forced: BTreeSet::new(), packet_data, gop_output_frameno_start: BTreeMap::new(), @@ -362,8 +363,8 @@ impl ContextInner { let lookahead_frames = self .frame_q .range(self.next_lookahead_frame - 1..) - .filter_map(|(&_input_frameno, frame)| frame.as_ref()) - .collect::>>>(); + .filter_map(|(&_input_frameno, frame)| frame.as_ref().map(Arc::clone)) + .collect::>>>(); if is_flushing { // This is the last time send_frame is called, process all the @@ -376,22 +377,10 @@ impl ContextInner { break; } - Self::compute_keyframe_placement( - cur_lookahead_frames, - &self.keyframes_forced, - &mut self.keyframe_detector, - &mut self.next_lookahead_frame, - &mut self.keyframes, - ); + self.compute_frame_placement(cur_lookahead_frames); } } else { - Self::compute_keyframe_placement( - &lookahead_frames, - &self.keyframes_forced, - &mut self.keyframe_detector, - &mut self.next_lookahead_frame, - &mut self.keyframes, - ); + self.compute_frame_placement(&lookahead_frames); } } @@ -439,10 +428,13 @@ impl ContextInner { &self, gop_input_frameno_start: u64, ignore_limit: bool, ) -> u64 { let next_detected = self - .keyframes + .frame_depths .iter() - .find(|&&input_frameno| input_frameno > gop_input_frameno_start) - .cloned(); + .find(|&(&input_frameno, frame_depth)| { + frame_depth == &FrameDepth::Intra + && input_frameno > gop_input_frameno_start + }) + .map(|(input_frameno, _)| *input_frameno); let mut next_limit = gop_input_frameno_start + self.config.max_key_frame_interval; if !ignore_limit && self.limit.is_some() { @@ -577,7 +569,8 @@ impl ContextInner { } // Now that we know the input_frameno, look up the correct frame type - let frame_type = if self.keyframes.contains(&input_frameno) { + let frame_type = if self.frame_depths[&input_frameno] == FrameDepth::Intra + { FrameType::KEY } else { FrameType::INTER @@ -862,22 +855,145 @@ impl ContextInner { } #[hawktracer(compute_keyframe_placement)] - pub fn compute_keyframe_placement( - lookahead_frames: &[&Arc>], keyframes_forced: &BTreeSet, - keyframe_detector: &mut SceneChangeDetector, - next_lookahead_frame: &mut u64, keyframes: &mut BTreeSet, + pub fn compute_frame_placement( + &mut self, lookahead_frames: &[Arc>], ) { - if keyframes_forced.contains(next_lookahead_frame) - || keyframe_detector.analyze_next_frame( + if self.keyframes_forced.contains(&self.next_lookahead_frame) { + self.frame_depths.insert(self.next_lookahead_frame, FrameDepth::Intra); + } else { + let is_keyframe = self.keyframe_detector.analyze_next_frame( lookahead_frames, - *next_lookahead_frame, - *keyframes.iter().last().unwrap(), - ) - { - keyframes.insert(*next_lookahead_frame); + self.next_lookahead_frame, + *self.frame_depths.iter().last().unwrap().0, + ); + if is_keyframe { + self.keyframe_detector.inter_costs.remove(&self.next_lookahead_frame); + self.frame_depths.insert(self.next_lookahead_frame, FrameDepth::Intra); + } else if self.frame_depths[&(self.next_lookahead_frame - 1)] + == FrameDepth::Intra + { + // The last frame is a keyframe, so this one must start a new mini-GOP + self.keyframe_detector.inter_costs.remove(&self.next_lookahead_frame); + self + .frame_depths + .insert(self.next_lookahead_frame, FrameDepth::Inter { depth: 0 }); + } else { + self.compute_current_minigop_cost(); + }; } - *next_lookahead_frame += 1; + self.next_lookahead_frame += 1; + } + + fn compute_current_minigop_cost(&mut self) { + let minigop_start_frame = *self + .frame_depths + .iter() + .rev() + .find(|(_, d)| **d == FrameDepth::Inter { depth: 0 }) + .unwrap() + .0; + + let current_width = + (self.next_lookahead_frame - minigop_start_frame) as u8; + let max_pyramid_width = + self.config.speed_settings.rdo_lookahead_frames.min(32) as u8; + + let mut need_new_minigop = false; + if current_width == max_pyramid_width { + // Since we hit the max width, we must start a new mini-GOP. + need_new_minigop = true; + } else { + let current_minigop_cost = self + .keyframe_detector + .inter_costs + .range(minigop_start_frame..=self.next_lookahead_frame) + .map(|cost| { + // Adjust the inter cost down to 8-bit scaling + *cost.1 / (1 << (self.config.bit_depth - 8)) as f64 + }) + .sum::(); + let allowance = match current_width + 1 { + // Depth 0 + 1..=2 => 18000.0, + // Depth 1 + 3 => 20000.0, + // Depth 2 + 4 => 20000.0, + // Depth 3 + 5..=8 => 18000.0, + // Depth 4 + 9..=16 => 12000.0, + // Depth 5 + 17..=32 => 10000.0, + // SAFETY: 32 is the max mini-GOP width + _ => unsafe { debug_unreachable!() }, + }; + if current_minigop_cost > allowance { + need_new_minigop = true; + } + } + + if need_new_minigop { + self.compute_minigop_frame_order( + minigop_start_frame, + self.next_lookahead_frame - 1, + ); + self + .frame_depths + .insert(self.next_lookahead_frame, FrameDepth::Inter { depth: 0 }); + for frameno in minigop_start_frame..=self.next_lookahead_frame { + self.keyframe_detector.inter_costs.remove(&frameno); + } + } + } + + // Start and end frame are inclusive + fn compute_minigop_frame_order(&mut self, start_frame: u64, end_frame: u64) { + // By this point, `start_frame` should already be inserted at depth 0 + if start_frame == end_frame { + return; + } + + let mut frames = ((start_frame + 1)..=end_frame).collect::>(); + let mut current_depth = 0; + while !frames.is_empty() { + if current_depth == 0 { + // Special case for depth 0, we generally want the last frame at this depth + self + .frame_depths + .insert(frames.pop_last().unwrap(), FrameDepth::Inter { depth: 0 }); + current_depth = 1; + } else { + let max_frames_in_level = 1 << (current_depth - 1); + if frames.len() <= max_frames_in_level { + for frameno in frames.into_iter() { + self + .frame_depths + .insert(frameno, FrameDepth::Inter { depth: current_depth }); + } + break; + } else { + let mut breakpoints = vec![*frames.first().unwrap()]; + let mut prev_val = *frames.first().unwrap(); + for frameno in &frames { + if *frameno > prev_val + 1 { + breakpoints.push(*frameno); + } + prev_val = *frameno; + } + breakpoints.push(*frames.last().unwrap()); + for (start, end) in breakpoints.into_iter().tuple_windows() { + let midpoint = (end - start + 1) / 2; + frames.remove(&midpoint); + self + .frame_depths + .insert(midpoint, FrameDepth::Inter { depth: current_depth }); + } + current_depth += 1; + } + } + } } #[hawktracer(compute_frame_invariants)] @@ -1718,3 +1834,9 @@ impl ContextInner { } } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FrameDepth { + Intra, + Inter { depth: u8 }, +} diff --git a/src/scenechange/mod.rs b/src/scenechange/mod.rs index 7414f09d3b..0b9c463c6e 100644 --- a/src/scenechange/mod.rs +++ b/src/scenechange/mod.rs @@ -92,9 +92,12 @@ pub struct SceneChangeDetector { cpu_feature_level: CpuFeatureLevel, encoder_config: EncoderConfig, sequence: Arc, - /// Calculated intra costs for each input frame. + /// Calculated block-level intra costs for each input frame. /// These are cached for reuse later in rav1e. pub(crate) intra_costs: BTreeMap>, + /// Calculated frame-level inter costs for each input frames compared to its previous frame. + /// These are cached for reuse later in rav1e. + pub(crate) inter_costs: BTreeMap, /// Temporary buffer used by estimate_intra_costs. pub(crate) temp_plane: Option>, } @@ -152,6 +155,7 @@ impl SceneChangeDetector { encoder_config, sequence, intra_costs: BTreeMap::new(), + inter_costs: BTreeMap::new(), temp_plane: None, } } @@ -166,7 +170,7 @@ impl SceneChangeDetector { /// This will gracefully handle the first frame in the video as well. #[hawktracer(analyze_next_frame)] pub fn analyze_next_frame( - &mut self, frame_set: &[&Arc>], input_frameno: u64, + &mut self, frame_set: &[Arc>], input_frameno: u64, previous_keyframe: u64, ) -> bool { // Use score deque for adaptive threshold for scene cut @@ -254,7 +258,7 @@ impl SceneChangeDetector { // Initially fill score deque with frame scores fn initialize_score_deque( - &mut self, frame_set: &[&Arc>], input_frameno: u64, + &mut self, frame_set: &[Arc>], input_frameno: u64, init_len: usize, ) { for x in 0..init_len { diff --git a/src/scenechange/standard.rs b/src/scenechange/standard.rs index eb52b1280b..53e7bd7534 100644 --- a/src/scenechange/standard.rs +++ b/src/scenechange/standard.rs @@ -75,6 +75,7 @@ impl SceneChangeDetector { self.sequence.clone(), buffer, ); + self.inter_costs.insert(input_frameno, mv_inter_cost); }); s.spawn(|_| { imp_block_cost = From b1e4e5d133472eb7cf9acc7119dd11fb71bc8236 Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Fri, 23 Dec 2022 17:08:24 -0500 Subject: [PATCH 2/2] WIP The ref frame code that has slot_idx math I don't understand is at src/encoder.rs:1040-1080 --- src/api/config/encoder.rs | 8 + src/api/internal.rs | 616 +++++++++++++------------------------- src/api/lookahead.rs | 22 +- src/encoder.rs | 127 ++++---- src/me.rs | 6 +- 5 files changed, 301 insertions(+), 478 deletions(-) diff --git a/src/api/config/encoder.rs b/src/api/config/encoder.rs index be33370769..4fd2b501ea 100644 --- a/src/api/config/encoder.rs +++ b/src/api/config/encoder.rs @@ -228,6 +228,14 @@ impl EncoderConfig { !self.speed_settings.transform.tx_domain_distortion } + pub const fn reorder(&self) -> bool { + !self.low_latency + } + + pub const fn multiref(&self) -> bool { + self.reorder() || self.speed_settings.multiref + } + /// Describes whether the output is targeted as HDR pub fn is_hdr(&self) -> bool { self diff --git a/src/api/internal.rs b/src/api/internal.rs index 947968f1b1..b94189a82f 100644 --- a/src/api/internal.rs +++ b/src/api/internal.rs @@ -38,173 +38,10 @@ use std::fs; use std::path::PathBuf; use std::sync::Arc; -/// The set of options that controls frame re-ordering and reference picture -/// selection. -/// The options stored here are invariant over the whole encode. #[derive(Debug, Clone, Copy)] -pub struct InterConfig { - /// Whether frame re-ordering is enabled. - reorder: bool, - /// Whether P-frames can use multiple references. - pub(crate) multiref: bool, - /// The depth of the re-ordering pyramid. - /// The current code cannot support values larger than 2. - pub(crate) pyramid_depth: u64, - /// Number of input frames in group. - pub(crate) group_input_len: u64, - /// Number of output frames in group. - /// This includes both hidden frames and "show existing frame" frames. - group_output_len: u64, - /// Interval between consecutive S-frames. - /// Keyframes reset this interval. - /// This MUST be a multiple of group_input_len. - pub(crate) switch_frame_interval: u64, -} - -impl InterConfig { - pub(crate) fn new(enc_config: &EncoderConfig) -> InterConfig { - let reorder = !enc_config.low_latency; - // A group always starts with (group_output_len - group_input_len) hidden - // frames, followed by group_input_len shown frames. - // The shown frames iterate over the input frames in order, with frames - // already encoded as hidden frames now displayed with Show Existing - // Frame. - // For example, for a pyramid depth of 2, the group is as follows: - // |TU |TU |TU |TU - // idx_in_group_output: 0 1 2 3 4 5 - // input_frameno: 4 2 1 SEF 3 SEF - // output_frameno: 1 2 3 4 5 6 - // level: 0 1 2 1 2 0 - // ^^^^^ ^^^^^^^^^^^^^ - // hidden shown - // TODO: This only works for pyramid_depth <= 2 --- after that we need - // more hidden frames in the middle of the group. - let pyramid_depth = if reorder { 2 } else { 0 }; - let group_input_len = 1 << pyramid_depth; - let group_output_len = group_input_len + pyramid_depth; - let switch_frame_interval = enc_config.switch_frame_interval; - assert!(switch_frame_interval % group_input_len == 0); - InterConfig { - reorder, - multiref: reorder || enc_config.speed_settings.multiref, - pyramid_depth, - group_input_len, - group_output_len, - switch_frame_interval, - } - } - - /// Get the index of an output frame in its re-ordering group given the output - /// frame number of the frame in the current keyframe gop. - /// When re-ordering is disabled, this always returns 0. - pub(crate) fn get_idx_in_group_output( - &self, output_frameno_in_gop: u64, - ) -> u64 { - // The first frame in the GOP should be a keyframe and is not re-ordered, - // so we should not be calling this function on it. - debug_assert!(output_frameno_in_gop > 0); - (output_frameno_in_gop - 1) % self.group_output_len - } - - /// Get the order-hint of an output frame given the output frame number of the - /// frame in the current keyframe gop and the index of that output frame - /// in its re-ordering gorup. - pub(crate) fn get_order_hint( - &self, output_frameno_in_gop: u64, idx_in_group_output: u64, - ) -> u32 { - // The first frame in the GOP should be a keyframe, but currently this - // function only handles inter frames. - // We could return 0 for keyframes if keyframe support is needed. - debug_assert!(output_frameno_in_gop > 0); - // Which P-frame group in the current gop is this output frame in? - // Subtract 1 because the first frame in the gop is always a keyframe. - let group_idx = (output_frameno_in_gop - 1) / self.group_output_len; - // Get the offset to the corresponding input frame. - // TODO: This only works with pyramid_depth <= 2. - let offset = if idx_in_group_output < self.pyramid_depth { - self.group_input_len >> idx_in_group_output - } else { - idx_in_group_output - self.pyramid_depth + 1 - }; - // Construct the final order hint relative to the start of the group. - (self.group_input_len * group_idx + offset) as u32 - } - - /// Get the level of the current frame in the pyramid. - pub(crate) const fn get_level(&self, idx_in_group_output: u64) -> u64 { - if !self.reorder { - 0 - } else if idx_in_group_output < self.pyramid_depth { - // Hidden frames are output first (to be shown in the future). - idx_in_group_output - } else { - // Shown frames - // TODO: This only works with pyramid_depth <= 2. - pos_to_lvl( - idx_in_group_output - self.pyramid_depth + 1, - self.pyramid_depth, - ) - } - } - - pub(crate) const fn get_slot_idx(&self, level: u64, order_hint: u32) -> u32 { - // Frames with level == 0 are stored in slots 0..4, and frames with higher - // values of level in slots 4..8 - if level == 0 { - (order_hint >> self.pyramid_depth) & 3 - } else { - // This only works with pyramid_depth <= 4. - 3 + level as u32 - } - } - - pub(crate) const fn get_show_frame(&self, idx_in_group_output: u64) -> bool { - idx_in_group_output >= self.pyramid_depth - } - - pub(crate) const fn get_show_existing_frame( - &self, idx_in_group_output: u64, - ) -> bool { - // The self.reorder test here is redundant, but short-circuits the rest, - // avoiding a bunch of work when it's false. - self.reorder - && self.get_show_frame(idx_in_group_output) - && (idx_in_group_output - self.pyramid_depth + 1).count_ones() == 1 - && idx_in_group_output != self.pyramid_depth - } - - pub(crate) fn get_input_frameno( - &self, output_frameno_in_gop: u64, gop_input_frameno_start: u64, - ) -> u64 { - if output_frameno_in_gop == 0 { - gop_input_frameno_start - } else { - let idx_in_group_output = - self.get_idx_in_group_output(output_frameno_in_gop); - let order_hint = - self.get_order_hint(output_frameno_in_gop, idx_in_group_output); - gop_input_frameno_start + order_hint as u64 - } - } - - const fn max_reordering_latency(&self) -> u64 { - self.group_input_len - } - - pub(crate) const fn keyframe_lookahead_distance(&self) -> u64 { - self.max_reordering_latency() + 1 - } - - pub(crate) const fn allowed_ref_frames(&self) -> &[RefType] { - use crate::partition::RefType::*; - if self.reorder { - &ALL_INTER_REFS - } else if self.multiref { - &[LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME] - } else { - &[LAST_FRAME] - } - } +pub(crate) struct MiniGopConfig { + pub group_input_len: usize, + pub pyramid_depth: u8, } // Thin wrapper for frame-related data @@ -230,7 +67,7 @@ pub(crate) struct ContextInner { pub(crate) frame_count: u64, pub(crate) limit: Option, pub(crate) output_frameno: u64, - pub(super) inter_cfg: InterConfig, + pub(super) minigop_config: MiniGopConfig, pub(super) frames_processed: u64, /// Maps *input_frameno* to frames pub(super) frame_q: FrameQueue, @@ -248,6 +85,11 @@ pub(crate) struct ContextInner { gop_output_frameno_start: BTreeMap, /// Maps `output_frameno` to `gop_input_frameno_start`. pub(crate) gop_input_frameno_start: BTreeMap, + /// Maps `output_frameno` to `minigop_output_frameno_start`. + minigop_output_frameno_start: BTreeMap, + /// Maps `output_frameno` to `minigop_input_frameno_start`. + pub(crate) minigop_input_frameno_start: BTreeMap, + frame_type_lookahead_distance: usize, keyframe_detector: SceneChangeDetector, pub(crate) config: Arc, seq: Arc, @@ -274,13 +116,12 @@ impl ContextInner { if enc.quantizer < 255 { Some(enc.quantizer as u8) } else { None }; let seq = Arc::new(Sequence::new(enc)); - let inter_cfg = InterConfig::new(enc); - let lookahead_distance = inter_cfg.keyframe_lookahead_distance() as usize; + let lookahead_distance = enc.speed_settings.rdo_lookahead_frames.min(32); ContextInner { frame_count: 0, limit: None, - inter_cfg, + minigop_config: MiniGopConfig { group_input_len: 1, pyramid_depth: 0 }, output_frameno: 0, frames_processed: 0, frame_q: BTreeMap::new(), @@ -290,6 +131,9 @@ impl ContextInner { packet_data, gop_output_frameno_start: BTreeMap::new(), gop_input_frameno_start: BTreeMap::new(), + minigop_output_frameno_start: BTreeMap::new(), + minigop_input_frameno_start: BTreeMap::new(), + frame_type_lookahead_distance: lookahead_distance, keyframe_detector: SceneChangeDetector::new( enc.clone(), CpuFeatureLevel::default(), @@ -317,6 +161,20 @@ impl ContextInner { } } + pub(crate) fn allowed_ref_frames(&self) -> &[RefType] { + use crate::partition::RefType::*; + + let reorder = self.config.reorder(); + let multiref = self.config.multiref(); + if reorder { + &ALL_INTER_REFS + } else if multiref { + &[LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME] + } else { + &[LAST_FRAME] + } + } + #[hawktracer(send_frame)] pub fn send_frame( &mut self, mut frame: Option>>, @@ -394,7 +252,7 @@ impl ContextInner { fn needs_more_frame_q_lookahead(&self, input_frameno: u64) -> bool { let lookahead_end = self.frame_q.keys().last().cloned().unwrap_or(0); let frames_needed = - input_frameno + self.inter_cfg.keyframe_lookahead_distance() + 1; + input_frameno + self.frame_type_lookahead_distance as u64 + 1; lookahead_end < frames_needed && self.needs_more_frames(lookahead_end) } @@ -424,6 +282,30 @@ impl ContextInner { .take(self.config.speed_settings.rdo_lookahead_frames + 1) } + fn next_minigop_input_frameno( + &self, minigop_input_frameno_start: u64, ignore_limit: bool, + ) -> u64 { + let next_detected = self + .frame_depths + .iter() + .find(|&(&input_frameno, frame_depth)| { + (frame_depth == &FrameDepth::Intra + || frame_depth + == &FrameDepth::Inter { depth: 0, is_minigop_start: true }) + && input_frameno > minigop_input_frameno_start + }) + .map(|(input_frameno, _)| *input_frameno); + let mut next_limit = + minigop_input_frameno_start + self.config.max_key_frame_interval; + if !ignore_limit && self.limit.is_some() { + next_limit = next_limit.min(self.limit.unwrap()); + } + if next_detected.is_none() { + return next_limit; + } + cmp::min(next_detected.unwrap(), next_limit) + } + fn next_keyframe_input_frameno( &self, gop_input_frameno_start: u64, ignore_limit: bool, ) -> u64 { @@ -481,6 +363,53 @@ impl ContextInner { data_location } + fn get_input_frameno( + &mut self, output_frameno: u64, minigop_input_frameno_start: u64, + ) -> u64 { + let next_minigop_start = self + .frame_depths + .range((minigop_input_frameno_start + 1)..) + .find(|&(_, depth)| depth.is_minigop_start()) + .map(|(frameno, _)| *frameno); + let minigop_end = next_minigop_start + .unwrap_or_else(|| *self.frame_depths.keys().last().unwrap()); + let minigop_depth = self + .frame_depths + .range(minigop_input_frameno_start..=minigop_end) + .map(|(_, depth)| depth.depth()) + .max() + .unwrap(); + let last_fi = &self.frame_data.last_key_value().unwrap().1.unwrap().fi; + let next_input_frameno = self + .frame_depths + .range(minigop_input_frameno_start..=minigop_end) + .find(|(frameno, depth)| { + depth.depth() == last_fi.pyramid_level + && **frameno > last_fi.input_frameno + }) + .or_else(|| { + self + .frame_depths + .range(minigop_input_frameno_start..=minigop_end) + .find(|(_, depth)| depth.depth() == last_fi.pyramid_level + 1) + }) + .map(|(frameno, _)| *frameno); + if let Some(frameno) = next_input_frameno { + frameno + } else { + // This frame starts a new minigop + let input_frameno = last_fi.input_frameno + 1; + self.minigop_output_frameno_start.insert(output_frameno, output_frameno); + self.minigop_input_frameno_start.insert(output_frameno, input_frameno); + self.minigop_config = MiniGopConfig { + group_input_len: (minigop_end - minigop_input_frameno_start + 1) + as usize, + pyramid_depth: minigop_depth, + }; + input_frameno + } + } + fn build_frame_properties( &mut self, output_frameno: u64, ) -> Result>, EncoderStatus> { @@ -501,11 +430,26 @@ impl ContextInner { .gop_input_frameno_start .insert(output_frameno, prev_gop_input_frameno_start); - let output_frameno_in_gop = - output_frameno - self.gop_output_frameno_start[&output_frameno]; - let mut input_frameno = self.inter_cfg.get_input_frameno( - output_frameno_in_gop, - self.gop_input_frameno_start[&output_frameno], + let (prev_minigop_output_frameno_start, prev_minigop_input_frameno_start) = + if output_frameno == 0 { + (0, 0) + } else { + ( + self.minigop_output_frameno_start[&(output_frameno - 1)], + self.minigop_input_frameno_start[&(output_frameno - 1)], + ) + }; + + self + .minigop_output_frameno_start + .insert(output_frameno, prev_minigop_output_frameno_start); + self + .minigop_input_frameno_start + .insert(output_frameno, prev_minigop_input_frameno_start); + + let mut input_frameno = self.get_input_frameno( + output_frameno, + self.minigop_input_frameno_start[&output_frameno], ); if self.needs_more_frame_q_lookahead(input_frameno) { @@ -518,49 +462,6 @@ impl ContextInner { Box::new([]) }; - if output_frameno_in_gop > 0 { - let next_keyframe_input_frameno = self.next_keyframe_input_frameno( - self.gop_input_frameno_start[&output_frameno], - false, - ); - let prev_input_frameno = - self.get_previous_fi(output_frameno).input_frameno; - if input_frameno >= next_keyframe_input_frameno { - if !self.inter_cfg.reorder - || ((output_frameno_in_gop - 1) % self.inter_cfg.group_output_len - == 0 - && prev_input_frameno == (next_keyframe_input_frameno - 1)) - { - input_frameno = next_keyframe_input_frameno; - - // If we'll return early, do it before modifying the state. - match self.frame_q.get(&input_frameno) { - Some(Some(_)) => {} - _ => { - return Err(EncoderStatus::NeedMoreData); - } - } - - *self.gop_output_frameno_start.get_mut(&output_frameno).unwrap() = - output_frameno; - *self.gop_input_frameno_start.get_mut(&output_frameno).unwrap() = - next_keyframe_input_frameno; - } else { - let fi = FrameInvariants::new_inter_frame( - self.get_previous_coded_fi(output_frameno), - &self.inter_cfg, - self.gop_input_frameno_start[&output_frameno], - output_frameno_in_gop, - next_keyframe_input_frameno, - self.config.error_resilient, - t35_metadata, - ); - assert!(fi.is_none()); - return Ok(fi); - } - } - } - match self.frame_q.get(&input_frameno) { Some(Some(_)) => {} _ => { @@ -593,18 +494,44 @@ impl ContextInner { ); Ok(Some(fi)) } else { - let next_keyframe_input_frameno = self.next_keyframe_input_frameno( + let minigop_input_frameno_start = + self.minigop_input_frameno_start[&output_frameno]; + let next_minigop_input_frameno = self.next_minigop_input_frameno( self.gop_input_frameno_start[&output_frameno], false, ); + // Show frame if all previous input frames have already been shown + let show_frame = self + .frame_data + .range(minigop_input_frameno_start..) + .filter(|(_, data)| data.unwrap().fi.show_frame) + .map(|(_, data)| data.unwrap().fi.input_frameno) + .sorted() + .unique() + .filter(|frameno| *frameno < input_frameno) + .count() as u64 + == input_frameno - minigop_input_frameno_start; + let show_existing_frame = self + .frame_data + .range(minigop_input_frameno_start..) + .any(|(_, data)| data.unwrap().fi.input_frameno == input_frameno); + if show_existing_frame { + assert!(show_frame); + } let fi = FrameInvariants::new_inter_frame( self.get_previous_coded_fi(output_frameno), - &self.inter_cfg, + input_frameno, self.gop_input_frameno_start[&output_frameno], output_frameno_in_gop, - next_keyframe_input_frameno, - self.config.error_resilient, + minigop_input_frameno_start, + output_frameno - self.minigop_output_frameno_start[&output_frameno], + next_minigop_input_frameno, + show_frame, + show_existing_frame, + &self.frame_depths, + &self.config, t35_metadata, + &self.minigop_config, ); assert!(fi.is_some()); Ok(fi) @@ -742,7 +669,7 @@ impl ContextInner { // P-frames in this instance. // // Compute the motion vectors. - compute_motion_vectors(fi, fs, &self.inter_cfg); + compute_motion_vectors(fi, fs, self.allowed_ref_frames()); let coded_data = fi.coded_frame_data.as_mut().unwrap(); @@ -871,12 +798,15 @@ impl ContextInner { self.frame_depths.insert(self.next_lookahead_frame, FrameDepth::Intra); } else if self.frame_depths[&(self.next_lookahead_frame - 1)] == FrameDepth::Intra + || self.config.low_latency { - // The last frame is a keyframe, so this one must start a new mini-GOP + // The last frame is a keyframe, so this one must start a new mini-GOP. + // Or, in the case of low latency, every frame is a separate mini-GOP. self.keyframe_detector.inter_costs.remove(&self.next_lookahead_frame); - self - .frame_depths - .insert(self.next_lookahead_frame, FrameDepth::Inter { depth: 0 }); + self.frame_depths.insert( + self.next_lookahead_frame, + FrameDepth::Inter { depth: 0, is_minigop_start: true }, + ); } else { self.compute_current_minigop_cost(); }; @@ -890,14 +820,15 @@ impl ContextInner { .frame_depths .iter() .rev() - .find(|(_, d)| **d == FrameDepth::Inter { depth: 0 }) + .find(|(_, d)| { + **d == FrameDepth::Inter { depth: 0, is_minigop_start: true } + }) .unwrap() .0; let current_width = (self.next_lookahead_frame - minigop_start_frame) as u8; - let max_pyramid_width = - self.config.speed_settings.rdo_lookahead_frames.min(32) as u8; + let max_pyramid_width = self.frame_type_lookahead_distance as u8; let mut need_new_minigop = false; if current_width == max_pyramid_width { @@ -925,9 +856,7 @@ impl ContextInner { // Depth 4 9..=16 => 12000.0, // Depth 5 - 17..=32 => 10000.0, - // SAFETY: 32 is the max mini-GOP width - _ => unsafe { debug_unreachable!() }, + 17.. => 10000.0, }; if current_minigop_cost > allowance { need_new_minigop = true; @@ -939,9 +868,10 @@ impl ContextInner { minigop_start_frame, self.next_lookahead_frame - 1, ); - self - .frame_depths - .insert(self.next_lookahead_frame, FrameDepth::Inter { depth: 0 }); + self.frame_depths.insert( + self.next_lookahead_frame, + FrameDepth::Inter { depth: 0, is_minigop_start: true }, + ); for frameno in minigop_start_frame..=self.next_lookahead_frame { self.keyframe_detector.inter_costs.remove(&frameno); } @@ -960,17 +890,23 @@ impl ContextInner { while !frames.is_empty() { if current_depth == 0 { // Special case for depth 0, we generally want the last frame at this depth - self - .frame_depths - .insert(frames.pop_last().unwrap(), FrameDepth::Inter { depth: 0 }); - current_depth = 1; + let frameno = frames.pop_last().unwrap(); + self.frame_depths.insert( + frameno, + FrameDepth::Inter { depth: 0, is_minigop_start: frames.is_empty() }, + ); + current_depth += 1; } else { let max_frames_in_level = 1 << (current_depth - 1); if frames.len() <= max_frames_in_level { for frameno in frames.into_iter() { - self - .frame_depths - .insert(frameno, FrameDepth::Inter { depth: current_depth }); + self.frame_depths.insert( + frameno, + FrameDepth::Inter { + depth: current_depth, + is_minigop_start: false, + }, + ); } break; } else { @@ -986,9 +922,13 @@ impl ContextInner { for (start, end) in breakpoints.into_iter().tuple_windows() { let midpoint = (end - start + 1) / 2; frames.remove(&midpoint); - self - .frame_depths - .insert(midpoint, FrameDepth::Inter { depth: current_depth }); + self.frame_depths.insert( + midpoint, + FrameDepth::Inter { + depth: current_depth, + is_minigop_start: false, + }, + ); } current_depth += 1; } @@ -1402,11 +1342,8 @@ impl ContextInner { ) -> Result, EncoderStatus> { let frame_data = self.frame_data.get_mut(&cur_output_frameno).unwrap().as_mut().unwrap(); - let sef_data = encode_show_existing_frame( - &frame_data.fi, - &mut frame_data.fs, - &self.inter_cfg, - ); + let sef_data = + encode_show_existing_frame(&frame_data.fi, &mut frame_data.fs); let bits = (sef_data.len() * 8) as i64; self.packet_data.extend(sef_data); self.rc_state.update_state( @@ -1491,7 +1428,7 @@ impl ContextInner { if self.rc_state.needs_trial_encode(fti) { let mut trial_fs = frame_data.fs.clone(); - let data = encode_frame(&frame_data.fi, &mut trial_fs, &self.inter_cfg); + let data = encode_frame(&frame_data.fi, &mut trial_fs); self.rc_state.update_state( (data.len() * 8) as i64, fti, @@ -1510,8 +1447,7 @@ impl ContextInner { frame_data.fi.set_quantizers(&qps); } - let data = - encode_frame(&frame_data.fi, &mut frame_data.fs, &self.inter_cfg); + let data = encode_frame(&frame_data.fi, &mut frame_data.fs); #[cfg(feature = "dump_lookahead_data")] { let input_frameno = frame_data.fi.input_frameno; @@ -1681,162 +1617,26 @@ impl ContextInner { self.gop_input_frameno_start.remove(&i); } } - - /// Counts the number of output frames of each subtype in the next - /// `reservoir_frame_delay` temporal units (needed for rate control). - /// Returns the number of output frames (excluding SEF frames) and output TUs - /// until the last keyframe in the next `reservoir_frame_delay` temporal units, - /// or the end of the interval, whichever comes first. - /// The former is needed because it indicates the number of rate estimates we - /// will make. - /// The latter is needed because it indicates the number of times new bitrate - /// is added to the buffer. - pub(crate) fn guess_frame_subtypes( - &self, nframes: &mut [i32; FRAME_NSUBTYPES + 1], - reservoir_frame_delay: i32, - ) -> (i32, i32) { - for fti in 0..=FRAME_NSUBTYPES { - nframes[fti] = 0; - } - - // Two-pass calls this function before receive_packet(), and in particular - // before the very first send_frame(), when the following maps are empty. - // In this case, return 0 as the default value. - let mut prev_keyframe_input_frameno = *self - .gop_input_frameno_start - .get(&self.output_frameno) - .unwrap_or_else(|| { - assert!(self.output_frameno == 0); - &0 - }); - let mut prev_keyframe_output_frameno = *self - .gop_output_frameno_start - .get(&self.output_frameno) - .unwrap_or_else(|| { - assert!(self.output_frameno == 0); - &0 - }); - - let mut prev_keyframe_ntus = 0; - // Does not include SEF frames. - let mut prev_keyframe_nframes = 0; - let mut acc: [i32; FRAME_NSUBTYPES + 1] = [0; FRAME_NSUBTYPES + 1]; - // Updates the frame counts with the accumulated values when we hit a - // keyframe. - fn collect_counts( - nframes: &mut [i32; FRAME_NSUBTYPES + 1], - acc: &mut [i32; FRAME_NSUBTYPES + 1], - ) { - for fti in 0..=FRAME_NSUBTYPES { - nframes[fti] += acc[fti]; - acc[fti] = 0; - } - acc[FRAME_SUBTYPE_I] += 1; - } - let mut output_frameno = self.output_frameno; - let mut ntus = 0; - // Does not include SEF frames. - let mut nframes_total = 0; - while ntus < reservoir_frame_delay { - let output_frameno_in_gop = - output_frameno - prev_keyframe_output_frameno; - let is_kf = - if let Some(Some(frame_data)) = self.frame_data.get(&output_frameno) { - if frame_data.fi.frame_type == FrameType::KEY { - prev_keyframe_input_frameno = frame_data.fi.input_frameno; - // We do not currently use forward keyframes, so they should always - // end the current TU (thus we always increment ntus below). - debug_assert!(frame_data.fi.show_frame); - true - } else { - false - } - } else { - // It is possible to be invoked for the first time from twopass_out() - // before receive_packet() is called, in which case frame_invariants - // will not be populated. - // Force the first frame in each GOP to be a keyframe in that case. - output_frameno_in_gop == 0 - }; - if is_kf { - collect_counts(nframes, &mut acc); - prev_keyframe_output_frameno = output_frameno; - prev_keyframe_ntus = ntus; - prev_keyframe_nframes = nframes_total; - output_frameno += 1; - ntus += 1; - nframes_total += 1; - continue; - } - let idx_in_group_output = - self.inter_cfg.get_idx_in_group_output(output_frameno_in_gop); - let input_frameno = prev_keyframe_input_frameno - + self - .inter_cfg - .get_order_hint(output_frameno_in_gop, idx_in_group_output) - as u64; - // For rate control purposes, ignore any limit on frame count that has - // been set. - // We pretend that we will keep encoding frames forever to prevent the - // control loop from driving us into the rails as we come up against a - // hard stop (with no more chance to correct outstanding errors). - let next_keyframe_input_frameno = - self.next_keyframe_input_frameno(prev_keyframe_input_frameno, true); - // If we are re-ordering, we may skip some output frames in the final - // re-order group of the GOP. - if input_frameno >= next_keyframe_input_frameno { - // If we have encoded enough whole groups to reach the next keyframe, - // then start the next keyframe gop. - if 1 - + (output_frameno - prev_keyframe_output_frameno) - / self.inter_cfg.group_output_len - * self.inter_cfg.group_input_len - >= next_keyframe_input_frameno - prev_keyframe_input_frameno - { - collect_counts(nframes, &mut acc); - prev_keyframe_input_frameno = input_frameno; - prev_keyframe_output_frameno = output_frameno; - prev_keyframe_ntus = ntus; - prev_keyframe_nframes = nframes_total; - // We do not currently use forward keyframes, so they should always - // end the current TU. - output_frameno += 1; - ntus += 1; - } - output_frameno += 1; - continue; - } - if self.inter_cfg.get_show_existing_frame(idx_in_group_output) { - acc[FRAME_SUBTYPE_SEF] += 1; - } else { - // TODO: Implement golden P-frames. - let fti = FRAME_SUBTYPE_P - + (self.inter_cfg.get_level(idx_in_group_output) as usize); - acc[fti] += 1; - nframes_total += 1; - } - if self.inter_cfg.get_show_frame(idx_in_group_output) { - ntus += 1; - } - output_frameno += 1; - } - if prev_keyframe_output_frameno <= self.output_frameno { - // If there were no keyframes at all, or only the first frame was a - // keyframe, the accumulators never flushed and still contain counts for - // the entire buffer. - // In both cases, we return these counts. - collect_counts(nframes, &mut acc); - (nframes_total, ntus) - } else { - // Otherwise, we discard what remains in the accumulators as they contain - // the counts from and past the last keyframe. - (prev_keyframe_nframes, prev_keyframe_ntus) - } - } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum FrameDepth { Intra, - Inter { depth: u8 }, + Inter { depth: u8, is_minigop_start: bool }, +} + +impl FrameDepth { + pub fn depth(self) -> u8 { + match self { + FrameDepth::Intra => 0, + FrameDepth::Inter { depth, .. } => depth, + } + } + + pub fn is_minigop_start(self) -> bool { + match self { + FrameDepth::Intra => true, + FrameDepth::Inter { is_minigop_start, .. } => is_minigop_start, + } + } } diff --git a/src/api/lookahead.rs b/src/api/lookahead.rs index 3b711883ff..05adf89600 100644 --- a/src/api/lookahead.rs +++ b/src/api/lookahead.rs @@ -1,4 +1,3 @@ -use crate::api::internal::InterConfig; use crate::config::EncoderConfig; use crate::context::{BlockOffset, FrameBlocks, TileBlockOffset}; use crate::cpu_features::CpuFeatureLevel; @@ -8,18 +7,21 @@ use crate::encoder::{ }; use crate::frame::{AsRegion, PlaneOffset}; use crate::me::{estimate_tile_motion, RefMEStats}; -use crate::partition::{get_intra_edges, BlockSize}; +use crate::partition::{get_intra_edges, BlockSize, RefType}; use crate::predict::{IntraParam, PredictionMode}; use crate::tiling::{Area, PlaneRegion, TileRect}; use crate::transform::TxSize; use crate::Pixel; use rayon::iter::*; use rust_hawktracer::*; +use std::collections::BTreeMap; use std::sync::Arc; use v_frame::frame::Frame; use v_frame::pixel::CastFromPrimitive; use v_frame::plane::Plane; +use super::MiniGopConfig; + pub(crate) const IMP_BLOCK_MV_UNITS_PER_PIXEL: i64 = 8; pub(crate) const IMP_BLOCK_SIZE_IN_MV_UNITS: i64 = IMPORTANCE_BLOCK_SIZE as i64 * IMP_BLOCK_MV_UNITS_PER_PIXEL; @@ -183,7 +185,6 @@ pub(crate) fn estimate_inter_costs( ) -> f64 { config.low_latency = true; config.speed_settings.multiref = false; - let inter_cfg = InterConfig::new(&config); let last_fi = FrameInvariants::new_key_frame( Arc::new(config), sequence, @@ -192,12 +193,18 @@ pub(crate) fn estimate_inter_costs( ); let mut fi = FrameInvariants::new_inter_frame( &last_fi, - &inter_cfg, + 1, 0, 1, - 2, + 0, + 1, + 1, + true, false, + &BTreeMap::new(), + &config, Box::new([]), + &MiniGopConfig { group_input_len: 1, pyramid_depth: 1 }, ) .unwrap(); @@ -267,7 +274,8 @@ pub(crate) fn estimate_inter_costs( #[hawktracer(compute_motion_vectors)] pub(crate) fn compute_motion_vectors( - fi: &mut FrameInvariants, fs: &mut FrameState, inter_cfg: &InterConfig, + fi: &mut FrameInvariants, fs: &mut FrameState, + allowed_ref_frames: &[RefType], ) { let mut blocks = FrameBlocks::new(fi.w_in_b, fi.h_in_b); fi.sequence @@ -277,6 +285,6 @@ pub(crate) fn compute_motion_vectors( .into_par_iter() .for_each(|mut ctx| { let ts = &mut ctx.ts; - estimate_tile_motion(fi, ts, inter_cfg); + estimate_tile_motion(fi, ts, allowed_ref_frames); }); } diff --git a/src/encoder.rs b/src/encoder.rs index 8eac5c287b..a0c1f797e9 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -44,6 +44,7 @@ use bitstream_io::{BigEndian, BitWrite, BitWriter}; use rayon::iter::*; use rust_hawktracer::*; +use std::collections::BTreeMap; use std::collections::VecDeque; use std::io::Write; use std::mem::MaybeUninit; @@ -663,7 +664,7 @@ pub struct FrameInvariants { pub use_tx_domain_distortion: bool, pub use_tx_domain_rate: bool, pub idx_in_group_output: u64, - pub pyramid_level: u64, + pub pyramid_level: u8, pub enable_early_exit: bool, pub tx_mode_select: bool, pub enable_inter_txfm_split: bool, @@ -816,17 +817,6 @@ pub enum Scales { SpatiotemporalScales, } -pub(crate) const fn pos_to_lvl(pos: u64, pyramid_depth: u64) -> u64 { - // Derive level within pyramid for a frame with a given coding order position - // For example, with a pyramid of depth 2, the 2 least significant bits of the - // position determine the level: - // 00 -> 0 - // 01 -> 2 - // 10 -> 1 - // 11 -> 2 - pyramid_depth - (pos | (1 << pyramid_depth)).trailing_zeros() as u64 -} - impl FrameInvariants { #[allow(clippy::erasing_op, clippy::identity_op)] /// # Panics @@ -964,48 +954,42 @@ impl FrameInvariants { /// Returns the created `FrameInvariants`, or `None` if this should be /// a placeholder frame. pub(crate) fn new_inter_frame( - previous_coded_fi: &Self, inter_cfg: &InterConfig, + previous_coded_fi: &Self, input_frameno: u64, gop_input_frameno_start: u64, output_frameno_in_gop: u64, - next_keyframe_input_frameno: u64, error_resilient: bool, - t35_metadata: Box<[T35]>, + minigop_input_frameno_start: u64, output_frameno_in_minigop: u64, + next_golden_frame_input_frameno: u64, show_frame: bool, + show_existing_frame: bool, frame_depths: &BTreeMap, + config: &EncoderConfig, t35_metadata: Box<[T35]>, + minigop_config: &MiniGopConfig, ) -> Option { - let input_frameno = inter_cfg - .get_input_frameno(output_frameno_in_gop, gop_input_frameno_start); - if input_frameno >= next_keyframe_input_frameno { - // This is an invalid frame. We set it as a placeholder in the FI list. - return None; - } - // We have this special thin clone method to avoid cloning the - // quite large lookahead data for SEFs, when it is not needed. + // quite large lookahead data for SEFs when it is not needed. let mut fi = previous_coded_fi.clone_without_coded_data(); fi.intra_only = false; fi.force_integer_mv = 0; // note: should be 1 if fi.intra_only is true - fi.idx_in_group_output = - inter_cfg.get_idx_in_group_output(output_frameno_in_gop); + fi.idx_in_group_output = output_frameno_in_minigop; fi.tx_mode_select = fi.enable_inter_txfm_split; - - let show_existing_frame = - inter_cfg.get_show_existing_frame(fi.idx_in_group_output); if !show_existing_frame { fi.coded_frame_data = previous_coded_fi.coded_frame_data.clone(); } - fi.order_hint = - inter_cfg.get_order_hint(output_frameno_in_gop, fi.idx_in_group_output); + fi.order_hint = output_frameno_in_gop as u32; - fi.pyramid_level = inter_cfg.get_level(fi.idx_in_group_output); + fi.pyramid_level = frame_depths[&input_frameno].depth(); - fi.frame_type = if (inter_cfg.switch_frame_interval > 0) - && (output_frameno_in_gop % inter_cfg.switch_frame_interval == 0) - && (fi.pyramid_level == 0) + fi.frame_type = if config.switch_frame_interval > 0 + && output_frameno_in_gop % config.switch_frame_interval == 0 + && fi.pyramid_level == 0 { FrameType::SWITCH } else { FrameType::INTER }; - fi.error_resilient = - if fi.frame_type == FrameType::SWITCH { true } else { error_resilient }; + fi.error_resilient = if fi.frame_type == FrameType::SWITCH { + true + } else { + config.error_resilient + }; fi.frame_size_override_flag = if fi.frame_type == FrameType::SWITCH { true @@ -1023,8 +1007,8 @@ impl FrameInvariants { }; // this is the slot that the current frame is going to be saved into - let slot_idx = inter_cfg.get_slot_idx(fi.pyramid_level, fi.order_hint); - fi.show_frame = inter_cfg.get_show_frame(fi.idx_in_group_output); + let slot_idx = output_frameno_in_minigop as u32; + fi.show_frame = show_frame; fi.t35_metadata = if fi.show_frame { t35_metadata } else { Box::new([]) }; fi.frame_to_show_map_idx = slot_idx; fi.refresh_frame_flags = if fi.frame_type == FrameType::SWITCH { @@ -1040,15 +1024,16 @@ impl FrameInvariants { let ref_in_previous_group = LAST3_FRAME; // reuse probability estimates from previous frames only in top level frames - fi.primary_ref_frame = if fi.error_resilient || (fi.pyramid_level > 2) { - PRIMARY_REF_NONE - } else { - (ref_in_previous_group.to_index()) as u32 + if fi.error_resilient { + fi.primary_ref_frame = PRIMARY_REF_NONE; }; - if fi.pyramid_level == 0 { - // level 0 has no forward references + if fi.idx_in_group_output == 0 { + // frame 0 has no forward references // default to last P frame + if !fi.error_resilient { + fi.primary_ref_frame = LAST_FRAME as u32; + } fi.ref_frames = [ // calculations done relative to the slot_idx for this frame. // the last four frames can be found by subtracting from the current slot_idx @@ -1057,22 +1042,41 @@ impl FrameInvariants { // this is the previous P frame (slot_idx + 4 - 1) as u8 % 4 ; INTER_REFS_PER_FRAME]; - if inter_cfg.multiref { + if config.multiref() { // use the second-previous p frame as a second reference frame - fi.ref_frames[second_ref_frame.to_index()] = - (slot_idx + 4 - 2) as u8 % 4; + fi.ref_frames[LAST2_FRAME as usize] = (slot_idx + 4 - 2) as u8 % 4; + } + } else if fi.pyramid_level == 0 { + if !fi.error_resilient { + fi.primary_ref_frame = GOLDEN_FRAME as u32; + } + fi.ref_frames = [ + // calculations done relative to the slot_idx for this frame. + // the last four frames can be found by subtracting from the current slot_idx + // add 4 to prevent underflow + // TODO: maybe use order_hint here like in get_slot_idx? + // this is the previous P frame + (slot_idx + 4 - 1) as u8 % 4 + ; INTER_REFS_PER_FRAME]; + if config.multiref() { + // use the previous p frame as a second reference frame + fi.ref_frames[LAST_FRAME as usize] = (slot_idx + 4 - 2) as u8 % 4; } } else { - debug_assert!(inter_cfg.multiref); + debug_assert!(config.multiref()); + + if !fi.error_resilient { + fi.primary_ref_frame = LAST_FRAME as u32; + } // fill in defaults // default to backwards reference in lower level fi.ref_frames = [{ let oh = fi.order_hint - - (inter_cfg.group_input_len as u32 >> fi.pyramid_level); - let lvl1 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth); + - (minigop_config.group_input_len as u32 >> fi.pyramid_level); + let lvl1 = pos_to_lvl(oh as u64, minigop_config.pyramid_depth); if lvl1 == 0 { - ((oh >> inter_cfg.pyramid_depth) % 4) as u8 + ((oh >> minigop_config.pyramid_depth) % 4) as u8 } else { 3 + lvl1 as u8 } @@ -1080,10 +1084,10 @@ impl FrameInvariants { // use forward reference in lower level as a second reference frame fi.ref_frames[second_ref_frame.to_index()] = { let oh = fi.order_hint - + (inter_cfg.group_input_len as u32 >> fi.pyramid_level); - let lvl2 = pos_to_lvl(oh as u64, inter_cfg.pyramid_depth); + + (minigop_config.group_input_len as u32 >> fi.pyramid_level); + let lvl2 = pos_to_lvl(oh as u64, minigop_config.pyramid_depth); if lvl2 == 0 { - ((oh >> inter_cfg.pyramid_depth) % 4) as u8 + ((oh >> minigop_config.pyramid_depth) % 4) as u8 } else { 3 + lvl2 as u8 } @@ -1095,13 +1099,14 @@ impl FrameInvariants { fi.set_ref_frame_sign_bias(); - fi.reference_mode = if inter_cfg.multiref && fi.idx_in_group_output != 0 { + fi.reference_mode = if config.multiref() && fi.idx_in_group_output != 0 { ReferenceMode::SELECT } else { ReferenceMode::SINGLE }; fi.input_frameno = input_frameno; - fi.me_range_scale = (inter_cfg.group_input_len >> fi.pyramid_level) as u8; + fi.me_range_scale = + (minigop_config.group_input_len >> fi.pyramid_level) as u8; if fi.show_frame || fi.showable_frame { let cur_frame_time = fi.frame_timestamp(); @@ -3674,7 +3679,7 @@ fn write_tile_group_header(tile_start_and_end_present_flag: bool) -> Vec { /// - If the frame packets cannot be written #[hawktracer(encode_show_existing_frame)] pub fn encode_show_existing_frame( - fi: &FrameInvariants, fs: &mut FrameState, inter_cfg: &InterConfig, + fi: &FrameInvariants, fs: &mut FrameState, ) -> Vec { debug_assert!(fi.is_show_existing_frame()); let obu_extension = 0; @@ -3697,7 +3702,7 @@ pub fn encode_show_existing_frame( let mut buf2 = Vec::new(); { let mut bw2 = BitWriter::endian(&mut buf2, BigEndian); - bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap(); + bw2.write_frame_header_obu(fi, fs).unwrap(); } { @@ -3749,7 +3754,7 @@ fn get_initial_segmentation( /// - If the frame packets cannot be written #[hawktracer(encode_frame)] pub fn encode_frame( - fi: &FrameInvariants, fs: &mut FrameState, inter_cfg: &InterConfig, + fi: &FrameInvariants, fs: &mut FrameState, ) -> Vec { debug_assert!(!fi.is_show_existing_frame()); let obu_extension = 0; @@ -3760,7 +3765,7 @@ pub fn encode_frame( fs.segmentation = get_initial_segmentation(fi); segmentation_optimize(fi, fs); } - let tile_group = encode_tile_group(fi, fs, inter_cfg); + let tile_group = encode_tile_group(fi, fs); if fi.frame_type == FrameType::KEY { write_key_frame_obus(&mut packet, fi, obu_extension).unwrap(); @@ -3778,7 +3783,7 @@ pub fn encode_frame( let mut buf2 = Vec::new(); { let mut bw2 = BitWriter::endian(&mut buf2, BigEndian); - bw2.write_frame_header_obu(fi, fs, inter_cfg).unwrap(); + bw2.write_frame_header_obu(fi, fs).unwrap(); } { diff --git a/src/me.rs b/src/me.rs index 20fa201cbe..5351313d5d 100644 --- a/src/me.rs +++ b/src/me.rs @@ -25,6 +25,8 @@ use crate::FrameInvariants; use arrayvec::*; use rust_hawktracer::*; + +use crate::util::ILog; use std::ops::{Index, IndexMut}; use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -153,7 +155,7 @@ pub enum MVSamplingMode { pub fn estimate_tile_motion( fi: &FrameInvariants, ts: &mut TileStateMut<'_, T>, - inter_cfg: &InterConfig, + allowed_ref_frames: &[RefType], ) { let init_size = MIB_SIZE_LOG2; @@ -179,7 +181,7 @@ pub fn estimate_tile_motion( for sby in 0..ts.sb_height { for sbx in 0..ts.sb_width { let mut tested_frames_flags = 0; - for &ref_frame in inter_cfg.allowed_ref_frames() { + for &ref_frame in allowed_ref_frames { let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()]; if tested_frames_flags & frame_flag == frame_flag { continue;