From ba91dbcb3e55e336a410b7a3438c325d833c6171 Mon Sep 17 00:00:00 2001
From: Danil Chapovalov <danilchap@webrtc.org>
Date: Fri, 15 Jan 2021 16:46:11 +0100
Subject: [PATCH] In SVC controllers add support for frames dropped by encoder

by updating flag that T1 frame can be referenced when it is encoded
rather than when it is sent for encoding.
Otherwise when encoder drops T1 frame, configuration for following T2 frame would
still try to reference that absent T1 frame leading to invalid references.

Bug: None
Change-Id: I6398275971596b0618bcf9c926f0282f74120976
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/202030
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33002}
---
 .../svc/scalability_structure_full_svc.cc     |  5 +++-
 .../svc/scalability_structure_key_svc.cc      |  5 +++-
 .../scalability_structure_key_svc_unittest.cc | 23 ++++++++++++++++++
 .../scalability_structure_l3t3_unittest.cc    | 24 +++++++++++++++++++
 4 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/modules/video_coding/svc/scalability_structure_full_svc.cc b/modules/video_coding/svc/scalability_structure_full_svc.cc
index c489b60502..5454622924 100644
--- a/modules/video_coding/svc/scalability_structure_full_svc.cc
+++ b/modules/video_coding/svc/scalability_structure_full_svc.cc
@@ -188,7 +188,6 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
         // No frame reference top layer frame, so no need save it into a buffer.
         if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) {
           config.Update(BufferIndex(sid, /*tid=*/1));
-          can_reference_t1_frame_for_spatial_id_.set(sid);
         }
         spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1);
       }
@@ -246,6 +245,10 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
 
 GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone(
     const LayerFrameConfig& config) {
+  if (config.TemporalId() == 1) {
+    can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
+  }
+
   GenericFrameInfo frame_info;
   frame_info.spatial_id = config.SpatialId();
   frame_info.temporal_id = config.TemporalId();
diff --git a/modules/video_coding/svc/scalability_structure_key_svc.cc b/modules/video_coding/svc/scalability_structure_key_svc.cc
index cfc89a3794..9399c0cf7e 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc.cc
+++ b/modules/video_coding/svc/scalability_structure_key_svc.cc
@@ -148,7 +148,6 @@ ScalabilityStructureKeySvc::T1Config() {
     config.Id(kDelta).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
     if (num_temporal_layers_ > 2) {
       config.Update(BufferIndex(sid, /*tid=*/1));
-      can_reference_t1_frame_for_spatial_id_.set(sid);
     }
   }
   return configs;
@@ -223,6 +222,10 @@ ScalabilityStructureKeySvc::NextFrameConfig(bool restart) {
 
 GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
     const LayerFrameConfig& config) {
+  if (config.TemporalId() == 1) {
+    can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
+  }
+
   GenericFrameInfo frame_info;
   frame_info.spatial_id = config.SpatialId();
   frame_info.temporal_id = config.TemporalId();
diff --git a/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc b/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
index 752f710eb6..34ec74726d 100644
--- a/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
+++ b/modules/video_coding/svc/scalability_structure_key_svc_unittest.cc
@@ -51,6 +51,29 @@ TEST(ScalabilityStructureL3T3KeyTest,
   EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
 }
 
+TEST(ScalabilityStructureL3T3KeyTest,
+     SkipT1FrameByEncoderKeepsReferencesValid) {
+  std::vector<GenericFrameInfo> frames;
+  ScalabilityStructureL3T3Key structure;
+  ScalabilityStructureWrapper wrapper(structure);
+
+  // 1st 2 temporal units (T0 and T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+  // Simulate T1 frame dropped by the encoder,
+  // i.e. retrieve config, but skip calling OnEncodeDone.
+  structure.NextFrameConfig(/*restart=*/false);
+  // one more temporal units (T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
+
+  ASSERT_THAT(frames, SizeIs(9));
+  EXPECT_EQ(frames[0].temporal_id, 0);
+  EXPECT_EQ(frames[3].temporal_id, 2);
+  // T1 frames were dropped by the encoder.
+  EXPECT_EQ(frames[6].temporal_id, 2);
+
+  EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
+}
+
 TEST(ScalabilityStructureL3T3KeyTest,
      ReenablingSpatialLayerBeforeMissedT0FrameDoesntTriggerAKeyFrame) {
   ScalabilityStructureL3T3Key structure;
diff --git a/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc b/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc
index 1a3dc8b60d..ca66fa8f2b 100644
--- a/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc
+++ b/modules/video_coding/svc/scalability_structure_l3t3_unittest.cc
@@ -9,6 +9,8 @@
  */
 #include "modules/video_coding/svc/scalability_structure_l3t3.h"
 
+#include <vector>
+
 #include "modules/video_coding/svc/scalability_structure_test_helpers.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
@@ -44,6 +46,28 @@ TEST(ScalabilityStructureL3T3Test, SkipS1T1FrameKeepsStructureValid) {
   EXPECT_EQ(frames[0].temporal_id, 2);
 }
 
+TEST(ScalabilityStructureL3T3Test, SkipT1FrameByEncoderKeepsReferencesValid) {
+  std::vector<GenericFrameInfo> frames;
+  ScalabilityStructureL3T3 structure;
+  ScalabilityStructureWrapper wrapper(structure);
+
+  // 1st 2 temporal units (T0 and T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/2, frames);
+  // Simulate T1 frame dropped by the encoder,
+  // i.e. retrieve config, but skip calling OnEncodeDone.
+  structure.NextFrameConfig(/*restart=*/false);
+  // one more temporal units (T2)
+  wrapper.GenerateFrames(/*num_temporal_units=*/1, frames);
+
+  ASSERT_THAT(frames, SizeIs(9));
+  EXPECT_EQ(frames[0].temporal_id, 0);
+  EXPECT_EQ(frames[3].temporal_id, 2);
+  // T1 frame was dropped by the encoder.
+  EXPECT_EQ(frames[6].temporal_id, 2);
+
+  EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames));
+}
+
 TEST(ScalabilityStructureL3T3Test, SwitchSpatialLayerBeforeT1Frame) {
   ScalabilityStructureL3T3 structure;
   ScalabilityStructureWrapper wrapper(structure);