Merge pull request #158 from jdibenes/pyav-aud-flush

Pyav aud flush
jdibenes · Feb 26, 2025 · dd8c6d1 · dd8c6d1
2 parents b520bc6 + 2acae5e
commit dd8c6d1
Show file tree

Hide file tree

Showing 10 changed files with 91 additions and 67 deletions.
diff --git a/extensions/client_cpp/hl2ss_dp.cpp b/extensions/client_cpp/hl2ss_dp.cpp
@@ -148,7 +148,7 @@ void client::close()
 
 uint64_t gatherer::compute_timestamp(uint64_t ct, uint64_t et, uint32_t tb)
 {
-    return ((ct + et) * hl2ss::time_base::HUNDREDS_OF_NANOSECONDS) / tb;
+    return (uint64_t)(((ct + et) / (double)tb) * hl2ss::time_base::HUNDREDS_OF_NANOSECONDS);
 }
 
 void gatherer::avcc_to_annex_b(uint8_t* sample, uint32_t size)

diff --git a/extensions/client_cpp/main.cpp b/extensions/client_cpp/main.cpp
@@ -827,7 +827,7 @@ void test_dp_mrc(char const* host)
 #ifdef HL2SS_ENABLE_DP
     hl2ss::dp::mrc_configuration configuration{true, true, true, true, true, false, 0};
 
-    std::unique_ptr<hl2ss::dp::rx_mrc> client = hl2ss::lnm::rx_mrc(host, "live", "user", "pass");
+    std::unique_ptr<hl2ss::dp::rx_mrc> client = hl2ss::lnm::rx_mrc(host, hl2ss::dp::stream_port::LIVE, "user", "pass");
 
     client->open();
     for (;;)

diff --git a/hl2ss/hl2ss/Package.appxmanifest b/hl2ss/hl2ss/Package.appxmanifest
@@ -9,7 +9,7 @@
   <Identity
     Name="eaaf3af3-1402-4e5b-b6a1-5d0fbb7c1ba8"
     Publisher="CN=jcds"
-    Version="1.0.34.0" />
+    Version="1.0.35.0" />
   <mp:PhoneIdentity PhoneProductId="eaaf3af3-1402-4e5b-b6a1-5d0fbb7c1ba8" PhonePublisherId="00000000-0000-0000-0000-000000000000"/>
   <Properties>
     <DisplayName>hl2ss</DisplayName>

diff --git a/hl2ss/hl2ss/custom_encoder.cpp b/hl2ss/hl2ss/custom_encoder.cpp
@@ -7,11 +7,10 @@
 //-----------------------------------------------------------------------------
 
 // OK
-CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, bool shift)
+CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size)
 {
     m_metadata      = std::make_unique<uint8_t[]>(metadata_size);
     m_metadata_size = metadata_size;
-    m_shift         = shift;
     m_pHookCallback = pHookCallback;
     m_pHookParam    = pHookParam;
     m_pMetadataFree = pMetadataFree;
@@ -21,23 +20,21 @@ CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam,
 
 // OK
 CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, AudioSubtype input_subtype, AACFormat  const& format) :
-CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size, false)
+CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size)
 {
     m_pSinkWriter = CustomSinkWriter::CreateForAudio(Thunk_Sink, this, input_subtype, format);
 }
 
 // OK
 CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, VideoSubtype input_subtype, H26xFormat const& format, uint32_t stride, std::vector<uint64_t> const& encoder_options) :
-CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size, format.profile != H26xProfile::H26xProfile_None)
+CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size)
 {
     m_pSinkWriter = CustomSinkWriter::CreateForVideo(Thunk_Sink, this, input_subtype, format, stride, encoder_options);
 }
 
 // OK
 CustomEncoder::~CustomEncoder()
 {
-    if (!m_shift) { return; }
-    if (m_pMetadataFree) { m_pMetadataFree(m_metadata.get(), m_metadata_size); }
 }
 
 // OK
@@ -54,7 +51,7 @@ void CustomEncoder::ProcessSample(IMFSample* pSample)
     pSample->ConvertToContiguousBuffer(&pBuffer);
     pSample->GetSampleTime(&hnsSampleTime);
 
-    if (!m_shift) { pSample->GetBlob(MF_USER_DATA_PAYLOAD, m_metadata.get(), m_metadata_size, NULL); }
+    pSample->GetBlob(MF_USER_DATA_PAYLOAD, m_metadata.get(), m_metadata_size, NULL);
 
     pBuffer->Lock(&pFrame, NULL, &cbFrameBytes);
 
@@ -64,8 +61,6 @@ void CustomEncoder::ProcessSample(IMFSample* pSample)
     pBuffer->Release();
 
     if (m_pMetadataFree) { m_pMetadataFree(m_metadata.get(), m_metadata_size); }
-
-    if ( m_shift) { pSample->GetBlob(MF_USER_DATA_PAYLOAD, m_metadata.get(), m_metadata_size, NULL); }
 }
 
 // OK

diff --git a/hl2ss/hl2ss/custom_encoder.h b/hl2ss/hl2ss/custom_encoder.h
@@ -14,12 +14,11 @@ class CustomEncoder
     std::unique_ptr<CustomSinkWriter> m_pSinkWriter;
     std::unique_ptr<uint8_t[]> m_metadata;
     uint32_t m_metadata_size;
-    bool m_shift;
     HOOK_ENCODER_PROC m_pHookCallback;
     void* m_pHookParam;
     HOOK_METADATA_PROC m_pMetadataFree;
 
-    CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, bool shift);
+    CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size);
 
     void ProcessSample(IMFSample* pSample);
 

diff --git a/hl2ss_unity/Assets/Plugins/WSA/hl2ss.dll b/hl2ss_unity/Assets/Plugins/WSA/hl2ss.dll
diff --git a/hl2ss_unreal/Plugins/hl2ss/Binaries/hl2ss/hl2ss.dll b/hl2ss_unreal/Plugins/hl2ss/Binaries/hl2ss/hl2ss.dll
diff --git a/viewer/hl2ss.py b/viewer/hl2ss.py
@@ -919,6 +919,66 @@ def get_audio_codec_bitrate(profile):
     return None
 
 
+class _codec_h264:
+    _aud = b'\x00\x00\x00\x01\x09\x10'
+
+    def __init__(self):
+        self._codec = self._codec = av.CodecContext.create('h264', 'r')
+
+    def decode(self, payload):
+        for packet in self._codec.parse(payload[6:] + _codec_h264._aud):
+            for frame in self._codec.decode(packet):
+                return frame
+
+
+class _codec_hevc:
+    _aud = b'\x00\x00\x00\x01\x46\x01\x03'
+
+    def __init__(self):
+        self._codec = self._codec = av.CodecContext.create('hevc', 'r')
+
+    def decode(self, payload):
+        for packet in self._codec.parse(payload + _codec_hevc._aud):
+            for frame in self._codec.decode(packet):
+                return frame
+
+
+class _codec_aac:
+    def __init__(self):
+        self._codec = av.CodecContext.create('aac', 'r')
+
+    def decode(self, payload):
+        for packet in self._codec.parse(payload):
+            for frame in self._codec.decode(packet):
+                return frame
+
+
+def get_video_codec(profile):
+    if (profile == VideoProfile.H264_BASE):
+        return _codec_h264()
+    if (profile == VideoProfile.H264_MAIN):
+        return _codec_h264()
+    if (profile == VideoProfile.H264_HIGH):
+        return _codec_h264()
+    if (profile == VideoProfile.H265_MAIN):
+        return _codec_hevc()
+
+    return None
+
+
+def get_audio_codec(profile):
+    if (profile == AudioProfile.AAC_12000):
+        return _codec_aac()
+    if (profile == AudioProfile.AAC_16000):
+        return _codec_aac()
+    if (profile == AudioProfile.AAC_20000):
+        return _codec_aac()
+    if (profile == AudioProfile.AAC_24000):
+        return _codec_aac()
+
+    return None
+
+
 #------------------------------------------------------------------------------
 # RM VLC Decoder
 #------------------------------------------------------------------------------
@@ -947,13 +1007,10 @@ def __init__(self, profile):
         self.profile = profile
 
     def create(self):
-        self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
+        self._codec = get_video_codec(self.profile)
 
     def decode(self, payload):
-        for packet in self._codec.parse(payload):
-            for frame in self._codec.decode(packet):
-                return frame.to_ndarray()[:Parameters_RM_VLC.HEIGHT, :Parameters_RM_VLC.WIDTH]
-        return None
+        return self._codec.decode(payload).to_ndarray()[:Parameters_RM_VLC.HEIGHT, :Parameters_RM_VLC.WIDTH]
 
 
 class _unpack_rm_vlc:
@@ -1016,13 +1073,10 @@ def __init__(self, profile):
         self.profile = profile
 
     def create(self):
-        self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
+        self._codec = get_video_codec(self.profile)
 
     def decode(self, payload):
-        for packet in self._codec.parse(payload[_Mode0Layout_RM_DEPTH_AHAT_STRUCT.BASE:-8]):
-            for frame in self._codec.decode(packet):
-                return _unpack_rm_depth_ahat_nv12_as_yuv420p(frame.to_ndarray(), np.frombuffer(payload[-8:], dtype=np.uint64, offset=0, count=1))
-        return None
+        return _unpack_rm_depth_ahat_nv12_as_yuv420p(self._codec.decode(payload[_Mode0Layout_RM_DEPTH_AHAT_STRUCT.BASE:-8]).to_ndarray(), np.frombuffer(payload[-8:], dtype=np.uint64, offset=0, count=1))
 
 
 class _unpack_rm_depth_ahat:
@@ -1042,8 +1096,6 @@ def create(self):
         self._codec = pyzdepth.DepthCompressor()
 
     def decode(self, payload):
-        if (len(payload) <= 0):
-            return None
         result, width, height, decompressed = self._codec.Decompress(bytes(payload))
         return np.frombuffer(decompressed, dtype=np.uint16).reshape((height, width))
 
@@ -1053,13 +1105,10 @@ def __init__(self, profile):
         self.profile = profile
 
     def create(self):
-        self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
+        self._codec = get_video_codec(self.profile)
 
     def decode(self, payload):
-        for packet in self._codec.parse(payload):
-            for frame in self._codec.decode(packet):
-                return np.square(frame.to_ndarray()[:Parameters_RM_DEPTH_AHAT.HEIGHT, :Parameters_RM_DEPTH_AHAT.WIDTH], dtype=np.uint16)
-        return None
+        return np.square(self._codec.decode(payload).to_ndarray()[:Parameters_RM_DEPTH_AHAT.HEIGHT, :Parameters_RM_DEPTH_AHAT.WIDTH], dtype=np.uint16)
 
 
 class _unpack_ab_rm_depth_ahat:
@@ -1205,13 +1254,10 @@ def __init__(self, profile):
         self.profile = profile
 
     def create(self, width, height):
-        self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
+        self._codec = get_video_codec(self.profile)
 
     def decode(self, payload, format):
-        for packet in self._codec.parse(payload):
-            for frame in self._codec.decode(packet):
-                return frame.to_ndarray(format=format)
-        return None
+        return self._codec.decode(payload).to_ndarray(format=format)
 
 
 class _unpack_pv:
@@ -1262,13 +1308,10 @@ def __init__(self, profile):
         self.profile = profile
 
     def create(self):
-        self._codec = av.CodecContext.create(get_audio_codec_name(self.profile), 'r')
+        self._codec = get_audio_codec(self.profile)
 
     def decode(self, payload):
-        for packet in self._codec.parse(payload):
-            for frame in self._codec.decode(packet):
-                return frame.to_ndarray()
-        return None
+        return self._codec.decode(payload).to_ndarray()
 
 
 class _unpack_microphone:
@@ -1511,7 +1554,6 @@ def __init__(self, host, port, chunk, mode, divisor, profile, level, bitrate, op
     def open(self):
         self._codec.create()
         super().open()
-        self.get_next_packet()
 
     def get_next_packet(self):
         data = super().get_next_packet()
@@ -1531,7 +1573,6 @@ def __init__(self, host, port, chunk, mode, divisor, profile_z, profile_ab, leve
     def open(self):
         self._codec.create()
         super().open()
-        self.get_next_packet()
 
     def get_next_packet(self):
         data = super().get_next_packet()
@@ -1567,7 +1608,6 @@ def __init__(self, host, port, chunk, mode, width, height, framerate, divisor, p
     def open(self):        
         self._codec.create(self.width, self.height)
         super().open()
-        self.get_next_packet()
 
     def get_next_packet(self):
         data = super().get_next_packet()

diff --git a/viewer/hl2ss_dp.py b/viewer/hl2ss_dp.py
@@ -128,6 +128,7 @@ def open(self, host, port, user, password, chunk_size, configuration):
         self._audio_tb = 48000
         self._video_et = 0
         self._audio_et = 0
+        self._video_init = None
 
     def get_next_packet(self):
         packets = []
@@ -165,8 +166,7 @@ def get_next_packet(self):
                                                                     pps_data = stbl_data[133:141]
                                                                     sps_data[0:2] = b'\x00\x00'
                                                                     pps_data[0:2] = b'\x00\x00'
-                                                                    t = _compute_timestamp(self._video_ct, self._video_et, self._video_tb)
-                                                                    packets.append(hl2ss._packet(t, struct.pack('B', StreamKind.VIDEO | 0x04) + _avcc_to_annex_b(sps_data + pps_data), None))
+                                                                    self._video_init = sps_data + pps_data
                                                                 elif (stbl_type == 'mp4a'):
                                                                     self._audio_id = id
                                                                     self._audio_ct = ct * tb
@@ -208,6 +208,9 @@ def get_next_packet(self):
                                 sample = data[offset:(offset+size)]
                                 if (id == self._video_id):
                                     t = _compute_timestamp(self._video_ct, self._video_et, self._video_tb)
+                                    if (self._video_init is not None):
+                                        sample = sample[:6] + self._video_init + sample[6:] # AUD + SPS + PPS + IDR
+                                        self._video_init = None
                                     packets.append(hl2ss._packet(t, struct.pack('B', StreamKind.VIDEO | keyf) + _avcc_to_annex_b(sample), None))
                                     self._video_et += span
                                 elif (id == self._audio_id):
@@ -299,16 +302,12 @@ def unpack_mrc(payload):
 
 
 class decode_mrc:
-    def __init__(self):
-        self._video_codec = hl2ss.decode_pv(hl2ss.VideoProfile.H264_MAIN)
-        self._audio_codec = hl2ss.decode_microphone(hl2ss.AudioProfile.AAC_12000, hl2ss.AACLevel.L2)
-
     def create(self):
-        self._video_codec.create(0, 0)
-        self._audio_codec.create()
+        self._video_codec = hl2ss.get_video_codec(hl2ss.VideoProfile.H264_MAIN)
+        self._audio_codec = hl2ss.get_audio_codec(hl2ss.AudioProfile.AAC_24000)
 
     def decode(self, payload, kind, format):
-        return self._video_codec.decode(payload, format) if (kind == StreamKind.VIDEO) else self._audio_codec.decode(payload) if (kind == StreamKind.AUDIO) else None
+        return self._video_codec.decode(payload).to_ndarray(format=format) if (kind == StreamKind.VIDEO) else self._audio_codec.decode(payload).to_ndarray() if (kind == StreamKind.AUDIO) else None
 
 
 #------------------------------------------------------------------------------
@@ -322,21 +321,14 @@ def __init__(self, host, port, user, password, chunk, configuration, format):
         self._codec = decode_mrc()
 
     def open(self):
-        self._d_t = 0
-        self._d_k = False
         self._codec.create()
         super().open()
 
     def get_next_packet(self):
-        while (True):
-            data = super().get_next_packet()
-            data.payload = unpack_mrc(data.payload)
-            data.payload.sample = self._codec.decode(data.payload.sample, data.payload.kind, self.format)
-            if (data.payload.kind == StreamKind.VIDEO):
-                data.timestamp,         self._d_t = (self._d_t, data.timestamp)
-                data.payload.key_frame, self._d_k = (self._d_k, data.payload.key_frame)
-            if (data.payload.sample is not None):
-                return data
+        data = super().get_next_packet()
+        data.payload = unpack_mrc(data.payload)
+        data.payload.sample = self._codec.decode(data.payload.sample, data.payload.kind, self.format)
+        return data
 
     def close(self):
         super().close()

diff --git a/viewer/hl2ss_utilities.py b/viewer/hl2ss_utilities.py
@@ -580,10 +580,8 @@ def unpack_to_mp4(input_filenames, output_filename):
 
     for stream in streams:
         stream.time_base = time_base
-    for codec in codecs:
-        codec.time_base = time_base
 
-    base = 0#hl2ss._RANGEOF.U64_MAX
+    base = 0
 
     for reader in readers:
         data = reader.get_next_packet()