Skip to content

Commit

Permalink
Merge pull request #158 from jdibenes/pyav-aud-flush
Browse files Browse the repository at this point in the history
Pyav aud flush
  • Loading branch information
jdibenes authored Feb 26, 2025
2 parents b520bc6 + 2acae5e commit dd8c6d1
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 67 deletions.
2 changes: 1 addition & 1 deletion extensions/client_cpp/hl2ss_dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ void client::close()

uint64_t gatherer::compute_timestamp(uint64_t ct, uint64_t et, uint32_t tb)
{
return ((ct + et) * hl2ss::time_base::HUNDREDS_OF_NANOSECONDS) / tb;
return (uint64_t)(((ct + et) / (double)tb) * hl2ss::time_base::HUNDREDS_OF_NANOSECONDS);
}

void gatherer::avcc_to_annex_b(uint8_t* sample, uint32_t size)
Expand Down
2 changes: 1 addition & 1 deletion extensions/client_cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ void test_dp_mrc(char const* host)
#ifdef HL2SS_ENABLE_DP
hl2ss::dp::mrc_configuration configuration{true, true, true, true, true, false, 0};

std::unique_ptr<hl2ss::dp::rx_mrc> client = hl2ss::lnm::rx_mrc(host, "live", "user", "pass");
std::unique_ptr<hl2ss::dp::rx_mrc> client = hl2ss::lnm::rx_mrc(host, hl2ss::dp::stream_port::LIVE, "user", "pass");

client->open();
for (;;)
Expand Down
2 changes: 1 addition & 1 deletion hl2ss/hl2ss/Package.appxmanifest
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<Identity
Name="eaaf3af3-1402-4e5b-b6a1-5d0fbb7c1ba8"
Publisher="CN=jcds"
Version="1.0.34.0" />
Version="1.0.35.0" />
<mp:PhoneIdentity PhoneProductId="eaaf3af3-1402-4e5b-b6a1-5d0fbb7c1ba8" PhonePublisherId="00000000-0000-0000-0000-000000000000"/>
<Properties>
<DisplayName>hl2ss</DisplayName>
Expand Down
13 changes: 4 additions & 9 deletions hl2ss/hl2ss/custom_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
//-----------------------------------------------------------------------------

// OK
CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, bool shift)
CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size)
{
m_metadata = std::make_unique<uint8_t[]>(metadata_size);
m_metadata_size = metadata_size;
m_shift = shift;
m_pHookCallback = pHookCallback;
m_pHookParam = pHookParam;
m_pMetadataFree = pMetadataFree;
Expand All @@ -21,23 +20,21 @@ CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam,

// OK
CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, AudioSubtype input_subtype, AACFormat const& format) :
CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size, false)
CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size)
{
m_pSinkWriter = CustomSinkWriter::CreateForAudio(Thunk_Sink, this, input_subtype, format);
}

// OK
CustomEncoder::CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, VideoSubtype input_subtype, H26xFormat const& format, uint32_t stride, std::vector<uint64_t> const& encoder_options) :
CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size, format.profile != H26xProfile::H26xProfile_None)
CustomEncoder(pHookCallback, pHookParam, pMetadataFree, metadata_size)
{
m_pSinkWriter = CustomSinkWriter::CreateForVideo(Thunk_Sink, this, input_subtype, format, stride, encoder_options);
}

// OK
CustomEncoder::~CustomEncoder()
{
if (!m_shift) { return; }
if (m_pMetadataFree) { m_pMetadataFree(m_metadata.get(), m_metadata_size); }
}

// OK
Expand All @@ -54,7 +51,7 @@ void CustomEncoder::ProcessSample(IMFSample* pSample)
pSample->ConvertToContiguousBuffer(&pBuffer);
pSample->GetSampleTime(&hnsSampleTime);

if (!m_shift) { pSample->GetBlob(MF_USER_DATA_PAYLOAD, m_metadata.get(), m_metadata_size, NULL); }
pSample->GetBlob(MF_USER_DATA_PAYLOAD, m_metadata.get(), m_metadata_size, NULL);

pBuffer->Lock(&pFrame, NULL, &cbFrameBytes);

Expand All @@ -64,8 +61,6 @@ void CustomEncoder::ProcessSample(IMFSample* pSample)
pBuffer->Release();

if (m_pMetadataFree) { m_pMetadataFree(m_metadata.get(), m_metadata_size); }

if ( m_shift) { pSample->GetBlob(MF_USER_DATA_PAYLOAD, m_metadata.get(), m_metadata_size, NULL); }
}

// OK
Expand Down
3 changes: 1 addition & 2 deletions hl2ss/hl2ss/custom_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@ class CustomEncoder
std::unique_ptr<CustomSinkWriter> m_pSinkWriter;
std::unique_ptr<uint8_t[]> m_metadata;
uint32_t m_metadata_size;
bool m_shift;
HOOK_ENCODER_PROC m_pHookCallback;
void* m_pHookParam;
HOOK_METADATA_PROC m_pMetadataFree;

CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size, bool shift);
CustomEncoder(HOOK_ENCODER_PROC pHookCallback, void* pHookParam, HOOK_METADATA_PROC pMetadataFree, uint32_t metadata_size);

void ProcessSample(IMFSample* pSample);

Expand Down
Binary file modified hl2ss_unity/Assets/Plugins/WSA/hl2ss.dll
Binary file not shown.
Binary file modified hl2ss_unreal/Plugins/hl2ss/Binaries/hl2ss/hl2ss.dll
Binary file not shown.
100 changes: 70 additions & 30 deletions viewer/hl2ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,66 @@ def get_audio_codec_bitrate(profile):
return None


class _codec_h264:
_aud = b'\x00\x00\x00\x01\x09\x10'

def __init__(self):
self._codec = self._codec = av.CodecContext.create('h264', 'r')

def decode(self, payload):
for packet in self._codec.parse(payload[6:] + _codec_h264._aud):
for frame in self._codec.decode(packet):
return frame


class _codec_hevc:
_aud = b'\x00\x00\x00\x01\x46\x01\x03'

def __init__(self):
self._codec = self._codec = av.CodecContext.create('hevc', 'r')

def decode(self, payload):
for packet in self._codec.parse(payload + _codec_hevc._aud):
for frame in self._codec.decode(packet):
return frame


class _codec_aac:
def __init__(self):
self._codec = av.CodecContext.create('aac', 'r')

def decode(self, payload):
for packet in self._codec.parse(payload):
for frame in self._codec.decode(packet):
return frame


def get_video_codec(profile):
if (profile == VideoProfile.H264_BASE):
return _codec_h264()
if (profile == VideoProfile.H264_MAIN):
return _codec_h264()
if (profile == VideoProfile.H264_HIGH):
return _codec_h264()
if (profile == VideoProfile.H265_MAIN):
return _codec_hevc()

return None


def get_audio_codec(profile):
if (profile == AudioProfile.AAC_12000):
return _codec_aac()
if (profile == AudioProfile.AAC_16000):
return _codec_aac()
if (profile == AudioProfile.AAC_20000):
return _codec_aac()
if (profile == AudioProfile.AAC_24000):
return _codec_aac()

return None


#------------------------------------------------------------------------------
# RM VLC Decoder
#------------------------------------------------------------------------------
Expand Down Expand Up @@ -947,13 +1007,10 @@ def __init__(self, profile):
self.profile = profile

def create(self):
self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
self._codec = get_video_codec(self.profile)

def decode(self, payload):
for packet in self._codec.parse(payload):
for frame in self._codec.decode(packet):
return frame.to_ndarray()[:Parameters_RM_VLC.HEIGHT, :Parameters_RM_VLC.WIDTH]
return None
return self._codec.decode(payload).to_ndarray()[:Parameters_RM_VLC.HEIGHT, :Parameters_RM_VLC.WIDTH]


class _unpack_rm_vlc:
Expand Down Expand Up @@ -1016,13 +1073,10 @@ def __init__(self, profile):
self.profile = profile

def create(self):
self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
self._codec = get_video_codec(self.profile)

def decode(self, payload):
for packet in self._codec.parse(payload[_Mode0Layout_RM_DEPTH_AHAT_STRUCT.BASE:-8]):
for frame in self._codec.decode(packet):
return _unpack_rm_depth_ahat_nv12_as_yuv420p(frame.to_ndarray(), np.frombuffer(payload[-8:], dtype=np.uint64, offset=0, count=1))
return None
return _unpack_rm_depth_ahat_nv12_as_yuv420p(self._codec.decode(payload[_Mode0Layout_RM_DEPTH_AHAT_STRUCT.BASE:-8]).to_ndarray(), np.frombuffer(payload[-8:], dtype=np.uint64, offset=0, count=1))


class _unpack_rm_depth_ahat:
Expand All @@ -1042,8 +1096,6 @@ def create(self):
self._codec = pyzdepth.DepthCompressor()

def decode(self, payload):
if (len(payload) <= 0):
return None
result, width, height, decompressed = self._codec.Decompress(bytes(payload))
return np.frombuffer(decompressed, dtype=np.uint16).reshape((height, width))

Expand All @@ -1053,13 +1105,10 @@ def __init__(self, profile):
self.profile = profile

def create(self):
self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
self._codec = get_video_codec(self.profile)

def decode(self, payload):
for packet in self._codec.parse(payload):
for frame in self._codec.decode(packet):
return np.square(frame.to_ndarray()[:Parameters_RM_DEPTH_AHAT.HEIGHT, :Parameters_RM_DEPTH_AHAT.WIDTH], dtype=np.uint16)
return None
return np.square(self._codec.decode(payload).to_ndarray()[:Parameters_RM_DEPTH_AHAT.HEIGHT, :Parameters_RM_DEPTH_AHAT.WIDTH], dtype=np.uint16)


class _unpack_ab_rm_depth_ahat:
Expand Down Expand Up @@ -1205,13 +1254,10 @@ def __init__(self, profile):
self.profile = profile

def create(self, width, height):
self._codec = av.CodecContext.create(get_video_codec_name(self.profile), 'r')
self._codec = get_video_codec(self.profile)

def decode(self, payload, format):
for packet in self._codec.parse(payload):
for frame in self._codec.decode(packet):
return frame.to_ndarray(format=format)
return None
return self._codec.decode(payload).to_ndarray(format=format)


class _unpack_pv:
Expand Down Expand Up @@ -1262,13 +1308,10 @@ def __init__(self, profile):
self.profile = profile

def create(self):
self._codec = av.CodecContext.create(get_audio_codec_name(self.profile), 'r')
self._codec = get_audio_codec(self.profile)

def decode(self, payload):
for packet in self._codec.parse(payload):
for frame in self._codec.decode(packet):
return frame.to_ndarray()
return None
return self._codec.decode(payload).to_ndarray()


class _unpack_microphone:
Expand Down Expand Up @@ -1511,7 +1554,6 @@ def __init__(self, host, port, chunk, mode, divisor, profile, level, bitrate, op
def open(self):
self._codec.create()
super().open()
self.get_next_packet()

def get_next_packet(self):
data = super().get_next_packet()
Expand All @@ -1531,7 +1573,6 @@ def __init__(self, host, port, chunk, mode, divisor, profile_z, profile_ab, leve
def open(self):
self._codec.create()
super().open()
self.get_next_packet()

def get_next_packet(self):
data = super().get_next_packet()
Expand Down Expand Up @@ -1567,7 +1608,6 @@ def __init__(self, host, port, chunk, mode, width, height, framerate, divisor, p
def open(self):
self._codec.create(self.width, self.height)
super().open()
self.get_next_packet()

def get_next_packet(self):
data = super().get_next_packet()
Expand Down
32 changes: 12 additions & 20 deletions viewer/hl2ss_dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def open(self, host, port, user, password, chunk_size, configuration):
self._audio_tb = 48000
self._video_et = 0
self._audio_et = 0
self._video_init = None

def get_next_packet(self):
packets = []
Expand Down Expand Up @@ -165,8 +166,7 @@ def get_next_packet(self):
pps_data = stbl_data[133:141]
sps_data[0:2] = b'\x00\x00'
pps_data[0:2] = b'\x00\x00'
t = _compute_timestamp(self._video_ct, self._video_et, self._video_tb)
packets.append(hl2ss._packet(t, struct.pack('B', StreamKind.VIDEO | 0x04) + _avcc_to_annex_b(sps_data + pps_data), None))
self._video_init = sps_data + pps_data
elif (stbl_type == 'mp4a'):
self._audio_id = id
self._audio_ct = ct * tb
Expand Down Expand Up @@ -208,6 +208,9 @@ def get_next_packet(self):
sample = data[offset:(offset+size)]
if (id == self._video_id):
t = _compute_timestamp(self._video_ct, self._video_et, self._video_tb)
if (self._video_init is not None):
sample = sample[:6] + self._video_init + sample[6:] # AUD + SPS + PPS + IDR
self._video_init = None
packets.append(hl2ss._packet(t, struct.pack('B', StreamKind.VIDEO | keyf) + _avcc_to_annex_b(sample), None))
self._video_et += span
elif (id == self._audio_id):
Expand Down Expand Up @@ -299,16 +302,12 @@ def unpack_mrc(payload):


class decode_mrc:
def __init__(self):
self._video_codec = hl2ss.decode_pv(hl2ss.VideoProfile.H264_MAIN)
self._audio_codec = hl2ss.decode_microphone(hl2ss.AudioProfile.AAC_12000, hl2ss.AACLevel.L2)

def create(self):
self._video_codec.create(0, 0)
self._audio_codec.create()
self._video_codec = hl2ss.get_video_codec(hl2ss.VideoProfile.H264_MAIN)
self._audio_codec = hl2ss.get_audio_codec(hl2ss.AudioProfile.AAC_24000)

def decode(self, payload, kind, format):
return self._video_codec.decode(payload, format) if (kind == StreamKind.VIDEO) else self._audio_codec.decode(payload) if (kind == StreamKind.AUDIO) else None
return self._video_codec.decode(payload).to_ndarray(format=format) if (kind == StreamKind.VIDEO) else self._audio_codec.decode(payload).to_ndarray() if (kind == StreamKind.AUDIO) else None


#------------------------------------------------------------------------------
Expand All @@ -322,21 +321,14 @@ def __init__(self, host, port, user, password, chunk, configuration, format):
self._codec = decode_mrc()

def open(self):
self._d_t = 0
self._d_k = False
self._codec.create()
super().open()

def get_next_packet(self):
while (True):
data = super().get_next_packet()
data.payload = unpack_mrc(data.payload)
data.payload.sample = self._codec.decode(data.payload.sample, data.payload.kind, self.format)
if (data.payload.kind == StreamKind.VIDEO):
data.timestamp, self._d_t = (self._d_t, data.timestamp)
data.payload.key_frame, self._d_k = (self._d_k, data.payload.key_frame)
if (data.payload.sample is not None):
return data
data = super().get_next_packet()
data.payload = unpack_mrc(data.payload)
data.payload.sample = self._codec.decode(data.payload.sample, data.payload.kind, self.format)
return data

def close(self):
super().close()
Expand Down
4 changes: 1 addition & 3 deletions viewer/hl2ss_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,8 @@ def unpack_to_mp4(input_filenames, output_filename):

for stream in streams:
stream.time_base = time_base
for codec in codecs:
codec.time_base = time_base

base = 0#hl2ss._RANGEOF.U64_MAX
base = 0

for reader in readers:
data = reader.get_next_packet()
Expand Down

0 comments on commit dd8c6d1

Please sign in to comment.