Skip to content

Commit

Permalink
add MPEG-4 video format (#236)
Browse files Browse the repository at this point in the history
  • Loading branch information
aler9 authored Apr 10, 2023
1 parent c0e3ba2 commit 0561810
Show file tree
Hide file tree
Showing 19 changed files with 210 additions and 44 deletions.
25 changes: 13 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,19 +97,20 @@ https://pkg.go.dev/github.com/bluenviron/gortsplib/v3#pkg-index

## Standards

* RTSP 1.0 https://www.rfc-editor.org/rfc/rfc2326
* RTSP 2.0 https://www.rfc-editor.org/rfc/rfc7826
* RTP Profile for Audio and Video Conferences with Minimal Control https://www.rfc-editor.org/rfc/rfc3551
* RTP Payload Format for MPEG1/MPEG2 Video https://www.rfc-editor.org/rfc/rfc2250
* RTP Payload Format for JPEG-compressed Video https://www.rfc-editor.org/rfc/rfc2435
* RTP Payload Format for H.264 Video https://www.rfc-editor.org/rfc/rfc6184
* RTP Payload Format for High Efficiency Video Coding (HEVC) https://www.rfc-editor.org/rfc/rfc7798.html
* RTP Payload Format for VP8 Video https://www.rfc-editor.org/rfc/rfc7741.html
* RTSP 1.0 https://datatracker.ietf.org/doc/html/rfc2326
* RTSP 2.0 https://datatracker.ietf.org/doc/html/rfc7826
* RTP Profile for Audio and Video Conferences with Minimal Control https://datatracker.ietf.org/doc/html/rfc3551
* RTP Payload Format for MPEG1/MPEG2 Video https://datatracker.ietf.org/doc/html/rfc2250
* RTP Payload Format for JPEG-compressed Video https://datatracker.ietf.org/doc/html/rfc2435
* RTP Payload Format for MPEG-4 Audio/Visual Streams https://datatracker.ietf.org/doc/html/rfc6416
* RTP Payload Format for H.264 Video https://datatracker.ietf.org/doc/html/rfc6184
* RTP Payload Format for High Efficiency Video Coding (HEVC) https://datatracker.ietf.org/doc/html/rfc7798
* RTP Payload Format for VP8 Video https://datatracker.ietf.org/doc/html/rfc7741
* RTP Payload Format for VP9 Video https://datatracker.ietf.org/doc/html/draft-ietf-payload-vp9-16
* RTP Payload Format for 12-bit DAT Audio and 20- and 24-bit Linear Sampled Audio https://www.rfc-editor.org/rfc/rfc3190.html
* RTP Payload Format for the Opus Speech and Audio Codec https://www.rfc-editor.org/rfc/rfc7587.html
* RTP Payload Format for MPEG-4 Audio/Visual Streams https://www.rfc-editor.org/rfc/rfc6416
* RTP Payload Format for Transport of MPEG-4 Elementary Streams https://www.rfc-editor.org/rfc/rfc3640.html
* RTP Payload Format for 12-bit DAT Audio and 20- and 24-bit Linear Sampled Audio https://datatracker.ietf.org/doc/html/rfc3190
* RTP Payload Format for Vorbis Encoded Audio https://datatracker.ietf.org/doc/html/rfc5215
* RTP Payload Format for the Opus Speech and Audio Codec https://datatracker.ietf.org/doc/html/rfc7587
* RTP Payload Format for Transport of MPEG-4 Elementary Streams https://datatracker.ietf.org/doc/html/rfc3640
* ITU-T Rec. H.264 (08/2021) https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-202108-I!!PDF-E&type=items
* ITU-T Rec. H.265 (08/2021) https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.265-202108-I!!PDF-E&type=items
* ISO 14496-3, Coding of audio-visual objects, part 3, Audio
Expand Down
7 changes: 5 additions & 2 deletions pkg/formats/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ func decodeFMTP(enc string) map[string]string {
return ret
}

// Format is a format of a media.
// It defines a codec and a payload type used to ship the media.
// Format is a RTP format of a media.
// It defines a codec and a payload type used to transmit the media.
type Format interface {
// String returns a description of the format.
String() string
Expand Down Expand Up @@ -111,6 +111,9 @@ func Unmarshal(md *psdp.MediaDescription, payloadTypeStr string) (Format, error)
case payloadType == 32:
return &MPEG2Video{}

case codec == "mp4v-es" && clock == "90000":
return &MPEG4Video{}

case codec == "h264" && clock == "90000":
return &H264{}

Expand Down
35 changes: 34 additions & 1 deletion pkg/formats/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,39 @@ func TestNewFromMediaDescription(t *testing.T) {
},
&MPEG2Video{},
},
{
"video mpeg4 video",
&psdp.MediaDescription{
MediaName: psdp.MediaName{
Media: "video",
Protos: []string{"RTP", "AVP"},
Formats: []string{"96"},
},
Attributes: []psdp.Attribute{
{
Key: "rtpmap",
Value: "96 MP4V-ES/90000",
},
{
Key: "fmtp",
Value: "96 profile-level-id=1; " +
"config=000001B001000001B58913000001000000012000C48D8AEE053C04641443000001B24C61766335382E3133342E313030",
},
},
},
&MPEG4Video{
PayloadTyp: 96,
ProfileLevelID: 1,
Config: []byte{
0x00, 0x00, 0x01, 0xb0, 0x01, 0x00, 0x00, 0x01,
0xb5, 0x89, 0x13, 0x00, 0x00, 0x01, 0x00, 0x00,
0x00, 0x01, 0x20, 0x00, 0xc4, 0x8d, 0x8a, 0xee,
0x05, 0x3c, 0x04, 0x64, 0x14, 0x43, 0x00, 0x00,
0x01, 0xb2, 0x4c, 0x61, 0x76, 0x63, 0x35, 0x38,
0x2e, 0x31, 0x33, 0x34, 0x2e, 0x31, 0x30, 0x30,
},
},
},
{
"video h264",
&psdp.MediaDescription{
Expand Down Expand Up @@ -443,7 +476,7 @@ func TestNewFromMediaDescription(t *testing.T) {
},
},
{
"h264 empty sprop-parameter-sets",
"video h264 empty sprop-parameter-sets",
&psdp.MediaDescription{
MediaName: psdp.MediaName{
Media: "video",
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/g711.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import (
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtpsimpleaudio"
)

// G711 is a format that uses the G711 codec, encoded with mu-law or A-law.
// G711 is a RTP format that uses the G711 codec, encoded with mu-law or A-law.
// Specification: https://datatracker.ietf.org/doc/html/rfc3551
type G711 struct {
// whether to use mu-law. Otherwise, A-law is used.
MULaw bool
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/g722.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import (
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtpsimpleaudio"
)

// G722 is a format that uses the G722 codec.
// G722 is a RTP format that uses the G722 codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc3551
type G722 struct{}

// String implements Format.
Expand Down
2 changes: 1 addition & 1 deletion pkg/formats/generic.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func findClockRate(payloadType uint8, rtpMap string) (int, error) {
return int(v), nil
}

// Generic is a generic format.
// Generic is a generic RTP format.
type Generic struct {
PayloadTyp uint8
RTPMap string
Expand Down
29 changes: 15 additions & 14 deletions pkg/formats/h264.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ func rtpH264ContainsIDR(pkt *rtp.Packet) bool {
}
}

// H264 is a format that uses the H264 codef.
// H264 is a RTP format that uses the H264 codec, defined in MPEG-4 part 10.
// Specification: https://datatracker.ietf.org/doc/html/rfc6184
type H264 struct {
PayloadTyp uint8
SPS []byte
Expand Down Expand Up @@ -101,14 +102,14 @@ func (f *H264) unmarshal(payloadType uint8, clock string, codec string, rtpmap s
for key, val := range fmtp {
switch key {
case "sprop-parameter-sets":
tmp2 := strings.Split(val, ",")
if len(tmp2) >= 2 {
sps, err := base64.StdEncoding.DecodeString(tmp2[0])
tmp := strings.Split(val, ",")
if len(tmp) >= 2 {
sps, err := base64.StdEncoding.DecodeString(tmp[0])
if err != nil {
return fmt.Errorf("invalid sprop-parameter-sets (%v)", val)
}

pps, err := base64.StdEncoding.DecodeString(tmp2[1])
pps, err := base64.StdEncoding.DecodeString(tmp[1])
if err != nil {
return fmt.Errorf("invalid sprop-parameter-sets (%v)", val)
}
Expand All @@ -118,12 +119,12 @@ func (f *H264) unmarshal(payloadType uint8, clock string, codec string, rtpmap s
}

case "packetization-mode":
tmp2, err := strconv.ParseInt(val, 10, 64)
tmp, err := strconv.ParseInt(val, 10, 64)
if err != nil {
return fmt.Errorf("invalid packetization-mode (%v)", val)
}

f.PacketizationMode = int(tmp2)
f.PacketizationMode = int(tmp)
}
}

Expand All @@ -141,15 +142,15 @@ func (f *H264) Marshal() (string, map[string]string) {
fmtp["packetization-mode"] = strconv.FormatInt(int64(f.PacketizationMode), 10)
}

var tmp2 []string
var tmp []string
if f.SPS != nil {
tmp2 = append(tmp2, base64.StdEncoding.EncodeToString(f.SPS))
tmp = append(tmp, base64.StdEncoding.EncodeToString(f.SPS))
}
if f.PPS != nil {
tmp2 = append(tmp2, base64.StdEncoding.EncodeToString(f.PPS))
tmp = append(tmp, base64.StdEncoding.EncodeToString(f.PPS))
}
if tmp2 != nil {
fmtp["sprop-parameter-sets"] = strings.Join(tmp2, ",")
if tmp != nil {
fmtp["sprop-parameter-sets"] = strings.Join(tmp, ",")
}
if len(f.SPS) >= 4 {
fmtp["profile-level-id"] = strings.ToUpper(hex.EncodeToString(f.SPS[1:4]))
Expand All @@ -163,7 +164,7 @@ func (f *H264) PTSEqualsDTS(pkt *rtp.Packet) bool {
return rtpH264ContainsIDR(pkt)
}

// CreateDecoder creates a decoder able to decode the content of the formaf.
// CreateDecoder creates a decoder able to decode the content of the format.
func (f *H264) CreateDecoder() *rtph264.Decoder {
d := &rtph264.Decoder{
PacketizationMode: f.PacketizationMode,
Expand All @@ -172,7 +173,7 @@ func (f *H264) CreateDecoder() *rtph264.Decoder {
return d
}

// CreateEncoder creates an encoder able to encode the content of the formaf.
// CreateEncoder creates an encoder able to encode the content of the format.
func (f *H264) CreateEncoder() *rtph264.Encoder {
e := &rtph264.Encoder{
PayloadType: f.PayloadTyp,
Expand Down
7 changes: 4 additions & 3 deletions pkg/formats/h265.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtph265"
)

// H265 is a format that uses the H265 codef.
// H265 is a RTP format that uses the H265 codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc7798
type H265 struct {
PayloadTyp uint8
VPS []byte
Expand Down Expand Up @@ -102,7 +103,7 @@ func (f *H265) PTSEqualsDTS(*rtp.Packet) bool {
return true
}

// CreateDecoder creates a decoder able to decode the content of the formaf.
// CreateDecoder creates a decoder able to decode the content of the format.
func (f *H265) CreateDecoder() *rtph265.Decoder {
d := &rtph265.Decoder{
MaxDONDiff: f.MaxDONDiff,
Expand All @@ -111,7 +112,7 @@ func (f *H265) CreateDecoder() *rtph265.Decoder {
return d
}

// CreateEncoder creates an encoder able to encode the content of the formaf.
// CreateEncoder creates an encoder able to encode the content of the format.
func (f *H265) CreateEncoder() *rtph265.Encoder {
e := &rtph265.Encoder{
PayloadType: f.PayloadTyp,
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/lpcm.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtplpcm"
)

// LPCM is a format that uses the uncompressed, Linear PCM codec.
// LPCM is a RTP format that uses the uncompressed, Linear PCM codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc3190
type LPCM struct {
PayloadTyp uint8
BitDepth int
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/mjpeg.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import (
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtpmjpeg"
)

// MJPEG is a format that uses the Motion-JPEG codec.
// MJPEG is a RTP format that uses the Motion-JPEG codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc2435
type MJPEG struct{}

// String implements Format.
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/mpeg2_audio.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import (
"github.com/pion/rtp"
)

// MPEG2Audio is a format that uses a MPEG-1 or MPEG-2 audio codec.
// MPEG2Audio is a RTP format that uses a MPEG-1 or MPEG-2 audio codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc2250
type MPEG2Audio struct{}

// String implements Format.
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/mpeg2_video.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import (
"github.com/pion/rtp"
)

// MPEG2Video is a format that uses a MPEG-1 or MPEG-2 video codec.
// MPEG2Video is a RTP format that uses a MPEG-1 or MPEG-2 video codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc2250
type MPEG2Video struct{}

// String implements Format.
Expand Down
3 changes: 2 additions & 1 deletion pkg/formats/mpeg4_audio.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (
"github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio"
)

// MPEG4Audio is a format that uses a MPEG-4 audio codec.
// MPEG4Audio is a RTP format that uses a MPEG-4 audio codec.
// Specification: https://datatracker.ietf.org/doc/html/rfc3640
type MPEG4Audio struct {
PayloadTyp uint8
Config *mpeg4audio.Config
Expand Down
81 changes: 81 additions & 0 deletions pkg/formats/mpeg4_video.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package formats

import (
"encoding/hex"
"fmt"
"strconv"
"strings"

"github.com/pion/rtp"
)

// MPEG4Video is a RTP format that uses the video codec defined in MPEG-4 part 2.
// Specification: https://datatracker.ietf.org/doc/html/rfc6416#section-7.1
type MPEG4Video struct {
PayloadTyp uint8
ProfileLevelID int
Config []byte
}

// String implements Format.
func (f *MPEG4Video) String() string {
return "MPEG4-video"
}

// ClockRate implements Format.
func (f *MPEG4Video) ClockRate() int {
return 90000
}

// PayloadType implements Format.
func (f *MPEG4Video) PayloadType() uint8 {
return f.PayloadTyp
}

func (f *MPEG4Video) unmarshal(
payloadType uint8, clock string, codec string,
rtpmap string, fmtp map[string]string,
) error {
f.PayloadTyp = payloadType

// If this parameter is not specified by
// the procedure, its default value of 1 (Simple Profile/Level 1) is
// used.
f.ProfileLevelID = 1

for key, val := range fmtp {
switch key {
case "profile-level-id":
tmp, err := strconv.ParseInt(val, 10, 64)
if err != nil {
return fmt.Errorf("invalid profile-level-id: %v", val)
}

f.ProfileLevelID = int(tmp)

case "config":
var err error
f.Config, err = hex.DecodeString(val)
if err != nil {
return fmt.Errorf("invalid config: %v", val)
}
}
}

return nil
}

// Marshal implements Format.
func (f *MPEG4Video) Marshal() (string, map[string]string) {
fmtp := map[string]string{
"profile-level-id": strconv.FormatInt(int64(f.ProfileLevelID), 10),
"config": strings.ToUpper(hex.EncodeToString(f.Config)),
}

return "MP4V-ES/90000", fmtp
}

// PTSEqualsDTS implements Format.
func (f *MPEG4Video) PTSEqualsDTS(*rtp.Packet) bool {
return true
}
Loading

0 comments on commit 0561810

Please sign in to comment.