Skip to content

Commit 0561810

Browse files
authored
add MPEG-4 video format (#236)
related to bluenviron/mediamtx#1550
1 parent c0e3ba2 commit 0561810

19 files changed

+210
-44
lines changed

README.md

+13-12
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,20 @@ https://pkg.go.dev/github.com/bluenviron/gortsplib/v3#pkg-index
9797

9898
## Standards
9999

100-
* RTSP 1.0 https://www.rfc-editor.org/rfc/rfc2326
101-
* RTSP 2.0 https://www.rfc-editor.org/rfc/rfc7826
102-
* RTP Profile for Audio and Video Conferences with Minimal Control https://www.rfc-editor.org/rfc/rfc3551
103-
* RTP Payload Format for MPEG1/MPEG2 Video https://www.rfc-editor.org/rfc/rfc2250
104-
* RTP Payload Format for JPEG-compressed Video https://www.rfc-editor.org/rfc/rfc2435
105-
* RTP Payload Format for H.264 Video https://www.rfc-editor.org/rfc/rfc6184
106-
* RTP Payload Format for High Efficiency Video Coding (HEVC) https://www.rfc-editor.org/rfc/rfc7798.html
107-
* RTP Payload Format for VP8 Video https://www.rfc-editor.org/rfc/rfc7741.html
100+
* RTSP 1.0 https://datatracker.ietf.org/doc/html/rfc2326
101+
* RTSP 2.0 https://datatracker.ietf.org/doc/html/rfc7826
102+
* RTP Profile for Audio and Video Conferences with Minimal Control https://datatracker.ietf.org/doc/html/rfc3551
103+
* RTP Payload Format for MPEG1/MPEG2 Video https://datatracker.ietf.org/doc/html/rfc2250
104+
* RTP Payload Format for JPEG-compressed Video https://datatracker.ietf.org/doc/html/rfc2435
105+
* RTP Payload Format for MPEG-4 Audio/Visual Streams https://datatracker.ietf.org/doc/html/rfc6416
106+
* RTP Payload Format for H.264 Video https://datatracker.ietf.org/doc/html/rfc6184
107+
* RTP Payload Format for High Efficiency Video Coding (HEVC) https://datatracker.ietf.org/doc/html/rfc7798
108+
* RTP Payload Format for VP8 Video https://datatracker.ietf.org/doc/html/rfc7741
108109
* RTP Payload Format for VP9 Video https://datatracker.ietf.org/doc/html/draft-ietf-payload-vp9-16
109-
* RTP Payload Format for 12-bit DAT Audio and 20- and 24-bit Linear Sampled Audio https://www.rfc-editor.org/rfc/rfc3190.html
110-
* RTP Payload Format for the Opus Speech and Audio Codec https://www.rfc-editor.org/rfc/rfc7587.html
111-
* RTP Payload Format for MPEG-4 Audio/Visual Streams https://www.rfc-editor.org/rfc/rfc6416
112-
* RTP Payload Format for Transport of MPEG-4 Elementary Streams https://www.rfc-editor.org/rfc/rfc3640.html
110+
* RTP Payload Format for 12-bit DAT Audio and 20- and 24-bit Linear Sampled Audio https://datatracker.ietf.org/doc/html/rfc3190
111+
* RTP Payload Format for Vorbis Encoded Audio https://datatracker.ietf.org/doc/html/rfc5215
112+
* RTP Payload Format for the Opus Speech and Audio Codec https://datatracker.ietf.org/doc/html/rfc7587
113+
* RTP Payload Format for Transport of MPEG-4 Elementary Streams https://datatracker.ietf.org/doc/html/rfc3640
113114
* ITU-T Rec. H.264 (08/2021) https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-202108-I!!PDF-E&type=items
114115
* ITU-T Rec. H.265 (08/2021) https://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.265-202108-I!!PDF-E&type=items
115116
* ISO 14496-3, Coding of audio-visual objects, part 3, Audio

pkg/formats/format.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ func decodeFMTP(enc string) map[string]string {
5757
return ret
5858
}
5959

60-
// Format is a format of a media.
61-
// It defines a codec and a payload type used to ship the media.
60+
// Format is a RTP format of a media.
61+
// It defines a codec and a payload type used to transmit the media.
6262
type Format interface {
6363
// String returns a description of the format.
6464
String() string
@@ -111,6 +111,9 @@ func Unmarshal(md *psdp.MediaDescription, payloadTypeStr string) (Format, error)
111111
case payloadType == 32:
112112
return &MPEG2Video{}
113113

114+
case codec == "mp4v-es" && clock == "90000":
115+
return &MPEG4Video{}
116+
114117
case codec == "h264" && clock == "90000":
115118
return &H264{}
116119

pkg/formats/format_test.go

+34-1
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,39 @@ func TestNewFromMediaDescription(t *testing.T) {
322322
},
323323
&MPEG2Video{},
324324
},
325+
{
326+
"video mpeg4 video",
327+
&psdp.MediaDescription{
328+
MediaName: psdp.MediaName{
329+
Media: "video",
330+
Protos: []string{"RTP", "AVP"},
331+
Formats: []string{"96"},
332+
},
333+
Attributes: []psdp.Attribute{
334+
{
335+
Key: "rtpmap",
336+
Value: "96 MP4V-ES/90000",
337+
},
338+
{
339+
Key: "fmtp",
340+
Value: "96 profile-level-id=1; " +
341+
"config=000001B001000001B58913000001000000012000C48D8AEE053C04641443000001B24C61766335382E3133342E313030",
342+
},
343+
},
344+
},
345+
&MPEG4Video{
346+
PayloadTyp: 96,
347+
ProfileLevelID: 1,
348+
Config: []byte{
349+
0x00, 0x00, 0x01, 0xb0, 0x01, 0x00, 0x00, 0x01,
350+
0xb5, 0x89, 0x13, 0x00, 0x00, 0x01, 0x00, 0x00,
351+
0x00, 0x01, 0x20, 0x00, 0xc4, 0x8d, 0x8a, 0xee,
352+
0x05, 0x3c, 0x04, 0x64, 0x14, 0x43, 0x00, 0x00,
353+
0x01, 0xb2, 0x4c, 0x61, 0x76, 0x63, 0x35, 0x38,
354+
0x2e, 0x31, 0x33, 0x34, 0x2e, 0x31, 0x30, 0x30,
355+
},
356+
},
357+
},
325358
{
326359
"video h264",
327360
&psdp.MediaDescription{
@@ -443,7 +476,7 @@ func TestNewFromMediaDescription(t *testing.T) {
443476
},
444477
},
445478
{
446-
"h264 empty sprop-parameter-sets",
479+
"video h264 empty sprop-parameter-sets",
447480
&psdp.MediaDescription{
448481
MediaName: psdp.MediaName{
449482
Media: "video",

pkg/formats/g711.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import (
66
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtpsimpleaudio"
77
)
88

9-
// G711 is a format that uses the G711 codec, encoded with mu-law or A-law.
9+
// G711 is a RTP format that uses the G711 codec, encoded with mu-law or A-law.
10+
// Specification: https://datatracker.ietf.org/doc/html/rfc3551
1011
type G711 struct {
1112
// whether to use mu-law. Otherwise, A-law is used.
1213
MULaw bool

pkg/formats/g722.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import (
66
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtpsimpleaudio"
77
)
88

9-
// G722 is a format that uses the G722 codec.
9+
// G722 is a RTP format that uses the G722 codec.
10+
// Specification: https://datatracker.ietf.org/doc/html/rfc3551
1011
type G722 struct{}
1112

1213
// String implements Format.

pkg/formats/generic.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func findClockRate(payloadType uint8, rtpMap string) (int, error) {
5151
return int(v), nil
5252
}
5353

54-
// Generic is a generic format.
54+
// Generic is a generic RTP format.
5555
type Generic struct {
5656
PayloadTyp uint8
5757
RTPMap string

pkg/formats/h264.go

+15-14
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ func rtpH264ContainsIDR(pkt *rtp.Packet) bool {
7070
}
7171
}
7272

73-
// H264 is a format that uses the H264 codef.
73+
// H264 is a RTP format that uses the H264 codec, defined in MPEG-4 part 10.
74+
// Specification: https://datatracker.ietf.org/doc/html/rfc6184
7475
type H264 struct {
7576
PayloadTyp uint8
7677
SPS []byte
@@ -101,14 +102,14 @@ func (f *H264) unmarshal(payloadType uint8, clock string, codec string, rtpmap s
101102
for key, val := range fmtp {
102103
switch key {
103104
case "sprop-parameter-sets":
104-
tmp2 := strings.Split(val, ",")
105-
if len(tmp2) >= 2 {
106-
sps, err := base64.StdEncoding.DecodeString(tmp2[0])
105+
tmp := strings.Split(val, ",")
106+
if len(tmp) >= 2 {
107+
sps, err := base64.StdEncoding.DecodeString(tmp[0])
107108
if err != nil {
108109
return fmt.Errorf("invalid sprop-parameter-sets (%v)", val)
109110
}
110111

111-
pps, err := base64.StdEncoding.DecodeString(tmp2[1])
112+
pps, err := base64.StdEncoding.DecodeString(tmp[1])
112113
if err != nil {
113114
return fmt.Errorf("invalid sprop-parameter-sets (%v)", val)
114115
}
@@ -118,12 +119,12 @@ func (f *H264) unmarshal(payloadType uint8, clock string, codec string, rtpmap s
118119
}
119120

120121
case "packetization-mode":
121-
tmp2, err := strconv.ParseInt(val, 10, 64)
122+
tmp, err := strconv.ParseInt(val, 10, 64)
122123
if err != nil {
123124
return fmt.Errorf("invalid packetization-mode (%v)", val)
124125
}
125126

126-
f.PacketizationMode = int(tmp2)
127+
f.PacketizationMode = int(tmp)
127128
}
128129
}
129130

@@ -141,15 +142,15 @@ func (f *H264) Marshal() (string, map[string]string) {
141142
fmtp["packetization-mode"] = strconv.FormatInt(int64(f.PacketizationMode), 10)
142143
}
143144

144-
var tmp2 []string
145+
var tmp []string
145146
if f.SPS != nil {
146-
tmp2 = append(tmp2, base64.StdEncoding.EncodeToString(f.SPS))
147+
tmp = append(tmp, base64.StdEncoding.EncodeToString(f.SPS))
147148
}
148149
if f.PPS != nil {
149-
tmp2 = append(tmp2, base64.StdEncoding.EncodeToString(f.PPS))
150+
tmp = append(tmp, base64.StdEncoding.EncodeToString(f.PPS))
150151
}
151-
if tmp2 != nil {
152-
fmtp["sprop-parameter-sets"] = strings.Join(tmp2, ",")
152+
if tmp != nil {
153+
fmtp["sprop-parameter-sets"] = strings.Join(tmp, ",")
153154
}
154155
if len(f.SPS) >= 4 {
155156
fmtp["profile-level-id"] = strings.ToUpper(hex.EncodeToString(f.SPS[1:4]))
@@ -163,7 +164,7 @@ func (f *H264) PTSEqualsDTS(pkt *rtp.Packet) bool {
163164
return rtpH264ContainsIDR(pkt)
164165
}
165166

166-
// CreateDecoder creates a decoder able to decode the content of the formaf.
167+
// CreateDecoder creates a decoder able to decode the content of the format.
167168
func (f *H264) CreateDecoder() *rtph264.Decoder {
168169
d := &rtph264.Decoder{
169170
PacketizationMode: f.PacketizationMode,
@@ -172,7 +173,7 @@ func (f *H264) CreateDecoder() *rtph264.Decoder {
172173
return d
173174
}
174175

175-
// CreateEncoder creates an encoder able to encode the content of the formaf.
176+
// CreateEncoder creates an encoder able to encode the content of the format.
176177
func (f *H264) CreateEncoder() *rtph264.Encoder {
177178
e := &rtph264.Encoder{
178179
PayloadType: f.PayloadTyp,

pkg/formats/h265.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ import (
1111
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtph265"
1212
)
1313

14-
// H265 is a format that uses the H265 codef.
14+
// H265 is a RTP format that uses the H265 codec.
15+
// Specification: https://datatracker.ietf.org/doc/html/rfc7798
1516
type H265 struct {
1617
PayloadTyp uint8
1718
VPS []byte
@@ -102,7 +103,7 @@ func (f *H265) PTSEqualsDTS(*rtp.Packet) bool {
102103
return true
103104
}
104105

105-
// CreateDecoder creates a decoder able to decode the content of the formaf.
106+
// CreateDecoder creates a decoder able to decode the content of the format.
106107
func (f *H265) CreateDecoder() *rtph265.Decoder {
107108
d := &rtph265.Decoder{
108109
MaxDONDiff: f.MaxDONDiff,
@@ -111,7 +112,7 @@ func (f *H265) CreateDecoder() *rtph265.Decoder {
111112
return d
112113
}
113114

114-
// CreateEncoder creates an encoder able to encode the content of the formaf.
115+
// CreateEncoder creates an encoder able to encode the content of the format.
115116
func (f *H265) CreateEncoder() *rtph265.Encoder {
116117
e := &rtph265.Encoder{
117118
PayloadType: f.PayloadTyp,

pkg/formats/lpcm.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ import (
99
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtplpcm"
1010
)
1111

12-
// LPCM is a format that uses the uncompressed, Linear PCM codec.
12+
// LPCM is a RTP format that uses the uncompressed, Linear PCM codec.
13+
// Specification: https://datatracker.ietf.org/doc/html/rfc3190
1314
type LPCM struct {
1415
PayloadTyp uint8
1516
BitDepth int

pkg/formats/mjpeg.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import (
66
"github.com/bluenviron/gortsplib/v3/pkg/formats/rtpmjpeg"
77
)
88

9-
// MJPEG is a format that uses the Motion-JPEG codec.
9+
// MJPEG is a RTP format that uses the Motion-JPEG codec.
10+
// Specification: https://datatracker.ietf.org/doc/html/rfc2435
1011
type MJPEG struct{}
1112

1213
// String implements Format.

pkg/formats/mpeg2_audio.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ import (
44
"github.com/pion/rtp"
55
)
66

7-
// MPEG2Audio is a format that uses a MPEG-1 or MPEG-2 audio codec.
7+
// MPEG2Audio is a RTP format that uses a MPEG-1 or MPEG-2 audio codec.
8+
// Specification: https://datatracker.ietf.org/doc/html/rfc2250
89
type MPEG2Audio struct{}
910

1011
// String implements Format.

pkg/formats/mpeg2_video.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ import (
44
"github.com/pion/rtp"
55
)
66

7-
// MPEG2Video is a format that uses a MPEG-1 or MPEG-2 video codec.
7+
// MPEG2Video is a RTP format that uses a MPEG-1 or MPEG-2 video codec.
8+
// Specification: https://datatracker.ietf.org/doc/html/rfc2250
89
type MPEG2Video struct{}
910

1011
// String implements Format.

pkg/formats/mpeg4_audio.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ import (
1111
"github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio"
1212
)
1313

14-
// MPEG4Audio is a format that uses a MPEG-4 audio codec.
14+
// MPEG4Audio is a RTP format that uses a MPEG-4 audio codec.
15+
// Specification: https://datatracker.ietf.org/doc/html/rfc3640
1516
type MPEG4Audio struct {
1617
PayloadTyp uint8
1718
Config *mpeg4audio.Config

pkg/formats/mpeg4_video.go

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package formats
2+
3+
import (
4+
"encoding/hex"
5+
"fmt"
6+
"strconv"
7+
"strings"
8+
9+
"github.com/pion/rtp"
10+
)
11+
12+
// MPEG4Video is a RTP format that uses the video codec defined in MPEG-4 part 2.
13+
// Specification: https://datatracker.ietf.org/doc/html/rfc6416#section-7.1
14+
type MPEG4Video struct {
15+
PayloadTyp uint8
16+
ProfileLevelID int
17+
Config []byte
18+
}
19+
20+
// String implements Format.
21+
func (f *MPEG4Video) String() string {
22+
return "MPEG4-video"
23+
}
24+
25+
// ClockRate implements Format.
26+
func (f *MPEG4Video) ClockRate() int {
27+
return 90000
28+
}
29+
30+
// PayloadType implements Format.
31+
func (f *MPEG4Video) PayloadType() uint8 {
32+
return f.PayloadTyp
33+
}
34+
35+
func (f *MPEG4Video) unmarshal(
36+
payloadType uint8, clock string, codec string,
37+
rtpmap string, fmtp map[string]string,
38+
) error {
39+
f.PayloadTyp = payloadType
40+
41+
// If this parameter is not specified by
42+
// the procedure, its default value of 1 (Simple Profile/Level 1) is
43+
// used.
44+
f.ProfileLevelID = 1
45+
46+
for key, val := range fmtp {
47+
switch key {
48+
case "profile-level-id":
49+
tmp, err := strconv.ParseInt(val, 10, 64)
50+
if err != nil {
51+
return fmt.Errorf("invalid profile-level-id: %v", val)
52+
}
53+
54+
f.ProfileLevelID = int(tmp)
55+
56+
case "config":
57+
var err error
58+
f.Config, err = hex.DecodeString(val)
59+
if err != nil {
60+
return fmt.Errorf("invalid config: %v", val)
61+
}
62+
}
63+
}
64+
65+
return nil
66+
}
67+
68+
// Marshal implements Format.
69+
func (f *MPEG4Video) Marshal() (string, map[string]string) {
70+
fmtp := map[string]string{
71+
"profile-level-id": strconv.FormatInt(int64(f.ProfileLevelID), 10),
72+
"config": strings.ToUpper(hex.EncodeToString(f.Config)),
73+
}
74+
75+
return "MP4V-ES/90000", fmtp
76+
}
77+
78+
// PTSEqualsDTS implements Format.
79+
func (f *MPEG4Video) PTSEqualsDTS(*rtp.Packet) bool {
80+
return true
81+
}

0 commit comments

Comments
 (0)