diff --git a/av/av.go b/av/av.go index df99726..5c03823 100644 --- a/av/av.go +++ b/av/av.go @@ -116,6 +116,9 @@ type CodecType uint32 var ( H264 = MakeVideoCodecType(avCodecTypeMagic + 1) + HEVC = MakeVideoCodecType(avCodecTypeMagic + 2) + VP9 = MakeVideoCodecType(avCodecTypeMagic + 3) + AV1 = MakeVideoCodecType(avCodecTypeMagic + 4) AAC = MakeAudioCodecType(avCodecTypeMagic + 1) PCM_MULAW = MakeAudioCodecType(avCodecTypeMagic + 2) PCM_ALAW = MakeAudioCodecType(avCodecTypeMagic + 3) @@ -126,10 +129,16 @@ var ( const codecTypeAudioBit = 0x1 const codecTypeOtherBits = 1 -func (self CodecType) String() string { - switch self { +func (c CodecType) String() string { + switch c { case H264: return "H264" + case HEVC: + return "HEVC" + case VP9: + return "VP9" + case AV1: + return "AV1" case AAC: return "AAC" case PCM_MULAW: @@ -144,12 +153,12 @@ func (self CodecType) String() string { return "" } -func (self CodecType) IsAudio() bool { - return self&codecTypeAudioBit != 0 +func (c CodecType) IsAudio() bool { + return c&codecTypeAudioBit != 0 } -func (self CodecType) IsVideo() bool { - return self&codecTypeAudioBit == 0 +func (c CodecType) IsVideo() bool { + return c&codecTypeAudioBit == 0 } // Make a new audio codec type. @@ -169,7 +178,7 @@ const avCodecTypeMagic = 233333 // CodecData is some important bytes for initializing audio/video decoder, // can be converted to VideoCodecData or AudioCodecData using: // -// codecdata.(AudioCodecData) or codecdata.(VideoCodecData) +// codecdata.(AudioCodecData) or codecdata.(VideoCodecData) // // for H264, CodecData is AVCDecoderConfigure bytes, includes SPS/PPS. type CodecData interface { diff --git a/av/pktque/filters.go b/av/pktque/filters.go index ff54d3f..4d10e42 100644 --- a/av/pktque/filters.go +++ b/av/pktque/filters.go @@ -2,8 +2,9 @@ package pktque import ( - "github.com/datarhei/joy4/av" "time" + + "github.com/datarhei/joy4/av" ) type Filter interface { diff --git a/codec/aacparser/parser.go b/codec/aacparser/parser.go index a220993..a94c086 100644 --- a/codec/aacparser/parser.go +++ b/codec/aacparser/parser.go @@ -3,10 +3,11 @@ package aacparser import ( "bytes" "fmt" - "github.com/datarhei/joy4/av" - "github.com/datarhei/joy4/utils/bits" "io" "time" + + "github.com/datarhei/joy4/av" + "github.com/datarhei/joy4/utils/bits" ) // copied from libavcodec/mpeg4audio.h diff --git a/codec/h264parser/parser.go b/codec/h264parser/parser.go index 61a3339..e38364f 100644 --- a/codec/h264parser/parser.go +++ b/codec/h264parser/parser.go @@ -3,6 +3,7 @@ package h264parser import ( "bytes" "fmt" + "github.com/datarhei/joy4/av" "github.com/datarhei/joy4/utils/bits" "github.com/datarhei/joy4/utils/bits/pio" diff --git a/codec/h264parser/parser_test.go b/codec/h264parser/parser_test.go index 1565674..99c8903 100644 --- a/codec/h264parser/parser_test.go +++ b/codec/h264parser/parser_test.go @@ -6,16 +6,16 @@ import ( ) func TestParser(t *testing.T) { - var ok bool + var typ int var nalus [][]byte annexbFrame, _ := hex.DecodeString("00000001223322330000000122332233223300000133000001000001") - nalus, ok = SplitNALUs(annexbFrame) - t.Log(ok, len(nalus)) + nalus, typ = SplitNALUs(annexbFrame) + t.Log(typ, len(nalus)) avccFrame, _ := hex.DecodeString( "00000008aabbccaabbccaabb00000001aa", ) - nalus, ok = SplitNALUs(avccFrame) - t.Log(ok, len(nalus)) + nalus, typ = SplitNALUs(avccFrame) + t.Log(typ, len(nalus)) } diff --git a/codec/hevcparser/parser.go b/codec/hevcparser/parser.go new file mode 100644 index 0000000..8ec0238 --- /dev/null +++ b/codec/hevcparser/parser.go @@ -0,0 +1,695 @@ +package hevcparser + +// based on https://github.com/deepch/vdk/blob/v0.0.21/codec/h265parser/parser.go + +import ( + "bytes" + "errors" + "fmt" + + "github.com/datarhei/joy4/av" + "github.com/datarhei/joy4/utils/bits" + "github.com/datarhei/joy4/utils/bits/pio" +) + +type SPSInfo struct { + ProfileIdc uint + LevelIdc uint + CropLeft uint + CropRight uint + CropTop uint + CropBottom uint + Width uint + Height uint + PicWidthInLumaSamples uint + PicHeightInLumaSamples uint + generalProfileSpace uint + generalTierFlag uint + generalProfileIDC uint + generalProfileCompatibilityFlags uint32 + generalConstraintIndicatorFlags uint64 + generalLevelIDC uint +} + +const ( + NAL_UNIT_CODED_SLICE_TRAIL_N = 0 + NAL_UNIT_CODED_SLICE_TRAIL_R = 1 + NAL_UNIT_CODED_SLICE_TSA_N = 2 + NAL_UNIT_CODED_SLICE_TSA_R = 3 + NAL_UNIT_CODED_SLICE_STSA_N = 4 + NAL_UNIT_CODED_SLICE_STSA_R = 5 + NAL_UNIT_CODED_SLICE_RADL_N = 6 + NAL_UNIT_CODED_SLICE_RADL_R = 7 + NAL_UNIT_CODED_SLICE_RASL_N = 8 + NAL_UNIT_CODED_SLICE_RASL_R = 9 + NAL_UNIT_RESERVED_VCL_N10 = 10 + NAL_UNIT_RESERVED_VCL_R11 = 11 + NAL_UNIT_RESERVED_VCL_N12 = 12 + NAL_UNIT_RESERVED_VCL_R13 = 13 + NAL_UNIT_RESERVED_VCL_N14 = 14 + NAL_UNIT_RESERVED_VCL_R15 = 15 + NAL_UNIT_CODED_SLICE_BLA_W_LP = 16 + NAL_UNIT_CODED_SLICE_BLA_W_RADL = 17 + NAL_UNIT_CODED_SLICE_BLA_N_LP = 18 + NAL_UNIT_CODED_SLICE_IDR_W_RADL = 19 + NAL_UNIT_CODED_SLICE_IDR_N_LP = 20 + NAL_UNIT_CODED_SLICE_CRA = 21 + NAL_UNIT_RESERVED_IRAP_VCL22 = 22 + NAL_UNIT_RESERVED_IRAP_VCL23 = 23 + NAL_UNIT_RESERVED_VCL24 = 24 + NAL_UNIT_RESERVED_VCL25 = 25 + NAL_UNIT_RESERVED_VCL26 = 26 + NAL_UNIT_RESERVED_VCL27 = 27 + NAL_UNIT_RESERVED_VCL28 = 28 + NAL_UNIT_RESERVED_VCL29 = 29 + NAL_UNIT_RESERVED_VCL30 = 30 + NAL_UNIT_RESERVED_VCL31 = 31 + NAL_UNIT_VPS = 32 + NAL_UNIT_SPS = 33 + NAL_UNIT_PPS = 34 + NAL_UNIT_ACCESS_UNIT_DELIMITER = 35 + NAL_UNIT_EOS = 36 + NAL_UNIT_EOB = 37 + NAL_UNIT_FILLER_DATA = 38 + NAL_UNIT_PREFIX_SEI = 39 + NAL_UNIT_SUFFIX_SEI = 40 + NAL_UNIT_RESERVED_NVCL41 = 41 + NAL_UNIT_RESERVED_NVCL42 = 42 + NAL_UNIT_RESERVED_NVCL43 = 43 + NAL_UNIT_RESERVED_NVCL44 = 44 + NAL_UNIT_RESERVED_NVCL45 = 45 + NAL_UNIT_RESERVED_NVCL46 = 46 + NAL_UNIT_RESERVED_NVCL47 = 47 + NAL_UNIT_UNSPECIFIED_48 = 48 + NAL_UNIT_UNSPECIFIED_49 = 49 + NAL_UNIT_UNSPECIFIED_50 = 50 + NAL_UNIT_UNSPECIFIED_51 = 51 + NAL_UNIT_UNSPECIFIED_52 = 52 + NAL_UNIT_UNSPECIFIED_53 = 53 + NAL_UNIT_UNSPECIFIED_54 = 54 + NAL_UNIT_UNSPECIFIED_55 = 55 + NAL_UNIT_UNSPECIFIED_56 = 56 + NAL_UNIT_UNSPECIFIED_57 = 57 + NAL_UNIT_UNSPECIFIED_58 = 58 + NAL_UNIT_UNSPECIFIED_59 = 59 + NAL_UNIT_UNSPECIFIED_60 = 60 + NAL_UNIT_UNSPECIFIED_61 = 61 + NAL_UNIT_UNSPECIFIED_62 = 62 + NAL_UNIT_UNSPECIFIED_63 = 63 + NAL_UNIT_INVALID = 64 +) + +const ( + MAX_VPS_COUNT = 16 + MAX_SUB_LAYERS = 7 + MAX_SPS_COUNT = 32 +) + +var ( + ErrorHEVCIncorectUnitSize = errors.New("incorrect unit size") + ErrorHECVIncorectUnitType = errors.New("incorrect unit type") +) + +var StartCodeBytes = []byte{0, 0, 1} +var AUDBytes = []byte{0, 0, 0, 1, 0x9, 0xf0, 0, 0, 0, 1} // AUD + +const ( + NALU_RAW = iota + NALU_AVCC + NALU_ANNEXB +) + +func SplitNALUs(b []byte) (nalus [][]byte, typ int) { + if len(b) < 4 { + return [][]byte{b}, NALU_RAW + } + val3 := pio.U24BE(b) + val4 := pio.U32BE(b) + if val4 <= uint32(len(b)) { + _val4 := val4 + _b := b[4:] + nalus := [][]byte{} + for { + nalus = append(nalus, _b[:_val4]) + _b = _b[_val4:] + if len(_b) < 4 { + break + } + _val4 = pio.U32BE(_b) + _b = _b[4:] + if _val4 > uint32(len(_b)) { + break + } + } + if len(_b) == 0 { + return nalus, NALU_AVCC + } + } + if val3 == 1 || val4 == 1 { + _val3 := val3 + _val4 := val4 + start := 0 + pos := 0 + for { + if start != pos { + nalus = append(nalus, b[start:pos]) + } + if _val3 == 1 { + pos += 3 + } else if _val4 == 1 { + pos += 4 + } + start = pos + if start == len(b) { + break + } + _val3 = 0 + _val4 = 0 + for pos < len(b) { + if pos+2 < len(b) && b[pos] == 0 { + _val3 = pio.U24BE(b[pos:]) + if _val3 == 0 { + if pos+3 < len(b) { + _val4 = uint32(b[pos+3]) + if _val4 == 1 { + break + } + } + } else if _val3 == 1 { + break + } + pos++ + } else { + pos++ + } + } + } + typ = NALU_ANNEXB + return + } + + return [][]byte{b}, NALU_RAW +} + +func ParseSPS(sps []byte) (ctx SPSInfo, err error) { + if len(sps) < 2 { + err = ErrorHEVCIncorectUnitSize + return + } + rbsp := nal2rbsp(sps[2:]) + br := &bits.GolombBitReader{R: bytes.NewReader(rbsp)} + + // sps_video_parameter_set_id + if _, err = br.ReadBits(4); err != nil { + return + } + + // sps_max_sub_layers_minus1 + spsMaxSubLayersMinus1, err := br.ReadBits(3) + if err != nil { + return + } + + // sps_temporal_id_nesting_flag + if _, err = br.ReadBit(); err != nil { + return + } + + // profile_tier_level( 1, sps_max_sub_layers_minus1 ) + if err = parsePTL(br, &ctx, spsMaxSubLayersMinus1); err != nil { + return + } + + // sps_seq_parameter_set_id + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + + // chroma_format_idc + var chroma_format_idc uint + if chroma_format_idc, err = br.ReadExponentialGolombCode(); err != nil { + return + } + if chroma_format_idc == 3 { + // separate_colour_plane_flag + if _, err = br.ReadBit(); err != nil { + return + } + } + + // Table 6-1, Section 6.2 + var subWidthC uint + var subHeightC uint + + switch chroma_format_idc { + case 0: + subWidthC, subHeightC = 1, 1 + case 1: + subWidthC, subHeightC = 2, 2 + case 2: + subWidthC, subHeightC = 2, 1 + case 3: + subWidthC, subHeightC = 1, 1 + } + + // pic_width_in_luma_samples + if ctx.PicWidthInLumaSamples, err = br.ReadExponentialGolombCode(); err != nil { + return + } + + // pic_height_in_luma_samples + if ctx.PicHeightInLumaSamples, err = br.ReadExponentialGolombCode(); err != nil { + return + } + + // conformance_window_flag + conformanceWindowFlag, err := br.ReadBit() + if err != nil { + return + } + + var conf_win_left_offset uint + var conf_win_right_offset uint + var conf_win_top_offset uint + var conf_win_bottom_offset uint + + if conformanceWindowFlag != 0 { + // conf_win_left_offset + conf_win_left_offset, err = br.ReadExponentialGolombCode() + if err != nil { + return + } + ctx.CropLeft = subWidthC * conf_win_left_offset + + // conf_win_right_offset + conf_win_right_offset, err = br.ReadExponentialGolombCode() + if err != nil { + return + } + ctx.CropRight = subWidthC * conf_win_right_offset + + // conf_win_top_offset + conf_win_top_offset, err = br.ReadExponentialGolombCode() + if err != nil { + return + } + ctx.CropTop = subHeightC * conf_win_top_offset + + // conf_win_bottom_offset + conf_win_bottom_offset, err = br.ReadExponentialGolombCode() + if err != nil { + return + } + ctx.CropBottom = subHeightC * conf_win_bottom_offset + } + + ctx.Width = ctx.PicWidthInLumaSamples - ctx.CropLeft - ctx.CropRight + ctx.Height = ctx.PicHeightInLumaSamples - ctx.CropTop - ctx.CropBottom + + // bit_depth_luma_minus8 + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + + // bit_depth_chroma_minus8 + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + + // log2_max_pic_order_cnt_lsb_minus4 + _, err = br.ReadExponentialGolombCode() + if err != nil { + return + } + + // sps_sub_layer_ordering_info_present_flag + spsSubLayerOrderingInfoPresentFlag, err := br.ReadBit() + if err != nil { + return + } + var i uint + if spsSubLayerOrderingInfoPresentFlag != 0 { + i = 0 + } else { + i = spsMaxSubLayersMinus1 + } + for ; i <= spsMaxSubLayersMinus1; i++ { + // sps_max_dec_pic_buffering_minus1[ i ] + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // sps_max_num_reorder_pics[ i ] + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // sps_max_latency_increase_plus1[ i ] + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + } + + // log2_min_luma_coding_block_size_minus3 + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // log2_diff_max_min_luma_coding_block_size + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // log2_min_luma_transform_block_size_minus2 + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // log2_diff_max_min_luma_transform_block_size + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // max_transform_hierarchy_depth_inter + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + // max_transform_hierarchy_depth_intra + if _, err = br.ReadExponentialGolombCode(); err != nil { + return + } + return +} + +func parsePTL(br *bits.GolombBitReader, ctx *SPSInfo, maxSubLayersMinus1 uint) error { + var err error + var ptl SPSInfo + if ptl.generalProfileSpace, err = br.ReadBits(2); err != nil { + return err + } + if ptl.generalTierFlag, err = br.ReadBit(); err != nil { + return err + } + if ptl.generalProfileIDC, err = br.ReadBits(5); err != nil { + return err + } + if ptl.generalProfileCompatibilityFlags, err = br.ReadBits32(32); err != nil { + return err + } + if ptl.generalConstraintIndicatorFlags, err = br.ReadBits64(48); err != nil { + return err + } + if ptl.generalLevelIDC, err = br.ReadBits(8); err != nil { + return err + } + updatePTL(ctx, &ptl) + if maxSubLayersMinus1 == 0 { + return nil + } + subLayerProfilePresentFlag := make([]uint, maxSubLayersMinus1) + subLayerLevelPresentFlag := make([]uint, maxSubLayersMinus1) + for i := uint(0); i < maxSubLayersMinus1; i++ { + if subLayerProfilePresentFlag[i], err = br.ReadBit(); err != nil { + return err + } + if subLayerLevelPresentFlag[i], err = br.ReadBit(); err != nil { + return err + } + } + if maxSubLayersMinus1 > 0 { + for i := maxSubLayersMinus1; i < 8; i++ { + if _, err = br.ReadBits(2); err != nil { + return err + } + } + } + for i := uint(0); i < maxSubLayersMinus1; i++ { + if subLayerProfilePresentFlag[i] != 0 { + if _, err = br.ReadBits32(32); err != nil { + return err + } + if _, err = br.ReadBits32(32); err != nil { + return err + } + if _, err = br.ReadBits32(24); err != nil { + return err + } + } + + if subLayerLevelPresentFlag[i] != 0 { + if _, err = br.ReadBits(8); err != nil { + return err + } + } + } + return nil +} + +func updatePTL(ctx, ptl *SPSInfo) { + ctx.generalProfileSpace = ptl.generalProfileSpace + + if ptl.generalTierFlag > ctx.generalTierFlag { + ctx.generalLevelIDC = ptl.generalLevelIDC + + ctx.generalTierFlag = ptl.generalTierFlag + } else { + if ptl.generalLevelIDC > ctx.generalLevelIDC { + ctx.generalLevelIDC = ptl.generalLevelIDC + } + } + + if ptl.generalProfileIDC > ctx.generalProfileIDC { + ctx.generalProfileIDC = ptl.generalProfileIDC + } + + ctx.generalProfileCompatibilityFlags &= ptl.generalProfileCompatibilityFlags + + ctx.generalConstraintIndicatorFlags &= ptl.generalConstraintIndicatorFlags +} + +func nal2rbsp(nal []byte) []byte { + return bytes.Replace(nal, []byte{0x0, 0x0, 0x3}, []byte{0x0, 0x0}, -1) +} + +type CodecData struct { + Record []byte + RecordInfo HEVCDecoderConfRecord + SPSInfo SPSInfo +} + +func (codec CodecData) Type() av.CodecType { + return av.HEVC +} + +func (codec CodecData) HEVCDecoderConfRecordBytes() []byte { + return codec.Record +} + +func (codec CodecData) SPS() []byte { + return codec.RecordInfo.SPS[0] +} + +func (codec CodecData) PPS() []byte { + return codec.RecordInfo.PPS[0] +} + +func (codec CodecData) VPS() []byte { + return codec.RecordInfo.VPS[0] +} + +func (codec CodecData) Width() int { + return int(codec.SPSInfo.Width) +} + +func (codec CodecData) Height() int { + return int(codec.SPSInfo.Height) +} + +func NewCodecDataFromHEVCDecoderConfRecord(record []byte) (self CodecData, err error) { + self.Record = record + if _, err = (&self.RecordInfo).Unmarshal(record); err != nil { + return + } + if len(self.RecordInfo.SPS) == 0 { + err = fmt.Errorf("hevcparser: no SPS found in HEVCDecoderConfRecord") + return + } + if len(self.RecordInfo.PPS) == 0 { + err = fmt.Errorf("hevcparser: no PPS found in HEVCDecoderConfRecord") + return + } + if len(self.RecordInfo.VPS) == 0 { + err = fmt.Errorf("hevcparser: no VPS found in HEVCDecoderConfRecord") + return + } + if self.SPSInfo, err = ParseSPS(self.RecordInfo.SPS[0]); err != nil { + err = fmt.Errorf("hevcparser: parse SPS failed(%s)", err) + return + } + return +} + +func NewCodecDataFromVPSAndSPSAndPPS(vps, sps, pps []byte) (self CodecData, err error) { + recordinfo := HEVCDecoderConfRecord{} + recordinfo.HEVCProfileIndication = sps[3] + recordinfo.ProfileCompatibility = sps[4] + recordinfo.HEVCLevelIndication = sps[5] + recordinfo.SPS = [][]byte{sps} + recordinfo.PPS = [][]byte{pps} + recordinfo.VPS = [][]byte{vps} + recordinfo.LengthSizeMinusOne = 3 + if self.SPSInfo, err = ParseSPS(sps); err != nil { + return + } + buf := make([]byte, recordinfo.Len()) + recordinfo.Marshal(buf, self.SPSInfo) + self.RecordInfo = recordinfo + self.Record = buf + return +} + +type HEVCDecoderConfRecord struct { + HEVCProfileIndication uint8 + ProfileCompatibility uint8 + HEVCLevelIndication uint8 + LengthSizeMinusOne uint8 + VPS [][]byte + SPS [][]byte + PPS [][]byte +} + +var ErrDecconfInvalid = fmt.Errorf("hevcparser: HEVCDecoderConfRecord invalid") + +func (record *HEVCDecoderConfRecord) Unmarshal(b []byte) (n int, err error) { + if len(b) < 30 { + err = ErrDecconfInvalid + return + } + record.HEVCProfileIndication = b[1] + record.ProfileCompatibility = b[2] + record.HEVCLevelIndication = b[3] + record.LengthSizeMinusOne = b[4] & 0x03 + + vpscount := int(b[25] & 0x1f) + n += 26 + for i := 0; i < vpscount; i++ { + if len(b) < n+2 { + err = ErrDecconfInvalid + return + } + vpslen := int(pio.U16BE(b[n:])) + n += 2 + + if len(b) < n+vpslen { + err = ErrDecconfInvalid + return + } + record.VPS = append(record.VPS, b[n:n+vpslen]) + n += vpslen + } + + if len(b) < n+1 { + err = ErrDecconfInvalid + return + } + + n++ + n++ + + spscount := int(b[n]) + n++ + + for i := 0; i < spscount; i++ { + if len(b) < n+2 { + err = ErrDecconfInvalid + return + } + spslen := int(pio.U16BE(b[n:])) + n += 2 + + if len(b) < n+spslen { + err = ErrDecconfInvalid + return + } + record.SPS = append(record.SPS, b[n:n+spslen]) + n += spslen + } + + n++ + n++ + + ppscount := int(b[n]) + n++ + + for i := 0; i < ppscount; i++ { + if len(b) < n+2 { + err = ErrDecconfInvalid + return + } + ppslen := int(pio.U16BE(b[n:])) + n += 2 + + if len(b) < n+ppslen { + err = ErrDecconfInvalid + return + } + record.PPS = append(record.PPS, b[n:n+ppslen]) + n += ppslen + } + return +} + +func (record HEVCDecoderConfRecord) Len() (n int) { + n = 23 + for _, sps := range record.SPS { + n += 5 + len(sps) + } + for _, pps := range record.PPS { + n += 5 + len(pps) + } + for _, vps := range record.VPS { + n += 5 + len(vps) + } + return +} + +func (record HEVCDecoderConfRecord) Marshal(b []byte, si SPSInfo) (n int) { + b[0] = 1 + b[1] = record.HEVCProfileIndication + b[2] = record.ProfileCompatibility + b[3] = record.HEVCLevelIndication + b[21] = 3 + b[22] = 3 + n += 23 + b[n] = (record.VPS[0][0] >> 1) & 0x3f + n++ + b[n] = byte(len(record.VPS) >> 8) + n++ + b[n] = byte(len(record.VPS)) + n++ + for _, vps := range record.VPS { + pio.PutU16BE(b[n:], uint16(len(vps))) + n += 2 + copy(b[n:], vps) + n += len(vps) + } + b[n] = (record.SPS[0][0] >> 1) & 0x3f + n++ + b[n] = byte(len(record.SPS) >> 8) + n++ + b[n] = byte(len(record.SPS)) + n++ + for _, sps := range record.SPS { + pio.PutU16BE(b[n:], uint16(len(sps))) + n += 2 + copy(b[n:], sps) + n += len(sps) + } + b[n] = (record.PPS[0][0] >> 1) & 0x3f + n++ + b[n] = byte(len(record.PPS) >> 8) + n++ + b[n] = byte(len(record.PPS)) + n++ + for _, pps := range record.PPS { + pio.PutU16BE(b[n:], uint16(len(pps))) + n += 2 + copy(b[n:], pps) + n += len(pps) + } + return +} diff --git a/examples/rtmp_client/main.go b/examples/rtmp_client/main.go new file mode 100644 index 0000000..550262f --- /dev/null +++ b/examples/rtmp_client/main.go @@ -0,0 +1,58 @@ +package main + +import ( + "fmt" + "log" + "os" + + "github.com/datarhei/joy4/av" + "github.com/datarhei/joy4/av/avutil" + "github.com/datarhei/joy4/codec/h264parser" + "github.com/datarhei/joy4/format" +) + +func init() { + format.RegisterAll() +} + +func main() { + if len(os.Args) < 2 { + log.Fatalf("%s [url]", os.Args[0]) + } + + src, err := avutil.Open(os.Args[1]) + if err != nil { + log.Fatalf("error connecting: %s", err.Error()) + } + + defer src.Close() + + var streams []av.CodecData + + if streams, err = src.Streams(); err != nil { + log.Fatalf("error streams: %s", err.Error()) + } + + idx := int8(-1) + for i, s := range streams { + if s.Type().IsVideo() { + fmt.Printf("video: %s\n", s.Type().String()) + v := s.(h264parser.CodecData) + os.Stdout.Write(v.AVCDecoderConfRecordBytes()) + idx = int8(i) + } + } + + for { + p, err := src.ReadPacket() + if err != nil { + log.Fatalf("error reading: %s", err.Error()) + } + + if p.Idx != idx { + continue + } + + os.Stdout.Write(p.Data) + } +} diff --git a/examples/rtmp_server/main.go b/examples/rtmp_server/main.go index b301dea..9b98c66 100644 --- a/examples/rtmp_server/main.go +++ b/examples/rtmp_server/main.go @@ -116,7 +116,7 @@ func (s *server) handlePlay(conn *rtmp.Conn) { s.lock.RUnlock() if ch != nil { - conn.SetMetaData(ch.metadata) + //conn.SetMetaData(ch.metadata) s.log("PLAY", "START", conn.URL.Path, "", client) cursor := ch.que.Oldest() @@ -139,8 +139,6 @@ func (s *server) handlePlay(conn *rtmp.Conn) { } else { s.log("PLAY", "NOTFOUND", conn.URL.Path, "", client) } - - return } func (s *server) handlePublish(conn *rtmp.Conn) { @@ -169,14 +167,14 @@ func (s *server) handlePublish(conn *rtmp.Conn) { return } - metadata := conn.GetMetaData() + //metadata := conn.GetMetaData() s.lock.Lock() ch := s.channels[conn.URL.Path] if ch == nil { ch = &channel{} - ch.metadata = metadata + //ch.metadata = metadata ch.que = pubsub.NewQueue() ch.que.WriteHeader(streams) for _, stream := range streams { @@ -221,8 +219,6 @@ func (s *server) handlePublish(conn *rtmp.Conn) { ch.que.Close() s.log("PUBLISH", "STOP", conn.URL.Path, "", client) - - return } func main() { diff --git a/format/flv/flv.go b/format/flv/flv.go index 742811d..5bac86d 100644 --- a/format/flv/flv.go +++ b/format/flv/flv.go @@ -2,16 +2,19 @@ package flv import ( "bufio" + "encoding/hex" "fmt" + "io" + "github.com/datarhei/joy4/av" "github.com/datarhei/joy4/av/avutil" "github.com/datarhei/joy4/codec" "github.com/datarhei/joy4/codec/aacparser" "github.com/datarhei/joy4/codec/fake" "github.com/datarhei/joy4/codec/h264parser" + "github.com/datarhei/joy4/codec/hevcparser" "github.com/datarhei/joy4/format/flv/flvio" "github.com/datarhei/joy4/utils/bits/pio" - "io" ) var MaxProbePacketCount = 20 @@ -27,6 +30,8 @@ func NewMetadataByStreams(streams []av.CodecData) (metadata flvio.AMFMap, err er switch typ { case av.H264: metadata["videocodecid"] = flvio.VIDEO_H264 + case av.HEVC: + metadata["videocodecid"] = flvio.FourCCToFloat(flvio.FOURCC_HEVC) default: err = fmt.Errorf("flv: metadata: unsupported video codecType=%v", stream.Type()) @@ -83,21 +88,42 @@ func (self *Prober) PushTag(tag flvio.Tag, timestamp int32) (err error) { switch tag.Type { case flvio.TAG_VIDEO: - switch tag.AVCPacketType { - case flvio.AVC_SEQHDR: - if !self.GotVideo { - var stream h264parser.CodecData - if stream, err = h264parser.NewCodecDataFromAVCDecoderConfRecord(tag.Data); err != nil { - err = fmt.Errorf("flv: h264 seqhdr invalid: %s", err.Error()) - return + if tag.IsExHeader { + if tag.FourCC == flvio.FOURCC_HEVC { + if tag.PacketType == flvio.PKTTYPE_SEQUENCE_START { + if !self.GotVideo { + var stream hevcparser.CodecData + fmt.Printf("got HEVC sequence start:\n%s\n", hex.Dump(tag.Data)) + if stream, err = hevcparser.NewCodecDataFromHEVCDecoderConfRecord(tag.Data); err != nil { + err = fmt.Errorf("flv: hevc seqhdr invalid: %s", err.Error()) + return + } + self.VideoStreamIdx = len(self.Streams) + self.Streams = append(self.Streams, stream) + self.GotVideo = true + } + } else if tag.PacketType == flvio.PKTTYPE_CODED_FRAMES || tag.PacketType == flvio.PKTTYPE_CODED_FRAMESX { + self.CacheTag(tag, timestamp) } - self.VideoStreamIdx = len(self.Streams) - self.Streams = append(self.Streams, stream) - self.GotVideo = true } + } else { + switch tag.AVCPacketType { + case flvio.AVC_SEQHDR: + if !self.GotVideo { + var stream h264parser.CodecData + fmt.Printf("got H264 sequence start:\n%s\n", hex.Dump(tag.Data)) + if stream, err = h264parser.NewCodecDataFromAVCDecoderConfRecord(tag.Data); err != nil { + err = fmt.Errorf("flv: h264 seqhdr invalid: %s", err.Error()) + return + } + self.VideoStreamIdx = len(self.Streams) + self.Streams = append(self.Streams, stream) + self.GotVideo = true + } - case flvio.AVC_NALU: - self.CacheTag(tag, timestamp) + case flvio.AVC_NALU: + self.CacheTag(tag, timestamp) + } } case flvio.TAG_AUDIO: @@ -166,8 +192,8 @@ func (self *Prober) TagToPacket(tag flvio.Tag, timestamp int32) (pkt av.Packet, switch tag.Type { case flvio.TAG_VIDEO: pkt.Idx = int8(self.VideoStreamIdx) - switch tag.AVCPacketType { - case flvio.AVC_NALU: + switch tag.PacketType { + case flvio.PKTTYPE_CODED_FRAMES, flvio.PKTTYPE_CODED_FRAMESX: ok = true pkt.Data = tag.Data pkt.CompositionTime = flvio.TsToTime(tag.CompositionTime) @@ -219,6 +245,22 @@ func CodecDataToTag(stream av.CodecData) (_tag flvio.Tag, ok bool, err error) { Data: h264.AVCDecoderConfRecordBytes(), FrameType: flvio.FRAME_KEY, } + fmt.Printf("set H264 sequence start:\n%v\n", hex.Dump(h264.AVCDecoderConfRecordBytes())) + ok = true + _tag = tag + + case av.HEVC: + fmt.Printf("CodecDataToTag for HEVC\n") + hevc := stream.(hevcparser.CodecData) + tag := flvio.Tag{ + Type: flvio.TAG_VIDEO, + IsExHeader: true, + PacketType: flvio.PKTTYPE_SEQUENCE_START, + FourCC: flvio.FOURCC_HEVC, + Data: hevc.HEVCDecoderConfRecordBytes(), + FrameType: flvio.FRAME_KEY, + } + fmt.Printf("set HEVC sequence start:\n%v\n", hex.Dump(hevc.HEVCDecoderConfRecordBytes())) ok = true _tag = tag @@ -272,6 +314,27 @@ func PacketToTag(pkt av.Packet, stream av.CodecData) (tag flvio.Tag, timestamp i tag.FrameType = flvio.FRAME_INTER } + case av.HEVC: + //fmt.Printf("PacketToTag for HEVC\n") + tag = flvio.Tag{ + Type: flvio.TAG_VIDEO, + IsExHeader: true, + PacketType: flvio.PKTTYPE_CODED_FRAMES, + CompositionTime: flvio.TimeToTs(pkt.CompositionTime), + FourCC: flvio.FOURCC_HEVC, + Data: pkt.Data, + } + + if pkt.CompositionTime == 0 { + tag.PacketType = flvio.PKTTYPE_CODED_FRAMESX + } + + if pkt.IsKeyFrame { + tag.FrameType = flvio.FRAME_KEY + } else { + tag.FrameType = flvio.FRAME_INTER + } + case av.AAC: tag = flvio.Tag{ Type: flvio.TAG_AUDIO, diff --git a/format/flv/flvio/flvio.go b/format/flv/flvio/flvio.go index a69bdb5..9b69085 100644 --- a/format/flv/flvio/flvio.go +++ b/format/flv/flvio/flvio.go @@ -2,10 +2,11 @@ package flvio import ( "fmt" - "github.com/datarhei/joy4/av" - "github.com/datarhei/joy4/utils/bits/pio" "io" "time" + + "github.com/datarhei/joy4/av" + "github.com/datarhei/joy4/utils/bits/pio" ) func TsToTime(ts int32) time.Duration { @@ -60,6 +61,27 @@ const ( VIDEO_H264 = 7 ) +const ( + PKTTYPE_SEQUENCE_START = 0 + PKTTYPE_CODED_FRAMES = 1 + PKTTYPE_SEQUENCE_END = 2 + PKTTYPE_CODED_FRAMESX = 3 + PKTTYPE_METADATA = 4 + PKTTYPE_MPEG2TS_SEQUENCE_START = 5 +) + +var ( + FOURCC_AV1 = [4]byte{'a', 'v', '0', '1'} + FOURCC_VP9 = [4]byte{'v', 'p', '0', '9'} + FOURCC_HEVC = [4]byte{'h', 'v', 'c', '1'} +) + +func FourCCToFloat(fourcc [4]byte) float64 { + i := int(fourcc[0])<<24 | int(fourcc[1])<<16 | int(fourcc[2])<<8 | int(fourcc[3]) + + return float64(i) +} + type Tag struct { /* 8 = Audio @@ -126,14 +148,22 @@ type Tag struct { AACPacketType uint8 /* + 0: reserved 1: keyframe (for AVC, a seekable frame) 2: inter frame (for AVC, a non- seekable frame) 3: disposable inter frame (H.263 only) 4: generated keyframe (reserved for server use only) 5: video info/command frame + 6: reserved + 7: reserved */ FrameType uint8 + /* + FrameType & 0b1000 != 0 + */ + IsExHeader bool + /* 1: JPEG (currently unused) 2: Sorenson H.263 @@ -145,6 +175,16 @@ type Tag struct { */ CodecID uint8 + /* + 0: PacketTypeSequenceStart + 1: PacketTypeCodedFrames + 2: PacketTypeSequenceEnd + 3: PacketTypeCodedFramesX + 4: PacketTypeMetadata + 5: PacketTypeMPEG2TSSequenceStart + */ + PacketType uint8 + /* 0: AVC sequence header 1: AVC NALU @@ -154,18 +194,20 @@ type Tag struct { CompositionTime int32 + FourCC [4]byte + Data []byte } -func (self Tag) ChannelLayout() av.ChannelLayout { - if self.SoundType == SOUND_MONO { +func (t Tag) ChannelLayout() av.ChannelLayout { + if t.SoundType == SOUND_MONO { return av.CH_MONO } else { return av.CH_STEREO } } -func (self *Tag) audioParseHeader(b []byte) (n int, err error) { +func (t *Tag) audioParseHeader(b []byte) (n int, err error) { if len(b) < n+1 { err = fmt.Errorf("audiodata: parse invalid") return @@ -173,97 +215,163 @@ func (self *Tag) audioParseHeader(b []byte) (n int, err error) { flags := b[n] n++ - self.SoundFormat = flags >> 4 - self.SoundRate = (flags >> 2) & 0x3 - self.SoundSize = (flags >> 1) & 0x1 - self.SoundType = flags & 0x1 + t.SoundFormat = flags >> 4 + t.SoundRate = (flags >> 2) & 0x3 + t.SoundSize = (flags >> 1) & 0x1 + t.SoundType = flags & 0x1 - switch self.SoundFormat { + switch t.SoundFormat { case SOUND_AAC: if len(b) < n+1 { err = fmt.Errorf("audiodata: parse invalid") return } - self.AACPacketType = b[n] + t.AACPacketType = b[n] n++ } return } -func (self Tag) audioFillHeader(b []byte) (n int) { +func (t Tag) audioFillHeader(b []byte) (n int) { var flags uint8 - flags |= self.SoundFormat << 4 - flags |= self.SoundRate << 2 - flags |= self.SoundSize << 1 - flags |= self.SoundType + flags |= t.SoundFormat << 4 + flags |= t.SoundRate << 2 + flags |= t.SoundSize << 1 + flags |= t.SoundType b[n] = flags n++ - switch self.SoundFormat { + switch t.SoundFormat { case SOUND_AAC: - b[n] = self.AACPacketType + b[n] = t.AACPacketType n++ } return } -func (self *Tag) videoParseHeader(b []byte) (n int, err error) { +func (t *Tag) videoParseHeader(b []byte) (n int, err error) { if len(b) < n+1 { err = fmt.Errorf("videodata: parse invalid") return } flags := b[n] - self.FrameType = flags >> 4 - self.CodecID = flags & 0xf + t.FrameType = flags >> 4 + t.CodecID = flags & 0b1111 + + //fmt.Printf("%#8b\n", flags) n++ - if self.FrameType == FRAME_INTER || self.FrameType == FRAME_KEY { + if (t.FrameType & 0b1000) != 0 { + t.IsExHeader = true + t.PacketType = t.CodecID + t.CodecID = 0 + + if t.PacketType != PKTTYPE_METADATA { + t.FrameType = t.FrameType & 0b0111 + } + } + + if !t.IsExHeader { + if t.FrameType == FRAME_INTER || t.FrameType == FRAME_KEY { + if len(b) < n+4 { + err = fmt.Errorf("videodata: parse invalid: neither interframe nor keyframe") + return + } + t.AVCPacketType = b[n] + switch t.AVCPacketType { + case AVC_SEQHDR: + t.PacketType = PKTTYPE_SEQUENCE_START + case AVC_NALU: + t.PacketType = PKTTYPE_CODED_FRAMES + case AVC_EOS: + t.PacketType = PKTTYPE_SEQUENCE_END + } + n++ + + t.CompositionTime = pio.I24BE(b[n:]) + n += 3 + } + } else { if len(b) < n+4 { - err = fmt.Errorf("videodata: parse invalid") + err = fmt.Errorf("videodata: parse invalid: not enough bytes for the fourCC value") return } - self.AVCPacketType = b[n] - n++ - self.CompositionTime = pio.I24BE(b[n:]) + t.FourCC[0] = b[n] + t.FourCC[1] = b[n+1] + t.FourCC[2] = b[n+2] + t.FourCC[3] = b[n+3] + + n += 4 + + t.CompositionTime = 0 + + if t.FourCC == FOURCC_HEVC { + if t.PacketType == PKTTYPE_CODED_FRAMES { + t.CompositionTime = pio.I24BE(b[n:]) + n += 3 + } + } + } + + //fmt.Printf("parseVideoHeader: PacketType: %d\n%s\n", t.PacketType, hex.Dump(b[:n])) + + return +} + +func (t Tag) videoFillHeader(b []byte) (n int) { + if t.IsExHeader { + flags := t.FrameType<<4 | t.PacketType | 0b10000000 + b[n] = flags + n++ + b[n] = t.FourCC[0] + b[n+1] = t.FourCC[1] + b[n+2] = t.FourCC[2] + b[n+3] = t.FourCC[3] + n += 4 + + if t.FourCC == FOURCC_HEVC { + if t.PacketType == PKTTYPE_CODED_FRAMES { + pio.PutI24BE(b[n:], t.CompositionTime) + n += 3 + } + } + } else { + flags := t.FrameType<<4 | t.CodecID + b[n] = flags + n++ + b[n] = t.AVCPacketType + n++ + pio.PutI24BE(b[n:], t.CompositionTime) n += 3 } + //fmt.Printf("videoFillHeader: PacketType: %d\n%s\n", t.PacketType, hex.Dump(b[:n])) + return } -func (self Tag) videoFillHeader(b []byte) (n int) { - flags := self.FrameType<<4 | self.CodecID - b[n] = flags - n++ - b[n] = self.AVCPacketType - n++ - pio.PutI24BE(b[n:], self.CompositionTime) - n += 3 - return -} - -func (self Tag) FillHeader(b []byte) (n int) { - switch self.Type { +func (t Tag) FillHeader(b []byte) (n int) { + switch t.Type { case TAG_AUDIO: - return self.audioFillHeader(b) + return t.audioFillHeader(b) case TAG_VIDEO: - return self.videoFillHeader(b) + return t.videoFillHeader(b) } return } -func (self *Tag) ParseHeader(b []byte) (n int, err error) { - switch self.Type { +func (t *Tag) ParseHeader(b []byte) (n int, err error) { + switch t.Type { case TAG_AUDIO: - return self.audioParseHeader(b) + return t.audioParseHeader(b) case TAG_VIDEO: - return self.videoParseHeader(b) + return t.videoParseHeader(b) } return diff --git a/format/rtmp/rtmp.go b/format/rtmp/rtmp.go index 8d9dc74..9b8a93f 100644 --- a/format/rtmp/rtmp.go +++ b/format/rtmp/rtmp.go @@ -78,6 +78,7 @@ func (self *Server) handleConn(conn *Conn) (err error) { } if conn.playing { + fmt.Printf("play\n") if self.HandlePlay != nil { self.HandlePlay(conn) } @@ -463,6 +464,8 @@ func (self *Conn) readConnect() (err error) { return } + fmt.Printf("readConnect: %+v\n", self.commandobj) + var ok bool var _app, _tcurl interface{} if _app, ok = self.commandobj["app"]; !ok { @@ -691,6 +694,8 @@ func (self *Conn) writeConnect(path string) (err error) { return } + fmt.Printf("writeConnect: app: %s\n", path) + // > connect("app") if Debug { fmt.Printf("rtmp: > connect('%s') host=%s\n", path, self.URL.Host) @@ -705,6 +710,7 @@ func (self *Conn) writeConnect(path string) (err error) { "audioCodecs": 4071, "videoCodecs": 252, "videoFunction": 1, + "fourCcList": flvio.AMFArray{"av01", "vp09", "hvc1"}, }, ); err != nil { return @@ -984,14 +990,19 @@ func (self *Conn) WriteHeader(streams []av.CodecData) (err error) { var metadata flvio.AMFMap = nil - metadata = self.GetMetaData() + //metadata = self.GetMetaData() + + fmt.Printf("WriteHeader\n") if metadata == nil { if metadata, err = flv.NewMetadataByStreams(streams); err != nil { + fmt.Printf("WriteHeader error: %s\n", err.Error()) return } } + fmt.Printf("WriteHeader: %#v\n", metadata) + // > onMetaData() if err = self.writeDataMsg(5, self.avmsgsid, "onMetaData", metadata); err != nil { return @@ -1547,17 +1558,21 @@ func (self *Conn) handleMsg(timestamp uint32, msgsid uint32, msgtypeid uint8, ms if metaindex != -1 && metaindex < len(self.datamsgvals) { self.metadata = self.datamsgvals[metaindex].(flvio.AMFMap) + fmt.Printf("onMetadata: %+v\n", self.metadata) + fmt.Printf("videocodecid: %#08x (%f)\n", int64(self.metadata["videocodecid"].(float64)), self.metadata["videocodecid"].(float64)) } case msgtypeidVideoMsg: if len(msgdata) == 0 { return } + //fmt.Printf("msgdata: %#08x\n", msgdata[:5]) tag := flvio.Tag{Type: flvio.TAG_VIDEO} var n int if n, err = (&tag).ParseHeader(msgdata); err != nil { return } + //fmt.Printf("tag: %+v\n", tag) if !(tag.FrameType == flvio.FRAME_INTER || tag.FrameType == flvio.FRAME_KEY) { return } diff --git a/utils/bits/golomb_reader.go b/utils/bits/golomb_reader.go index da57cb2..b40978b 100644 --- a/utils/bits/golomb_reader.go +++ b/utils/bits/golomb_reader.go @@ -33,6 +33,30 @@ func (self *GolombBitReader) ReadBits(n int) (res uint, err error) { return } +func (self *GolombBitReader) ReadBits32(n uint) (r uint32, err error) { + var t uint + for i := uint(0); i < n; i++ { + t, err = self.ReadBit() + if err != nil { + return + } + r = (r << 1) | uint32(t) + } + return +} + +func (self *GolombBitReader) ReadBits64(n uint) (r uint64, err error) { + var t uint + for i := uint(0); i < n; i++ { + t, err = self.ReadBit() + if err != nil { + return + } + r = (r << 1) | uint64(t) + } + return +} + func (self *GolombBitReader) ReadExponentialGolombCode() (res uint, err error) { i := 0 for {