joy4/codec/hevcparser/parser.go
2024-01-03 12:59:10 +01:00

696 lines
16 KiB
Go

package hevcparser
// based on https://github.com/deepch/vdk/blob/v0.0.21/codec/h265parser/parser.go
import (
"bytes"
"errors"
"fmt"
"github.com/datarhei/joy4/av"
"github.com/datarhei/joy4/utils/bits"
"github.com/datarhei/joy4/utils/bits/pio"
)
type SPSInfo struct {
ProfileIdc uint
LevelIdc uint
CropLeft uint
CropRight uint
CropTop uint
CropBottom uint
Width uint
Height uint
PicWidthInLumaSamples uint
PicHeightInLumaSamples uint
generalProfileSpace uint
generalTierFlag uint
generalProfileIDC uint
generalProfileCompatibilityFlags uint32
generalConstraintIndicatorFlags uint64
generalLevelIDC uint
}
const (
NAL_UNIT_CODED_SLICE_TRAIL_N = 0
NAL_UNIT_CODED_SLICE_TRAIL_R = 1
NAL_UNIT_CODED_SLICE_TSA_N = 2
NAL_UNIT_CODED_SLICE_TSA_R = 3
NAL_UNIT_CODED_SLICE_STSA_N = 4
NAL_UNIT_CODED_SLICE_STSA_R = 5
NAL_UNIT_CODED_SLICE_RADL_N = 6
NAL_UNIT_CODED_SLICE_RADL_R = 7
NAL_UNIT_CODED_SLICE_RASL_N = 8
NAL_UNIT_CODED_SLICE_RASL_R = 9
NAL_UNIT_RESERVED_VCL_N10 = 10
NAL_UNIT_RESERVED_VCL_R11 = 11
NAL_UNIT_RESERVED_VCL_N12 = 12
NAL_UNIT_RESERVED_VCL_R13 = 13
NAL_UNIT_RESERVED_VCL_N14 = 14
NAL_UNIT_RESERVED_VCL_R15 = 15
NAL_UNIT_CODED_SLICE_BLA_W_LP = 16
NAL_UNIT_CODED_SLICE_BLA_W_RADL = 17
NAL_UNIT_CODED_SLICE_BLA_N_LP = 18
NAL_UNIT_CODED_SLICE_IDR_W_RADL = 19
NAL_UNIT_CODED_SLICE_IDR_N_LP = 20
NAL_UNIT_CODED_SLICE_CRA = 21
NAL_UNIT_RESERVED_IRAP_VCL22 = 22
NAL_UNIT_RESERVED_IRAP_VCL23 = 23
NAL_UNIT_RESERVED_VCL24 = 24
NAL_UNIT_RESERVED_VCL25 = 25
NAL_UNIT_RESERVED_VCL26 = 26
NAL_UNIT_RESERVED_VCL27 = 27
NAL_UNIT_RESERVED_VCL28 = 28
NAL_UNIT_RESERVED_VCL29 = 29
NAL_UNIT_RESERVED_VCL30 = 30
NAL_UNIT_RESERVED_VCL31 = 31
NAL_UNIT_VPS = 32
NAL_UNIT_SPS = 33
NAL_UNIT_PPS = 34
NAL_UNIT_ACCESS_UNIT_DELIMITER = 35
NAL_UNIT_EOS = 36
NAL_UNIT_EOB = 37
NAL_UNIT_FILLER_DATA = 38
NAL_UNIT_PREFIX_SEI = 39
NAL_UNIT_SUFFIX_SEI = 40
NAL_UNIT_RESERVED_NVCL41 = 41
NAL_UNIT_RESERVED_NVCL42 = 42
NAL_UNIT_RESERVED_NVCL43 = 43
NAL_UNIT_RESERVED_NVCL44 = 44
NAL_UNIT_RESERVED_NVCL45 = 45
NAL_UNIT_RESERVED_NVCL46 = 46
NAL_UNIT_RESERVED_NVCL47 = 47
NAL_UNIT_UNSPECIFIED_48 = 48
NAL_UNIT_UNSPECIFIED_49 = 49
NAL_UNIT_UNSPECIFIED_50 = 50
NAL_UNIT_UNSPECIFIED_51 = 51
NAL_UNIT_UNSPECIFIED_52 = 52
NAL_UNIT_UNSPECIFIED_53 = 53
NAL_UNIT_UNSPECIFIED_54 = 54
NAL_UNIT_UNSPECIFIED_55 = 55
NAL_UNIT_UNSPECIFIED_56 = 56
NAL_UNIT_UNSPECIFIED_57 = 57
NAL_UNIT_UNSPECIFIED_58 = 58
NAL_UNIT_UNSPECIFIED_59 = 59
NAL_UNIT_UNSPECIFIED_60 = 60
NAL_UNIT_UNSPECIFIED_61 = 61
NAL_UNIT_UNSPECIFIED_62 = 62
NAL_UNIT_UNSPECIFIED_63 = 63
NAL_UNIT_INVALID = 64
)
const (
MAX_VPS_COUNT = 16
MAX_SUB_LAYERS = 7
MAX_SPS_COUNT = 32
)
var (
ErrorHEVCIncorectUnitSize = errors.New("incorrect unit size")
ErrorHECVIncorectUnitType = errors.New("incorrect unit type")
)
var StartCodeBytes = []byte{0, 0, 1}
var AUDBytes = []byte{0, 0, 0, 1, 0x9, 0xf0, 0, 0, 0, 1} // AUD
const (
NALU_RAW = iota
NALU_AVCC
NALU_ANNEXB
)
func SplitNALUs(b []byte) (nalus [][]byte, typ int) {
if len(b) < 4 {
return [][]byte{b}, NALU_RAW
}
val3 := pio.U24BE(b)
val4 := pio.U32BE(b)
if val4 <= uint32(len(b)) {
_val4 := val4
_b := b[4:]
nalus := [][]byte{}
for {
nalus = append(nalus, _b[:_val4])
_b = _b[_val4:]
if len(_b) < 4 {
break
}
_val4 = pio.U32BE(_b)
_b = _b[4:]
if _val4 > uint32(len(_b)) {
break
}
}
if len(_b) == 0 {
return nalus, NALU_AVCC
}
}
if val3 == 1 || val4 == 1 {
_val3 := val3
_val4 := val4
start := 0
pos := 0
for {
if start != pos {
nalus = append(nalus, b[start:pos])
}
if _val3 == 1 {
pos += 3
} else if _val4 == 1 {
pos += 4
}
start = pos
if start == len(b) {
break
}
_val3 = 0
_val4 = 0
for pos < len(b) {
if pos+2 < len(b) && b[pos] == 0 {
_val3 = pio.U24BE(b[pos:])
if _val3 == 0 {
if pos+3 < len(b) {
_val4 = uint32(b[pos+3])
if _val4 == 1 {
break
}
}
} else if _val3 == 1 {
break
}
pos++
} else {
pos++
}
}
}
typ = NALU_ANNEXB
return
}
return [][]byte{b}, NALU_RAW
}
func ParseSPS(sps []byte) (ctx SPSInfo, err error) {
if len(sps) < 2 {
err = ErrorHEVCIncorectUnitSize
return
}
rbsp := nal2rbsp(sps[2:])
br := &bits.GolombBitReader{R: bytes.NewReader(rbsp)}
// sps_video_parameter_set_id
if _, err = br.ReadBits(4); err != nil {
return
}
// sps_max_sub_layers_minus1
spsMaxSubLayersMinus1, err := br.ReadBits(3)
if err != nil {
return
}
// sps_temporal_id_nesting_flag
if _, err = br.ReadBit(); err != nil {
return
}
// profile_tier_level( 1, sps_max_sub_layers_minus1 )
if err = parsePTL(br, &ctx, spsMaxSubLayersMinus1); err != nil {
return
}
// sps_seq_parameter_set_id
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// chroma_format_idc
var chroma_format_idc uint
if chroma_format_idc, err = br.ReadExponentialGolombCode(); err != nil {
return
}
if chroma_format_idc == 3 {
// separate_colour_plane_flag
if _, err = br.ReadBit(); err != nil {
return
}
}
// Table 6-1, Section 6.2
var subWidthC uint
var subHeightC uint
switch chroma_format_idc {
case 0:
subWidthC, subHeightC = 1, 1
case 1:
subWidthC, subHeightC = 2, 2
case 2:
subWidthC, subHeightC = 2, 1
case 3:
subWidthC, subHeightC = 1, 1
}
// pic_width_in_luma_samples
if ctx.PicWidthInLumaSamples, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// pic_height_in_luma_samples
if ctx.PicHeightInLumaSamples, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// conformance_window_flag
conformanceWindowFlag, err := br.ReadBit()
if err != nil {
return
}
var conf_win_left_offset uint
var conf_win_right_offset uint
var conf_win_top_offset uint
var conf_win_bottom_offset uint
if conformanceWindowFlag != 0 {
// conf_win_left_offset
conf_win_left_offset, err = br.ReadExponentialGolombCode()
if err != nil {
return
}
ctx.CropLeft = subWidthC * conf_win_left_offset
// conf_win_right_offset
conf_win_right_offset, err = br.ReadExponentialGolombCode()
if err != nil {
return
}
ctx.CropRight = subWidthC * conf_win_right_offset
// conf_win_top_offset
conf_win_top_offset, err = br.ReadExponentialGolombCode()
if err != nil {
return
}
ctx.CropTop = subHeightC * conf_win_top_offset
// conf_win_bottom_offset
conf_win_bottom_offset, err = br.ReadExponentialGolombCode()
if err != nil {
return
}
ctx.CropBottom = subHeightC * conf_win_bottom_offset
}
ctx.Width = ctx.PicWidthInLumaSamples - ctx.CropLeft - ctx.CropRight
ctx.Height = ctx.PicHeightInLumaSamples - ctx.CropTop - ctx.CropBottom
// bit_depth_luma_minus8
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// bit_depth_chroma_minus8
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// log2_max_pic_order_cnt_lsb_minus4
_, err = br.ReadExponentialGolombCode()
if err != nil {
return
}
// sps_sub_layer_ordering_info_present_flag
spsSubLayerOrderingInfoPresentFlag, err := br.ReadBit()
if err != nil {
return
}
var i uint
if spsSubLayerOrderingInfoPresentFlag != 0 {
i = 0
} else {
i = spsMaxSubLayersMinus1
}
for ; i <= spsMaxSubLayersMinus1; i++ {
// sps_max_dec_pic_buffering_minus1[ i ]
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// sps_max_num_reorder_pics[ i ]
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// sps_max_latency_increase_plus1[ i ]
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
}
// log2_min_luma_coding_block_size_minus3
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// log2_diff_max_min_luma_coding_block_size
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// log2_min_luma_transform_block_size_minus2
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// log2_diff_max_min_luma_transform_block_size
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// max_transform_hierarchy_depth_inter
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
// max_transform_hierarchy_depth_intra
if _, err = br.ReadExponentialGolombCode(); err != nil {
return
}
return
}
func parsePTL(br *bits.GolombBitReader, ctx *SPSInfo, maxSubLayersMinus1 uint) error {
var err error
var ptl SPSInfo
if ptl.generalProfileSpace, err = br.ReadBits(2); err != nil {
return err
}
if ptl.generalTierFlag, err = br.ReadBit(); err != nil {
return err
}
if ptl.generalProfileIDC, err = br.ReadBits(5); err != nil {
return err
}
if ptl.generalProfileCompatibilityFlags, err = br.ReadBits32(32); err != nil {
return err
}
if ptl.generalConstraintIndicatorFlags, err = br.ReadBits64(48); err != nil {
return err
}
if ptl.generalLevelIDC, err = br.ReadBits(8); err != nil {
return err
}
updatePTL(ctx, &ptl)
if maxSubLayersMinus1 == 0 {
return nil
}
subLayerProfilePresentFlag := make([]uint, maxSubLayersMinus1)
subLayerLevelPresentFlag := make([]uint, maxSubLayersMinus1)
for i := uint(0); i < maxSubLayersMinus1; i++ {
if subLayerProfilePresentFlag[i], err = br.ReadBit(); err != nil {
return err
}
if subLayerLevelPresentFlag[i], err = br.ReadBit(); err != nil {
return err
}
}
if maxSubLayersMinus1 > 0 {
for i := maxSubLayersMinus1; i < 8; i++ {
if _, err = br.ReadBits(2); err != nil {
return err
}
}
}
for i := uint(0); i < maxSubLayersMinus1; i++ {
if subLayerProfilePresentFlag[i] != 0 {
if _, err = br.ReadBits32(32); err != nil {
return err
}
if _, err = br.ReadBits32(32); err != nil {
return err
}
if _, err = br.ReadBits32(24); err != nil {
return err
}
}
if subLayerLevelPresentFlag[i] != 0 {
if _, err = br.ReadBits(8); err != nil {
return err
}
}
}
return nil
}
func updatePTL(ctx, ptl *SPSInfo) {
ctx.generalProfileSpace = ptl.generalProfileSpace
if ptl.generalTierFlag > ctx.generalTierFlag {
ctx.generalLevelIDC = ptl.generalLevelIDC
ctx.generalTierFlag = ptl.generalTierFlag
} else {
if ptl.generalLevelIDC > ctx.generalLevelIDC {
ctx.generalLevelIDC = ptl.generalLevelIDC
}
}
if ptl.generalProfileIDC > ctx.generalProfileIDC {
ctx.generalProfileIDC = ptl.generalProfileIDC
}
ctx.generalProfileCompatibilityFlags &= ptl.generalProfileCompatibilityFlags
ctx.generalConstraintIndicatorFlags &= ptl.generalConstraintIndicatorFlags
}
func nal2rbsp(nal []byte) []byte {
return bytes.Replace(nal, []byte{0x0, 0x0, 0x3}, []byte{0x0, 0x0}, -1)
}
type CodecData struct {
Record []byte
RecordInfo HEVCDecoderConfRecord
SPSInfo SPSInfo
}
func (codec CodecData) Type() av.CodecType {
return av.HEVC
}
func (codec CodecData) HEVCDecoderConfRecordBytes() []byte {
return codec.Record
}
func (codec CodecData) SPS() []byte {
return codec.RecordInfo.SPS[0]
}
func (codec CodecData) PPS() []byte {
return codec.RecordInfo.PPS[0]
}
func (codec CodecData) VPS() []byte {
return codec.RecordInfo.VPS[0]
}
func (codec CodecData) Width() int {
return int(codec.SPSInfo.Width)
}
func (codec CodecData) Height() int {
return int(codec.SPSInfo.Height)
}
func NewCodecDataFromHEVCDecoderConfRecord(record []byte) (self CodecData, err error) {
self.Record = record
if _, err = (&self.RecordInfo).Unmarshal(record); err != nil {
return
}
if len(self.RecordInfo.SPS) == 0 {
err = fmt.Errorf("hevcparser: no SPS found in HEVCDecoderConfRecord")
return
}
if len(self.RecordInfo.PPS) == 0 {
err = fmt.Errorf("hevcparser: no PPS found in HEVCDecoderConfRecord")
return
}
if len(self.RecordInfo.VPS) == 0 {
err = fmt.Errorf("hevcparser: no VPS found in HEVCDecoderConfRecord")
return
}
if self.SPSInfo, err = ParseSPS(self.RecordInfo.SPS[0]); err != nil {
err = fmt.Errorf("hevcparser: parse SPS failed(%s)", err)
return
}
return
}
func NewCodecDataFromVPSAndSPSAndPPS(vps, sps, pps []byte) (self CodecData, err error) {
recordinfo := HEVCDecoderConfRecord{}
recordinfo.HEVCProfileIndication = sps[3]
recordinfo.ProfileCompatibility = sps[4]
recordinfo.HEVCLevelIndication = sps[5]
recordinfo.SPS = [][]byte{sps}
recordinfo.PPS = [][]byte{pps}
recordinfo.VPS = [][]byte{vps}
recordinfo.LengthSizeMinusOne = 3
if self.SPSInfo, err = ParseSPS(sps); err != nil {
return
}
buf := make([]byte, recordinfo.Len())
recordinfo.Marshal(buf, self.SPSInfo)
self.RecordInfo = recordinfo
self.Record = buf
return
}
type HEVCDecoderConfRecord struct {
HEVCProfileIndication uint8
ProfileCompatibility uint8
HEVCLevelIndication uint8
LengthSizeMinusOne uint8
VPS [][]byte
SPS [][]byte
PPS [][]byte
}
var ErrDecconfInvalid = fmt.Errorf("hevcparser: HEVCDecoderConfRecord invalid")
func (record *HEVCDecoderConfRecord) Unmarshal(b []byte) (n int, err error) {
if len(b) < 30 {
err = ErrDecconfInvalid
return
}
record.HEVCProfileIndication = b[1]
record.ProfileCompatibility = b[2]
record.HEVCLevelIndication = b[3]
record.LengthSizeMinusOne = b[4] & 0x03
vpscount := int(b[25] & 0x1f)
n += 26
for i := 0; i < vpscount; i++ {
if len(b) < n+2 {
err = ErrDecconfInvalid
return
}
vpslen := int(pio.U16BE(b[n:]))
n += 2
if len(b) < n+vpslen {
err = ErrDecconfInvalid
return
}
record.VPS = append(record.VPS, b[n:n+vpslen])
n += vpslen
}
if len(b) < n+1 {
err = ErrDecconfInvalid
return
}
n++
n++
spscount := int(b[n])
n++
for i := 0; i < spscount; i++ {
if len(b) < n+2 {
err = ErrDecconfInvalid
return
}
spslen := int(pio.U16BE(b[n:]))
n += 2
if len(b) < n+spslen {
err = ErrDecconfInvalid
return
}
record.SPS = append(record.SPS, b[n:n+spslen])
n += spslen
}
n++
n++
ppscount := int(b[n])
n++
for i := 0; i < ppscount; i++ {
if len(b) < n+2 {
err = ErrDecconfInvalid
return
}
ppslen := int(pio.U16BE(b[n:]))
n += 2
if len(b) < n+ppslen {
err = ErrDecconfInvalid
return
}
record.PPS = append(record.PPS, b[n:n+ppslen])
n += ppslen
}
return
}
func (record HEVCDecoderConfRecord) Len() (n int) {
n = 23
for _, sps := range record.SPS {
n += 5 + len(sps)
}
for _, pps := range record.PPS {
n += 5 + len(pps)
}
for _, vps := range record.VPS {
n += 5 + len(vps)
}
return
}
func (record HEVCDecoderConfRecord) Marshal(b []byte, si SPSInfo) (n int) {
b[0] = 1
b[1] = record.HEVCProfileIndication
b[2] = record.ProfileCompatibility
b[3] = record.HEVCLevelIndication
b[21] = 3
b[22] = 3
n += 23
b[n] = (record.VPS[0][0] >> 1) & 0x3f
n++
b[n] = byte(len(record.VPS) >> 8)
n++
b[n] = byte(len(record.VPS))
n++
for _, vps := range record.VPS {
pio.PutU16BE(b[n:], uint16(len(vps)))
n += 2
copy(b[n:], vps)
n += len(vps)
}
b[n] = (record.SPS[0][0] >> 1) & 0x3f
n++
b[n] = byte(len(record.SPS) >> 8)
n++
b[n] = byte(len(record.SPS))
n++
for _, sps := range record.SPS {
pio.PutU16BE(b[n:], uint16(len(sps)))
n += 2
copy(b[n:], sps)
n += len(sps)
}
b[n] = (record.PPS[0][0] >> 1) & 0x3f
n++
b[n] = byte(len(record.PPS) >> 8)
n++
b[n] = byte(len(record.PPS))
n++
for _, pps := range record.PPS {
pio.PutU16BE(b[n:], uint16(len(pps)))
n += 2
copy(b[n:], pps)
n += len(pps)
}
return
}