567 lines
16 KiB
Go
567 lines
16 KiB
Go
package isom
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"github.com/nareix/bits"
|
|
"io"
|
|
"io/ioutil"
|
|
)
|
|
|
|
// copied from libavformat/isom.h
|
|
const (
|
|
MP4ESDescrTag = 3
|
|
MP4DecConfigDescrTag = 4
|
|
MP4DecSpecificDescrTag = 5
|
|
)
|
|
|
|
var debugReader = false
|
|
var debugWriter = false
|
|
|
|
// copied from libavcodec/mpeg4audio.h
|
|
const (
|
|
AOT_AAC_MAIN = 1 + iota ///< Y Main
|
|
AOT_AAC_LC ///< Y Low Complexity
|
|
AOT_AAC_SSR ///< N (code in SoC repo) Scalable Sample Rate
|
|
AOT_AAC_LTP ///< Y Long Term Prediction
|
|
AOT_SBR ///< Y Spectral Band Replication
|
|
AOT_AAC_SCALABLE ///< N Scalable
|
|
AOT_TWINVQ ///< N Twin Vector Quantizer
|
|
AOT_CELP ///< N Code Excited Linear Prediction
|
|
AOT_HVXC ///< N Harmonic Vector eXcitation Coding
|
|
AOT_TTSI = 12 + iota ///< N Text-To-Speech Interface
|
|
AOT_MAINSYNTH ///< N Main Synthesis
|
|
AOT_WAVESYNTH ///< N Wavetable Synthesis
|
|
AOT_MIDI ///< N General MIDI
|
|
AOT_SAFX ///< N Algorithmic Synthesis and Audio Effects
|
|
AOT_ER_AAC_LC ///< N Error Resilient Low Complexity
|
|
AOT_ER_AAC_LTP = 19 + iota ///< N Error Resilient Long Term Prediction
|
|
AOT_ER_AAC_SCALABLE ///< N Error Resilient Scalable
|
|
AOT_ER_TWINVQ ///< N Error Resilient Twin Vector Quantizer
|
|
AOT_ER_BSAC ///< N Error Resilient Bit-Sliced Arithmetic Coding
|
|
AOT_ER_AAC_LD ///< N Error Resilient Low Delay
|
|
AOT_ER_CELP ///< N Error Resilient Code Excited Linear Prediction
|
|
AOT_ER_HVXC ///< N Error Resilient Harmonic Vector eXcitation Coding
|
|
AOT_ER_HILN ///< N Error Resilient Harmonic and Individual Lines plus Noise
|
|
AOT_ER_PARAM ///< N Error Resilient Parametric
|
|
AOT_SSC ///< N SinuSoidal Coding
|
|
AOT_PS ///< N Parametric Stereo
|
|
AOT_SURROUND ///< N MPEG Surround
|
|
AOT_ESCAPE ///< Y Escape Value
|
|
AOT_L1 ///< Y Layer 1
|
|
AOT_L2 ///< Y Layer 2
|
|
AOT_L3 ///< Y Layer 3
|
|
AOT_DST ///< N Direct Stream Transfer
|
|
AOT_ALS ///< Y Audio LosslesS
|
|
AOT_SLS ///< N Scalable LosslesS
|
|
AOT_SLS_NON_CORE ///< N Scalable LosslesS (non core)
|
|
AOT_ER_AAC_ELD ///< N Error Resilient Enhanced Low Delay
|
|
AOT_SMR_SIMPLE ///< N Symbolic Music Representation Simple
|
|
AOT_SMR_MAIN ///< N Symbolic Music Representation Main
|
|
AOT_USAC_NOSBR ///< N Unified Speech and Audio Coding (no SBR)
|
|
AOT_SAOC ///< N Spatial Audio Object Coding
|
|
AOT_LD_SURROUND ///< N Low Delay MPEG Surround
|
|
AOT_USAC ///< N Unified Speech and Audio Coding
|
|
)
|
|
|
|
type MPEG4AudioConfig struct {
|
|
SampleRate int
|
|
ChannelCount int
|
|
ObjectType uint
|
|
SampleRateIndex uint
|
|
ChannelConfig uint
|
|
}
|
|
|
|
var sampleRateTable = []int{
|
|
96000, 88200, 64000, 48000, 44100, 32000,
|
|
24000, 22050, 16000, 12000, 11025, 8000, 7350,
|
|
}
|
|
|
|
var chanConfigTable = []int{
|
|
0, 1, 2, 3, 4, 5, 6, 8,
|
|
}
|
|
|
|
func IsADTSFrame(frames []byte) bool {
|
|
return len(frames) > 7 && frames[0] == 0xff && frames[1]&0xf0 == 0xf0
|
|
}
|
|
|
|
func ReadADTSFrame(frame []byte) (config MPEG4AudioConfig, payload []byte, samples int, framelen int, err error) {
|
|
if !IsADTSFrame(frame) {
|
|
err = fmt.Errorf("not adts frame")
|
|
return
|
|
}
|
|
config.ObjectType = uint(frame[2]>>6) + 1
|
|
config.SampleRateIndex = uint(frame[2] >> 2 & 0xf)
|
|
config.ChannelConfig = uint(frame[2]<<2&0x4 | frame[3]>>6&0x3)
|
|
framelen = int(frame[3]&0x3)<<11 | int(frame[4])<<3 | int(frame[5]>>5)
|
|
samples = (int(frame[6]&0x3) + 1) * 1024
|
|
hdrlen := 7
|
|
if frame[1]&0x1 == 0 {
|
|
hdrlen = 9
|
|
}
|
|
if framelen < hdrlen || len(frame) < framelen {
|
|
err = fmt.Errorf("invalid adts header length")
|
|
return
|
|
}
|
|
payload = frame[hdrlen:framelen]
|
|
return
|
|
}
|
|
|
|
func MakeADTSHeader(config MPEG4AudioConfig, samples int, payloadLength int) (header []byte) {
|
|
payloadLength += 7
|
|
//AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
|
|
header = []byte{0xff, 0xf1, 0x50, 0x80, 0x043, 0xff, 0xcd}
|
|
//config.ObjectType = uint(frames[2]>>6)+1
|
|
//config.SampleRateIndex = uint(frames[2]>>2&0xf)
|
|
//config.ChannelConfig = uint(frames[2]<<2&0x4|frames[3]>>6&0x3)
|
|
header[2] = (byte(config.ObjectType-1)&0x3)<<6 | (byte(config.SampleRateIndex)&0xf)<<2 | byte(config.ChannelConfig>>2)&0x1
|
|
header[3] = header[3]&0x3f | byte(config.ChannelConfig&0x3)<<6
|
|
header[3] = header[3]&0xfc | byte(payloadLength>>11)&0x3
|
|
header[4] = byte(payloadLength >> 3)
|
|
header[5] = header[5]&0x1f | (byte(payloadLength)&0x7)<<5
|
|
header[6] = header[6]&0xfc | byte(samples/1024-1)
|
|
return
|
|
}
|
|
|
|
func ExtractADTSFrames(frames []byte) (config MPEG4AudioConfig, payload []byte, samples int, err error) {
|
|
for len(frames) > 0 {
|
|
var n, framelen int
|
|
if config, payload, n, framelen, err = ReadADTSFrame(frames); err != nil {
|
|
return
|
|
}
|
|
frames = frames[framelen:]
|
|
samples += n
|
|
}
|
|
return
|
|
}
|
|
|
|
func ReadADTSHeader(data []byte) (config MPEG4AudioConfig, frameLength int) {
|
|
br := &bits.Reader{R: bytes.NewReader(data)}
|
|
var i uint
|
|
|
|
//Structure
|
|
//AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
|
|
//Header consists of 7 or 9 bytes (without or with CRC).
|
|
|
|
// 2 bytes
|
|
//A 12 syncword 0xFFF, all bits must be 1
|
|
br.ReadBits(12)
|
|
//B 1 MPEG Version: 0 for MPEG-4, 1 for MPEG-2
|
|
br.ReadBits(1)
|
|
//C 2 Layer: always 0
|
|
br.ReadBits(2)
|
|
//D 1 protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC
|
|
br.ReadBits(1)
|
|
|
|
//E 2 profile, the MPEG-4 Audio Object Type minus 1
|
|
config.ObjectType, _ = br.ReadBits(2)
|
|
config.ObjectType++
|
|
//F 4 MPEG-4 Sampling Frequency Index (15 is forbidden)
|
|
config.SampleRateIndex, _ = br.ReadBits(4)
|
|
//G 1 private bit, guaranteed never to be used by MPEG, set to 0 when encoding, ignore when decoding
|
|
br.ReadBits(1)
|
|
//H 3 MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an inband PCE)
|
|
config.ChannelConfig, _ = br.ReadBits(3)
|
|
//I 1 originality, set to 0 when encoding, ignore when decoding
|
|
br.ReadBits(1)
|
|
//J 1 home, set to 0 when encoding, ignore when decoding
|
|
br.ReadBits(1)
|
|
//K 1 copyrighted id bit, the next bit of a centrally registered copyright identifier, set to 0 when encoding, ignore when decoding
|
|
br.ReadBits(1)
|
|
//L 1 copyright id start, signals that this frame's copyright id bit is the first bit of the copyright id, set to 0 when encoding, ignore when decoding
|
|
br.ReadBits(1)
|
|
|
|
//M 13 frame length, this value must include 7 or 9 bytes of header length: FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
|
|
i, _ = br.ReadBits(13)
|
|
frameLength = int(i)
|
|
//O 11 Buffer fullness
|
|
br.ReadBits(11)
|
|
//P 2 Number of AAC frames (RDBs) in ADTS frame minus 1, for maximum compatibility always use 1 AAC frame per ADTS frame
|
|
br.ReadBits(2)
|
|
|
|
//Q 16 CRC if protection absent is 0
|
|
return
|
|
}
|
|
|
|
func readObjectType(r *bits.Reader) (objectType uint, err error) {
|
|
if objectType, err = r.ReadBits(5); err != nil {
|
|
return
|
|
}
|
|
if objectType == AOT_ESCAPE {
|
|
var i uint
|
|
if i, err = r.ReadBits(6); err != nil {
|
|
return
|
|
}
|
|
objectType = 32 + i
|
|
}
|
|
return
|
|
}
|
|
|
|
func writeObjectType(w *bits.Writer, objectType uint) (err error) {
|
|
if objectType >= 32 {
|
|
if err = w.WriteBits(AOT_ESCAPE, 5); err != nil {
|
|
return
|
|
}
|
|
if err = w.WriteBits(objectType-32, 6); err != nil {
|
|
return
|
|
}
|
|
} else {
|
|
if err = w.WriteBits(objectType, 5); err != nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func readSampleRateIndex(r *bits.Reader) (index uint, err error) {
|
|
if index, err = r.ReadBits(4); err != nil {
|
|
return
|
|
}
|
|
if index == 0xf {
|
|
if index, err = r.ReadBits(24); err != nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func writeSampleRateIndex(w *bits.Writer, index uint) (err error) {
|
|
if index >= 0xf {
|
|
if err = w.WriteBits(0xf, 4); err != nil {
|
|
return
|
|
}
|
|
if err = w.WriteBits(index, 24); err != nil {
|
|
return
|
|
}
|
|
} else {
|
|
if err = w.WriteBits(index, 4); err != nil {
|
|
return
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (self MPEG4AudioConfig) IsValid() bool {
|
|
return self.ObjectType > 0
|
|
}
|
|
|
|
func (self MPEG4AudioConfig) Complete() (config MPEG4AudioConfig) {
|
|
config = self
|
|
if int(config.SampleRateIndex) < len(sampleRateTable) {
|
|
config.SampleRate = sampleRateTable[config.SampleRateIndex]
|
|
}
|
|
if int(config.ChannelConfig) < len(chanConfigTable) {
|
|
config.ChannelCount = chanConfigTable[config.ChannelConfig]
|
|
}
|
|
return
|
|
}
|
|
|
|
// copied from libavcodec/mpeg4audio.c avpriv_mpeg4audio_get_config()
|
|
func ReadMPEG4AudioConfig(r io.Reader) (config MPEG4AudioConfig, err error) {
|
|
br := &bits.Reader{R: r}
|
|
|
|
if config.ObjectType, err = readObjectType(br); err != nil {
|
|
return
|
|
}
|
|
if config.SampleRateIndex, err = readSampleRateIndex(br); err != nil {
|
|
return
|
|
}
|
|
if config.ChannelConfig, err = br.ReadBits(4); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) {
|
|
bw := &bits.Writer{W: w}
|
|
|
|
if err = writeObjectType(bw, config.ObjectType); err != nil {
|
|
return
|
|
}
|
|
|
|
if config.SampleRateIndex == 0 {
|
|
for i, rate := range sampleRateTable {
|
|
if rate == config.SampleRate {
|
|
config.SampleRateIndex = uint(i)
|
|
}
|
|
}
|
|
}
|
|
if err = writeSampleRateIndex(bw, config.SampleRateIndex); err != nil {
|
|
return
|
|
}
|
|
|
|
if config.ChannelConfig == 0 {
|
|
for i, count := range chanConfigTable {
|
|
if count == config.ChannelCount {
|
|
config.ChannelConfig = uint(i)
|
|
}
|
|
}
|
|
}
|
|
if err = bw.WriteBits(config.ChannelConfig, 4); err != nil {
|
|
return
|
|
}
|
|
|
|
if err = bw.FlushBits(); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func readDesc(r io.Reader) (tag uint, data []byte, err error) {
|
|
if tag, err = bits.ReadUIntBE(r, 8); err != nil {
|
|
return
|
|
}
|
|
var length uint
|
|
for i := 0; i < 4; i++ {
|
|
var c uint
|
|
if c, err = bits.ReadUIntBE(r, 8); err != nil {
|
|
return
|
|
}
|
|
length = (length << 7) | (c & 0x7f)
|
|
if c&0x80 == 0 {
|
|
break
|
|
}
|
|
}
|
|
data = make([]byte, length)
|
|
if _, err = r.Read(data); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func writeDesc(w io.Writer, tag uint, data []byte) (err error) {
|
|
if err = bits.WriteUIntBE(w, tag, 8); err != nil {
|
|
return
|
|
}
|
|
length := uint(len(data))
|
|
for i := 3; i > 0; i-- {
|
|
if err = bits.WriteUIntBE(w, (length>>uint(7*i))&0x7f|0x80, 8); err != nil {
|
|
return
|
|
}
|
|
}
|
|
if err = bits.WriteUIntBE(w, length&0x7f, 8); err != nil {
|
|
return
|
|
}
|
|
if _, err = w.Write(data); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func readESDesc(r io.Reader) (err error) {
|
|
var ES_ID uint
|
|
// ES_ID
|
|
if ES_ID, err = bits.ReadUIntBE(r, 16); err != nil {
|
|
return
|
|
}
|
|
var flags uint
|
|
if flags, err = bits.ReadUIntBE(r, 8); err != nil {
|
|
return
|
|
}
|
|
//streamDependenceFlag
|
|
if flags&0x80 != 0 {
|
|
if _, err = bits.ReadUIntBE(r, 16); err != nil {
|
|
return
|
|
}
|
|
}
|
|
//URL_Flag
|
|
if flags&0x40 != 0 {
|
|
var length uint
|
|
if length, err = bits.ReadUIntBE(r, 8); err != nil {
|
|
return
|
|
}
|
|
if _, err = io.CopyN(ioutil.Discard, r, int64(length)); err != nil {
|
|
return
|
|
}
|
|
}
|
|
//OCRstreamFlag
|
|
if flags&0x20 != 0 {
|
|
if _, err = bits.ReadUIntBE(r, 16); err != nil {
|
|
return
|
|
}
|
|
}
|
|
if debugReader {
|
|
println("readESDesc:", ES_ID, flags)
|
|
}
|
|
return
|
|
}
|
|
|
|
func writeESDesc(w io.Writer, ES_ID uint) (err error) {
|
|
// ES_ID
|
|
if err = bits.WriteUIntBE(w, ES_ID, 16); err != nil {
|
|
return
|
|
}
|
|
// flags
|
|
if err = bits.WriteUIntBE(w, 0, 8); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func readDescByTag(r io.Reader, targetTag uint) (data []byte, err error) {
|
|
var found bool
|
|
for {
|
|
if tag, _data, err := readDesc(r); err != nil {
|
|
break
|
|
} else {
|
|
if tag == targetTag {
|
|
data = _data
|
|
found = true
|
|
}
|
|
if debugReader {
|
|
println("readDescByTag:", tag, len(_data))
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
err = fmt.Errorf("tag not found")
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
// copied from libavformat/isom.c ff_mp4_read_dec_config_descr()
|
|
func readDecConfDesc(r io.Reader) (decConfig []byte, err error) {
|
|
var objectId uint
|
|
var streamType uint
|
|
var bufSize uint
|
|
var maxBitrate uint
|
|
var avgBitrate uint
|
|
|
|
// objectId
|
|
if objectId, err = bits.ReadUIntBE(r, 8); err != nil {
|
|
return
|
|
}
|
|
// streamType
|
|
if streamType, err = bits.ReadUIntBE(r, 8); err != nil {
|
|
return
|
|
}
|
|
// buffer size db
|
|
if bufSize, err = bits.ReadUIntBE(r, 24); err != nil {
|
|
return
|
|
}
|
|
// max bitrate
|
|
if maxBitrate, err = bits.ReadUIntBE(r, 32); err != nil {
|
|
return
|
|
}
|
|
// avg bitrate
|
|
if avgBitrate, err = bits.ReadUIntBE(r, 32); err != nil {
|
|
return
|
|
}
|
|
|
|
if debugReader {
|
|
println("readDecConfDesc:", objectId, streamType, bufSize, maxBitrate, avgBitrate)
|
|
}
|
|
|
|
if decConfig, err = readDescByTag(r, MP4DecSpecificDescrTag); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
// copied from libavformat/movenc.c mov_write_esds_tag()
|
|
func writeDecConfDesc(w io.Writer, objectId uint, streamType uint, decConfig []byte) (err error) {
|
|
// objectId
|
|
if err = bits.WriteUIntBE(w, objectId, 8); err != nil {
|
|
return
|
|
}
|
|
// streamType
|
|
if err = bits.WriteUIntBE(w, streamType, 8); err != nil {
|
|
return
|
|
}
|
|
// buffer size db
|
|
if err = bits.WriteUIntBE(w, 0, 24); err != nil {
|
|
return
|
|
}
|
|
// max bitrate
|
|
if err = bits.WriteUIntBE(w, 200000, 32); err != nil {
|
|
return
|
|
}
|
|
// avg bitrate
|
|
if err = bits.WriteUIntBE(w, 0, 32); err != nil {
|
|
return
|
|
}
|
|
if err = writeDesc(w, MP4DecSpecificDescrTag, decConfig); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
// copied from libavformat/mov.c ff_mov_read_esds()
|
|
func ReadElemStreamDesc(r io.Reader) (decConfig []byte, err error) {
|
|
if debugReader {
|
|
println("ReadElemStreamDesc: start")
|
|
}
|
|
|
|
var data []byte
|
|
if data, err = readDescByTag(r, MP4ESDescrTag); err != nil {
|
|
return
|
|
}
|
|
r = bytes.NewReader(data)
|
|
|
|
if err = readESDesc(r); err != nil {
|
|
return
|
|
}
|
|
|
|
if data, err = readDescByTag(r, MP4DecConfigDescrTag); err != nil {
|
|
return
|
|
}
|
|
r = bytes.NewReader(data)
|
|
|
|
if decConfig, err = readDecConfDesc(r); err != nil {
|
|
return
|
|
}
|
|
|
|
if debugReader {
|
|
println("ReadElemStreamDesc: end")
|
|
}
|
|
return
|
|
}
|
|
|
|
func ReadElemStreamDescAAC(r io.Reader) (config MPEG4AudioConfig, err error) {
|
|
var data []byte
|
|
if data, err = ReadElemStreamDesc(r); err != nil {
|
|
return
|
|
}
|
|
if debugReader {
|
|
println("decConfig: ", len(data))
|
|
}
|
|
if config, err = ReadMPEG4AudioConfig(bytes.NewReader(data)); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func WriteElemStreamDescAAC(w io.Writer, config MPEG4AudioConfig, trackId uint) (err error) {
|
|
// MP4ESDescrTag(ESDesc MP4DecConfigDescrTag(objectId streamType bufSize avgBitrate MP4DecSpecificDescrTag(decConfig)))
|
|
|
|
buf := &bytes.Buffer{}
|
|
WriteMPEG4AudioConfig(buf, config)
|
|
data := buf.Bytes()
|
|
|
|
buf = &bytes.Buffer{}
|
|
// 0x40 = ObjectType AAC
|
|
// 0x15 = Audiostream
|
|
writeDecConfDesc(buf, 0x40, 0x15, data)
|
|
data = buf.Bytes()
|
|
|
|
buf = &bytes.Buffer{}
|
|
writeDesc(buf, MP4DecConfigDescrTag, data) // 4
|
|
data = buf.Bytes()
|
|
|
|
buf = &bytes.Buffer{}
|
|
writeESDesc(buf, trackId)
|
|
buf.Write(data)
|
|
writeDesc(buf, 0x06, []byte{0x02})
|
|
data = buf.Bytes()
|
|
|
|
buf = &bytes.Buffer{}
|
|
writeDesc(buf, MP4ESDescrTag, data) // 3
|
|
data = buf.Bytes()
|
|
|
|
if _, err = w.Write(data); err != nil {
|
|
return
|
|
}
|
|
return
|
|
}
|