From 4a7ed9cf72682a456cc483f1eaad2253bdc019b3 Mon Sep 17 00:00:00 2001 From: cfanfrank Date: Mon, 11 Mar 2013 11:02:25 +0800 Subject: [PATCH 01/34] first --- README.md | 65 +++++++++++++++++++++++ aacdec.go | 84 ++++++++++++++++++++++++++++++ aacenc.go | 93 +++++++++++++++++++++++++++++++++ h264dec.go | 90 ++++++++++++++++++++++++++++++++ h264enc.go | 148 +++++++++++++++++++++++++++++++++++++++++++++++++++++ util.go | 51 ++++++++++++++++++ 6 files changed, 531 insertions(+) create mode 100644 README.md create mode 100644 aacdec.go create mode 100644 aacenc.go create mode 100644 h264dec.go create mode 100644 h264enc.go create mode 100644 util.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..fc2c360 --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ + +codec +==== + +Golang aac/h264 encoder and decoder + +H264 encoding example + + w := 400 + h := 400 + var nal [][]byte + + c, _ := codec.NewH264Encoder(w, h, image.YCbCrSubsampleRatio420) + nal = append(nal, c.Header) + + for i := 0; i < 60; i++ { + img := image.NewYCbCr(image.Rect(0,0,w,h), image.YCbCrSubsampleRatio420) + p, _ := c.Encode(img) + if len(p.Data) > 0 { + nal = append(nal, p.Data) + } + } + for { + // flush encoder + p, err := c.Encode(nil) + if err != nil { + break + } + nal = append(nal, p.Data) + } + +H264 decoding example + + dec, err := codec.NewH264Decoder(nal[0]) + for i, n := range nal[1:] { + img, err := dec.Decode(n) + if err == nil { + fp, _ := os.Create(fmt.Sprintf("/tmp/dec-%d.jpg", i)) + jpeg.Encode(fp, img, nil) + fp.Close() + } + } + +AAC encoding example + + var pkts [][]byte + + c, _ := codec.NewAACEncoder() + pkts = append(pkts, c.Header) + + for i := 0; i < 60; i++ { + var sample [8192]byte + p, _ := c.Encode(sample) + if len(p) > 0 { + pkts = append(pkts, p) + } + } + +AAC decoding example + + dec, _ := codec.NewAACDecoder(pkts[0]) + for _, p := range pkts[1:] { + sample, err := dec.Decode(p) + } + diff --git a/aacdec.go b/aacdec.go new file mode 100644 index 0000000..47f4ddd --- /dev/null +++ b/aacdec.go @@ -0,0 +1,84 @@ + +package codec + +import ( + /* + #include + #include + #include + #include + + typedef struct { + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + int got; + } aacdec_t ; + + static int aacdec_new(aacdec_t *m, uint8_t *buf, int len) { + m->c = avcodec_find_decoder(CODEC_ID_AAC); + m->ctx = avcodec_alloc_context3(m->c); + m->f = avcodec_alloc_frame(); + m->ctx->extradata = buf; + m->ctx->extradata_size = len; + m->ctx->debug = 0x3; + av_log(m->ctx, AV_LOG_DEBUG, "m %p\n", m); + return avcodec_open2(m->ctx, m->c, 0); + } + + static int aacdec_decode(aacdec_t *m, uint8_t *data, int len) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = data; + pkt.size = len; + av_log(m->ctx, AV_LOG_DEBUG, "decode %p\n", m); + return avcodec_decode_audio4(m->ctx, m->f, &m->got, &pkt); + } + */ + "C" + "unsafe" + "errors" +) + +type AACDecoder struct { + m C.aacdec_t +} + +func NewAACDecoder(header []byte) (m *AACDecoder, err error) { + m = &AACDecoder{} + r := C.aacdec_new(&m.m, + (*C.uint8_t)(unsafe.Pointer(&header[0])), + (C.int)(len(header)), + ) + if int(r) < 0 { + err = errors.New("open codec failed") + } + return +} + +func (m *AACDecoder) Decode(data []byte) (sample []byte, err error) { + r := C.aacdec_decode( + &m.m, + (*C.uint8_t)(unsafe.Pointer(&data[0])), + (C.int)(len(data)), + ) + if int(r) < 0 { + err = errors.New("decode failed") + return + } + if int(m.m.got) == 0 { + err = errors.New("no data") + return + } + size := int(m.m.f.linesize[0])*2 + sample = make([]byte, size*2) + for i := 0; i < 2; i++ { + C.memcpy( + unsafe.Pointer(&sample[i*size]), + unsafe.Pointer(m.m.f.data[i]), + (C.size_t)(size), + ) + } + return +} + diff --git a/aacenc.go b/aacenc.go new file mode 100644 index 0000000..0f02812 --- /dev/null +++ b/aacenc.go @@ -0,0 +1,93 @@ + +package codec + +import ( + /* + #include + #include + #include + + typedef struct { + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + int got; + uint8_t buf[1024*10]; int size; + int samplerate; int bitrate; + int channels; + } aacenc_t ; + + static int aacenc_new(aacenc_t *m) { + m->c = avcodec_find_encoder(CODEC_ID_AAC); + m->ctx = avcodec_alloc_context3(m->c); + m->ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; + m->ctx->sample_rate = m->samplerate; + m->ctx->bit_rate = m->bitrate; + m->ctx->channels = m->channels; + m->ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; + m->f = avcodec_alloc_frame(); + int r = avcodec_open2(m->ctx, m->c, 0); + av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); + return r; + } + + static void aacenc_encode(aacenc_t *m) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = m->buf; + pkt.size = sizeof(m->buf); + m->f->nb_samples = 1024; + m->f->extended_data = m->f->data; + m->f->linesize[0] = 4096; + avcodec_encode_audio2(m->ctx, &pkt, m->f, &m->got); + av_log(m->ctx, AV_LOG_DEBUG, "got %d size %d\n", m->got, pkt.size); + m->size = pkt.size; + } + */ + "C" + "unsafe" + "errors" +) + +type AACEncoder struct { + m C.aacenc_t + Header []byte +} + +// only supported fltp,stereo,44100khz. If you need other config, it's easy to modify code +func NewAACEncoder() (m *AACEncoder, err error) { + m = &AACEncoder{} + m.m.samplerate = 44100 + m.m.bitrate = 50000 + m.m.channels = 2 + r := C.aacenc_new(&m.m) + if int(r) != 0 { + err = errors.New("open codec failed") + return + } + m.Header = make([]byte, (int)(m.m.ctx.extradata_size)) + C.memcpy( + unsafe.Pointer(&m.Header[0]), + unsafe.Pointer(&m.m.ctx.extradata), + (C.size_t)(len(m.Header)), + ) + return +} + +func (m *AACEncoder) Encode(sample []byte) (ret []byte, err error) { + m.m.f.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) + m.m.f.data[1] = (*C.uint8_t)(unsafe.Pointer(&sample[4096])) + C.aacenc_encode(&m.m) + if int(m.m.got) == 0 { + err = errors.New("no data") + return + } + ret = make([]byte, (int)(m.m.size)) + C.memcpy( + unsafe.Pointer(&ret[0]), + unsafe.Pointer(&m.m.buf[0]), + (C.size_t)(m.m.size), + ) + return +} + diff --git a/h264dec.go b/h264dec.go new file mode 100644 index 0000000..a4884ca --- /dev/null +++ b/h264dec.go @@ -0,0 +1,90 @@ + +package codec + +import ( + /* + #include + #include + #include + + typedef struct { + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + int got; + } h264dec_t ; + + static int h264dec_new(h264dec_t *h, uint8_t *data, int len) { + h->c = avcodec_find_decoder(CODEC_ID_H264); + h->ctx = avcodec_alloc_context3(h->c); + h->f = avcodec_alloc_frame(); + h->ctx->extradata = data; + h->ctx->extradata_size = len; + h->ctx->debug = 0x3; + return avcodec_open2(h->ctx, h->c, 0); + } + + static int h264dec_decode(h264dec_t *h, uint8_t *data, int len) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = data; + pkt.size = len; + return avcodec_decode_video2(h->ctx, h->f, &h->got, &pkt); + } + */ + "C" + "unsafe" + "errors" + "image" +) + +type H264Decoder struct { + m C.h264dec_t +} + +func NewH264Decoder(header []byte) (m *H264Decoder, err error) { + m = &H264Decoder{} + r := C.h264dec_new( + &m.m, + (*C.uint8_t)(unsafe.Pointer(&header[0])), + (C.int)(len(header)), + ) + if int(r) < 0 { + err = errors.New("open codec failed") + } + return +} + +func (m *H264Decoder) Decode(nal []byte) (f *image.YCbCr, err error) { + r := C.h264dec_decode( + &m.m, + (*C.uint8_t)(unsafe.Pointer(&nal[0])), + (C.int)(len(nal)), + ) + if int(r) < 0 { + err = errors.New("decode failed") + return + } + if m.m.got == 0 { + err = errors.New("no picture") + return + } + + w := int(m.m.f.width) + h := int(m.m.f.height) + ys := int(m.m.f.linesize[0]) + cs := int(m.m.f.linesize[1]) + + f = &image.YCbCr{ + Y: fromCPtr(unsafe.Pointer(m.m.f.data[0]), ys*h), + Cb: fromCPtr(unsafe.Pointer(m.m.f.data[1]), cs*h/2), + Cr: fromCPtr(unsafe.Pointer(m.m.f.data[2]), cs*h/2), + YStride: ys, + CStride: cs, + SubsampleRatio: image.YCbCrSubsampleRatio420, + Rect: image.Rect(0, 0, w, h), + } + + return +} + diff --git a/h264enc.go b/h264enc.go new file mode 100644 index 0000000..1947591 --- /dev/null +++ b/h264enc.go @@ -0,0 +1,148 @@ + +package codec + +import ( + + /* + #include + #include + #include + #include + #include + #include + #include + + typedef struct { + int w, h; + int pixfmt; + char *preset[2]; + char *profile; + int bitrate; + int got; + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + AVPacket pkt; + } h264enc_t; + + static int h264enc_new(h264enc_t *m) { + m->c = avcodec_find_encoder(CODEC_ID_H264); + m->ctx = avcodec_alloc_context3(m->c); + m->ctx->width = m->w; + m->ctx->height = m->w; + m->ctx->bit_rate = m->bitrate; + m->ctx->pix_fmt = m->pixfmt; + m->ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; + m->f = avcodec_alloc_frame(); + return avcodec_open2(m->ctx, m->c, NULL); + } + + */ + "C" + "unsafe" + "image" + "errors" + "strings" + //"log" +) + +type H264Encoder struct { + m C.h264enc_t + Header []byte + Pixfmt image.YCbCrSubsampleRatio + W, H int +} + +func NewH264Encoder( + w, h int, + pixfmt image.YCbCrSubsampleRatio, + opts ...string, +) (m *H264Encoder, err error) { + m = &H264Encoder{} + m.m.w = (C.int)(w) + m.m.h = (C.int)(h) + m.W = w + m.H = h + m.Pixfmt = pixfmt + switch pixfmt { + case image.YCbCrSubsampleRatio444: + m.m.pixfmt = C.PIX_FMT_YUV444P + case image.YCbCrSubsampleRatio422: + m.m.pixfmt = C.PIX_FMT_YUV422P + case image.YCbCrSubsampleRatio420: + m.m.pixfmt = C.PIX_FMT_YUV420P + } + for _, opt := range opts { + a := strings.Split(opt, ",") + switch { + case a[0] == "preset" && len(a) == 3: + m.m.preset[0] = C.CString(a[1]) + m.m.preset[1] = C.CString(a[2]) + case a[0] == "profile" && len(a) == 2: + m.m.profile = C.CString(a[1]) + } + } + r := C.h264enc_new(&m.m) + if int(r) < 0 { + err = errors.New("open encoder failed") + return + } + m.Header = fromCPtr(unsafe.Pointer(m.m.ctx.extradata), (int)(m.m.ctx.extradata_size)) + //m.Header = fromCPtr(unsafe.Pointer(m.m.pps), (int)(m.m.ppslen)) + return +} + +type h264Out struct { + Data []byte + Key bool +} + +func (m *H264Encoder) Encode(img *image.YCbCr) (out h264Out, err error) { + var f *C.AVFrame + if img == nil { + f = nil + } else { + if img.SubsampleRatio != m.Pixfmt { + err = errors.New("image pixfmt not match") + return + } + if img.Rect.Dx() != m.W || img.Rect.Dy() != m.H { + err = errors.New("image size not match") + return + } + f = m.m.f + f.data[0] = (*C.uint8_t)(unsafe.Pointer(&img.Y[0])); + f.data[1] = (*C.uint8_t)(unsafe.Pointer(&img.Cb[0])); + f.data[2] = (*C.uint8_t)(unsafe.Pointer(&img.Cr[0])); + f.linesize[0] = (C.int)(img.YStride); + f.linesize[1] = (C.int)(img.CStride); + f.linesize[2] = (C.int)(img.CStride); + } + + C.av_init_packet(&m.m.pkt) + r := C.avcodec_encode_video2(m.m.ctx, &m.m.pkt, f, &m.m.got) + defer C.av_free_packet(&m.m.pkt) + if int(r) < 0 { + err = errors.New("encode failed") + return + } + if m.m.got == 0 { + err = errors.New("no picture") + return + } + if (m.m.pkt.size == 0) { + err = errors.New("packet size == 0") + return + } + + out.Data = make([]byte, m.m.pkt.size) + C.memcpy( + unsafe.Pointer(&out.Data[0]), + unsafe.Pointer(m.m.pkt.data), + (C.size_t)(m.m.pkt.size), + ) + out.Key = (m.m.pkt.flags & C.AV_PKT_FLAG_KEY) != 0 + + return +} + diff --git a/util.go b/util.go new file mode 100644 index 0000000..ce6b126 --- /dev/null +++ b/util.go @@ -0,0 +1,51 @@ + +/* + +Golang h264,aac decoder/encoder libav wrapper + + d, err = codec.NewAACEncoder() + data, err = d.Encode(samples) + + d, err = codec.NewAACDecoder(aaccfg) + samples, err = d.Decode(data) + + var img *image.YCbCr + d, err = codec.NewH264Encoder(640, 480) + img, err = d.Encode(img) + + d, err = codec.NewH264Decoder(pps) + img, err = d.Decode(nal) +*/ +package codec + +import ( + "unsafe" + "reflect" + + /* + #cgo darwin LDFLAGS: -lavformat -lavutil -lavcodec + + #include + #include + + static void libav_init() { + av_register_all(); + av_log_set_level(AV_LOG_DEBUG); + } + */ + "C" +) + +func init() { + C.libav_init() +} + +func fromCPtr(buf unsafe.Pointer, size int) (ret []uint8) { + hdr := (*reflect.SliceHeader)((unsafe.Pointer(&ret))) + hdr.Cap = size + hdr.Len = size + hdr.Data = uintptr(buf) + return +} + + From 1a3d00af18f3eaa662ba1a8d340a44415e760eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20Ol=C3=A1h?= Date: Mon, 1 Dec 2014 17:14:01 +0100 Subject: [PATCH 02/34] syntax highlighting in the README --- README.md | 103 +++++++++++++++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index fc2c360..c306153 100644 --- a/README.md +++ b/README.md @@ -2,64 +2,71 @@ codec ==== -Golang aac/h264 encoder and decoder +Golang aac/h264 encoder and decoder. -H264 encoding example +H264 encoding example: - w := 400 - h := 400 - var nal [][]byte +```go +w := 400 +h := 400 +var nal [][]byte - c, _ := codec.NewH264Encoder(w, h, image.YCbCrSubsampleRatio420) - nal = append(nal, c.Header) +c, _ := codec.NewH264Encoder(w, h, image.YCbCrSubsampleRatio420) +nal = append(nal, c.Header) - for i := 0; i < 60; i++ { - img := image.NewYCbCr(image.Rect(0,0,w,h), image.YCbCrSubsampleRatio420) - p, _ := c.Encode(img) - if len(p.Data) > 0 { - nal = append(nal, p.Data) - } - } - for { - // flush encoder - p, err := c.Encode(nil) - if err != nil { - break - } +for i := 0; i < 60; i++ { + img := image.NewYCbCr(image.Rect(0,0,w,h), image.YCbCrSubsampleRatio420) + p, _ := c.Encode(img) + if len(p.Data) > 0 { nal = append(nal, p.Data) } - -H264 decoding example - - dec, err := codec.NewH264Decoder(nal[0]) - for i, n := range nal[1:] { - img, err := dec.Decode(n) - if err == nil { - fp, _ := os.Create(fmt.Sprintf("/tmp/dec-%d.jpg", i)) - jpeg.Encode(fp, img, nil) - fp.Close() - } +} +for { + // flush encoder + p, err := c.Encode(nil) + if err != nil { + break } + nal = append(nal, p.Data) +} +``` -AAC encoding example - - var pkts [][]byte +H264 decoding example: - c, _ := codec.NewAACEncoder() - pkts = append(pkts, c.Header) - - for i := 0; i < 60; i++ { - var sample [8192]byte - p, _ := c.Encode(sample) - if len(p) > 0 { - pkts = append(pkts, p) - } +```go +dec, err := codec.NewH264Decoder(nal[0]) +for i, n := range nal[1:] { + img, err := dec.Decode(n) + if err == nil { + fp, _ := os.Create(fmt.Sprintf("/tmp/dec-%d.jpg", i)) + jpeg.Encode(fp, img, nil) + fp.Close() } +} +``` -AAC decoding example - - dec, _ := codec.NewAACDecoder(pkts[0]) - for _, p := range pkts[1:] { - sample, err := dec.Decode(p) +AAC encoding example: + +```go +var pkts [][]byte + +c, _ := codec.NewAACEncoder() +pkts = append(pkts, c.Header) + +for i := 0; i < 60; i++ { + var sample [8192]byte + p, _ := c.Encode(sample) + if len(p) > 0 { + pkts = append(pkts, p) } +} +``` +AAC decoding example: + +```go +dec, _ := codec.NewAACDecoder(pkts[0]) +for _, p := range pkts[1:] { + sample, err := dec.Decode(p) +} +``` From 7f0d898b81bc210d6063db0d1a265b12155cda3d Mon Sep 17 00:00:00 2001 From: Brian Fulkerson Date: Sat, 7 Mar 2015 12:08:56 +0000 Subject: [PATCH 03/34] Use the height parameter when configuring h264enc --- h264enc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h264enc.go b/h264enc.go index 1947591..3f0274d 100644 --- a/h264enc.go +++ b/h264enc.go @@ -29,7 +29,7 @@ import ( m->c = avcodec_find_encoder(CODEC_ID_H264); m->ctx = avcodec_alloc_context3(m->c); m->ctx->width = m->w; - m->ctx->height = m->w; + m->ctx->height = m->h; m->ctx->bit_rate = m->bitrate; m->ctx->pix_fmt = m->pixfmt; m->ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; From 6e08365b9bf9cfd3fc4c21b8a8d94787ac9deb7e Mon Sep 17 00:00:00 2001 From: XieRan Date: Mon, 22 Jun 2015 02:42:45 +0800 Subject: [PATCH 04/34] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index c306153..ac8920c 100644 --- a/README.md +++ b/README.md @@ -70,3 +70,8 @@ for _, p := range pkts[1:] { sample, err := dec.Decode(p) } ``` + +License +---- + +All code is under WTFPL. You can use it for everything as you want :) From 0b26955b16d5dab1bcedb3678a19232639070272 Mon Sep 17 00:00:00 2001 From: Paul Kohler Date: Thu, 10 Dec 2015 12:54:21 -0500 Subject: [PATCH 05/34] changed LFLAGS so it works on ubuntu and switched to non-deprecated libav calls --- aacdec.go | 62 +++++++++++++++++++-------------------- aacenc.go | 80 +++++++++++++++++++++++++------------------------- h264dec.go | 66 ++++++++++++++++++++--------------------- h264enc.go | 86 ++++++++++++++++++++++++++---------------------------- util.go | 19 +++++------- 5 files changed, 151 insertions(+), 162 deletions(-) diff --git a/aacdec.go b/aacdec.go index 47f4ddd..7ddeedd 100644 --- a/aacdec.go +++ b/aacdec.go @@ -1,43 +1,42 @@ - package codec import ( /* - #include - #include - #include - #include + #include + #include + #include + #include - typedef struct { - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - int got; - } aacdec_t ; + typedef struct { + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + int got; + } aacdec_t ; - static int aacdec_new(aacdec_t *m, uint8_t *buf, int len) { - m->c = avcodec_find_decoder(CODEC_ID_AAC); - m->ctx = avcodec_alloc_context3(m->c); - m->f = avcodec_alloc_frame(); - m->ctx->extradata = buf; - m->ctx->extradata_size = len; - m->ctx->debug = 0x3; - av_log(m->ctx, AV_LOG_DEBUG, "m %p\n", m); - return avcodec_open2(m->ctx, m->c, 0); - } + static int aacdec_new(aacdec_t *m, uint8_t *buf, int len) { + m->c = avcodec_find_decoder(CODEC_ID_AAC); + m->ctx = avcodec_alloc_context3(m->c); + m->f = avcodec_alloc_frame(); + m->ctx->extradata = buf; + m->ctx->extradata_size = len; + m->ctx->debug = 0x3; + av_log(m->ctx, AV_LOG_DEBUG, "m %p\n", m); + return avcodec_open2(m->ctx, m->c, 0); + } - static int aacdec_decode(aacdec_t *m, uint8_t *data, int len) { - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = data; - pkt.size = len; - av_log(m->ctx, AV_LOG_DEBUG, "decode %p\n", m); - return avcodec_decode_audio4(m->ctx, m->f, &m->got, &pkt); - } + static int aacdec_decode(aacdec_t *m, uint8_t *data, int len) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = data; + pkt.size = len; + av_log(m->ctx, AV_LOG_DEBUG, "decode %p\n", m); + return avcodec_decode_audio4(m->ctx, m->f, &m->got, &pkt); + } */ "C" - "unsafe" "errors" + "unsafe" ) type AACDecoder struct { @@ -70,7 +69,7 @@ func (m *AACDecoder) Decode(data []byte) (sample []byte, err error) { err = errors.New("no data") return } - size := int(m.m.f.linesize[0])*2 + size := int(m.m.f.linesize[0]) * 2 sample = make([]byte, size*2) for i := 0; i < 2; i++ { C.memcpy( @@ -81,4 +80,3 @@ func (m *AACDecoder) Decode(data []byte) (sample []byte, err error) { } return } - diff --git a/aacenc.go b/aacenc.go index 0f02812..8386280 100644 --- a/aacenc.go +++ b/aacenc.go @@ -1,56 +1,55 @@ - package codec import ( /* - #include - #include - #include + #include + #include + #include - typedef struct { - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - int got; - uint8_t buf[1024*10]; int size; - int samplerate; int bitrate; - int channels; - } aacenc_t ; + typedef struct { + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + int got; + uint8_t buf[1024*10]; int size; + int samplerate; int bitrate; + int channels; + } aacenc_t ; - static int aacenc_new(aacenc_t *m) { - m->c = avcodec_find_encoder(CODEC_ID_AAC); - m->ctx = avcodec_alloc_context3(m->c); - m->ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; - m->ctx->sample_rate = m->samplerate; - m->ctx->bit_rate = m->bitrate; - m->ctx->channels = m->channels; - m->ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; - m->f = avcodec_alloc_frame(); - int r = avcodec_open2(m->ctx, m->c, 0); - av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); - return r; - } + static int aacenc_new(aacenc_t *m) { + m->c = avcodec_find_encoder(CODEC_ID_AAC); + m->ctx = avcodec_alloc_context3(m->c); + m->ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; + m->ctx->sample_rate = m->samplerate; + m->ctx->bit_rate = m->bitrate; + m->ctx->channels = m->channels; + m->ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; + m->f = avcodec_alloc_frame(); + int r = avcodec_open2(m->ctx, m->c, 0); + av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); + return r; + } - static void aacenc_encode(aacenc_t *m) { - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = m->buf; - pkt.size = sizeof(m->buf); - m->f->nb_samples = 1024; - m->f->extended_data = m->f->data; - m->f->linesize[0] = 4096; - avcodec_encode_audio2(m->ctx, &pkt, m->f, &m->got); - av_log(m->ctx, AV_LOG_DEBUG, "got %d size %d\n", m->got, pkt.size); - m->size = pkt.size; - } + static void aacenc_encode(aacenc_t *m) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = m->buf; + pkt.size = sizeof(m->buf); + m->f->nb_samples = 1024; + m->f->extended_data = m->f->data; + m->f->linesize[0] = 4096; + avcodec_encode_audio2(m->ctx, &pkt, m->f, &m->got); + av_log(m->ctx, AV_LOG_DEBUG, "got %d size %d\n", m->got, pkt.size); + m->size = pkt.size; + } */ "C" - "unsafe" "errors" + "unsafe" ) type AACEncoder struct { - m C.aacenc_t + m C.aacenc_t Header []byte } @@ -90,4 +89,3 @@ func (m *AACEncoder) Encode(sample []byte) (ret []byte, err error) { ) return } - diff --git a/h264dec.go b/h264dec.go index a4884ca..745f339 100644 --- a/h264dec.go +++ b/h264dec.go @@ -1,41 +1,40 @@ - package codec import ( /* - #include - #include - #include + #include + #include + #include - typedef struct { - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - int got; - } h264dec_t ; + typedef struct { + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + int got; + } h264dec_t ; - static int h264dec_new(h264dec_t *h, uint8_t *data, int len) { - h->c = avcodec_find_decoder(CODEC_ID_H264); - h->ctx = avcodec_alloc_context3(h->c); - h->f = avcodec_alloc_frame(); - h->ctx->extradata = data; - h->ctx->extradata_size = len; - h->ctx->debug = 0x3; - return avcodec_open2(h->ctx, h->c, 0); - } + static int h264dec_new(h264dec_t *h, uint8_t *data, int len) { + h->c = avcodec_find_decoder(CODEC_ID_H264); + h->ctx = avcodec_alloc_context3(h->c); + h->f = avcodec_alloc_frame(); + h->ctx->extradata = data; + h->ctx->extradata_size = len; + h->ctx->debug = 0x3; + return avcodec_open2(h->ctx, h->c, 0); + } - static int h264dec_decode(h264dec_t *h, uint8_t *data, int len) { - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = data; - pkt.size = len; - return avcodec_decode_video2(h->ctx, h->f, &h->got, &pkt); - } + static int h264dec_decode(h264dec_t *h, uint8_t *data, int len) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = data; + pkt.size = len; + return avcodec_decode_video2(h->ctx, h->f, &h->got, &pkt); + } */ "C" - "unsafe" "errors" "image" + "unsafe" ) type H264Decoder struct { @@ -76,15 +75,14 @@ func (m *H264Decoder) Decode(nal []byte) (f *image.YCbCr, err error) { cs := int(m.m.f.linesize[1]) f = &image.YCbCr{ - Y: fromCPtr(unsafe.Pointer(m.m.f.data[0]), ys*h), - Cb: fromCPtr(unsafe.Pointer(m.m.f.data[1]), cs*h/2), - Cr: fromCPtr(unsafe.Pointer(m.m.f.data[2]), cs*h/2), - YStride: ys, - CStride: cs, + Y: fromCPtr(unsafe.Pointer(m.m.f.data[0]), ys*h), + Cb: fromCPtr(unsafe.Pointer(m.m.f.data[1]), cs*h/2), + Cr: fromCPtr(unsafe.Pointer(m.m.f.data[2]), cs*h/2), + YStride: ys, + CStride: cs, SubsampleRatio: image.YCbCrSubsampleRatio420, - Rect: image.Rect(0, 0, w, h), + Rect: image.Rect(0, 0, w, h), } return } - diff --git a/h264enc.go b/h264enc.go index 3f0274d..92fe349 100644 --- a/h264enc.go +++ b/h264enc.go @@ -1,56 +1,55 @@ - package codec import ( /* - #include - #include - #include - #include - #include - #include - #include + #include + #include + #include + #include + #include + #include + #include - typedef struct { - int w, h; - int pixfmt; - char *preset[2]; - char *profile; - int bitrate; - int got; - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - AVPacket pkt; - } h264enc_t; + typedef struct { + int w, h; + int pixfmt; + char *preset[2]; + char *profile; + int bitrate; + int got; + AVCodec *c; + AVCodecContext *ctx; + AVFrame *f; + AVPacket pkt; + } h264enc_t; - static int h264enc_new(h264enc_t *m) { - m->c = avcodec_find_encoder(CODEC_ID_H264); - m->ctx = avcodec_alloc_context3(m->c); - m->ctx->width = m->w; - m->ctx->height = m->h; - m->ctx->bit_rate = m->bitrate; - m->ctx->pix_fmt = m->pixfmt; - m->ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; - m->f = avcodec_alloc_frame(); - return avcodec_open2(m->ctx, m->c, NULL); - } + static int h264enc_new(h264enc_t *m) { + m->c = avcodec_find_encoder(CODEC_ID_H264); + m->ctx = avcodec_alloc_context3(m->c); + m->ctx->width = m->w; + m->ctx->height = m->h; + m->ctx->bit_rate = m->bitrate; + m->ctx->pix_fmt = m->pixfmt; + m->ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; + m->f = avcodec_alloc_frame(); + return avcodec_open2(m->ctx, m->c, NULL); + } */ "C" - "unsafe" - "image" "errors" + "image" "strings" + "unsafe" //"log" ) type H264Encoder struct { - m C.h264enc_t + m C.h264enc_t Header []byte Pixfmt image.YCbCrSubsampleRatio - W, H int + W, H int } func NewH264Encoder( @@ -94,7 +93,7 @@ func NewH264Encoder( type h264Out struct { Data []byte - Key bool + Key bool } func (m *H264Encoder) Encode(img *image.YCbCr) (out h264Out, err error) { @@ -111,12 +110,12 @@ func (m *H264Encoder) Encode(img *image.YCbCr) (out h264Out, err error) { return } f = m.m.f - f.data[0] = (*C.uint8_t)(unsafe.Pointer(&img.Y[0])); - f.data[1] = (*C.uint8_t)(unsafe.Pointer(&img.Cb[0])); - f.data[2] = (*C.uint8_t)(unsafe.Pointer(&img.Cr[0])); - f.linesize[0] = (C.int)(img.YStride); - f.linesize[1] = (C.int)(img.CStride); - f.linesize[2] = (C.int)(img.CStride); + f.data[0] = (*C.uint8_t)(unsafe.Pointer(&img.Y[0])) + f.data[1] = (*C.uint8_t)(unsafe.Pointer(&img.Cb[0])) + f.data[2] = (*C.uint8_t)(unsafe.Pointer(&img.Cr[0])) + f.linesize[0] = (C.int)(img.YStride) + f.linesize[1] = (C.int)(img.CStride) + f.linesize[2] = (C.int)(img.CStride) } C.av_init_packet(&m.m.pkt) @@ -130,7 +129,7 @@ func (m *H264Encoder) Encode(img *image.YCbCr) (out h264Out, err error) { err = errors.New("no picture") return } - if (m.m.pkt.size == 0) { + if m.m.pkt.size == 0 { err = errors.New("packet size == 0") return } @@ -145,4 +144,3 @@ func (m *H264Encoder) Encode(img *image.YCbCr) (out h264Out, err error) { return } - diff --git a/util.go b/util.go index ce6b126..58ca3a7 100644 --- a/util.go +++ b/util.go @@ -1,4 +1,3 @@ - /* Golang h264,aac decoder/encoder libav wrapper @@ -19,19 +18,19 @@ Golang h264,aac decoder/encoder libav wrapper package codec import ( - "unsafe" "reflect" + "unsafe" /* - #cgo darwin LDFLAGS: -lavformat -lavutil -lavcodec + #cgo LDFLAGS: -lavformat -lavutil -lavcodec - #include - #include + #include + #include - static void libav_init() { - av_register_all(); - av_log_set_level(AV_LOG_DEBUG); - } + static void libav_init() { + av_register_all(); + av_log_set_level(AV_LOG_DEBUG); + } */ "C" ) @@ -47,5 +46,3 @@ func fromCPtr(buf unsafe.Pointer, size int) (ret []uint8) { hdr.Data = uintptr(buf) return } - - From b51fba84ef40a430a37012b3f8682389c3ec7608 Mon Sep 17 00:00:00 2001 From: Paul Kohler Date: Thu, 10 Dec 2015 14:11:55 -0500 Subject: [PATCH 06/34] corrected avcodec_alloc_frame error... thought I already had, though --- aacdec.go | 2 +- aacenc.go | 2 +- h264dec.go | 2 +- h264enc.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/aacdec.go b/aacdec.go index 7ddeedd..efa9653 100644 --- a/aacdec.go +++ b/aacdec.go @@ -17,7 +17,7 @@ import ( static int aacdec_new(aacdec_t *m, uint8_t *buf, int len) { m->c = avcodec_find_decoder(CODEC_ID_AAC); m->ctx = avcodec_alloc_context3(m->c); - m->f = avcodec_alloc_frame(); + m->f = av_frame_alloc(); m->ctx->extradata = buf; m->ctx->extradata_size = len; m->ctx->debug = 0x3; diff --git a/aacenc.go b/aacenc.go index 8386280..7ce6c21 100644 --- a/aacenc.go +++ b/aacenc.go @@ -24,7 +24,7 @@ import ( m->ctx->bit_rate = m->bitrate; m->ctx->channels = m->channels; m->ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; - m->f = avcodec_alloc_frame(); + m->f = av_frame_alloc(); int r = avcodec_open2(m->ctx, m->c, 0); av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); return r; diff --git a/h264dec.go b/h264dec.go index 745f339..747051a 100644 --- a/h264dec.go +++ b/h264dec.go @@ -16,7 +16,7 @@ import ( static int h264dec_new(h264dec_t *h, uint8_t *data, int len) { h->c = avcodec_find_decoder(CODEC_ID_H264); h->ctx = avcodec_alloc_context3(h->c); - h->f = avcodec_alloc_frame(); + h->f = av_frame_alloc(); h->ctx->extradata = data; h->ctx->extradata_size = len; h->ctx->debug = 0x3; diff --git a/h264enc.go b/h264enc.go index 92fe349..21bdb9f 100644 --- a/h264enc.go +++ b/h264enc.go @@ -32,7 +32,7 @@ import ( m->ctx->bit_rate = m->bitrate; m->ctx->pix_fmt = m->pixfmt; m->ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; - m->f = avcodec_alloc_frame(); + m->f = av_frame_alloc(); return avcodec_open2(m->ctx, m->c, NULL); } From c22c2411f1f08a024080fdcf72737143adc9a70d Mon Sep 17 00:00:00 2001 From: nareix Date: Mon, 4 Apr 2016 14:43:30 +0800 Subject: [PATCH 07/34] add h264 parser --- h264parser/parser.go | 271 ++++++++++++++++++++++++++++++++++++++ h264parser/parser_test.go | 23 ++++ 2 files changed, 294 insertions(+) create mode 100644 h264parser/parser.go create mode 100644 h264parser/parser_test.go diff --git a/h264parser/parser.go b/h264parser/parser.go new file mode 100644 index 0000000..2d2d607 --- /dev/null +++ b/h264parser/parser.go @@ -0,0 +1,271 @@ + +package h264parser + +import ( + "github.com/nareix/bits" +) + +/* +From: http://stackoverflow.com/questions/24884827/possible-locations-for-sequence-picture-parameter-sets-for-h-264-stream + +First off, it's important to understand that there is no single standard H.264 elementary bitstream format. The specification document does contain an Annex, specifically Annex B, that describes one possible format, but it is not an actual requirement. The standard specifies how video is encoded into individual packets. How these packets are stored and transmitted is left open to the integrator. + +1. Annex B +Network Abstraction Layer Units +The packets are called Network Abstraction Layer Units. Often abbreviated NALU (or sometimes just NAL) each packet can be individually parsed and processed. The first byte of each NALU contains the NALU type, specifically bits 3 through 7. (bit 0 is always off, and bits 1-2 indicate whether a NALU is referenced by another NALU). + +There are 19 different NALU types defined separated into two categories, VCL and non-VCL: + +VCL, or Video Coding Layer packets contain the actual visual information. +Non-VCLs contain metadata that may or may not be required to decode the video. +A single NALU, or even a VCL NALU is NOT the same thing as a frame. A frame can be ‘sliced’ into several NALUs. Just like you can slice a pizza. One or more slices are then virtually grouped into a Access Units (AU) that contain one frame. Slicing does come at a slight quality cost, so it is not often used. + +Below is a table of all defined NALUs. + +0 Unspecified non-VCL +1 Coded slice of a non-IDR picture VCL +2 Coded slice data partition A VCL +3 Coded slice data partition B VCL +4 Coded slice data partition C VCL +5 Coded slice of an IDR picture VCL +6 Supplemental enhancement information (SEI) non-VCL +7 Sequence parameter set non-VCL +8 Picture parameter set non-VCL +9 Access unit delimiter non-VCL +10 End of sequence non-VCL +11 End of stream non-VCL +12 Filler data non-VCL +13 Sequence parameter set extension non-VCL +14 Prefix NAL unit non-VCL +15 Subset sequence parameter set non-VCL +16 Depth parameter set non-VCL +17..18 Reserved non-VCL +19 Coded slice of an auxiliary coded picture without partitioning non-VCL +20 Coded slice extension non-VCL +21 Coded slice extension for depth view components non-VCL +22..23 Reserved non-VCL +24..31 Unspecified non-VCL +There are a couple of NALU types where having knowledge of may be helpful later. + +Sequence Parameter Set (SPS). This non-VCL NALU contains information required to configure the decoder such as profile, level, resolution, frame rate. +Picture Parameter Set (PPS). Similar to the SPS, this non-VCL contains information on entropy coding mode, slice groups, motion prediction and deblocking filters. +Instantaneous Decoder Refresh (IDR). This VCL NALU is a self contained image slice. That is, an IDR can be decoded and displayed without referencing any other NALU save SPS and PPS. +Access Unit Delimiter (AUD). An AUD is an optional NALU that can be use to delimit frames in an elementary stream. It is not required (unless otherwise stated by the container/protocol, like TS), and is often not included in order to save space, but it can be useful to finds the start of a frame without having to fully parse each NALU. +NALU Start Codes +A NALU does not contain is its size. Therefore simply concatenating the NALUs to create a stream will not work because you will not know where one stops and the next begins. + +The Annex B specification solves this by requiring ‘Start Codes’ to precede each NALU. A start code is 2 or 3 0x00 bytes followed with a 0x01 byte. e.g. 0x000001 or 0x00000001. + +The 4 byte variation is useful for transmission over a serial connection as it is trivial to byte align the stream by looking for 31 zero bits followed by a one. If the next bit is 0 (because every NALU starts with a 0 bit), it is the start of a NALU. The 4 byte variation is usually only used for signaling random access points in the stream such as a SPS PPS AUD and IDR Where as the 3 byte variation is used everywhere else to save space. + +Emulation Prevention Bytes +Start codes work because the four byte sequences 0x000000, 0x000001, 0x000002 and 0x000003 are illegal within a non-RBSP NALU. So when creating a NALU, care is taken to escape these values that could otherwise be confused with a start code. This is accomplished by inserting an ‘Emulation Prevention’ byte 0x03, so that 0x000001 becomes 0x00000301. + +When decoding, it is important to look for and ignore emulation prevention bytes. Because emulation prevention bytes can occur almost anywhere within a NALU, it is often more convenient in documentation to assume they have already been removed. A representation without emulation prevention bytes is called Raw Byte Sequence Payload (RBSP). + +Example +Let's look at a complete example. + +0x0000 | 00 00 00 01 67 64 00 0A AC 72 84 44 26 84 00 00 +0x0010 | 03 00 04 00 00 03 00 CA 3C 48 96 11 80 00 00 00 +0x0020 | 01 68 E8 43 8F 13 21 30 00 00 01 65 88 81 00 05 +0x0030 | 4E 7F 87 DF 61 A5 8B 95 EE A4 E9 38 B7 6A 30 6A +0x0040 | 71 B9 55 60 0B 76 2E B5 0E E4 80 59 27 B8 67 A9 +0x0050 | 63 37 5E 82 20 55 FB E4 6A E9 37 35 72 E2 22 91 +0x0060 | 9E 4D FF 60 86 CE 7E 42 B7 95 CE 2A E1 26 BE 87 +0x0070 | 73 84 26 BA 16 36 F4 E6 9F 17 DA D8 64 75 54 B1 +0x0080 | F3 45 0C 0B 3C 74 B3 9D BC EB 53 73 87 C3 0E 62 +0x0090 | 47 48 62 CA 59 EB 86 3F 3A FA 86 B5 BF A8 6D 06 +0x00A0 | 16 50 82 C4 CE 62 9E 4E E6 4C C7 30 3E DE A1 0B +0x00B0 | D8 83 0B B6 B8 28 BC A9 EB 77 43 FC 7A 17 94 85 +0x00C0 | 21 CA 37 6B 30 95 B5 46 77 30 60 B7 12 D6 8C C5 +0x00D0 | 54 85 29 D8 69 A9 6F 12 4E 71 DF E3 E2 B1 6B 6B +0x00E0 | BF 9F FB 2E 57 30 A9 69 76 C4 46 A2 DF FA 91 D9 +0x00F0 | 50 74 55 1D 49 04 5A 1C D6 86 68 7C B6 61 48 6C +0x0100 | 96 E6 12 4C 27 AD BA C7 51 99 8E D0 F0 ED 8E F6 +0x0110 | 65 79 79 A6 12 A1 95 DB C8 AE E3 B6 35 E6 8D BC +0x0120 | 48 A3 7F AF 4A 28 8A 53 E2 7E 68 08 9F 67 77 98 +0x0130 | 52 DB 50 84 D6 5E 25 E1 4A 99 58 34 C7 11 D6 43 +0x0140 | FF C4 FD 9A 44 16 D1 B2 FB 02 DB A1 89 69 34 C2 +0x0150 | 32 55 98 F9 9B B2 31 3F 49 59 0C 06 8C DB A5 B2 +0x0160 | 9D 7E 12 2F D0 87 94 44 E4 0A 76 EF 99 2D 91 18 +0x0170 | 39 50 3B 29 3B F5 2C 97 73 48 91 83 B0 A6 F3 4B +0x0180 | 70 2F 1C 8F 3B 78 23 C6 AA 86 46 43 1D D7 2A 23 +0x0190 | 5E 2C D9 48 0A F5 F5 2C D1 FB 3F F0 4B 78 37 E9 +0x01A0 | 45 DD 72 CF 80 35 C3 95 07 F3 D9 06 E5 4A 58 76 +0x01B0 | 03 6C 81 20 62 45 65 44 73 BC FE C1 9F 31 E5 DB +0x01C0 | 89 5C 6B 79 D8 68 90 D7 26 A8 A1 88 86 81 DC 9A +0x01D0 | 4F 40 A5 23 C7 DE BE 6F 76 AB 79 16 51 21 67 83 +0x01E0 | 2E F3 D6 27 1A 42 C2 94 D1 5D 6C DB 4A 7A E2 CB +0x01F0 | 0B B0 68 0B BE 19 59 00 50 FC C0 BD 9D F5 F5 F8 +0x0200 | A8 17 19 D6 B3 E9 74 BA 50 E5 2C 45 7B F9 93 EA +0x0210 | 5A F9 A9 30 B1 6F 5B 36 24 1E 8D 55 57 F4 CC 67 +0x0220 | B2 65 6A A9 36 26 D0 06 B8 E2 E3 73 8B D1 C0 1C +0x0230 | 52 15 CA B5 AC 60 3E 36 42 F1 2C BD 99 77 AB A8 +0x0240 | A9 A4 8E 9C 8B 84 DE 73 F0 91 29 97 AE DB AF D6 +0x0250 | F8 5E 9B 86 B3 B3 03 B3 AC 75 6F A6 11 69 2F 3D +0x0260 | 3A CE FA 53 86 60 95 6C BB C5 4E F3 + +This is a complete AU containing 3 NALUs. As you can see, we begin with a Start code followed by an SPS (SPS starts with 67). Within the SPS, you will see two Emulation Prevention bytes. Without these bytes the illegal sequence 0x000000 would occur at these positions. Next you will see a start code followed by a PPS (PPS starts with 68) and one final start code followed by an IDR slice. This is a complete H.264 stream. If you type these values into a hex editor and save the file with a .264 extension, you will be able to convert it to this image: + +Lena + +Annex B is commonly used in live and streaming formats such as transport streams, over the air broadcasts, and DVDs. In these formats it is common to repeat the SPS and PPS periodically, usually preceding every IDR thus creating a random access point for the decoder. This enables the ability to join a stream already in progress. + +2. AVCC +The other common method of storing an H.264 stream is the AVCC format. In this format, each NALU is preceded with its length (in big endian format). This method is easier to parse, but you lose the byte alignment features of Annex B. Just to complicate things, the length may be encoded using 1, 2 or 4 bytes. This value is stored in a header object. This header is often called ‘extradata’ or ‘sequence header’. Its basic format is as follows: + +bits +8 version ( always 0x01 ) +8 avc profile ( sps[0][1] ) +8 avc compatibility ( sps[0][2] ) +8 avc level ( sps[0][3] ) +6 reserved ( all bits on ) +2 NALULengthSizeMinusOne +3 reserved ( all bits on ) +5 number of SPS NALUs (usually 1) +repeated once per SPS: + 16 SPS size + variable SPS NALU data +8 number of PPS NALUs (usually 1) +repeated once per PPS + 16 PPS size + variable PPS NALU data + +Using the same example above, the AVCC extradata will look like this: + +0x0000 | 01 64 00 0A FF E1 00 19 67 64 00 0A AC 72 84 44 +0x0010 | 26 84 00 00 03 00 04 00 00 03 00 CA 3C 48 96 11 +0x0020 | 80 01 00 07 68 E8 43 8F 13 21 30 + +You will notice SPS and PPS is now stored out of band. That is, separate from the elementary stream data. Storage and transmission of this data is the job of the file container, and beyond the scope of this document. Notice that even though we are not using start codes, emulation prevention bytes are still inserted. + +Additionally, there is a new variable called NALULengthSizeMinusOne. This confusingly named variable tells us how many bytes to use to store the length of each NALU. So, if NALULengthSizeMinusOne is set to 0, then each NALU is preceded with a single byte indicating its length. Using a single byte to store the size, the max size of a NALU is 255 bytes. That is obviously pretty small. Way too small for an entire key frame. Using 2 bytes gives us 64k per NALU. It would work in our example, but is still a pretty low limit. 3 bytes would be perfect, but for some reason is not universally supported. Therefore, 4 bytes is by far the most common, and it is what we used here: + +0x0000 | 00 00 02 41 65 88 81 00 05 4E 7F 87 DF 61 A5 8B +0x0010 | 95 EE A4 E9 38 B7 6A 30 6A 71 B9 55 60 0B 76 2E +0x0020 | B5 0E E4 80 59 27 B8 67 A9 63 37 5E 82 20 55 FB +0x0030 | E4 6A E9 37 35 72 E2 22 91 9E 4D FF 60 86 CE 7E +0x0040 | 42 B7 95 CE 2A E1 26 BE 87 73 84 26 BA 16 36 F4 +0x0050 | E6 9F 17 DA D8 64 75 54 B1 F3 45 0C 0B 3C 74 B3 +0x0060 | 9D BC EB 53 73 87 C3 0E 62 47 48 62 CA 59 EB 86 +0x0070 | 3F 3A FA 86 B5 BF A8 6D 06 16 50 82 C4 CE 62 9E +0x0080 | 4E E6 4C C7 30 3E DE A1 0B D8 83 0B B6 B8 28 BC +0x0090 | A9 EB 77 43 FC 7A 17 94 85 21 CA 37 6B 30 95 B5 +0x00A0 | 46 77 30 60 B7 12 D6 8C C5 54 85 29 D8 69 A9 6F +0x00B0 | 12 4E 71 DF E3 E2 B1 6B 6B BF 9F FB 2E 57 30 A9 +0x00C0 | 69 76 C4 46 A2 DF FA 91 D9 50 74 55 1D 49 04 5A +0x00D0 | 1C D6 86 68 7C B6 61 48 6C 96 E6 12 4C 27 AD BA +0x00E0 | C7 51 99 8E D0 F0 ED 8E F6 65 79 79 A6 12 A1 95 +0x00F0 | DB C8 AE E3 B6 35 E6 8D BC 48 A3 7F AF 4A 28 8A +0x0100 | 53 E2 7E 68 08 9F 67 77 98 52 DB 50 84 D6 5E 25 +0x0110 | E1 4A 99 58 34 C7 11 D6 43 FF C4 FD 9A 44 16 D1 +0x0120 | B2 FB 02 DB A1 89 69 34 C2 32 55 98 F9 9B B2 31 +0x0130 | 3F 49 59 0C 06 8C DB A5 B2 9D 7E 12 2F D0 87 94 +0x0140 | 44 E4 0A 76 EF 99 2D 91 18 39 50 3B 29 3B F5 2C +0x0150 | 97 73 48 91 83 B0 A6 F3 4B 70 2F 1C 8F 3B 78 23 +0x0160 | C6 AA 86 46 43 1D D7 2A 23 5E 2C D9 48 0A F5 F5 +0x0170 | 2C D1 FB 3F F0 4B 78 37 E9 45 DD 72 CF 80 35 C3 +0x0180 | 95 07 F3 D9 06 E5 4A 58 76 03 6C 81 20 62 45 65 +0x0190 | 44 73 BC FE C1 9F 31 E5 DB 89 5C 6B 79 D8 68 90 +0x01A0 | D7 26 A8 A1 88 86 81 DC 9A 4F 40 A5 23 C7 DE BE +0x01B0 | 6F 76 AB 79 16 51 21 67 83 2E F3 D6 27 1A 42 C2 +0x01C0 | 94 D1 5D 6C DB 4A 7A E2 CB 0B B0 68 0B BE 19 59 +0x01D0 | 00 50 FC C0 BD 9D F5 F5 F8 A8 17 19 D6 B3 E9 74 +0x01E0 | BA 50 E5 2C 45 7B F9 93 EA 5A F9 A9 30 B1 6F 5B +0x01F0 | 36 24 1E 8D 55 57 F4 CC 67 B2 65 6A A9 36 26 D0 +0x0200 | 06 B8 E2 E3 73 8B D1 C0 1C 52 15 CA B5 AC 60 3E +0x0210 | 36 42 F1 2C BD 99 77 AB A8 A9 A4 8E 9C 8B 84 DE +0x0220 | 73 F0 91 29 97 AE DB AF D6 F8 5E 9B 86 B3 B3 03 +0x0230 | B3 AC 75 6F A6 11 69 2F 3D 3A CE FA 53 86 60 95 +0x0240 | 6C BB C5 4E F3 + +An advantage to this format is the ability to configure the decoder at the start and jump into the middle of a stream. This is a common use case where the media is available on a random access medium such as a hard drive, and is therefore used in common container formats such as MP4 and MKV. +*/ + +func WalkNALUsAnnexb(nalus [][]byte, write func([]byte)) { + for i, nalu := range(nalus) { + if i == 0 { + write([]byte{0,0,0,1,0x9,0xf0,0,0,0,1}) // AUD + } else { + write([]byte{0,0,1}) + } + write(nalu) + } + return +} + +func SplitNALUs(b []byte) (ok bool, nalus [][]byte) { + if len(b) < 4 { + return + } + + val3 := bits.GetUIntBE(b, 24) + val4 := bits.GetUIntBE(b, 32) + + // is Annex B + if val3 == 1 || val4 == 1 { + start := 0 + pos := 0 + for { + if start != pos { + nalus = append(nalus, b[start:pos]) + } + if val3 == 1 { + pos += 3 + } else if val4 == 1 { + pos += 4 + } + start = pos + if start == len(b) { + break + } + val3 = 0 + val4 = 0 + for pos < len(b) { + if pos+2 < len(b) && b[pos] == 0 { + val3 = bits.GetUIntBE(b[pos:], 24) + if val3 == 0 { + if pos+3 < len(b) { + val4 = uint(b[pos+3]) + if val4 == 1 { + break + } + } + } else if val3 == 1 { + break + } + pos++ + } else { + pos++ + } + } + } + ok = true + return + } + + // maybe AVCC + if val4 <= uint(len(b)) { + b = b[4:] + for { + nalus = append(nalus, b[:val4]) + b = b[val4:] + if len(b) < 4 { + break + } + val4 = bits.GetUIntBE(b, 32) + b = b[4:] + if val4 > uint(len(b)) { + break + } + } + if len(b) == 0 { + ok = true + } + return + } + + return +} + diff --git a/h264parser/parser_test.go b/h264parser/parser_test.go new file mode 100644 index 0000000..8945692 --- /dev/null +++ b/h264parser/parser_test.go @@ -0,0 +1,23 @@ + +package h264parser + +import ( + "testing" + "encoding/hex" +) + +func TestParser(t *testing.T) { + var ok bool + var nalus [][]byte + + annexbFrame, _ := hex.DecodeString("000001223322330000000122332233223300000133000001000001") + ok, nalus = SplitNALUs(annexbFrame) + t.Log(ok, len(nalus)) + + avccFrame, _ := hex.DecodeString( + "00000008aabbccaabbccaabb00000001aa", + ) + ok, nalus = SplitNALUs(avccFrame) + t.Log(ok, len(nalus)) +} + From fbc251923ce8d214f9ee49ed011462d163fae403 Mon Sep 17 00:00:00 2001 From: nareix Date: Tue, 19 Apr 2016 11:40:23 +0800 Subject: [PATCH 08/34] add WalkNALUsAVCC() modify SplitNALUs return params order --- h264parser/parser.go | 19 +++++++++++++++---- h264parser/parser_test.go | 4 ++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index 2d2d607..3f44dc2 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -195,9 +195,18 @@ func WalkNALUsAnnexb(nalus [][]byte, write func([]byte)) { return } -func SplitNALUs(b []byte) (ok bool, nalus [][]byte) { +func WalkNALUsAVCC(nalus [][]byte, write func([]byte)) { + for _, nalu := range(nalus) { + var b [4]byte + bits.PutUIntBE(b[:], uint(len(nalu)), 32) + write(b[:]) + write(nalu) + } +} + +func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { if len(b) < 4 { - return + return [][]byte{b}, false } val3 := bits.GetUIntBE(b, 24) @@ -262,10 +271,12 @@ func SplitNALUs(b []byte) (ok bool, nalus [][]byte) { } if len(b) == 0 { ok = true + return + } else { + return [][]byte{b}, false } - return } - return + return [][]byte{b}, false } diff --git a/h264parser/parser_test.go b/h264parser/parser_test.go index 8945692..52ac18b 100644 --- a/h264parser/parser_test.go +++ b/h264parser/parser_test.go @@ -11,13 +11,13 @@ func TestParser(t *testing.T) { var nalus [][]byte annexbFrame, _ := hex.DecodeString("000001223322330000000122332233223300000133000001000001") - ok, nalus = SplitNALUs(annexbFrame) + nalus, ok = SplitNALUs(annexbFrame) t.Log(ok, len(nalus)) avccFrame, _ := hex.DecodeString( "00000008aabbccaabbccaabb00000001aa", ) - ok, nalus = SplitNALUs(avccFrame) + nalus, ok = SplitNALUs(avccFrame) t.Log(ok, len(nalus)) } From b58d9c03fc900832958a46318e657f9cd6e78401 Mon Sep 17 00:00:00 2001 From: nareix Date: Tue, 19 Apr 2016 15:00:21 +0800 Subject: [PATCH 09/34] add aacparser.go --- aacparser/parser.go | 312 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 312 insertions(+) create mode 100644 aacparser/parser.go diff --git a/aacparser/parser.go b/aacparser/parser.go new file mode 100644 index 0000000..db8dc64 --- /dev/null +++ b/aacparser/parser.go @@ -0,0 +1,312 @@ +package aacparser + +import ( + "github.com/nareix/bits" + "fmt" + "bytes" + "io" +) + +// copied from libavcodec/mpeg4audio.h +const ( + AOT_AAC_MAIN = 1 + iota ///< Y Main + AOT_AAC_LC ///< Y Low Complexity + AOT_AAC_SSR ///< N (code in SoC repo) Scalable Sample Rate + AOT_AAC_LTP ///< Y Long Term Prediction + AOT_SBR ///< Y Spectral Band Replication + AOT_AAC_SCALABLE ///< N Scalable + AOT_TWINVQ ///< N Twin Vector Quantizer + AOT_CELP ///< N Code Excited Linear Prediction + AOT_HVXC ///< N Harmonic Vector eXcitation Coding + AOT_TTSI = 12 + iota ///< N Text-To-Speech Interface + AOT_MAINSYNTH ///< N Main Synthesis + AOT_WAVESYNTH ///< N Wavetable Synthesis + AOT_MIDI ///< N General MIDI + AOT_SAFX ///< N Algorithmic Synthesis and Audio Effects + AOT_ER_AAC_LC ///< N Error Resilient Low Complexity + AOT_ER_AAC_LTP = 19 + iota ///< N Error Resilient Long Term Prediction + AOT_ER_AAC_SCALABLE ///< N Error Resilient Scalable + AOT_ER_TWINVQ ///< N Error Resilient Twin Vector Quantizer + AOT_ER_BSAC ///< N Error Resilient Bit-Sliced Arithmetic Coding + AOT_ER_AAC_LD ///< N Error Resilient Low Delay + AOT_ER_CELP ///< N Error Resilient Code Excited Linear Prediction + AOT_ER_HVXC ///< N Error Resilient Harmonic Vector eXcitation Coding + AOT_ER_HILN ///< N Error Resilient Harmonic and Individual Lines plus Noise + AOT_ER_PARAM ///< N Error Resilient Parametric + AOT_SSC ///< N SinuSoidal Coding + AOT_PS ///< N Parametric Stereo + AOT_SURROUND ///< N MPEG Surround + AOT_ESCAPE ///< Y Escape Value + AOT_L1 ///< Y Layer 1 + AOT_L2 ///< Y Layer 2 + AOT_L3 ///< Y Layer 3 + AOT_DST ///< N Direct Stream Transfer + AOT_ALS ///< Y Audio LosslesS + AOT_SLS ///< N Scalable LosslesS + AOT_SLS_NON_CORE ///< N Scalable LosslesS (non core) + AOT_ER_AAC_ELD ///< N Error Resilient Enhanced Low Delay + AOT_SMR_SIMPLE ///< N Symbolic Music Representation Simple + AOT_SMR_MAIN ///< N Symbolic Music Representation Main + AOT_USAC_NOSBR ///< N Unified Speech and Audio Coding (no SBR) + AOT_SAOC ///< N Spatial Audio Object Coding + AOT_LD_SURROUND ///< N Low Delay MPEG Surround + AOT_USAC ///< N Unified Speech and Audio Coding +) + +type MPEG4AudioConfig struct { + SampleRate int + ChannelCount int + ObjectType uint + SampleRateIndex uint + ChannelConfig uint +} + +var sampleRateTable = []int{ + 96000, 88200, 64000, 48000, 44100, 32000, + 24000, 22050, 16000, 12000, 11025, 8000, 7350, +} + +var chanConfigTable = []int{ + 0, 1, 2, 3, 4, 5, 6, 8, +} + +func IsADTSFrame(frames []byte) bool { + return len(frames) > 7 && frames[0] == 0xff && frames[1]&0xf0 == 0xf0 +} + +func ReadADTSFrame(frame []byte) (config MPEG4AudioConfig, payload []byte, samples int, framelen int, err error) { + if !IsADTSFrame(frame) { + err = fmt.Errorf("not adts frame") + return + } + config.ObjectType = uint(frame[2]>>6) + 1 + config.SampleRateIndex = uint(frame[2] >> 2 & 0xf) + config.ChannelConfig = uint(frame[2]<<2&0x4 | frame[3]>>6&0x3) + framelen = int(frame[3]&0x3)<<11 | int(frame[4])<<3 | int(frame[5]>>5) + samples = (int(frame[6]&0x3) + 1) * 1024 + hdrlen := 7 + if frame[1]&0x1 == 0 { + hdrlen = 9 + } + if framelen < hdrlen || len(frame) < framelen { + err = fmt.Errorf("invalid adts header length") + return + } + payload = frame[hdrlen:framelen] + return +} + +func MakeADTSHeader(config MPEG4AudioConfig, samples int, payloadLength int) (header []byte) { + payloadLength += 7 + //AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ) + header = []byte{0xff, 0xf1, 0x50, 0x80, 0x043, 0xff, 0xcd} + //config.ObjectType = uint(frames[2]>>6)+1 + //config.SampleRateIndex = uint(frames[2]>>2&0xf) + //config.ChannelConfig = uint(frames[2]<<2&0x4|frames[3]>>6&0x3) + header[2] = (byte(config.ObjectType-1)&0x3)<<6 | (byte(config.SampleRateIndex)&0xf)<<2 | byte(config.ChannelConfig>>2)&0x1 + header[3] = header[3]&0x3f | byte(config.ChannelConfig&0x3)<<6 + header[3] = header[3]&0xfc | byte(payloadLength>>11)&0x3 + header[4] = byte(payloadLength >> 3) + header[5] = header[5]&0x1f | (byte(payloadLength)&0x7)<<5 + header[6] = header[6]&0xfc | byte(samples/1024-1) + return +} + +func ExtractADTSFrames(frames []byte) (config MPEG4AudioConfig, payload []byte, samples int, err error) { + for len(frames) > 0 { + var n, framelen int + if config, payload, n, framelen, err = ReadADTSFrame(frames); err != nil { + return + } + frames = frames[framelen:] + samples += n + } + return +} + +func ReadADTSHeader(data []byte) (config MPEG4AudioConfig, frameLength int) { + br := &bits.Reader{R: bytes.NewReader(data)} + var i uint + + //Structure + //AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ) + //Header consists of 7 or 9 bytes (without or with CRC). + + // 2 bytes + //A 12 syncword 0xFFF, all bits must be 1 + br.ReadBits(12) + //B 1 MPEG Version: 0 for MPEG-4, 1 for MPEG-2 + br.ReadBits(1) + //C 2 Layer: always 0 + br.ReadBits(2) + //D 1 protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC + br.ReadBits(1) + + //E 2 profile, the MPEG-4 Audio Object Type minus 1 + config.ObjectType, _ = br.ReadBits(2) + config.ObjectType++ + //F 4 MPEG-4 Sampling Frequency Index (15 is forbidden) + config.SampleRateIndex, _ = br.ReadBits(4) + //G 1 private bit, guaranteed never to be used by MPEG, set to 0 when encoding, ignore when decoding + br.ReadBits(1) + //H 3 MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an inband PCE) + config.ChannelConfig, _ = br.ReadBits(3) + //I 1 originality, set to 0 when encoding, ignore when decoding + br.ReadBits(1) + //J 1 home, set to 0 when encoding, ignore when decoding + br.ReadBits(1) + //K 1 copyrighted id bit, the next bit of a centrally registered copyright identifier, set to 0 when encoding, ignore when decoding + br.ReadBits(1) + //L 1 copyright id start, signals that this frame's copyright id bit is the first bit of the copyright id, set to 0 when encoding, ignore when decoding + br.ReadBits(1) + + //M 13 frame length, this value must include 7 or 9 bytes of header length: FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame) + i, _ = br.ReadBits(13) + frameLength = int(i) + //O 11 Buffer fullness + br.ReadBits(11) + //P 2 Number of AAC frames (RDBs) in ADTS frame minus 1, for maximum compatibility always use 1 AAC frame per ADTS frame + br.ReadBits(2) + + //Q 16 CRC if protection absent is 0 + return +} + +func readObjectType(r *bits.Reader) (objectType uint, err error) { + if objectType, err = r.ReadBits(5); err != nil { + return + } + if objectType == AOT_ESCAPE { + var i uint + if i, err = r.ReadBits(6); err != nil { + return + } + objectType = 32 + i + } + return +} + +func writeObjectType(w *bits.Writer, objectType uint) (err error) { + if objectType >= 32 { + if err = w.WriteBits(AOT_ESCAPE, 5); err != nil { + return + } + if err = w.WriteBits(objectType-32, 6); err != nil { + return + } + } else { + if err = w.WriteBits(objectType, 5); err != nil { + return + } + } + return +} + +func readSampleRateIndex(r *bits.Reader) (index uint, err error) { + if index, err = r.ReadBits(4); err != nil { + return + } + if index == 0xf { + if index, err = r.ReadBits(24); err != nil { + return + } + } + return +} + +func writeSampleRateIndex(w *bits.Writer, index uint) (err error) { + if index >= 0xf { + if err = w.WriteBits(0xf, 4); err != nil { + return + } + if err = w.WriteBits(index, 24); err != nil { + return + } + } else { + if err = w.WriteBits(index, 4); err != nil { + return + } + } + return +} + +func (self MPEG4AudioConfig) IsValid() bool { + return self.ObjectType > 0 +} + +func (self MPEG4AudioConfig) Complete() (config MPEG4AudioConfig) { + config = self + if int(config.SampleRateIndex) < len(sampleRateTable) { + config.SampleRate = sampleRateTable[config.SampleRateIndex] + } + if int(config.ChannelConfig) < len(chanConfigTable) { + config.ChannelCount = chanConfigTable[config.ChannelConfig] + } + return +} + +func ReadMPEG4AudioConfig(r io.Reader) (config MPEG4AudioConfig, err error) { + // copied from libavcodec/mpeg4audio.c avpriv_mpeg4audio_get_config() + br := &bits.Reader{R: r} + + if config.ObjectType, err = readObjectType(br); err != nil { + return + } + if config.SampleRateIndex, err = readSampleRateIndex(br); err != nil { + return + } + if config.ChannelConfig, err = br.ReadBits(4); err != nil { + return + } + return +} + +func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) { + bw := &bits.Writer{W: w} + + if err = writeObjectType(bw, config.ObjectType); err != nil { + return + } + + if config.SampleRateIndex == 0 { + for i, rate := range sampleRateTable { + if rate == config.SampleRate { + config.SampleRateIndex = uint(i) + } + } + } + if err = writeSampleRateIndex(bw, config.SampleRateIndex); err != nil { + return + } + + if config.ChannelConfig == 0 { + for i, count := range chanConfigTable { + if count == config.ChannelCount { + config.ChannelConfig = uint(i) + } + } + } + if err = bw.WriteBits(config.ChannelConfig, 4); err != nil { + return + } + + if err = bw.FlushBits(); err != nil { + return + } + return +} + +type CodecInfo struct { + MPEG4AudioConfig +} + +func ParseCodecData(config []byte) (info CodecInfo, err error) { + r := bytes.NewReader(config) + if info.MPEG4AudioConfig, err = ReadMPEG4AudioConfig(r); err != nil { + err = fmt.Errorf("CodecData invalid: parse MPEG4AudioConfig failed(%s)", err) + return + } + info.MPEG4AudioConfig = info.MPEG4AudioConfig.Complete() + return +} + From cb4f07abc0abcaddce452428d6c12f90a635a87a Mon Sep 17 00:00:00 2001 From: nareix Date: Tue, 19 Apr 2016 15:00:36 +0800 Subject: [PATCH 10/34] move ParseAVCDecoderConfRecord() from mp4 package to here --- h264parser/parser.go | 358 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 358 insertions(+) diff --git a/h264parser/parser.go b/h264parser/parser.go index 3f44dc2..97907ff 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -3,6 +3,8 @@ package h264parser import ( "github.com/nareix/bits" + "fmt" + "bytes" ) /* @@ -280,3 +282,359 @@ func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { return [][]byte{b}, false } +type SPSInfo struct { + ProfileIdc uint + LevelIdc uint + + MbWidth uint + MbHeight uint + + CropLeft uint + CropRight uint + CropTop uint + CropBottom uint + + Width uint + Height uint +} + +func ParseSPS(data []byte) (self SPSInfo, err error) { + r := &bits.GolombBitReader{ + R: bytes.NewReader(data), + } + + if self.ProfileIdc, err = r.ReadBits(8); err != nil { + return + } + + // constraint_set0_flag-constraint_set6_flag,reserved_zero_2bits + if _, err = r.ReadBits(8); err != nil { + return + } + + // level_idc + if self.LevelIdc, err = r.ReadBits(8); err != nil { + return + } + + // seq_parameter_set_id + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + if self.ProfileIdc == 100 || self.ProfileIdc == 110 || + self.ProfileIdc == 122 || self.ProfileIdc == 244 || + self.ProfileIdc == 44 || self.ProfileIdc == 83 || + self.ProfileIdc == 86 || self.ProfileIdc == 118 { + + var chroma_format_idc uint + if chroma_format_idc, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + if chroma_format_idc == 3 { + // residual_colour_transform_flag + if _, err = r.ReadBit(); err != nil { + return + } + } + + // bit_depth_luma_minus8 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + // bit_depth_chroma_minus8 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + // qpprime_y_zero_transform_bypass_flag + if _, err = r.ReadBit(); err != nil { + return + } + + var seq_scaling_matrix_present_flag uint + if seq_scaling_matrix_present_flag, err = r.ReadBit(); err != nil { + return + } + + if seq_scaling_matrix_present_flag != 0 { + for i := 0; i < 8; i++ { + var seq_scaling_list_present_flag uint + if seq_scaling_list_present_flag, err = r.ReadBit(); err != nil { + return + } + if seq_scaling_list_present_flag != 0 { + var sizeOfScalingList uint + if i < 6 { + sizeOfScalingList = 16 + } else { + sizeOfScalingList = 64 + } + lastScale := uint(8) + nextScale := uint(8) + for j := uint(0); j < sizeOfScalingList; j++ { + if nextScale != 0 { + var delta_scale uint + if delta_scale, err = r.ReadSE(); err != nil { + return + } + nextScale = (lastScale + delta_scale + 256) % 256 + } + if nextScale != 0 { + lastScale = nextScale + } + } + } + } + } + } + + // log2_max_frame_num_minus4 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + var pic_order_cnt_type uint + if pic_order_cnt_type, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if pic_order_cnt_type == 0 { + // log2_max_pic_order_cnt_lsb_minus4 + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + } else if pic_order_cnt_type == 1 { + // delta_pic_order_always_zero_flag + if _, err = r.ReadBit(); err != nil { + return + } + // offset_for_non_ref_pic + if _, err = r.ReadSE(); err != nil { + return + } + // offset_for_top_to_bottom_field + if _, err = r.ReadSE(); err != nil { + return + } + var num_ref_frames_in_pic_order_cnt_cycle uint + if num_ref_frames_in_pic_order_cnt_cycle, err = r.ReadExponentialGolombCode(); err != nil { + return + } + for i := uint(0); i < num_ref_frames_in_pic_order_cnt_cycle; i++ { + if _, err = r.ReadSE(); err != nil { + return + } + } + } + + // max_num_ref_frames + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + // gaps_in_frame_num_value_allowed_flag + if _, err = r.ReadBit(); err != nil { + return + } + + if self.MbWidth, err = r.ReadExponentialGolombCode(); err != nil { + return + } + self.MbWidth++ + + if self.MbHeight, err = r.ReadExponentialGolombCode(); err != nil { + return + } + self.MbHeight++ + + var frame_mbs_only_flag uint + if frame_mbs_only_flag, err = r.ReadBit(); err != nil { + return + } + if frame_mbs_only_flag == 0 { + // mb_adaptive_frame_field_flag + if _, err = r.ReadBit(); err != nil { + return + } + } + + // direct_8x8_inference_flag + if _, err = r.ReadBit(); err != nil { + return + } + + var frame_cropping_flag uint + if frame_cropping_flag, err = r.ReadBit(); err != nil { + return + } + if frame_cropping_flag != 0 { + if self.CropLeft, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if self.CropRight, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if self.CropTop, err = r.ReadExponentialGolombCode(); err != nil { + return + } + if self.CropBottom, err = r.ReadExponentialGolombCode(); err != nil { + return + } + } + + self.Width = (self.MbWidth * 16) - self.CropLeft*2 - self.CropRight*2 + self.Height = ((2 - frame_mbs_only_flag) * self.MbHeight * 16) - self.CropTop*2 - self.CropBottom*2 + + return +} + +/* +func MakeAVCDecoderConfRecord( + SPS []byte, + PPS []byte, +) (self AVCDecoderConfRecord, err error) { + if len(SPS) < 4 { + err = fmt.Errorf("invalid SPS data") + return + } + self.AVCProfileIndication = int(SPS[1]) + self.ProfileCompatibility = int(SPS[2]) + self.AVCLevelIndication = int(SPS[3]) + self.SPS = [][]byte{SPS} + self.PPS = [][]byte{PPS} + self.LengthSizeMinusOne = 3 + return +} + +func WriteAVCDecoderConfRecord(w io.Writer, self AVCDecoderConfRecord) (err error) { + if err = WriteInt(w, 1, 1); err != nil { + return + } + if err = WriteInt(w, self.AVCProfileIndication, 1); err != nil { + return + } + if err = WriteInt(w, self.ProfileCompatibility, 1); err != nil { + return + } + if err = WriteInt(w, self.AVCLevelIndication, 1); err != nil { + return + } + if err = WriteInt(w, self.LengthSizeMinusOne|0xfc, 1); err != nil { + return + } + + if err = WriteInt(w, len(self.SPS)|0xe0, 1); err != nil { + return + } + for _, data := range self.SPS { + if err = WriteInt(w, len(data), 2); err != nil { + return + } + if err = WriteBytes(w, data, len(data)); err != nil { + return + } + } + + if err = WriteInt(w, len(self.PPS), 1); err != nil { + return + } + for _, data := range self.PPS { + if err = WriteInt(w, len(data), 2); err != nil { + return + } + if err = WriteBytes(w, data, len(data)); err != nil { + return + } + } + + return +} +*/ + +type CodecInfo struct { + Record AVCDecoderConfRecord + SPSInfo SPSInfo +} + +// CodecData is AVCDecoderConfRecord +func ParseCodecData(config []byte) (info CodecInfo, err error) { + if info.Record, err = ParseAVCDecoderConfRecord(config); err != nil { + return + } + if len(info.Record.SPS) < 1 { + err = fmt.Errorf("CodecData invalid: no SPS found in AVCDecoderConfRecord") + return + } + if info.SPSInfo, err = ParseSPS(info.Record.SPS[0]); err != nil { + err = fmt.Errorf("CodecData invalid: parse SPS failed(%s)", err) + return + } + return +} + +type AVCDecoderConfRecord struct { + AVCProfileIndication uint + ProfileCompatibility uint + AVCLevelIndication uint + LengthSizeMinusOne uint + SPS [][]byte + PPS [][]byte +} + +func ParseAVCDecoderConfRecord(config []byte) (self AVCDecoderConfRecord, err error) { + r := bytes.NewReader(config) + + if _, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + if self.AVCProfileIndication, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + if self.ProfileCompatibility, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + if self.AVCLevelIndication, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + if self.LengthSizeMinusOne, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + self.LengthSizeMinusOne &= 0x03 + + var u uint + var n, length int + var data []byte + + if u, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + n = int(u&0x1f) + for i := 0; i < n; i++ { + if u, err = bits.ReadUIntBE(r, 16); err != nil { + return + } + length = int(u) + if data, err = bits.ReadBytes(r, length); err != nil { + return + } + self.SPS = append(self.SPS, data) + } + + if u, err = bits.ReadUIntBE(r, 8); err != nil { + return + } + n = int(u) + for i := 0; i < n; i++ { + if u, err = bits.ReadUIntBE(r, 16); err != nil { + return + } + length = int(u) + if data, err = bits.ReadBytes(r, length); err != nil { + return + } + self.PPS = append(self.PPS, data) + } + + return +} + From b959f0d973be252756e3371077f654dde692d4d0 Mon Sep 17 00:00:00 2001 From: nareix Date: Tue, 19 Apr 2016 18:47:43 +0800 Subject: [PATCH 11/34] ParseSPS should skip first 1 bit of SPS --- h264parser/parser.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/h264parser/parser.go b/h264parser/parser.go index 97907ff..edf18b6 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -303,6 +303,10 @@ func ParseSPS(data []byte) (self SPSInfo, err error) { R: bytes.NewReader(data), } + if _, err = r.ReadBits(8); err != nil { + return + } + if self.ProfileIdc, err = r.ReadBits(8); err != nil { return } From b4831df77529a30e3105fb1f2ec5252ac8685393 Mon Sep 17 00:00:00 2001 From: nareix Date: Thu, 21 Apr 2016 15:58:46 +0800 Subject: [PATCH 12/34] SplitNALUs check AVCC first --- h264parser/parser.go | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index edf18b6..16b73ba 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -214,6 +214,27 @@ func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { val3 := bits.GetUIntBE(b, 24) val4 := bits.GetUIntBE(b, 32) + // maybe AVCC + if val4 <= uint(len(b)) { + b = b[4:] + nalus := [][]byte{} + for { + nalus = append(nalus, b[:val4]) + b = b[val4:] + if len(b) < 4 { + break + } + val4 = bits.GetUIntBE(b, 32) + b = b[4:] + if val4 > uint(len(b)) { + break + } + } + if len(b) == 0 { + return nalus, true + } + } + // is Annex B if val3 == 1 || val4 == 1 { start := 0 @@ -256,29 +277,6 @@ func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { return } - // maybe AVCC - if val4 <= uint(len(b)) { - b = b[4:] - for { - nalus = append(nalus, b[:val4]) - b = b[val4:] - if len(b) < 4 { - break - } - val4 = bits.GetUIntBE(b, 32) - b = b[4:] - if val4 > uint(len(b)) { - break - } - } - if len(b) == 0 { - ok = true - return - } else { - return [][]byte{b}, false - } - } - return [][]byte{b}, false } From 2412e77d722194be99b3a6345a7b0bd971f09175 Mon Sep 17 00:00:00 2001 From: nareix Date: Sat, 30 Apr 2016 09:22:33 +0800 Subject: [PATCH 13/34] SplitNALu bugfix --- h264parser/parser.go | 37 ++++++++++++++++++++----------------- h264parser/parser_test.go | 2 +- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index 16b73ba..5b3b9a2 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -216,55 +216,58 @@ func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { // maybe AVCC if val4 <= uint(len(b)) { - b = b[4:] + _val4 := val4 + _b := b[4:] nalus := [][]byte{} for { - nalus = append(nalus, b[:val4]) - b = b[val4:] - if len(b) < 4 { + nalus = append(nalus, _b[:_val4]) + _b = _b[_val4:] + if len(_b) < 4 { break } - val4 = bits.GetUIntBE(b, 32) - b = b[4:] - if val4 > uint(len(b)) { + _val4 = bits.GetUIntBE(_b, 32) + _b = _b[4:] + if _val4 > uint(len(_b)) { break } } - if len(b) == 0 { + if len(_b) == 0 { return nalus, true } } // is Annex B if val3 == 1 || val4 == 1 { + _val3 := val3 + _val4 := val4 start := 0 pos := 0 for { if start != pos { nalus = append(nalus, b[start:pos]) } - if val3 == 1 { + if _val3 == 1 { pos += 3 - } else if val4 == 1 { + } else if _val4 == 1 { pos += 4 } start = pos if start == len(b) { break } - val3 = 0 - val4 = 0 + _val3 = 0 + _val4 = 0 for pos < len(b) { if pos+2 < len(b) && b[pos] == 0 { - val3 = bits.GetUIntBE(b[pos:], 24) - if val3 == 0 { + _val3 = bits.GetUIntBE(b[pos:], 24) + if _val3 == 0 { if pos+3 < len(b) { - val4 = uint(b[pos+3]) - if val4 == 1 { + _val4 = uint(b[pos+3]) + if _val4 == 1 { break } } - } else if val3 == 1 { + } else if _val3 == 1 { break } pos++ diff --git a/h264parser/parser_test.go b/h264parser/parser_test.go index 52ac18b..83f3b9c 100644 --- a/h264parser/parser_test.go +++ b/h264parser/parser_test.go @@ -10,7 +10,7 @@ func TestParser(t *testing.T) { var ok bool var nalus [][]byte - annexbFrame, _ := hex.DecodeString("000001223322330000000122332233223300000133000001000001") + annexbFrame, _ := hex.DecodeString("00000001223322330000000122332233223300000133000001000001") nalus, ok = SplitNALUs(annexbFrame) t.Log(ok, len(nalus)) From 6b554c9c5211ecbcc8a6e90ca972f8f2512cc0ab Mon Sep 17 00:00:00 2001 From: nareix Date: Sat, 30 Apr 2016 09:34:45 +0800 Subject: [PATCH 14/34] add CreateCodecDataBySPSAndPPS --- h264parser/parser.go | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index 5b3b9a2..17b5745 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -3,6 +3,7 @@ package h264parser import ( "github.com/nareix/bits" + "io" "fmt" "bytes" ) @@ -510,51 +511,51 @@ func MakeAVCDecoderConfRecord( self.LengthSizeMinusOne = 3 return } +*/ func WriteAVCDecoderConfRecord(w io.Writer, self AVCDecoderConfRecord) (err error) { - if err = WriteInt(w, 1, 1); err != nil { + if err = bits.WriteUIntBE(w, 1, 8); err != nil { return } - if err = WriteInt(w, self.AVCProfileIndication, 1); err != nil { + if err = bits.WriteUIntBE(w, uint(self.AVCProfileIndication), 8); err != nil { return } - if err = WriteInt(w, self.ProfileCompatibility, 1); err != nil { + if err = bits.WriteUIntBE(w, uint(self.ProfileCompatibility), 8); err != nil { return } - if err = WriteInt(w, self.AVCLevelIndication, 1); err != nil { + if err = bits.WriteUIntBE(w, uint(self.AVCLevelIndication), 8); err != nil { return } - if err = WriteInt(w, self.LengthSizeMinusOne|0xfc, 1); err != nil { + if err = bits.WriteUIntBE(w, uint(self.LengthSizeMinusOne|0xfc), 8); err != nil { return } - if err = WriteInt(w, len(self.SPS)|0xe0, 1); err != nil { + if err = bits.WriteUIntBE(w, uint(len(self.SPS)|0xe0), 8); err != nil { return } for _, data := range self.SPS { - if err = WriteInt(w, len(data), 2); err != nil { + if err = bits.WriteUIntBE(w, uint(len(data)), 16); err != nil { return } - if err = WriteBytes(w, data, len(data)); err != nil { + if err = bits.WriteBytes(w, data, len(data)); err != nil { return } } - if err = WriteInt(w, len(self.PPS), 1); err != nil { + if err = bits.WriteUIntBE(w, uint(len(self.PPS)), 8); err != nil { return } for _, data := range self.PPS { - if err = WriteInt(w, len(data), 2); err != nil { + if err = bits.WriteUIntBE(w, uint(len(data)), 16); err != nil { return } - if err = WriteBytes(w, data, len(data)); err != nil { + if err = bits.WriteBytes(w, data, len(data)); err != nil { return } } return } -*/ type CodecInfo struct { Record AVCDecoderConfRecord @@ -577,6 +578,24 @@ func ParseCodecData(config []byte) (info CodecInfo, err error) { return } +func CreateCodecDataBySPSAndPPS(SPS, PPS []byte) (codecData []byte, err error) { + self := AVCDecoderConfRecord{} + self.AVCProfileIndication = uint(SPS[1]) + self.ProfileCompatibility = uint(SPS[2]) + self.AVCLevelIndication = uint(SPS[3]) + self.SPS = [][]byte{SPS} + self.PPS = [][]byte{PPS} + self.LengthSizeMinusOne = 3 + + buf := &bytes.Buffer{} + if err = WriteAVCDecoderConfRecord(buf, self); err != nil { + return + } + codecData = buf.Bytes() + + return +} + type AVCDecoderConfRecord struct { AVCProfileIndication uint ProfileCompatibility uint From 2eb65b8fac3ffe0f4e40619d846d53813669c4a0 Mon Sep 17 00:00:00 2001 From: nareix Date: Mon, 16 May 2016 20:53:34 +0800 Subject: [PATCH 15/34] add ParseSliceHeaderFromNALU() --- aacparser/parser.go | 6 ++-- h264parser/parser.go | 70 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/aacparser/parser.go b/aacparser/parser.go index db8dc64..0a3a1f4 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -112,12 +112,14 @@ func MakeADTSHeader(config MPEG4AudioConfig, samples int, payloadLength int) (he return } -func ExtractADTSFrames(frames []byte) (config MPEG4AudioConfig, payload []byte, samples int, err error) { +func ExtractADTSFrames(frames []byte) (config MPEG4AudioConfig, payload [][]byte, samples int, err error) { for len(frames) > 0 { var n, framelen int - if config, payload, n, framelen, err = ReadADTSFrame(frames); err != nil { + var _payload []byte + if config, _payload, n, framelen, err = ReadADTSFrame(frames); err != nil { return } + payload = append(payload, _payload) frames = frames[framelen:] samples += n } diff --git a/h264parser/parser.go b/h264parser/parser.go index 17b5745..5d05769 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -301,9 +301,7 @@ type SPSInfo struct { } func ParseSPS(data []byte) (self SPSInfo, err error) { - r := &bits.GolombBitReader{ - R: bytes.NewReader(data), - } + r := &bits.GolombBitReader{R: bytes.NewReader(data)} if _, err = r.ReadBits(8); err != nil { return @@ -662,3 +660,69 @@ func ParseAVCDecoderConfRecord(config []byte) (self AVCDecoderConfRecord, err er return } +type SliceType uint + +func (self SliceType) String() string { + switch self { + case P: + return "P" + case B: + return "B" + case I: + return "I" + } + return "" +} + +const ( + P = iota+1 + B + I +) + +func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { + + if len(packet) <= 1 { + err = fmt.Errorf("packet too short to parse slice header") + return + } + + nal_unit_type := packet[0]&0x1f + switch nal_unit_type { + case 1,2,5,19: + // slice_layer_without_partitioning_rbsp + // slice_data_partition_a_layer_rbsp + + default: + err = fmt.Errorf("nal_unit_type=%d has no slice header", nal_unit_type) + return + } + + r := &bits.GolombBitReader{R: bytes.NewReader(packet[1:])} + + // first_mb_in_slice + if _, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + // slice_type + var u uint + if u, err = r.ReadExponentialGolombCode(); err != nil { + return + } + + switch u { + case 0,3,5,8: + sliceType = P + case 1,6: + sliceType = B + case 2,4,7,9: + sliceType = I + default: + err = fmt.Errorf("slice_type=%d invalid", u) + return + } + + return +} + From 86e17d8e71cd785c33b1e725a5926ddff4a3e88d Mon Sep 17 00:00:00 2001 From: nareix Date: Thu, 19 May 2016 16:57:25 +0800 Subject: [PATCH 16/34] add ffmpeg audio decoder and encoder --- audio.go | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ ffmpeg.h | 14 +++++ 2 files changed, 191 insertions(+) create mode 100644 audio.go create mode 100644 ffmpeg.h diff --git a/audio.go b/audio.go new file mode 100644 index 0000000..d0b9b04 --- /dev/null +++ b/audio.go @@ -0,0 +1,177 @@ +package codec + +import ( + // #include "ffmpeg.h" + "C" + "unsafe" + "fmt" +) + +const ( + S16 = iota+1 + FLTP +) + +type AudioEncoder struct { + ff C.FFCtx + SampleRate int + BitRate int + ChannelCount int + SampleFormat int + sampleSize int +} + +func (self *AudioEncoder) Setup() (err error) { + ff := &self.ff + + switch self.SampleFormat { + case S16: + ff.codecCtx.sample_fmt = C.AV_SAMPLE_FMT_S16 + self.sampleSize = 2 + case FLTP: + ff.codecCtx.sample_fmt = C.AV_SAMPLE_FMT_FLTP + self.sampleSize = 4 + default: + err = fmt.Errorf("unsupported sample format") + return + } + + if self.BitRate == 0 { + self.BitRate = 50000 + } + + ff.frame = C.av_frame_alloc() + ff.codecCtx.sample_rate = C.int(self.SampleRate) + ff.codecCtx.bit_rate = C.int(self.BitRate) + ff.codecCtx.channels = C.int(self.ChannelCount) + ff.codecCtx.strict_std_compliance = C.FF_COMPLIANCE_EXPERIMENTAL + if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { + err = fmt.Errorf("avcodec_open2 failed") + return + } + + return +} + +func (self *AudioEncoder) Extradata() (data []byte) { + data = make([]byte, (int)(self.ff.codecCtx.extradata_size)) + C.memcpy( + unsafe.Pointer(&data[0]), + unsafe.Pointer(self.ff.codecCtx.extradata), + (C.size_t)(len(data)), + ) + return +} + +func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt []byte, err error) { + nbSamples := 1024 + expectedSize := nbSamples*self.sampleSize*self.ChannelCount + + if len(sample) != expectedSize { + err = fmt.Errorf("len(sample) should be %d", expectedSize) + return + } + + frame := self.ff.frame + frame.nb_samples = C.int(nbSamples) + for i := 0; i < self.ChannelCount; i++ { + frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*self.sampleSize])) + frame.linesize[i] = C.int(nbSamples*self.sampleSize) + } + frame.extended_data = &frame.data[0] + + cpkt := C.AVPacket{} + cgotpkt := C.int(0) + cerr := C.avcodec_encode_audio2(self.ff.codecCtx, &cpkt, frame, &cgotpkt) + if cerr < C.int(0) { + err = fmt.Errorf("avcodec_encode_audio2 failed: %d", cerr) + return + } + + if cgotpkt != 0 { + gotPkt = true + pkt = make([]byte, (int)(cpkt.size)) + C.memcpy( + unsafe.Pointer(&pkt[0]), + unsafe.Pointer(cpkt.data), + (C.size_t)(len(pkt)), + ) + } + + return +} + +type AudioDecoder struct { + ff C.FFCtx + Extradata []byte +} + +func (self *AudioDecoder) Setup() (err error) { + ff := &self.ff + + ff.frame = C.av_frame_alloc() + + if len(self.Extradata) > 0 { + ff.codecCtx.extradata = (*C.uint8_t)(unsafe.Pointer(&self.Extradata[0])) + ff.codecCtx.extradata_size = C.int(len(self.Extradata)) + } + + if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { + err = fmt.Errorf("avcodec_open2 failed") + return + } + + return +} + +func (self *AudioDecoder) Decode(frame []byte) (gotPkt bool, pkt []byte, err error) { + ff := &self.ff + + cpkt := C.AVPacket{ + data: (*C.uint8_t)(unsafe.Pointer(&frame[0])), + size: C.int(len(frame)), + } + cgotpkt := C.int(0) + cerr := C.avcodec_decode_audio4(ff.codecCtx, ff.frame, &cgotpkt, &cpkt); + if cerr < C.int(0) { + err = fmt.Errorf("avcodec_decode_audio4 failed: %d", cerr) + return + } + + if cgotpkt != C.int(0) { + gotPkt = true + pkt = make([]byte, (int)(cpkt.size)) + C.memcpy( + unsafe.Pointer(&pkt[0]), + unsafe.Pointer(cpkt.data), + (C.size_t)(len(pkt)), + ) + } + + return +} + +func FindAudioEncoderByName(name string) (enc *AudioEncoder) { + ff := C.FFCtx{} + ff.codec = C.avcodec_find_encoder_by_name(C.CString(name)) + if ff.codec != nil { + ff.codecCtx = C.avcodec_alloc_context3(ff.codec) + if ff.codecCtx != nil { + return &AudioEncoder{ff: ff} + } + } + return nil +} + +func FindAudioDecoderByName(name string) (dec *AudioDecoder) { + ff := C.FFCtx{} + ff.codec = C.avcodec_find_decoder_by_name(C.CString(name)) + if ff.codec != nil { + ff.codecCtx = C.avcodec_alloc_context3(ff.codec) + if ff.codecCtx != nil { + return &AudioDecoder{ff: ff} + } + } + return nil +} + diff --git a/ffmpeg.h b/ffmpeg.h new file mode 100644 index 0000000..1061a87 --- /dev/null +++ b/ffmpeg.h @@ -0,0 +1,14 @@ + +#include +#include +#include + +typedef struct { + AVCodec *codec; + AVCodecContext *codecCtx; + AVFrame *frame; +} FFCtx; + +int FFCtxFindEncoderByName(FFCtx *ff, const char *name); +int FFCtxFindDecoderByName(FFCtx *ff, const char *name); + From d6862125f7ec18bacabc124d192dea1d7cc6a654 Mon Sep 17 00:00:00 2001 From: nareix Date: Thu, 19 May 2016 17:12:02 +0800 Subject: [PATCH 17/34] add flush mode --- audio.go | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/audio.go b/audio.go index d0b9b04..6a5f7df 100644 --- a/audio.go +++ b/audio.go @@ -67,18 +67,22 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] nbSamples := 1024 expectedSize := nbSamples*self.sampleSize*self.ChannelCount - if len(sample) != expectedSize { - err = fmt.Errorf("len(sample) should be %d", expectedSize) - return - } - frame := self.ff.frame - frame.nb_samples = C.int(nbSamples) - for i := 0; i < self.ChannelCount; i++ { - frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*self.sampleSize])) - frame.linesize[i] = C.int(nbSamples*self.sampleSize) + if flush { + frame = nil + } else { + if len(sample) != expectedSize { + err = fmt.Errorf("len(sample) should be %d", expectedSize) + return + } + + frame.nb_samples = C.int(nbSamples) + for i := 0; i < self.ChannelCount; i++ { + frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*self.sampleSize])) + frame.linesize[i] = C.int(nbSamples*self.sampleSize) + } + frame.extended_data = &frame.data[0] } - frame.extended_data = &frame.data[0] cpkt := C.AVPacket{} cgotpkt := C.int(0) From 3b9c06f2e1ec616cbd8f317b1e22df2ded09b5cb Mon Sep 17 00:00:00 2001 From: nareix Date: Thu, 19 May 2016 22:27:15 +0800 Subject: [PATCH 18/34] fix decoder and encoder --- audio.go | 77 +++++++++++++++++++++++++++----------------------------- ffmpeg.h | 3 --- 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/audio.go b/audio.go index 6a5f7df..215f4ce 100644 --- a/audio.go +++ b/audio.go @@ -1,15 +1,24 @@ package codec import ( - // #include "ffmpeg.h" + /* + #include "ffmpeg.h" + int wrap_avcodec_decode_audio4(AVCodecContext *ctx, AVFrame *frame, void *data, int size, int *got) { + struct AVPacket pkt = {.data = data, .size = size}; + return avcodec_decode_audio4(ctx, frame, got, &pkt); + } + void set_sample_fmt(AVCodecContext *ctx, int sample_fmt) { + ctx->sample_fmt = sample_fmt; + } + */ "C" "unsafe" "fmt" ) const ( - S16 = iota+1 - FLTP + S16 = C.AV_SAMPLE_FMT_S16 + FLTP = C.AV_SAMPLE_FMT_FLTP ) type AudioEncoder struct { @@ -24,23 +33,11 @@ type AudioEncoder struct { func (self *AudioEncoder) Setup() (err error) { ff := &self.ff - switch self.SampleFormat { - case S16: - ff.codecCtx.sample_fmt = C.AV_SAMPLE_FMT_S16 - self.sampleSize = 2 - case FLTP: - ff.codecCtx.sample_fmt = C.AV_SAMPLE_FMT_FLTP - self.sampleSize = 4 - default: - err = fmt.Errorf("unsupported sample format") - return - } - + ff.frame = C.av_frame_alloc() if self.BitRate == 0 { self.BitRate = 50000 } - - ff.frame = C.av_frame_alloc() + C.set_sample_fmt(ff.codecCtx, C.int(self.SampleFormat)) ff.codecCtx.sample_rate = C.int(self.SampleRate) ff.codecCtx.bit_rate = C.int(self.BitRate) ff.codecCtx.channels = C.int(self.ChannelCount) @@ -64,8 +61,11 @@ func (self *AudioEncoder) Extradata() (data []byte) { } func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt []byte, err error) { - nbSamples := 1024 - expectedSize := nbSamples*self.sampleSize*self.ChannelCount + ff := &self.ff + nbSamples := int(ff.codecCtx.frame_size) + channelCount := int(ff.codecCtx.channels) + sampleSize := int(C.av_get_bytes_per_sample(ff.codecCtx.sample_fmt)) + expectedSize := nbSamples*sampleSize*channelCount frame := self.ff.frame if flush { @@ -77,11 +77,16 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] } frame.nb_samples = C.int(nbSamples) - for i := 0; i < self.ChannelCount; i++ { - frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*self.sampleSize])) - frame.linesize[i] = C.int(nbSamples*self.sampleSize) + if C.av_sample_fmt_is_planar(ff.codecCtx.sample_fmt) != 0 { + for i := 0; i < self.ChannelCount; i++ { + frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*sampleSize])) + frame.linesize[i] = C.int(nbSamples*sampleSize) + } + } else { + frame.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) + frame.linesize[0] = C.int(channelCount*nbSamples*self.sampleSize) } - frame.extended_data = &frame.data[0] + //frame.extended_data = &frame.data[0] } cpkt := C.AVPacket{} @@ -94,12 +99,8 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] if cgotpkt != 0 { gotPkt = true - pkt = make([]byte, (int)(cpkt.size)) - C.memcpy( - unsafe.Pointer(&pkt[0]), - unsafe.Pointer(cpkt.data), - (C.size_t)(len(pkt)), - ) + pkt = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) + C.av_free_packet(&cpkt) } return @@ -107,6 +108,8 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] type AudioDecoder struct { ff C.FFCtx + ChannelCount int + SampleFormat int Extradata []byte } @@ -120,6 +123,7 @@ func (self *AudioDecoder) Setup() (err error) { ff.codecCtx.extradata_size = C.int(len(self.Extradata)) } + ff.codecCtx.channels = C.int(self.ChannelCount) if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { err = fmt.Errorf("avcodec_open2 failed") return @@ -131,12 +135,8 @@ func (self *AudioDecoder) Setup() (err error) { func (self *AudioDecoder) Decode(frame []byte) (gotPkt bool, pkt []byte, err error) { ff := &self.ff - cpkt := C.AVPacket{ - data: (*C.uint8_t)(unsafe.Pointer(&frame[0])), - size: C.int(len(frame)), - } cgotpkt := C.int(0) - cerr := C.avcodec_decode_audio4(ff.codecCtx, ff.frame, &cgotpkt, &cpkt); + cerr := C.wrap_avcodec_decode_audio4(ff.codecCtx, ff.frame, unsafe.Pointer(&frame[0]), C.int(len(frame)), &cgotpkt) if cerr < C.int(0) { err = fmt.Errorf("avcodec_decode_audio4 failed: %d", cerr) return @@ -144,12 +144,9 @@ func (self *AudioDecoder) Decode(frame []byte) (gotPkt bool, pkt []byte, err err if cgotpkt != C.int(0) { gotPkt = true - pkt = make([]byte, (int)(cpkt.size)) - C.memcpy( - unsafe.Pointer(&pkt[0]), - unsafe.Pointer(cpkt.data), - (C.size_t)(len(pkt)), - ) + //pkt = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) + size := C.av_samples_get_buffer_size(nil, ff.codecCtx.channels, ff.frame.nb_samples, ff.codecCtx.sample_fmt, C.int(1)) + pkt = C.GoBytes(unsafe.Pointer(ff.frame.data[0]), size) } return diff --git a/ffmpeg.h b/ffmpeg.h index 1061a87..b336d17 100644 --- a/ffmpeg.h +++ b/ffmpeg.h @@ -9,6 +9,3 @@ typedef struct { AVFrame *frame; } FFCtx; -int FFCtxFindEncoderByName(FFCtx *ff, const char *name); -int FFCtxFindDecoderByName(FFCtx *ff, const char *name); - From 8e14c48c2ad6611352edb711aa278f6b90ebf83d Mon Sep 17 00:00:00 2001 From: nareix Date: Fri, 20 May 2016 23:51:14 +0800 Subject: [PATCH 19/34] update audio.go --- audio.go | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/audio.go b/audio.go index 215f4ce..2456c38 100644 --- a/audio.go +++ b/audio.go @@ -10,15 +10,24 @@ import ( void set_sample_fmt(AVCodecContext *ctx, int sample_fmt) { ctx->sample_fmt = sample_fmt; } + int wrap_av_get_bytes_per_sample(int sample_fmt) { + return av_get_bytes_per_sample(sample_fmt); + } */ "C" "unsafe" "fmt" ) +type SampleFormat int + +func (self SampleFormat) BytesPerSample() int { + return int(C.wrap_av_get_bytes_per_sample(C.int(self))) +} + const ( - S16 = C.AV_SAMPLE_FMT_S16 - FLTP = C.AV_SAMPLE_FMT_FLTP + S16 = SampleFormat(C.AV_SAMPLE_FMT_S16) + FLTP = SampleFormat(C.AV_SAMPLE_FMT_FLTP) ) type AudioEncoder struct { @@ -26,8 +35,8 @@ type AudioEncoder struct { SampleRate int BitRate int ChannelCount int - SampleFormat int - sampleSize int + SampleFormat SampleFormat + FrameSampleCount int } func (self *AudioEncoder) Setup() (err error) { @@ -46,28 +55,26 @@ func (self *AudioEncoder) Setup() (err error) { err = fmt.Errorf("avcodec_open2 failed") return } + self.SampleFormat = SampleFormat(int(ff.codecCtx.sample_fmt)) + self.ChannelCount = int(ff.codecCtx.channels) + self.FrameSampleCount = int(ff.codecCtx.frame_size) return } func (self *AudioEncoder) Extradata() (data []byte) { - data = make([]byte, (int)(self.ff.codecCtx.extradata_size)) - C.memcpy( - unsafe.Pointer(&data[0]), - unsafe.Pointer(self.ff.codecCtx.extradata), - (C.size_t)(len(data)), - ) + data = C.GoBytes(unsafe.Pointer(self.ff.codecCtx.extradata), self.ff.codecCtx.extradata_size) return } func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt []byte, err error) { ff := &self.ff - nbSamples := int(ff.codecCtx.frame_size) + nbSamples := self.FrameSampleCount channelCount := int(ff.codecCtx.channels) sampleSize := int(C.av_get_bytes_per_sample(ff.codecCtx.sample_fmt)) expectedSize := nbSamples*sampleSize*channelCount - frame := self.ff.frame + frame := ff.frame if flush { frame = nil } else { @@ -77,6 +84,8 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] } frame.nb_samples = C.int(nbSamples) + frame.format = C.int(ff.codecCtx.sample_fmt) + frame.channel_layout = ff.codecCtx.channel_layout if C.av_sample_fmt_is_planar(ff.codecCtx.sample_fmt) != 0 { for i := 0; i < self.ChannelCount; i++ { frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*sampleSize])) @@ -84,7 +93,7 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] } } else { frame.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) - frame.linesize[0] = C.int(channelCount*nbSamples*self.sampleSize) + frame.linesize[0] = C.int(channelCount*nbSamples*sampleSize) } //frame.extended_data = &frame.data[0] } @@ -109,7 +118,7 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] type AudioDecoder struct { ff C.FFCtx ChannelCount int - SampleFormat int + SampleFormat SampleFormat Extradata []byte } @@ -128,6 +137,8 @@ func (self *AudioDecoder) Setup() (err error) { err = fmt.Errorf("avcodec_open2 failed") return } + self.SampleFormat = SampleFormat(int(ff.codecCtx.sample_fmt)) + self.ChannelCount = int(ff.codecCtx.channels) return } From 790569a35a7b30211a213775dd878bd43a7106a8 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 25 May 2016 07:44:06 +0800 Subject: [PATCH 20/34] update --- aacenc.go | 16 ++--- aacparser/parser.go | 59 +++++++++++++++--- h264parser/parser.go | 145 +++++++++++++++++++++++++++++++------------ 3 files changed, 166 insertions(+), 54 deletions(-) diff --git a/aacenc.go b/aacenc.go index 7ce6c21..b350d1e 100644 --- a/aacenc.go +++ b/aacenc.go @@ -17,7 +17,7 @@ import ( } aacenc_t ; static int aacenc_new(aacenc_t *m) { - m->c = avcodec_find_encoder(CODEC_ID_AAC); + m->c = avcodec_find_encoder_by_name("aac"); m->ctx = avcodec_alloc_context3(m->c); m->ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; m->ctx->sample_rate = m->samplerate; @@ -26,7 +26,7 @@ import ( m->ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; m->f = av_frame_alloc(); int r = avcodec_open2(m->ctx, m->c, 0); - av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); + //av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); return r; } @@ -37,7 +37,8 @@ import ( pkt.size = sizeof(m->buf); m->f->nb_samples = 1024; m->f->extended_data = m->f->data; - m->f->linesize[0] = 4096; + m->f->linesize[0] = 1024*4; + //m->f->linesize[1] = 1024*4; avcodec_encode_audio2(m->ctx, &pkt, m->f, &m->got); av_log(m->ctx, AV_LOG_DEBUG, "got %d size %d\n", m->got, pkt.size); m->size = pkt.size; @@ -53,12 +54,12 @@ type AACEncoder struct { Header []byte } -// only supported fltp,stereo,44100khz. If you need other config, it's easy to modify code +// only supported fltp,stereo,44100HZ. If you need other config, it's easy to modify code func NewAACEncoder() (m *AACEncoder, err error) { m = &AACEncoder{} m.m.samplerate = 44100 m.m.bitrate = 50000 - m.m.channels = 2 + m.m.channels = 1 r := C.aacenc_new(&m.m) if int(r) != 0 { err = errors.New("open codec failed") @@ -67,7 +68,7 @@ func NewAACEncoder() (m *AACEncoder, err error) { m.Header = make([]byte, (int)(m.m.ctx.extradata_size)) C.memcpy( unsafe.Pointer(&m.Header[0]), - unsafe.Pointer(&m.m.ctx.extradata), + unsafe.Pointer(m.m.ctx.extradata), (C.size_t)(len(m.Header)), ) return @@ -75,7 +76,7 @@ func NewAACEncoder() (m *AACEncoder, err error) { func (m *AACEncoder) Encode(sample []byte) (ret []byte, err error) { m.m.f.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) - m.m.f.data[1] = (*C.uint8_t)(unsafe.Pointer(&sample[4096])) + //m.m.f.data[1] = (*C.uint8_t)(unsafe.Pointer(&sample[1024*4])) C.aacenc_encode(&m.m) if int(m.m.got) == 0 { err = errors.New("no data") @@ -89,3 +90,4 @@ func (m *AACEncoder) Encode(sample []byte) (ret []byte, err error) { ) return } + diff --git a/aacparser/parser.go b/aacparser/parser.go index 0a3a1f4..b4fab3d 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -2,6 +2,7 @@ package aacparser import ( "github.com/nareix/bits" + "github.com/nareix/av" "fmt" "bytes" "io" @@ -247,6 +248,16 @@ func (self MPEG4AudioConfig) Complete() (config MPEG4AudioConfig) { return } +func ParseMPEG4AudioConfig(data []byte) (config MPEG4AudioConfig, err error) { + r := bytes.NewReader(data) + if config, err = ReadMPEG4AudioConfig(r); err != nil { + err = fmt.Errorf("CodecData invalid: parse MPEG4AudioConfig failed(%s)", err) + return + } + config = config.Complete() + return +} + func ReadMPEG4AudioConfig(r io.Reader) (config MPEG4AudioConfig, err error) { // copied from libavcodec/mpeg4audio.c avpriv_mpeg4audio_get_config() br := &bits.Reader{R: r} @@ -298,17 +309,51 @@ func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) { return } -type CodecInfo struct { - MPEG4AudioConfig +type CodecData struct { + Config []byte + ConfigInfo MPEG4AudioConfig } -func ParseCodecData(config []byte) (info CodecInfo, err error) { - r := bytes.NewReader(config) - if info.MPEG4AudioConfig, err = ReadMPEG4AudioConfig(r); err != nil { - err = fmt.Errorf("CodecData invalid: parse MPEG4AudioConfig failed(%s)", err) +func (self CodecData) IsVideo() bool { + return false +} + +func (self CodecData) IsAudio() bool { + return true +} + +func (self CodecData) Type() int { + return av.AAC +} + +func (self CodecData) MPEG4AudioConfigBytes() []byte { + return self.Config +} + +func (self CodecData) ChannelCount() int { + return self.ConfigInfo.ChannelCount +} + +func (self CodecData) SampleRate() int { + return self.ConfigInfo.SampleRate +} + +func (self CodecData) SampleFormat() av.SampleFormat { + return av.FLTP +} + +func (self CodecData) MakeADTSHeader(samples int, payloadLength int) []byte { + return MakeADTSHeader(self.ConfigInfo, samples, payloadLength) +} + +func NewCodecDataFromMPEG4AudioConfigBytes(config []byte) (codec av.AACCodecData, err error) { + self := CodecData{} + self.Config = config + if self.ConfigInfo, err = ParseMPEG4AudioConfig(config); err != nil { + err = fmt.Errorf("parse MPEG4AudioConfig failed(%s)", err) return } - info.MPEG4AudioConfig = info.MPEG4AudioConfig.Complete() + codec = self return } diff --git a/h264parser/parser.go b/h264parser/parser.go index 5d05769..72ef921 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -2,6 +2,7 @@ package h264parser import ( + "github.com/nareix/av" "github.com/nareix/bits" "io" "fmt" @@ -492,25 +493,6 @@ func ParseSPS(data []byte) (self SPSInfo, err error) { return } -/* -func MakeAVCDecoderConfRecord( - SPS []byte, - PPS []byte, -) (self AVCDecoderConfRecord, err error) { - if len(SPS) < 4 { - err = fmt.Errorf("invalid SPS data") - return - } - self.AVCProfileIndication = int(SPS[1]) - self.ProfileCompatibility = int(SPS[2]) - self.AVCLevelIndication = int(SPS[3]) - self.SPS = [][]byte{SPS} - self.PPS = [][]byte{PPS} - self.LengthSizeMinusOne = 3 - return -} -*/ - func WriteAVCDecoderConfRecord(w io.Writer, self AVCDecoderConfRecord) (err error) { if err = bits.WriteUIntBE(w, 1, 8); err != nil { return @@ -555,42 +537,85 @@ func WriteAVCDecoderConfRecord(w io.Writer, self AVCDecoderConfRecord) (err erro return } -type CodecInfo struct { - Record AVCDecoderConfRecord +type CodecData struct { + Record []byte + RecordInfo AVCDecoderConfRecord SPSInfo SPSInfo } -// CodecData is AVCDecoderConfRecord -func ParseCodecData(config []byte) (info CodecInfo, err error) { - if info.Record, err = ParseAVCDecoderConfRecord(config); err != nil { +func (self CodecData) Type() int { + return av.H264 +} + +func (self CodecData) IsVideo() bool { + return true +} + +func (self CodecData) AVCDecoderConfRecordBytes() []byte { + return self.Record +} + +func (self CodecData) SPS() []byte { + return self.RecordInfo.SPS[0] +} + +func (self CodecData) PPS() []byte { + return self.RecordInfo.PPS[0] +} + +func (self CodecData) IsAudio() bool { + return false +} + +func (self CodecData) Width() int { + return int(self.SPSInfo.Width) +} + +func (self CodecData) Height() int { + return int(self.SPSInfo.Height) +} + +func NewCodecDataFromAVCDecoderConfRecord(record []byte) (codec av.H264CodecData, err error) { + self := CodecData{} + self.Record = record + if self.RecordInfo, err = ParseAVCDecoderConfRecord(record); err != nil { return } - if len(info.Record.SPS) < 1 { - err = fmt.Errorf("CodecData invalid: no SPS found in AVCDecoderConfRecord") + if len(self.RecordInfo.SPS) == 0 { + err = fmt.Errorf("no SPS found in AVCDecoderConfRecord") return } - if info.SPSInfo, err = ParseSPS(info.Record.SPS[0]); err != nil { - err = fmt.Errorf("CodecData invalid: parse SPS failed(%s)", err) + if len(self.RecordInfo.PPS) == 0 { + err = fmt.Errorf("no PPS found in AVCDecoderConfRecord") return } + if self.SPSInfo, err = ParseSPS(self.RecordInfo.SPS[0]); err != nil { + err = fmt.Errorf("parse SPS failed(%s)", err) + return + } + codec = self return } -func CreateCodecDataBySPSAndPPS(SPS, PPS []byte) (codecData []byte, err error) { - self := AVCDecoderConfRecord{} - self.AVCProfileIndication = uint(SPS[1]) - self.ProfileCompatibility = uint(SPS[2]) - self.AVCLevelIndication = uint(SPS[3]) - self.SPS = [][]byte{SPS} - self.PPS = [][]byte{PPS} - self.LengthSizeMinusOne = 3 - +func NewCodecDataFromSPSAndPPS(sps, pps []byte) (codec av.H264CodecData, err error) { + recordinfo := AVCDecoderConfRecord{} + recordinfo.AVCProfileIndication = uint(sps[1]) + recordinfo.ProfileCompatibility = uint(sps[2]) + recordinfo.AVCLevelIndication = uint(sps[3]) + recordinfo.SPS = [][]byte{sps} + recordinfo.PPS = [][]byte{pps} + recordinfo.LengthSizeMinusOne = 3 buf := &bytes.Buffer{} - if err = WriteAVCDecoderConfRecord(buf, self); err != nil { + if err = WriteAVCDecoderConfRecord(buf, recordinfo); err != nil { return } - codecData = buf.Bytes() - + self := CodecData{} + self.RecordInfo = recordinfo + self.Record = buf.Bytes() + if self.SPSInfo, err = ParseSPS(sps); err != nil { + return + } + codec = self return } @@ -726,3 +751,43 @@ func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { return } +/* +type CodecInfo struct { + Record AVCDecoderConfRecord + SPSInfo SPSInfo +} + +func ParseCodecData(config []byte) (info CodecInfo, err error) { + if info.Record, err = ParseAVCDecoderConfRecord(config); err != nil { + return + } + if len(info.Record.SPS) < 1 { + err = fmt.Errorf("CodecData invalid: no SPS found in AVCDecoderConfRecord") + return + } + if info.SPSInfo, err = ParseSPS(info.Record.SPS[0]); err != nil { + err = fmt.Errorf("CodecData invalid: parse SPS failed(%s)", err) + return + } + return +} + +func CreateCodecDataBySPSAndPPS(SPS, PPS []byte) (codecData []byte, err error) { + self := AVCDecoderConfRecord{} + self.AVCProfileIndication = uint(SPS[1]) + self.ProfileCompatibility = uint(SPS[2]) + self.AVCLevelIndication = uint(SPS[3]) + self.SPS = [][]byte{SPS} + self.PPS = [][]byte{PPS} + self.LengthSizeMinusOne = 3 + + buf := &bytes.Buffer{} + if err = WriteAVCDecoderConfRecord(buf, self); err != nil { + return + } + codecData = buf.Bytes() + + return +} +*/ + From 0971d60a78ef76861b4046ae066ccde487bca4c6 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 8 Jun 2016 14:36:31 +0800 Subject: [PATCH 21/34] change to ChannelLayout --- aacparser/parser.go | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/aacparser/parser.go b/aacparser/parser.go index b4fab3d..29b28b2 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -56,7 +56,7 @@ const ( type MPEG4AudioConfig struct { SampleRate int - ChannelCount int + ChannelLayout av.ChannelLayout ObjectType uint SampleRateIndex uint ChannelConfig uint @@ -67,8 +67,27 @@ var sampleRateTable = []int{ 24000, 22050, 16000, 12000, 11025, 8000, 7350, } -var chanConfigTable = []int{ - 0, 1, 2, 3, 4, 5, 6, 8, +/* +These are the channel configurations: +0: Defined in AOT Specifc Config +1: 1 channel: front-center +2: 2 channels: front-left, front-right +3: 3 channels: front-center, front-left, front-right +4: 4 channels: front-center, front-left, front-right, back-center +5: 5 channels: front-center, front-left, front-right, back-left, back-right +6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel +7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel +8-15: Reserved +*/ +var chanConfigTable = []av.ChannelLayout{ + 0, + av.CH_FRONT_CENTER, + av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT, + av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT, + av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_BACK_CENTER, + av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_BACK_LEFT|av.CH_BACK_RIGHT, + av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_BACK_LEFT|av.CH_BACK_RIGHT|av.CH_LOW_FREQ, + av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_SIDE_LEFT|av.CH_SIDE_RIGHT|av.CH_BACK_LEFT|av.CH_BACK_RIGHT|av.CH_LOW_FREQ, } func IsADTSFrame(frames []byte) bool { @@ -243,7 +262,7 @@ func (self MPEG4AudioConfig) Complete() (config MPEG4AudioConfig) { config.SampleRate = sampleRateTable[config.SampleRateIndex] } if int(config.ChannelConfig) < len(chanConfigTable) { - config.ChannelCount = chanConfigTable[config.ChannelConfig] + config.ChannelLayout = chanConfigTable[config.ChannelConfig] } return } @@ -293,8 +312,8 @@ func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) { } if config.ChannelConfig == 0 { - for i, count := range chanConfigTable { - if count == config.ChannelCount { + for i, layout := range chanConfigTable { + if layout == config.ChannelLayout { config.ChannelConfig = uint(i) } } @@ -330,8 +349,8 @@ func (self CodecData) MPEG4AudioConfigBytes() []byte { return self.Config } -func (self CodecData) ChannelCount() int { - return self.ConfigInfo.ChannelCount +func (self CodecData) ChannelLayout() av.ChannelLayout { + return self.ConfigInfo.ChannelLayout } func (self CodecData) SampleRate() int { From 7453318fd5af645b1422dece3b712b1e79833563 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 8 Jun 2016 14:36:58 +0800 Subject: [PATCH 22/34] change repo to github.com/nareix/ffmpeg --- README.md | 77 ---------------------- aacdec.go | 82 ----------------------- aacenc.go | 93 -------------------------- audio.go | 189 ----------------------------------------------------- ffmpeg.h | 11 ---- h264dec.go | 88 ------------------------- h264enc.go | 146 ----------------------------------------- util.go | 48 -------------- 8 files changed, 734 deletions(-) delete mode 100644 README.md delete mode 100644 aacdec.go delete mode 100644 aacenc.go delete mode 100644 audio.go delete mode 100644 ffmpeg.h delete mode 100644 h264dec.go delete mode 100644 h264enc.go delete mode 100644 util.go diff --git a/README.md b/README.md deleted file mode 100644 index ac8920c..0000000 --- a/README.md +++ /dev/null @@ -1,77 +0,0 @@ - -codec -==== - -Golang aac/h264 encoder and decoder. - -H264 encoding example: - -```go -w := 400 -h := 400 -var nal [][]byte - -c, _ := codec.NewH264Encoder(w, h, image.YCbCrSubsampleRatio420) -nal = append(nal, c.Header) - -for i := 0; i < 60; i++ { - img := image.NewYCbCr(image.Rect(0,0,w,h), image.YCbCrSubsampleRatio420) - p, _ := c.Encode(img) - if len(p.Data) > 0 { - nal = append(nal, p.Data) - } -} -for { - // flush encoder - p, err := c.Encode(nil) - if err != nil { - break - } - nal = append(nal, p.Data) -} -``` - -H264 decoding example: - -```go -dec, err := codec.NewH264Decoder(nal[0]) -for i, n := range nal[1:] { - img, err := dec.Decode(n) - if err == nil { - fp, _ := os.Create(fmt.Sprintf("/tmp/dec-%d.jpg", i)) - jpeg.Encode(fp, img, nil) - fp.Close() - } -} -``` - -AAC encoding example: - -```go -var pkts [][]byte - -c, _ := codec.NewAACEncoder() -pkts = append(pkts, c.Header) - -for i := 0; i < 60; i++ { - var sample [8192]byte - p, _ := c.Encode(sample) - if len(p) > 0 { - pkts = append(pkts, p) - } -} -``` - -AAC decoding example: - -```go -dec, _ := codec.NewAACDecoder(pkts[0]) -for _, p := range pkts[1:] { - sample, err := dec.Decode(p) -} -``` - -License ----- - -All code is under WTFPL. You can use it for everything as you want :) diff --git a/aacdec.go b/aacdec.go deleted file mode 100644 index efa9653..0000000 --- a/aacdec.go +++ /dev/null @@ -1,82 +0,0 @@ -package codec - -import ( - /* - #include - #include - #include - #include - - typedef struct { - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - int got; - } aacdec_t ; - - static int aacdec_new(aacdec_t *m, uint8_t *buf, int len) { - m->c = avcodec_find_decoder(CODEC_ID_AAC); - m->ctx = avcodec_alloc_context3(m->c); - m->f = av_frame_alloc(); - m->ctx->extradata = buf; - m->ctx->extradata_size = len; - m->ctx->debug = 0x3; - av_log(m->ctx, AV_LOG_DEBUG, "m %p\n", m); - return avcodec_open2(m->ctx, m->c, 0); - } - - static int aacdec_decode(aacdec_t *m, uint8_t *data, int len) { - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = data; - pkt.size = len; - av_log(m->ctx, AV_LOG_DEBUG, "decode %p\n", m); - return avcodec_decode_audio4(m->ctx, m->f, &m->got, &pkt); - } - */ - "C" - "errors" - "unsafe" -) - -type AACDecoder struct { - m C.aacdec_t -} - -func NewAACDecoder(header []byte) (m *AACDecoder, err error) { - m = &AACDecoder{} - r := C.aacdec_new(&m.m, - (*C.uint8_t)(unsafe.Pointer(&header[0])), - (C.int)(len(header)), - ) - if int(r) < 0 { - err = errors.New("open codec failed") - } - return -} - -func (m *AACDecoder) Decode(data []byte) (sample []byte, err error) { - r := C.aacdec_decode( - &m.m, - (*C.uint8_t)(unsafe.Pointer(&data[0])), - (C.int)(len(data)), - ) - if int(r) < 0 { - err = errors.New("decode failed") - return - } - if int(m.m.got) == 0 { - err = errors.New("no data") - return - } - size := int(m.m.f.linesize[0]) * 2 - sample = make([]byte, size*2) - for i := 0; i < 2; i++ { - C.memcpy( - unsafe.Pointer(&sample[i*size]), - unsafe.Pointer(m.m.f.data[i]), - (C.size_t)(size), - ) - } - return -} diff --git a/aacenc.go b/aacenc.go deleted file mode 100644 index b350d1e..0000000 --- a/aacenc.go +++ /dev/null @@ -1,93 +0,0 @@ -package codec - -import ( - /* - #include - #include - #include - - typedef struct { - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - int got; - uint8_t buf[1024*10]; int size; - int samplerate; int bitrate; - int channels; - } aacenc_t ; - - static int aacenc_new(aacenc_t *m) { - m->c = avcodec_find_encoder_by_name("aac"); - m->ctx = avcodec_alloc_context3(m->c); - m->ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; - m->ctx->sample_rate = m->samplerate; - m->ctx->bit_rate = m->bitrate; - m->ctx->channels = m->channels; - m->ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; - m->f = av_frame_alloc(); - int r = avcodec_open2(m->ctx, m->c, 0); - //av_log(m->ctx, AV_LOG_DEBUG, "extra %d\n", m->ctx->extradata_size); - return r; - } - - static void aacenc_encode(aacenc_t *m) { - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = m->buf; - pkt.size = sizeof(m->buf); - m->f->nb_samples = 1024; - m->f->extended_data = m->f->data; - m->f->linesize[0] = 1024*4; - //m->f->linesize[1] = 1024*4; - avcodec_encode_audio2(m->ctx, &pkt, m->f, &m->got); - av_log(m->ctx, AV_LOG_DEBUG, "got %d size %d\n", m->got, pkt.size); - m->size = pkt.size; - } - */ - "C" - "errors" - "unsafe" -) - -type AACEncoder struct { - m C.aacenc_t - Header []byte -} - -// only supported fltp,stereo,44100HZ. If you need other config, it's easy to modify code -func NewAACEncoder() (m *AACEncoder, err error) { - m = &AACEncoder{} - m.m.samplerate = 44100 - m.m.bitrate = 50000 - m.m.channels = 1 - r := C.aacenc_new(&m.m) - if int(r) != 0 { - err = errors.New("open codec failed") - return - } - m.Header = make([]byte, (int)(m.m.ctx.extradata_size)) - C.memcpy( - unsafe.Pointer(&m.Header[0]), - unsafe.Pointer(m.m.ctx.extradata), - (C.size_t)(len(m.Header)), - ) - return -} - -func (m *AACEncoder) Encode(sample []byte) (ret []byte, err error) { - m.m.f.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) - //m.m.f.data[1] = (*C.uint8_t)(unsafe.Pointer(&sample[1024*4])) - C.aacenc_encode(&m.m) - if int(m.m.got) == 0 { - err = errors.New("no data") - return - } - ret = make([]byte, (int)(m.m.size)) - C.memcpy( - unsafe.Pointer(&ret[0]), - unsafe.Pointer(&m.m.buf[0]), - (C.size_t)(m.m.size), - ) - return -} - diff --git a/audio.go b/audio.go deleted file mode 100644 index 2456c38..0000000 --- a/audio.go +++ /dev/null @@ -1,189 +0,0 @@ -package codec - -import ( - /* - #include "ffmpeg.h" - int wrap_avcodec_decode_audio4(AVCodecContext *ctx, AVFrame *frame, void *data, int size, int *got) { - struct AVPacket pkt = {.data = data, .size = size}; - return avcodec_decode_audio4(ctx, frame, got, &pkt); - } - void set_sample_fmt(AVCodecContext *ctx, int sample_fmt) { - ctx->sample_fmt = sample_fmt; - } - int wrap_av_get_bytes_per_sample(int sample_fmt) { - return av_get_bytes_per_sample(sample_fmt); - } - */ - "C" - "unsafe" - "fmt" -) - -type SampleFormat int - -func (self SampleFormat) BytesPerSample() int { - return int(C.wrap_av_get_bytes_per_sample(C.int(self))) -} - -const ( - S16 = SampleFormat(C.AV_SAMPLE_FMT_S16) - FLTP = SampleFormat(C.AV_SAMPLE_FMT_FLTP) -) - -type AudioEncoder struct { - ff C.FFCtx - SampleRate int - BitRate int - ChannelCount int - SampleFormat SampleFormat - FrameSampleCount int -} - -func (self *AudioEncoder) Setup() (err error) { - ff := &self.ff - - ff.frame = C.av_frame_alloc() - if self.BitRate == 0 { - self.BitRate = 50000 - } - C.set_sample_fmt(ff.codecCtx, C.int(self.SampleFormat)) - ff.codecCtx.sample_rate = C.int(self.SampleRate) - ff.codecCtx.bit_rate = C.int(self.BitRate) - ff.codecCtx.channels = C.int(self.ChannelCount) - ff.codecCtx.strict_std_compliance = C.FF_COMPLIANCE_EXPERIMENTAL - if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { - err = fmt.Errorf("avcodec_open2 failed") - return - } - self.SampleFormat = SampleFormat(int(ff.codecCtx.sample_fmt)) - self.ChannelCount = int(ff.codecCtx.channels) - self.FrameSampleCount = int(ff.codecCtx.frame_size) - - return -} - -func (self *AudioEncoder) Extradata() (data []byte) { - data = C.GoBytes(unsafe.Pointer(self.ff.codecCtx.extradata), self.ff.codecCtx.extradata_size) - return -} - -func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt []byte, err error) { - ff := &self.ff - nbSamples := self.FrameSampleCount - channelCount := int(ff.codecCtx.channels) - sampleSize := int(C.av_get_bytes_per_sample(ff.codecCtx.sample_fmt)) - expectedSize := nbSamples*sampleSize*channelCount - - frame := ff.frame - if flush { - frame = nil - } else { - if len(sample) != expectedSize { - err = fmt.Errorf("len(sample) should be %d", expectedSize) - return - } - - frame.nb_samples = C.int(nbSamples) - frame.format = C.int(ff.codecCtx.sample_fmt) - frame.channel_layout = ff.codecCtx.channel_layout - if C.av_sample_fmt_is_planar(ff.codecCtx.sample_fmt) != 0 { - for i := 0; i < self.ChannelCount; i++ { - frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*sampleSize])) - frame.linesize[i] = C.int(nbSamples*sampleSize) - } - } else { - frame.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) - frame.linesize[0] = C.int(channelCount*nbSamples*sampleSize) - } - //frame.extended_data = &frame.data[0] - } - - cpkt := C.AVPacket{} - cgotpkt := C.int(0) - cerr := C.avcodec_encode_audio2(self.ff.codecCtx, &cpkt, frame, &cgotpkt) - if cerr < C.int(0) { - err = fmt.Errorf("avcodec_encode_audio2 failed: %d", cerr) - return - } - - if cgotpkt != 0 { - gotPkt = true - pkt = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) - C.av_free_packet(&cpkt) - } - - return -} - -type AudioDecoder struct { - ff C.FFCtx - ChannelCount int - SampleFormat SampleFormat - Extradata []byte -} - -func (self *AudioDecoder) Setup() (err error) { - ff := &self.ff - - ff.frame = C.av_frame_alloc() - - if len(self.Extradata) > 0 { - ff.codecCtx.extradata = (*C.uint8_t)(unsafe.Pointer(&self.Extradata[0])) - ff.codecCtx.extradata_size = C.int(len(self.Extradata)) - } - - ff.codecCtx.channels = C.int(self.ChannelCount) - if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { - err = fmt.Errorf("avcodec_open2 failed") - return - } - self.SampleFormat = SampleFormat(int(ff.codecCtx.sample_fmt)) - self.ChannelCount = int(ff.codecCtx.channels) - - return -} - -func (self *AudioDecoder) Decode(frame []byte) (gotPkt bool, pkt []byte, err error) { - ff := &self.ff - - cgotpkt := C.int(0) - cerr := C.wrap_avcodec_decode_audio4(ff.codecCtx, ff.frame, unsafe.Pointer(&frame[0]), C.int(len(frame)), &cgotpkt) - if cerr < C.int(0) { - err = fmt.Errorf("avcodec_decode_audio4 failed: %d", cerr) - return - } - - if cgotpkt != C.int(0) { - gotPkt = true - //pkt = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) - size := C.av_samples_get_buffer_size(nil, ff.codecCtx.channels, ff.frame.nb_samples, ff.codecCtx.sample_fmt, C.int(1)) - pkt = C.GoBytes(unsafe.Pointer(ff.frame.data[0]), size) - } - - return -} - -func FindAudioEncoderByName(name string) (enc *AudioEncoder) { - ff := C.FFCtx{} - ff.codec = C.avcodec_find_encoder_by_name(C.CString(name)) - if ff.codec != nil { - ff.codecCtx = C.avcodec_alloc_context3(ff.codec) - if ff.codecCtx != nil { - return &AudioEncoder{ff: ff} - } - } - return nil -} - -func FindAudioDecoderByName(name string) (dec *AudioDecoder) { - ff := C.FFCtx{} - ff.codec = C.avcodec_find_decoder_by_name(C.CString(name)) - if ff.codec != nil { - ff.codecCtx = C.avcodec_alloc_context3(ff.codec) - if ff.codecCtx != nil { - return &AudioDecoder{ff: ff} - } - } - return nil -} - diff --git a/ffmpeg.h b/ffmpeg.h deleted file mode 100644 index b336d17..0000000 --- a/ffmpeg.h +++ /dev/null @@ -1,11 +0,0 @@ - -#include -#include -#include - -typedef struct { - AVCodec *codec; - AVCodecContext *codecCtx; - AVFrame *frame; -} FFCtx; - diff --git a/h264dec.go b/h264dec.go deleted file mode 100644 index 747051a..0000000 --- a/h264dec.go +++ /dev/null @@ -1,88 +0,0 @@ -package codec - -import ( - /* - #include - #include - #include - - typedef struct { - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - int got; - } h264dec_t ; - - static int h264dec_new(h264dec_t *h, uint8_t *data, int len) { - h->c = avcodec_find_decoder(CODEC_ID_H264); - h->ctx = avcodec_alloc_context3(h->c); - h->f = av_frame_alloc(); - h->ctx->extradata = data; - h->ctx->extradata_size = len; - h->ctx->debug = 0x3; - return avcodec_open2(h->ctx, h->c, 0); - } - - static int h264dec_decode(h264dec_t *h, uint8_t *data, int len) { - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = data; - pkt.size = len; - return avcodec_decode_video2(h->ctx, h->f, &h->got, &pkt); - } - */ - "C" - "errors" - "image" - "unsafe" -) - -type H264Decoder struct { - m C.h264dec_t -} - -func NewH264Decoder(header []byte) (m *H264Decoder, err error) { - m = &H264Decoder{} - r := C.h264dec_new( - &m.m, - (*C.uint8_t)(unsafe.Pointer(&header[0])), - (C.int)(len(header)), - ) - if int(r) < 0 { - err = errors.New("open codec failed") - } - return -} - -func (m *H264Decoder) Decode(nal []byte) (f *image.YCbCr, err error) { - r := C.h264dec_decode( - &m.m, - (*C.uint8_t)(unsafe.Pointer(&nal[0])), - (C.int)(len(nal)), - ) - if int(r) < 0 { - err = errors.New("decode failed") - return - } - if m.m.got == 0 { - err = errors.New("no picture") - return - } - - w := int(m.m.f.width) - h := int(m.m.f.height) - ys := int(m.m.f.linesize[0]) - cs := int(m.m.f.linesize[1]) - - f = &image.YCbCr{ - Y: fromCPtr(unsafe.Pointer(m.m.f.data[0]), ys*h), - Cb: fromCPtr(unsafe.Pointer(m.m.f.data[1]), cs*h/2), - Cr: fromCPtr(unsafe.Pointer(m.m.f.data[2]), cs*h/2), - YStride: ys, - CStride: cs, - SubsampleRatio: image.YCbCrSubsampleRatio420, - Rect: image.Rect(0, 0, w, h), - } - - return -} diff --git a/h264enc.go b/h264enc.go deleted file mode 100644 index 21bdb9f..0000000 --- a/h264enc.go +++ /dev/null @@ -1,146 +0,0 @@ -package codec - -import ( - - /* - #include - #include - #include - #include - #include - #include - #include - - typedef struct { - int w, h; - int pixfmt; - char *preset[2]; - char *profile; - int bitrate; - int got; - AVCodec *c; - AVCodecContext *ctx; - AVFrame *f; - AVPacket pkt; - } h264enc_t; - - static int h264enc_new(h264enc_t *m) { - m->c = avcodec_find_encoder(CODEC_ID_H264); - m->ctx = avcodec_alloc_context3(m->c); - m->ctx->width = m->w; - m->ctx->height = m->h; - m->ctx->bit_rate = m->bitrate; - m->ctx->pix_fmt = m->pixfmt; - m->ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; - m->f = av_frame_alloc(); - return avcodec_open2(m->ctx, m->c, NULL); - } - - */ - "C" - "errors" - "image" - "strings" - "unsafe" - //"log" -) - -type H264Encoder struct { - m C.h264enc_t - Header []byte - Pixfmt image.YCbCrSubsampleRatio - W, H int -} - -func NewH264Encoder( - w, h int, - pixfmt image.YCbCrSubsampleRatio, - opts ...string, -) (m *H264Encoder, err error) { - m = &H264Encoder{} - m.m.w = (C.int)(w) - m.m.h = (C.int)(h) - m.W = w - m.H = h - m.Pixfmt = pixfmt - switch pixfmt { - case image.YCbCrSubsampleRatio444: - m.m.pixfmt = C.PIX_FMT_YUV444P - case image.YCbCrSubsampleRatio422: - m.m.pixfmt = C.PIX_FMT_YUV422P - case image.YCbCrSubsampleRatio420: - m.m.pixfmt = C.PIX_FMT_YUV420P - } - for _, opt := range opts { - a := strings.Split(opt, ",") - switch { - case a[0] == "preset" && len(a) == 3: - m.m.preset[0] = C.CString(a[1]) - m.m.preset[1] = C.CString(a[2]) - case a[0] == "profile" && len(a) == 2: - m.m.profile = C.CString(a[1]) - } - } - r := C.h264enc_new(&m.m) - if int(r) < 0 { - err = errors.New("open encoder failed") - return - } - m.Header = fromCPtr(unsafe.Pointer(m.m.ctx.extradata), (int)(m.m.ctx.extradata_size)) - //m.Header = fromCPtr(unsafe.Pointer(m.m.pps), (int)(m.m.ppslen)) - return -} - -type h264Out struct { - Data []byte - Key bool -} - -func (m *H264Encoder) Encode(img *image.YCbCr) (out h264Out, err error) { - var f *C.AVFrame - if img == nil { - f = nil - } else { - if img.SubsampleRatio != m.Pixfmt { - err = errors.New("image pixfmt not match") - return - } - if img.Rect.Dx() != m.W || img.Rect.Dy() != m.H { - err = errors.New("image size not match") - return - } - f = m.m.f - f.data[0] = (*C.uint8_t)(unsafe.Pointer(&img.Y[0])) - f.data[1] = (*C.uint8_t)(unsafe.Pointer(&img.Cb[0])) - f.data[2] = (*C.uint8_t)(unsafe.Pointer(&img.Cr[0])) - f.linesize[0] = (C.int)(img.YStride) - f.linesize[1] = (C.int)(img.CStride) - f.linesize[2] = (C.int)(img.CStride) - } - - C.av_init_packet(&m.m.pkt) - r := C.avcodec_encode_video2(m.m.ctx, &m.m.pkt, f, &m.m.got) - defer C.av_free_packet(&m.m.pkt) - if int(r) < 0 { - err = errors.New("encode failed") - return - } - if m.m.got == 0 { - err = errors.New("no picture") - return - } - if m.m.pkt.size == 0 { - err = errors.New("packet size == 0") - return - } - - out.Data = make([]byte, m.m.pkt.size) - C.memcpy( - unsafe.Pointer(&out.Data[0]), - unsafe.Pointer(m.m.pkt.data), - (C.size_t)(m.m.pkt.size), - ) - out.Key = (m.m.pkt.flags & C.AV_PKT_FLAG_KEY) != 0 - - return -} diff --git a/util.go b/util.go deleted file mode 100644 index 58ca3a7..0000000 --- a/util.go +++ /dev/null @@ -1,48 +0,0 @@ -/* - -Golang h264,aac decoder/encoder libav wrapper - - d, err = codec.NewAACEncoder() - data, err = d.Encode(samples) - - d, err = codec.NewAACDecoder(aaccfg) - samples, err = d.Decode(data) - - var img *image.YCbCr - d, err = codec.NewH264Encoder(640, 480) - img, err = d.Encode(img) - - d, err = codec.NewH264Decoder(pps) - img, err = d.Decode(nal) -*/ -package codec - -import ( - "reflect" - "unsafe" - - /* - #cgo LDFLAGS: -lavformat -lavutil -lavcodec - - #include - #include - - static void libav_init() { - av_register_all(); - av_log_set_level(AV_LOG_DEBUG); - } - */ - "C" -) - -func init() { - C.libav_init() -} - -func fromCPtr(buf unsafe.Pointer, size int) (ret []uint8) { - hdr := (*reflect.SliceHeader)((unsafe.Pointer(&ret))) - hdr.Cap = size - hdr.Len = size - hdr.Data = uintptr(buf) - return -} From 58acf9ee66009d50e95827df7402eff78b61c824 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 8 Jun 2016 14:41:32 +0800 Subject: [PATCH 23/34] add codec.go --- codec.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 codec.go diff --git a/codec.go b/codec.go new file mode 100644 index 0000000..d4eb2db --- /dev/null +++ b/codec.go @@ -0,0 +1,46 @@ +package codec + +import ( + "github.com/nareix/av" +) + +type AudioCodecData struct { + CodecType int + CodecSampleRate int + CodecChannelLayout av.ChannelLayout + CodecSampleFormat av.SampleFormat +} + +func (self AudioCodecData) Type() int { + return self.CodecType +} + +func (self AudioCodecData) IsAudio() bool { + return true +} + +func (self AudioCodecData) IsVideo() bool { + return false +} + +func (self AudioCodecData) SampleRate() int { + return self.CodecSampleRate +} + +func (self AudioCodecData) ChannelLayout() av.ChannelLayout { + return self.CodecChannelLayout +} + +func (self AudioCodecData) SampleFormat() av.SampleFormat { + return self.CodecSampleFormat +} + +func NewPCMMulawCodecData() av.AudioCodecData { + return AudioCodecData{ + CodecType: av.PCM_MULAW, + CodecSampleFormat: av.S16, + CodecChannelLayout: av.CH_MONO, + CodecSampleRate: 8000, + } +} + From a5e9c7a0966bb943e167e01f807b6614b4d11d12 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 8 Jun 2016 17:55:43 +0800 Subject: [PATCH 24/34] remove av.H264CodecData --- aacparser/parser.go | 4 +--- h264parser/parser.go | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/aacparser/parser.go b/aacparser/parser.go index 29b28b2..1bcf0a4 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -365,14 +365,12 @@ func (self CodecData) MakeADTSHeader(samples int, payloadLength int) []byte { return MakeADTSHeader(self.ConfigInfo, samples, payloadLength) } -func NewCodecDataFromMPEG4AudioConfigBytes(config []byte) (codec av.AACCodecData, err error) { - self := CodecData{} +func NewCodecDataFromMPEG4AudioConfigBytes(config []byte) (self CodecData, err error) { self.Config = config if self.ConfigInfo, err = ParseMPEG4AudioConfig(config); err != nil { err = fmt.Errorf("parse MPEG4AudioConfig failed(%s)", err) return } - codec = self return } diff --git a/h264parser/parser.go b/h264parser/parser.go index 72ef921..6da4d53 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -575,8 +575,7 @@ func (self CodecData) Height() int { return int(self.SPSInfo.Height) } -func NewCodecDataFromAVCDecoderConfRecord(record []byte) (codec av.H264CodecData, err error) { - self := CodecData{} +func NewCodecDataFromAVCDecoderConfRecord(record []byte) (self CodecData, err error) { self.Record = record if self.RecordInfo, err = ParseAVCDecoderConfRecord(record); err != nil { return @@ -593,11 +592,10 @@ func NewCodecDataFromAVCDecoderConfRecord(record []byte) (codec av.H264CodecData err = fmt.Errorf("parse SPS failed(%s)", err) return } - codec = self return } -func NewCodecDataFromSPSAndPPS(sps, pps []byte) (codec av.H264CodecData, err error) { +func NewCodecDataFromSPSAndPPS(sps, pps []byte) (self CodecData, err error) { recordinfo := AVCDecoderConfRecord{} recordinfo.AVCProfileIndication = uint(sps[1]) recordinfo.ProfileCompatibility = uint(sps[2]) @@ -609,13 +607,11 @@ func NewCodecDataFromSPSAndPPS(sps, pps []byte) (codec av.H264CodecData, err err if err = WriteAVCDecoderConfRecord(buf, recordinfo); err != nil { return } - self := CodecData{} self.RecordInfo = recordinfo self.Record = buf.Bytes() if self.SPSInfo, err = ParseSPS(sps); err != nil { return } - codec = self return } From f8c8f6dc8e24f4e0c08658f58c47bcc54e6033a4 Mon Sep 17 00:00:00 2001 From: nareix Date: Sat, 11 Jun 2016 23:32:15 +0800 Subject: [PATCH 25/34] add pcma --- codec.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/codec.go b/codec.go index d4eb2db..e803d23 100644 --- a/codec.go +++ b/codec.go @@ -44,3 +44,12 @@ func NewPCMMulawCodecData() av.AudioCodecData { } } +func NewPCMAlawCodecData() av.AudioCodecData { + return AudioCodecData{ + CodecType: av.PCM_ALAW, + CodecSampleFormat: av.S16, + CodecChannelLayout: av.CH_MONO, + CodecSampleRate: 8000, + } +} + From 0ee725cc8d23f0dda824b9ad6759bb2bacc3e414 Mon Sep 17 00:00:00 2001 From: nareix Date: Sun, 12 Jun 2016 09:24:20 +0800 Subject: [PATCH 26/34] change error msg --- h264parser/parser.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index 6da4d53..f3ae9b9 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -581,15 +581,15 @@ func NewCodecDataFromAVCDecoderConfRecord(record []byte) (self CodecData, err er return } if len(self.RecordInfo.SPS) == 0 { - err = fmt.Errorf("no SPS found in AVCDecoderConfRecord") + err = fmt.Errorf("h264parser: no SPS found in AVCDecoderConfRecord") return } if len(self.RecordInfo.PPS) == 0 { - err = fmt.Errorf("no PPS found in AVCDecoderConfRecord") + err = fmt.Errorf("h264parser: no PPS found in AVCDecoderConfRecord") return } if self.SPSInfo, err = ParseSPS(self.RecordInfo.SPS[0]); err != nil { - err = fmt.Errorf("parse SPS failed(%s)", err) + err = fmt.Errorf("h264parser: parse SPS failed(%s)", err) return } return @@ -704,7 +704,7 @@ const ( func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { if len(packet) <= 1 { - err = fmt.Errorf("packet too short to parse slice header") + err = fmt.Errorf("h264parser: packet too short to parse slice header") return } @@ -715,7 +715,7 @@ func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { // slice_data_partition_a_layer_rbsp default: - err = fmt.Errorf("nal_unit_type=%d has no slice header", nal_unit_type) + err = fmt.Errorf("h264parser: nal_unit_type=%d has no slice header", nal_unit_type) return } @@ -740,7 +740,7 @@ func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { case 2,4,7,9: sliceType = I default: - err = fmt.Errorf("slice_type=%d invalid", u) + err = fmt.Errorf("h264parser: slice_type=%d invalid", u) return } From 323db22c7e67053f5778ab21a51c64c65bc8dfab Mon Sep 17 00:00:00 2001 From: nareix Date: Sun, 12 Jun 2016 11:15:30 +0800 Subject: [PATCH 27/34] rename fields --- aacparser/parser.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aacparser/parser.go b/aacparser/parser.go index 1bcf0a4..2bd4bf3 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -329,8 +329,8 @@ func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) { } type CodecData struct { - Config []byte - ConfigInfo MPEG4AudioConfig + ConfigBytes []byte + Config MPEG4AudioConfig } func (self CodecData) IsVideo() bool { @@ -346,15 +346,15 @@ func (self CodecData) Type() int { } func (self CodecData) MPEG4AudioConfigBytes() []byte { - return self.Config + return self.ConfigBytes } func (self CodecData) ChannelLayout() av.ChannelLayout { - return self.ConfigInfo.ChannelLayout + return self.Config.ChannelLayout } func (self CodecData) SampleRate() int { - return self.ConfigInfo.SampleRate + return self.Config.SampleRate } func (self CodecData) SampleFormat() av.SampleFormat { @@ -362,12 +362,12 @@ func (self CodecData) SampleFormat() av.SampleFormat { } func (self CodecData) MakeADTSHeader(samples int, payloadLength int) []byte { - return MakeADTSHeader(self.ConfigInfo, samples, payloadLength) + return MakeADTSHeader(self.Config, samples, payloadLength) } func NewCodecDataFromMPEG4AudioConfigBytes(config []byte) (self CodecData, err error) { - self.Config = config - if self.ConfigInfo, err = ParseMPEG4AudioConfig(config); err != nil { + self.ConfigBytes = config + if self.Config, err = ParseMPEG4AudioConfig(config); err != nil { err = fmt.Errorf("parse MPEG4AudioConfig failed(%s)", err) return } From a1c38b0cd0c94af21816fcd3086e84212ee9a2a3 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 22 Jun 2016 17:58:19 +0800 Subject: [PATCH 28/34] add PacketDuration --- aacparser/parser.go | 16 +++++++-------- codec.go | 46 ++++++++++++++++---------------------------- h264parser/parser.go | 10 +--------- 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/aacparser/parser.go b/aacparser/parser.go index 2bd4bf3..03733d4 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -3,6 +3,7 @@ package aacparser import ( "github.com/nareix/bits" "github.com/nareix/av" + "time" "fmt" "bytes" "io" @@ -333,15 +334,7 @@ type CodecData struct { Config MPEG4AudioConfig } -func (self CodecData) IsVideo() bool { - return false -} - -func (self CodecData) IsAudio() bool { - return true -} - -func (self CodecData) Type() int { +func (self CodecData) Type() av.CodecType { return av.AAC } @@ -361,6 +354,11 @@ func (self CodecData) SampleFormat() av.SampleFormat { return av.FLTP } +func (self CodecData) PacketDuration(data []byte) (dur time.Duration, err error) { + dur = time.Duration(1024) * time.Second / time.Duration(self.Config.SampleRate) + return +} + func (self CodecData) MakeADTSHeader(samples int, payloadLength int) []byte { return MakeADTSHeader(self.Config, samples, payloadLength) } diff --git a/codec.go b/codec.go index e803d23..ab1ad46 100644 --- a/codec.go +++ b/codec.go @@ -2,54 +2,42 @@ package codec import ( "github.com/nareix/av" + "time" ) -type AudioCodecData struct { - CodecType int - CodecSampleRate int - CodecChannelLayout av.ChannelLayout - CodecSampleFormat av.SampleFormat +type PCMUCodecData struct { + typ av.CodecType } -func (self AudioCodecData) Type() int { - return self.CodecType +func (self PCMUCodecData) Type() av.CodecType { + return self.typ } -func (self AudioCodecData) IsAudio() bool { - return true +func (self PCMUCodecData) SampleRate() int { + return 8000 } -func (self AudioCodecData) IsVideo() bool { - return false +func (self PCMUCodecData) ChannelLayout() av.ChannelLayout { + return av.CH_MONO } -func (self AudioCodecData) SampleRate() int { - return self.CodecSampleRate +func (self PCMUCodecData) SampleFormat() av.SampleFormat { + return av.S16 } -func (self AudioCodecData) ChannelLayout() av.ChannelLayout { - return self.CodecChannelLayout -} - -func (self AudioCodecData) SampleFormat() av.SampleFormat { - return self.CodecSampleFormat +func (self PCMUCodecData) PacketDuration(data []byte) (time.Duration, error) { + return time.Duration(len(data)) * time.Second / time.Duration(8000), nil } func NewPCMMulawCodecData() av.AudioCodecData { - return AudioCodecData{ - CodecType: av.PCM_MULAW, - CodecSampleFormat: av.S16, - CodecChannelLayout: av.CH_MONO, - CodecSampleRate: 8000, + return PCMUCodecData{ + typ: av.PCM_MULAW, } } func NewPCMAlawCodecData() av.AudioCodecData { - return AudioCodecData{ - CodecType: av.PCM_ALAW, - CodecSampleFormat: av.S16, - CodecChannelLayout: av.CH_MONO, - CodecSampleRate: 8000, + return PCMUCodecData{ + typ: av.PCM_ALAW, } } diff --git a/h264parser/parser.go b/h264parser/parser.go index f3ae9b9..ea3506b 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -543,14 +543,10 @@ type CodecData struct { SPSInfo SPSInfo } -func (self CodecData) Type() int { +func (self CodecData) Type() av.CodecType { return av.H264 } -func (self CodecData) IsVideo() bool { - return true -} - func (self CodecData) AVCDecoderConfRecordBytes() []byte { return self.Record } @@ -563,10 +559,6 @@ func (self CodecData) PPS() []byte { return self.RecordInfo.PPS[0] } -func (self CodecData) IsAudio() bool { - return false -} - func (self CodecData) Width() int { return int(self.SPSInfo.Width) } From 32f53b873e88b5292c0988d418c543c161031ee5 Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 29 Jun 2016 17:34:37 +0800 Subject: [PATCH 29/34] add some codec --- codec.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/codec.go b/codec.go index ab1ad46..c0de8af 100644 --- a/codec.go +++ b/codec.go @@ -41,3 +41,11 @@ func NewPCMAlawCodecData() av.AudioCodecData { } } +func NewNellyMoserCodecData() av.AudioCodecData { + return PCMUCodecData{typ: av.NELLYMOSER} +} + +func NewSpeexCodecData() av.AudioCodecData { + return PCMUCodecData{typ: av.SPEEX} +} + From a3104378041bfb7ededcd2f444e2da7acfdefd1f Mon Sep 17 00:00:00 2001 From: nareix Date: Wed, 29 Jun 2016 17:35:03 +0800 Subject: [PATCH 30/34] add fake --- fake/fake.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 fake/fake.go diff --git a/fake/fake.go b/fake/fake.go new file mode 100644 index 0000000..90c20f3 --- /dev/null +++ b/fake/fake.go @@ -0,0 +1,14 @@ +package fake + +import ( + "github.com/nareix/av" +) + +type CodecData struct { + Typ av.CodecType +} + +func (self CodecData) Type() av.CodecType { + return self.Typ +} + From d4285826d95369aa400f614c3241c9fc759afc40 Mon Sep 17 00:00:00 2001 From: nareix Date: Fri, 1 Jul 2016 17:53:33 +0800 Subject: [PATCH 31/34] rename consts --- aacparser/parser.go | 2 +- h264parser/parser.go | 25 ++++++++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/aacparser/parser.go b/aacparser/parser.go index 03733d4..91e152b 100644 --- a/aacparser/parser.go +++ b/aacparser/parser.go @@ -133,7 +133,7 @@ func MakeADTSHeader(config MPEG4AudioConfig, samples int, payloadLength int) (he return } -func ExtractADTSFrames(frames []byte) (config MPEG4AudioConfig, payload [][]byte, samples int, err error) { +func SplitADTSFrames(frames []byte) (config MPEG4AudioConfig, payload [][]byte, samples int, err error) { for len(frames) > 0 { var n, framelen int var _payload []byte diff --git a/h264parser/parser.go b/h264parser/parser.go index ea3506b..b8070ee 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -9,6 +9,13 @@ import ( "bytes" ) +const ( + NALU_SEI = 6 + NALU_PPS = 7 + NALU_SPS = 8 + NALU_AUD = 9 +) + /* From: http://stackoverflow.com/questions/24884827/possible-locations-for-sequence-picture-parameter-sets-for-h-264-stream @@ -677,20 +684,20 @@ type SliceType uint func (self SliceType) String() string { switch self { - case P: + case SLICE_P: return "P" - case B: + case SLICE_B: return "B" - case I: + case SLICE_I: return "I" } return "" } const ( - P = iota+1 - B - I + SLICE_P = iota+1 + SLICE_B + SLICE_I ) func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { @@ -726,11 +733,11 @@ func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) { switch u { case 0,3,5,8: - sliceType = P + sliceType = SLICE_P case 1,6: - sliceType = B + sliceType = SLICE_B case 2,4,7,9: - sliceType = I + sliceType = SLICE_I default: err = fmt.Errorf("h264parser: slice_type=%d invalid", u) return From 6d4ade40be7a24bca787033e7eae9140f16cf3f1 Mon Sep 17 00:00:00 2001 From: nareix Date: Fri, 1 Jul 2016 17:57:27 +0800 Subject: [PATCH 32/34] add NALU_RAW/AVCC/ANNEXB --- h264parser/parser.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index b8070ee..fd30e7c 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -215,9 +215,15 @@ func WalkNALUsAVCC(nalus [][]byte, write func([]byte)) { } } -func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { +const ( + NALU_RAW = iota + NALU_AVCC + NALU_ANNEXB +) + +func SplitNALUs(b []byte) (nalus [][]byte, typ int) { if len(b) < 4 { - return [][]byte{b}, false + return [][]byte{b}, NALU_RAW } val3 := bits.GetUIntBE(b, 24) @@ -241,7 +247,7 @@ func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { } } if len(_b) == 0 { - return nalus, true + return nalus, NALU_AVCC } } @@ -285,11 +291,11 @@ func SplitNALUs(b []byte) (nalus [][]byte, ok bool) { } } } - ok = true + typ = NALU_ANNEXB return } - return [][]byte{b}, false + return [][]byte{b}, NALU_RAW } type SPSInfo struct { From 116764e6c318ff265ecd290a649d090e9267f08a Mon Sep 17 00:00:00 2001 From: nareix Date: Fri, 1 Jul 2016 18:02:43 +0800 Subject: [PATCH 33/34] add CheckNALUsType() --- h264parser/parser.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/h264parser/parser.go b/h264parser/parser.go index fd30e7c..cfceb21 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -215,6 +215,25 @@ func WalkNALUsAVCC(nalus [][]byte, write func([]byte)) { } } +func CheckNALUsType(b []byte) int { + if len(b) < 4 { + return NALU_RAW + } + + val3 := bits.GetUIntBE(b, 24) + val4 := bits.GetUIntBE(b, 32) + + if val4+4 == uint(len(b)) { + return NALU_AVCC + } + + if val4 == 1 || val3 == 1 { + return NALU_ANNEXB + } + + return NALU_RAW +} + const ( NALU_RAW = iota NALU_AVCC From cef54937c5de3d17c5b4d14a489d4c445d613ef8 Mon Sep 17 00:00:00 2001 From: nareix Date: Fri, 1 Jul 2016 19:19:44 +0800 Subject: [PATCH 34/34] add FindDataNALUInAVCCNALUs and IsDataNALU --- h264parser/parser.go | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/h264parser/parser.go b/h264parser/parser.go index cfceb21..ff43cc7 100644 --- a/h264parser/parser.go +++ b/h264parser/parser.go @@ -16,6 +16,11 @@ const ( NALU_AUD = 9 ) +func IsDataNALU(b []byte) bool { + typ := b[0] & 0x1f + return typ >= 1 && typ <= 5 +} + /* From: http://stackoverflow.com/questions/24884827/possible-locations-for-sequence-picture-parameter-sets-for-h-264-stream @@ -215,23 +220,23 @@ func WalkNALUsAVCC(nalus [][]byte, write func([]byte)) { } } -func CheckNALUsType(b []byte) int { - if len(b) < 4 { - return NALU_RAW +func CheckNALUsType(b []byte) (typ int) { + _, typ = SplitNALUs(b) + return +} + +func FindDataNALUInAVCCNALUs(b []byte) (data []byte, ok bool) { + var typ int + var nalus [][]byte + if nalus, typ = SplitNALUs(b); typ != NALU_AVCC { + return } - - val3 := bits.GetUIntBE(b, 24) - val4 := bits.GetUIntBE(b, 32) - - if val4+4 == uint(len(b)) { - return NALU_AVCC + for _, nalu := range nalus { + if IsDataNALU(nalu) { + return nalu, true + } } - - if val4 == 1 || val3 == 1 { - return NALU_ANNEXB - } - - return NALU_RAW + return } const (