Add 'codec/' from commit 'cef54937c5de3d17c5b4d14a489d4c445d613ef8'

git-subtree-dir: codec git-subtree-mainline: 8b0ee7d5c264e0de454a9d23a5644b04ce8f2864 git-subtree-split: cef54937c5de3d17c5b4d14a489d4c445d613ef8
2016-07-01 21:37:19 +08:00 · 2016-07-01 21:37:19 +08:00 · 021f28e76c
commit 021f28e76c
parent 8b0ee7d5c2 cef54937c5
5 changed files with 1280 additions and 0 deletions
--- a/codec/aacparser/parser.go
+++ b/codec/aacparser/parser.go
@ -0,0 +1,374 @@
+package aacparser
+
+import (
+	"github.com/nareix/bits"
+	"github.com/nareix/av"
+	"time"
+	"fmt"
+	"bytes"
+	"io"
+)
+
+// copied from libavcodec/mpeg4audio.h
+const (
+	AOT_AAC_MAIN        = 1 + iota  ///< Y                       Main
+	AOT_AAC_LC                      ///< Y                       Low Complexity
+	AOT_AAC_SSR                     ///< N (code in SoC repo)    Scalable Sample Rate
+	AOT_AAC_LTP                     ///< Y                       Long Term Prediction
+	AOT_SBR                         ///< Y                       Spectral Band Replication
+	AOT_AAC_SCALABLE                ///< N                       Scalable
+	AOT_TWINVQ                      ///< N                       Twin Vector Quantizer
+	AOT_CELP                        ///< N                       Code Excited Linear Prediction
+	AOT_HVXC                        ///< N                       Harmonic Vector eXcitation Coding
+	AOT_TTSI            = 12 + iota ///< N                       Text-To-Speech Interface
+	AOT_MAINSYNTH                   ///< N                       Main Synthesis
+	AOT_WAVESYNTH                   ///< N                       Wavetable Synthesis
+	AOT_MIDI                        ///< N                       General MIDI
+	AOT_SAFX                        ///< N                       Algorithmic Synthesis and Audio Effects
+	AOT_ER_AAC_LC                   ///< N                       Error Resilient Low Complexity
+	AOT_ER_AAC_LTP      = 19 + iota ///< N                       Error Resilient Long Term Prediction
+	AOT_ER_AAC_SCALABLE             ///< N                       Error Resilient Scalable
+	AOT_ER_TWINVQ                   ///< N                       Error Resilient Twin Vector Quantizer
+	AOT_ER_BSAC                     ///< N                       Error Resilient Bit-Sliced Arithmetic Coding
+	AOT_ER_AAC_LD                   ///< N                       Error Resilient Low Delay
+	AOT_ER_CELP                     ///< N                       Error Resilient Code Excited Linear Prediction
+	AOT_ER_HVXC                     ///< N                       Error Resilient Harmonic Vector eXcitation Coding
+	AOT_ER_HILN                     ///< N                       Error Resilient Harmonic and Individual Lines plus Noise
+	AOT_ER_PARAM                    ///< N                       Error Resilient Parametric
+	AOT_SSC                         ///< N                       SinuSoidal Coding
+	AOT_PS                          ///< N                       Parametric Stereo
+	AOT_SURROUND                    ///< N                       MPEG Surround
+	AOT_ESCAPE                      ///< Y                       Escape Value
+	AOT_L1                          ///< Y                       Layer 1
+	AOT_L2                          ///< Y                       Layer 2
+	AOT_L3                          ///< Y                       Layer 3
+	AOT_DST                         ///< N                       Direct Stream Transfer
+	AOT_ALS                         ///< Y                       Audio LosslesS
+	AOT_SLS                         ///< N                       Scalable LosslesS
+	AOT_SLS_NON_CORE                ///< N                       Scalable LosslesS (non core)
+	AOT_ER_AAC_ELD                  ///< N                       Error Resilient Enhanced Low Delay
+	AOT_SMR_SIMPLE                  ///< N                       Symbolic Music Representation Simple
+	AOT_SMR_MAIN                    ///< N                       Symbolic Music Representation Main
+	AOT_USAC_NOSBR                  ///< N                       Unified Speech and Audio Coding (no SBR)
+	AOT_SAOC                        ///< N                       Spatial Audio Object Coding
+	AOT_LD_SURROUND                 ///< N                       Low Delay MPEG Surround
+	AOT_USAC                        ///< N                       Unified Speech and Audio Coding
+)
+
+type MPEG4AudioConfig struct {
+	SampleRate      int
+	ChannelLayout   av.ChannelLayout
+	ObjectType      uint
+	SampleRateIndex uint
+	ChannelConfig   uint
+}
+
+var sampleRateTable = []int{
+	96000, 88200, 64000, 48000, 44100, 32000,
+	24000, 22050, 16000, 12000, 11025, 8000, 7350,
+}
+
+/*
+These are the channel configurations:
+0: Defined in AOT Specifc Config
+1: 1 channel: front-center
+2: 2 channels: front-left, front-right
+3: 3 channels: front-center, front-left, front-right
+4: 4 channels: front-center, front-left, front-right, back-center
+5: 5 channels: front-center, front-left, front-right, back-left, back-right
+6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel
+7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel
+8-15: Reserved
+*/
+var chanConfigTable = []av.ChannelLayout{
+	0,
+	av.CH_FRONT_CENTER,
+	av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT,
+	av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT,
+	av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_BACK_CENTER,
+	av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_BACK_LEFT|av.CH_BACK_RIGHT,
+	av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_BACK_LEFT|av.CH_BACK_RIGHT|av.CH_LOW_FREQ,
+	av.CH_FRONT_CENTER|av.CH_FRONT_LEFT|av.CH_FRONT_RIGHT|av.CH_SIDE_LEFT|av.CH_SIDE_RIGHT|av.CH_BACK_LEFT|av.CH_BACK_RIGHT|av.CH_LOW_FREQ,
+}
+
+func IsADTSFrame(frames []byte) bool {
+	return len(frames) > 7 && frames[0] == 0xff && frames[1]&0xf0 == 0xf0
+}
+
+func ReadADTSFrame(frame []byte) (config MPEG4AudioConfig, payload []byte, samples int, framelen int, err error) {
+	if !IsADTSFrame(frame) {
+		err = fmt.Errorf("not adts frame")
+		return
+	}
+	config.ObjectType = uint(frame[2]>>6) + 1
+	config.SampleRateIndex = uint(frame[2] >> 2 & 0xf)
+	config.ChannelConfig = uint(frame[2]<<2&0x4 | frame[3]>>6&0x3)
+	framelen = int(frame[3]&0x3)<<11 | int(frame[4])<<3 | int(frame[5]>>5)
+	samples = (int(frame[6]&0x3) + 1) * 1024
+	hdrlen := 7
+	if frame[1]&0x1 == 0 {
+		hdrlen = 9
+	}
+	if framelen < hdrlen || len(frame) < framelen {
+		err = fmt.Errorf("invalid adts header length")
+		return
+	}
+	payload = frame[hdrlen:framelen]
+	return
+}
+
+func MakeADTSHeader(config MPEG4AudioConfig, samples int, payloadLength int) (header []byte) {
+	payloadLength += 7
+	//AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
+	header = []byte{0xff, 0xf1, 0x50, 0x80, 0x043, 0xff, 0xcd}
+	//config.ObjectType = uint(frames[2]>>6)+1
+	//config.SampleRateIndex = uint(frames[2]>>2&0xf)
+	//config.ChannelConfig = uint(frames[2]<<2&0x4|frames[3]>>6&0x3)
+	header[2] = (byte(config.ObjectType-1)&0x3)<<6 | (byte(config.SampleRateIndex)&0xf)<<2 | byte(config.ChannelConfig>>2)&0x1
+	header[3] = header[3]&0x3f | byte(config.ChannelConfig&0x3)<<6
+	header[3] = header[3]&0xfc | byte(payloadLength>>11)&0x3
+	header[4] = byte(payloadLength >> 3)
+	header[5] = header[5]&0x1f | (byte(payloadLength)&0x7)<<5
+	header[6] = header[6]&0xfc | byte(samples/1024-1)
+	return
+}
+
+func SplitADTSFrames(frames []byte) (config MPEG4AudioConfig, payload [][]byte, samples int, err error) {
+	for len(frames) > 0 {
+		var n, framelen int
+		var _payload []byte
+		if config, _payload, n, framelen, err = ReadADTSFrame(frames); err != nil {
+			return
+		}
+		payload = append(payload, _payload)
+		frames = frames[framelen:]
+		samples += n
+	}
+	return
+}
+
+func ReadADTSHeader(data []byte) (config MPEG4AudioConfig, frameLength int) {
+	br := &bits.Reader{R: bytes.NewReader(data)}
+	var i uint
+
+	//Structure
+	//AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
+	//Header consists of 7 or 9 bytes (without or with CRC).
+
+	// 2 bytes
+	//A	12	syncword 0xFFF, all bits must be 1
+	br.ReadBits(12)
+	//B	1	MPEG Version: 0 for MPEG-4, 1 for MPEG-2
+	br.ReadBits(1)
+	//C	2	Layer: always 0
+	br.ReadBits(2)
+	//D	1	protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC
+	br.ReadBits(1)
+
+	//E	2	profile, the MPEG-4 Audio Object Type minus 1
+	config.ObjectType, _ = br.ReadBits(2)
+	config.ObjectType++
+	//F	4	MPEG-4 Sampling Frequency Index (15 is forbidden)
+	config.SampleRateIndex, _ = br.ReadBits(4)
+	//G	1	private bit, guaranteed never to be used by MPEG, set to 0 when encoding, ignore when decoding
+	br.ReadBits(1)
+	//H	3	MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an inband PCE)
+	config.ChannelConfig, _ = br.ReadBits(3)
+	//I	1	originality, set to 0 when encoding, ignore when decoding
+	br.ReadBits(1)
+	//J	1	home, set to 0 when encoding, ignore when decoding
+	br.ReadBits(1)
+	//K	1	copyrighted id bit, the next bit of a centrally registered copyright identifier, set to 0 when encoding, ignore when decoding
+	br.ReadBits(1)
+	//L	1	copyright id start, signals that this frame's copyright id bit is the first bit of the copyright id, set to 0 when encoding, ignore when decoding
+	br.ReadBits(1)
+
+	//M	13	frame length, this value must include 7 or 9 bytes of header length: FrameLength = (ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
+	i, _ = br.ReadBits(13)
+	frameLength = int(i)
+	//O	11	Buffer fullness
+	br.ReadBits(11)
+	//P	2	Number of AAC frames (RDBs) in ADTS frame minus 1, for maximum compatibility always use 1 AAC frame per ADTS frame
+	br.ReadBits(2)
+
+	//Q	16	CRC if protection absent is 0
+	return
+}
+
+func readObjectType(r *bits.Reader) (objectType uint, err error) {
+	if objectType, err = r.ReadBits(5); err != nil {
+		return
+	}
+	if objectType == AOT_ESCAPE {
+		var i uint
+		if i, err = r.ReadBits(6); err != nil {
+			return
+		}
+		objectType = 32 + i
+	}
+	return
+}
+
+func writeObjectType(w *bits.Writer, objectType uint) (err error) {
+	if objectType >= 32 {
+		if err = w.WriteBits(AOT_ESCAPE, 5); err != nil {
+			return
+		}
+		if err = w.WriteBits(objectType-32, 6); err != nil {
+			return
+		}
+	} else {
+		if err = w.WriteBits(objectType, 5); err != nil {
+			return
+		}
+	}
+	return
+}
+
+func readSampleRateIndex(r *bits.Reader) (index uint, err error) {
+	if index, err = r.ReadBits(4); err != nil {
+		return
+	}
+	if index == 0xf {
+		if index, err = r.ReadBits(24); err != nil {
+			return
+		}
+	}
+	return
+}
+
+func writeSampleRateIndex(w *bits.Writer, index uint) (err error) {
+	if index >= 0xf {
+		if err = w.WriteBits(0xf, 4); err != nil {
+			return
+		}
+		if err = w.WriteBits(index, 24); err != nil {
+			return
+		}
+	} else {
+		if err = w.WriteBits(index, 4); err != nil {
+			return
+		}
+	}
+	return
+}
+
+func (self MPEG4AudioConfig) IsValid() bool {
+	return self.ObjectType > 0
+}
+
+func (self MPEG4AudioConfig) Complete() (config MPEG4AudioConfig) {
+	config = self
+	if int(config.SampleRateIndex) < len(sampleRateTable) {
+		config.SampleRate = sampleRateTable[config.SampleRateIndex]
+	}
+	if int(config.ChannelConfig) < len(chanConfigTable) {
+		config.ChannelLayout = chanConfigTable[config.ChannelConfig]
+	}
+	return
+}
+
+func ParseMPEG4AudioConfig(data []byte) (config MPEG4AudioConfig, err error) {
+	r := bytes.NewReader(data)
+	if config, err = ReadMPEG4AudioConfig(r); err != nil {
+		err = fmt.Errorf("CodecData invalid: parse MPEG4AudioConfig failed(%s)", err)
+		return
+	}
+	config = config.Complete()
+	return
+}
+
+func ReadMPEG4AudioConfig(r io.Reader) (config MPEG4AudioConfig, err error) {
+	// copied from libavcodec/mpeg4audio.c avpriv_mpeg4audio_get_config()
+	br := &bits.Reader{R: r}
+
+	if config.ObjectType, err = readObjectType(br); err != nil {
+		return
+	}
+	if config.SampleRateIndex, err = readSampleRateIndex(br); err != nil {
+		return
+	}
+	if config.ChannelConfig, err = br.ReadBits(4); err != nil {
+		return
+	}
+	return
+}
+
+func WriteMPEG4AudioConfig(w io.Writer, config MPEG4AudioConfig) (err error) {
+	bw := &bits.Writer{W: w}
+
+	if err = writeObjectType(bw, config.ObjectType); err != nil {
+		return
+	}
+
+	if config.SampleRateIndex == 0 {
+		for i, rate := range sampleRateTable {
+			if rate == config.SampleRate {
+				config.SampleRateIndex = uint(i)
+			}
+		}
+	}
+	if err = writeSampleRateIndex(bw, config.SampleRateIndex); err != nil {
+		return
+	}
+
+	if config.ChannelConfig == 0 {
+		for i, layout := range chanConfigTable {
+			if layout == config.ChannelLayout {
+				config.ChannelConfig = uint(i)
+			}
+		}
+	}
+	if err = bw.WriteBits(config.ChannelConfig, 4); err != nil {
+		return
+	}
+
+	if err = bw.FlushBits(); err != nil {
+		return
+	}
+	return
+}
+
+type CodecData struct {
+	ConfigBytes []byte
+	Config MPEG4AudioConfig
+}
+
+func (self CodecData) Type() av.CodecType {
+	return av.AAC
+}
+
+func (self CodecData) MPEG4AudioConfigBytes() []byte {
+	return self.ConfigBytes
+}
+
+func (self CodecData) ChannelLayout() av.ChannelLayout {
+	return self.Config.ChannelLayout
+}
+
+func (self CodecData) SampleRate() int {
+	return self.Config.SampleRate
+}
+
+func (self CodecData) SampleFormat() av.SampleFormat {
+	return av.FLTP
+}
+
+func (self CodecData) PacketDuration(data []byte) (dur time.Duration, err error) {
+	dur = time.Duration(1024) * time.Second / time.Duration(self.Config.SampleRate)
+	return
+}
+
+func (self CodecData) MakeADTSHeader(samples int, payloadLength int) []byte {
+	return MakeADTSHeader(self.Config, samples, payloadLength)
+}
+
+func NewCodecDataFromMPEG4AudioConfigBytes(config []byte) (self CodecData, err error) {
+	self.ConfigBytes = config
+	if self.Config, err = ParseMPEG4AudioConfig(config); err != nil {
+		err = fmt.Errorf("parse MPEG4AudioConfig failed(%s)", err)
+		return
+	}
+	return
+}
+
--- a/codec/codec.go
+++ b/codec/codec.go
@ -0,0 +1,51 @@
+package codec
+
+import (
+	"github.com/nareix/av"
+	"time"
+)
+
+type PCMUCodecData struct {
+	typ av.CodecType
+}
+
+func (self PCMUCodecData) Type() av.CodecType {
+	return self.typ
+}
+
+func (self PCMUCodecData) SampleRate() int {
+	return 8000
+}
+
+func (self PCMUCodecData) ChannelLayout() av.ChannelLayout {
+	return av.CH_MONO
+}
+
+func (self PCMUCodecData) SampleFormat() av.SampleFormat {
+	return av.S16
+}
+
+func (self PCMUCodecData) PacketDuration(data []byte) (time.Duration, error) {
+	return time.Duration(len(data)) * time.Second / time.Duration(8000), nil
+}
+
+func NewPCMMulawCodecData() av.AudioCodecData {
+	return PCMUCodecData{
+		typ: av.PCM_MULAW,
+	}
+}
+
+func NewPCMAlawCodecData() av.AudioCodecData {
+	return PCMUCodecData{
+		typ: av.PCM_ALAW,
+	}
+}
+
+func NewNellyMoserCodecData() av.AudioCodecData {
+	return PCMUCodecData{typ: av.NELLYMOSER}
+}
+
+func NewSpeexCodecData() av.AudioCodecData {
+	return PCMUCodecData{typ: av.SPEEX}
+}
+
--- a/codec/fake/fake.go
+++ b/codec/fake/fake.go
@ -0,0 +1,14 @@
+package fake
+
+import (
+	"github.com/nareix/av"
+)
+
+type CodecData struct {
+	Typ av.CodecType
+}
+
+func (self CodecData) Type() av.CodecType {
+	return self.Typ
+}
+
--- a/codec/h264parser/parser.go
+++ b/codec/h264parser/parser.go
@ -0,0 +1,818 @@
+
+package h264parser
+
+import (
+	"github.com/nareix/av"
+	"github.com/nareix/bits"
+	"io"
+	"fmt"
+	"bytes"
+)
+
+const (
+	NALU_SEI = 6
+	NALU_PPS = 7
+	NALU_SPS = 8
+	NALU_AUD = 9
+)
+
+func IsDataNALU(b []byte) bool {
+	typ := b[0] & 0x1f
+	return typ >= 1 && typ <= 5
+}
+
+/*
+From: http://stackoverflow.com/questions/24884827/possible-locations-for-sequence-picture-parameter-sets-for-h-264-stream
+
+First off, it's important to understand that there is no single standard H.264 elementary bitstream format. The specification document does contain an Annex, specifically Annex B, that describes one possible format, but it is not an actual requirement. The standard specifies how video is encoded into individual packets. How these packets are stored and transmitted is left open to the integrator.
+
+1. Annex B
+Network Abstraction Layer Units
+The packets are called Network Abstraction Layer Units. Often abbreviated NALU (or sometimes just NAL) each packet can be individually parsed and processed. The first byte of each NALU contains the NALU type, specifically bits 3 through 7. (bit 0 is always off, and bits 1-2 indicate whether a NALU is referenced by another NALU).
+
+There are 19 different NALU types defined separated into two categories, VCL and non-VCL:
+
+VCL, or Video Coding Layer packets contain the actual visual information.
+Non-VCLs contain metadata that may or may not be required to decode the video.
+A single NALU, or even a VCL NALU is NOT the same thing as a frame. A frame can be ‘sliced’ into several NALUs. Just like you can slice a pizza. One or more slices are then virtually grouped into a Access Units (AU) that contain one frame. Slicing does come at a slight quality cost, so it is not often used.
+
+Below is a table of all defined NALUs.
+
+0      Unspecified                                                    non-VCL
+1      Coded slice of a non-IDR picture                               VCL
+2      Coded slice data partition A                                   VCL
+3      Coded slice data partition B                                   VCL
+4      Coded slice data partition C                                   VCL
+5      Coded slice of an IDR picture                                  VCL
+6      Supplemental enhancement information (SEI)                     non-VCL
+7      Sequence parameter set                                         non-VCL
+8      Picture parameter set                                          non-VCL
+9      Access unit delimiter                                          non-VCL
+10     End of sequence                                                non-VCL
+11     End of stream                                                  non-VCL
+12     Filler data                                                    non-VCL
+13     Sequence parameter set extension                               non-VCL
+14     Prefix NAL unit                                                non-VCL
+15     Subset sequence parameter set                                  non-VCL
+16     Depth parameter set                                            non-VCL
+17..18 Reserved                                                       non-VCL
+19     Coded slice of an auxiliary coded picture without partitioning non-VCL
+20     Coded slice extension                                          non-VCL
+21     Coded slice extension for depth view components                non-VCL
+22..23 Reserved                                                       non-VCL
+24..31 Unspecified                                                    non-VCL
+There are a couple of NALU types where having knowledge of may be helpful later.
+
+Sequence Parameter Set (SPS). This non-VCL NALU contains information required to configure the decoder such as profile, level, resolution, frame rate.
+Picture Parameter Set (PPS). Similar to the SPS, this non-VCL contains information on entropy coding mode, slice groups, motion prediction and deblocking filters.
+Instantaneous Decoder Refresh (IDR). This VCL NALU is a self contained image slice. That is, an IDR can be decoded and displayed without referencing any other NALU save SPS and PPS.
+Access Unit Delimiter (AUD). An AUD is an optional NALU that can be use to delimit frames in an elementary stream. It is not required (unless otherwise stated by the container/protocol, like TS), and is often not included in order to save space, but it can be useful to finds the start of a frame without having to fully parse each NALU.
+NALU Start Codes
+A NALU does not contain is its size. Therefore simply concatenating the NALUs to create a stream will not work because you will not know where one stops and the next begins.
+
+The Annex B specification solves this by requiring ‘Start Codes’ to precede each NALU. A start code is 2 or 3 0x00 bytes followed with a 0x01 byte. e.g. 0x000001 or 0x00000001.
+
+The 4 byte variation is useful for transmission over a serial connection as it is trivial to byte align the stream by looking for 31 zero bits followed by a one. If the next bit is 0 (because every NALU starts with a 0 bit), it is the start of a NALU. The 4 byte variation is usually only used for signaling random access points in the stream such as a SPS PPS AUD and IDR Where as the 3 byte variation is used everywhere else to save space.
+
+Emulation Prevention Bytes
+Start codes work because the four byte sequences 0x000000, 0x000001, 0x000002 and 0x000003 are illegal within a non-RBSP NALU. So when creating a NALU, care is taken to escape these values that could otherwise be confused with a start code. This is accomplished by inserting an ‘Emulation Prevention’ byte 0x03, so that 0x000001 becomes 0x00000301.
+
+When decoding, it is important to look for and ignore emulation prevention bytes. Because emulation prevention bytes can occur almost anywhere within a NALU, it is often more convenient in documentation to assume they have already been removed. A representation without emulation prevention bytes is called Raw Byte Sequence Payload (RBSP).
+
+Example
+Let's look at a complete example.
+
+0x0000 | 00 00 00 01 67 64 00 0A AC 72 84 44 26 84 00 00
+0x0010 | 03 00 04 00 00 03 00 CA 3C 48 96 11 80 00 00 00
+0x0020 | 01 68 E8 43 8F 13 21 30 00 00 01 65 88 81 00 05
+0x0030 | 4E 7F 87 DF 61 A5 8B 95 EE A4 E9 38 B7 6A 30 6A
+0x0040 | 71 B9 55 60 0B 76 2E B5 0E E4 80 59 27 B8 67 A9
+0x0050 | 63 37 5E 82 20 55 FB E4 6A E9 37 35 72 E2 22 91
+0x0060 | 9E 4D FF 60 86 CE 7E 42 B7 95 CE 2A E1 26 BE 87
+0x0070 | 73 84 26 BA 16 36 F4 E6 9F 17 DA D8 64 75 54 B1
+0x0080 | F3 45 0C 0B 3C 74 B3 9D BC EB 53 73 87 C3 0E 62
+0x0090 | 47 48 62 CA 59 EB 86 3F 3A FA 86 B5 BF A8 6D 06
+0x00A0 | 16 50 82 C4 CE 62 9E 4E E6 4C C7 30 3E DE A1 0B
+0x00B0 | D8 83 0B B6 B8 28 BC A9 EB 77 43 FC 7A 17 94 85
+0x00C0 | 21 CA 37 6B 30 95 B5 46 77 30 60 B7 12 D6 8C C5
+0x00D0 | 54 85 29 D8 69 A9 6F 12 4E 71 DF E3 E2 B1 6B 6B
+0x00E0 | BF 9F FB 2E 57 30 A9 69 76 C4 46 A2 DF FA 91 D9
+0x00F0 | 50 74 55 1D 49 04 5A 1C D6 86 68 7C B6 61 48 6C
+0x0100 | 96 E6 12 4C 27 AD BA C7 51 99 8E D0 F0 ED 8E F6
+0x0110 | 65 79 79 A6 12 A1 95 DB C8 AE E3 B6 35 E6 8D BC
+0x0120 | 48 A3 7F AF 4A 28 8A 53 E2 7E 68 08 9F 67 77 98
+0x0130 | 52 DB 50 84 D6 5E 25 E1 4A 99 58 34 C7 11 D6 43
+0x0140 | FF C4 FD 9A 44 16 D1 B2 FB 02 DB A1 89 69 34 C2
+0x0150 | 32 55 98 F9 9B B2 31 3F 49 59 0C 06 8C DB A5 B2
+0x0160 | 9D 7E 12 2F D0 87 94 44 E4 0A 76 EF 99 2D 91 18
+0x0170 | 39 50 3B 29 3B F5 2C 97 73 48 91 83 B0 A6 F3 4B
+0x0180 | 70 2F 1C 8F 3B 78 23 C6 AA 86 46 43 1D D7 2A 23
+0x0190 | 5E 2C D9 48 0A F5 F5 2C D1 FB 3F F0 4B 78 37 E9
+0x01A0 | 45 DD 72 CF 80 35 C3 95 07 F3 D9 06 E5 4A 58 76
+0x01B0 | 03 6C 81 20 62 45 65 44 73 BC FE C1 9F 31 E5 DB
+0x01C0 | 89 5C 6B 79 D8 68 90 D7 26 A8 A1 88 86 81 DC 9A
+0x01D0 | 4F 40 A5 23 C7 DE BE 6F 76 AB 79 16 51 21 67 83
+0x01E0 | 2E F3 D6 27 1A 42 C2 94 D1 5D 6C DB 4A 7A E2 CB
+0x01F0 | 0B B0 68 0B BE 19 59 00 50 FC C0 BD 9D F5 F5 F8
+0x0200 | A8 17 19 D6 B3 E9 74 BA 50 E5 2C 45 7B F9 93 EA
+0x0210 | 5A F9 A9 30 B1 6F 5B 36 24 1E 8D 55 57 F4 CC 67
+0x0220 | B2 65 6A A9 36 26 D0 06 B8 E2 E3 73 8B D1 C0 1C
+0x0230 | 52 15 CA B5 AC 60 3E 36 42 F1 2C BD 99 77 AB A8
+0x0240 | A9 A4 8E 9C 8B 84 DE 73 F0 91 29 97 AE DB AF D6
+0x0250 | F8 5E 9B 86 B3 B3 03 B3 AC 75 6F A6 11 69 2F 3D
+0x0260 | 3A CE FA 53 86 60 95 6C BB C5 4E F3
+
+This is a complete AU containing 3 NALUs. As you can see, we begin with a Start code followed by an SPS (SPS starts with 67). Within the SPS, you will see two Emulation Prevention bytes. Without these bytes the illegal sequence 0x000000 would occur at these positions. Next you will see a start code followed by a PPS (PPS starts with 68) and one final start code followed by an IDR slice. This is a complete H.264 stream. If you type these values into a hex editor and save the file with a .264 extension, you will be able to convert it to this image:
+
+Lena
+
+Annex B is commonly used in live and streaming formats such as transport streams, over the air broadcasts, and DVDs. In these formats it is common to repeat the SPS and PPS periodically, usually preceding every IDR thus creating a random access point for the decoder. This enables the ability to join a stream already in progress.
+
+2. AVCC
+The other common method of storing an H.264 stream is the AVCC format. In this format, each NALU is preceded with its length (in big endian format). This method is easier to parse, but you lose the byte alignment features of Annex B. Just to complicate things, the length may be encoded using 1, 2 or 4 bytes. This value is stored in a header object. This header is often called ‘extradata’ or ‘sequence header’. Its basic format is as follows:
+
+bits    
+8   version ( always 0x01 )
+8   avc profile ( sps[0][1] )
+8   avc compatibility ( sps[0][2] )
+8   avc level ( sps[0][3] )
+6   reserved ( all bits on )
+2   NALULengthSizeMinusOne
+3   reserved ( all bits on )
+5   number of SPS NALUs (usually 1)
+repeated once per SPS:
+  16         SPS size
+	variable   SPS NALU data
+8   number of PPS NALUs (usually 1)
+repeated once per PPS
+  16         PPS size
+  variable   PPS NALU data
+
+Using the same example above, the AVCC extradata will look like this:
+
+0x0000 | 01 64 00 0A FF E1 00 19 67 64 00 0A AC 72 84 44
+0x0010 | 26 84 00 00 03 00 04 00 00 03 00 CA 3C 48 96 11
+0x0020 | 80 01 00 07 68 E8 43 8F 13 21 30
+
+You will notice SPS and PPS is now stored out of band. That is, separate from the elementary stream data. Storage and transmission of this data is the job of the file container, and beyond the scope of this document. Notice that even though we are not using start codes, emulation prevention bytes are still inserted.
+
+Additionally, there is a new variable called NALULengthSizeMinusOne. This confusingly named variable tells us how many bytes to use to store the length of each NALU. So, if NALULengthSizeMinusOne is set to 0, then each NALU is preceded with a single byte indicating its length. Using a single byte to store the size, the max size of a NALU is 255 bytes. That is obviously pretty small. Way too small for an entire key frame. Using 2 bytes gives us 64k per NALU. It would work in our example, but is still a pretty low limit. 3 bytes would be perfect, but for some reason is not universally supported. Therefore, 4 bytes is by far the most common, and it is what we used here:
+
+0x0000 | 00 00 02 41 65 88 81 00 05 4E 7F 87 DF 61 A5 8B
+0x0010 | 95 EE A4 E9 38 B7 6A 30 6A 71 B9 55 60 0B 76 2E
+0x0020 | B5 0E E4 80 59 27 B8 67 A9 63 37 5E 82 20 55 FB
+0x0030 | E4 6A E9 37 35 72 E2 22 91 9E 4D FF 60 86 CE 7E
+0x0040 | 42 B7 95 CE 2A E1 26 BE 87 73 84 26 BA 16 36 F4
+0x0050 | E6 9F 17 DA D8 64 75 54 B1 F3 45 0C 0B 3C 74 B3
+0x0060 | 9D BC EB 53 73 87 C3 0E 62 47 48 62 CA 59 EB 86
+0x0070 | 3F 3A FA 86 B5 BF A8 6D 06 16 50 82 C4 CE 62 9E
+0x0080 | 4E E6 4C C7 30 3E DE A1 0B D8 83 0B B6 B8 28 BC
+0x0090 | A9 EB 77 43 FC 7A 17 94 85 21 CA 37 6B 30 95 B5
+0x00A0 | 46 77 30 60 B7 12 D6 8C C5 54 85 29 D8 69 A9 6F
+0x00B0 | 12 4E 71 DF E3 E2 B1 6B 6B BF 9F FB 2E 57 30 A9
+0x00C0 | 69 76 C4 46 A2 DF FA 91 D9 50 74 55 1D 49 04 5A
+0x00D0 | 1C D6 86 68 7C B6 61 48 6C 96 E6 12 4C 27 AD BA
+0x00E0 | C7 51 99 8E D0 F0 ED 8E F6 65 79 79 A6 12 A1 95
+0x00F0 | DB C8 AE E3 B6 35 E6 8D BC 48 A3 7F AF 4A 28 8A
+0x0100 | 53 E2 7E 68 08 9F 67 77 98 52 DB 50 84 D6 5E 25
+0x0110 | E1 4A 99 58 34 C7 11 D6 43 FF C4 FD 9A 44 16 D1
+0x0120 | B2 FB 02 DB A1 89 69 34 C2 32 55 98 F9 9B B2 31
+0x0130 | 3F 49 59 0C 06 8C DB A5 B2 9D 7E 12 2F D0 87 94
+0x0140 | 44 E4 0A 76 EF 99 2D 91 18 39 50 3B 29 3B F5 2C
+0x0150 | 97 73 48 91 83 B0 A6 F3 4B 70 2F 1C 8F 3B 78 23
+0x0160 | C6 AA 86 46 43 1D D7 2A 23 5E 2C D9 48 0A F5 F5
+0x0170 | 2C D1 FB 3F F0 4B 78 37 E9 45 DD 72 CF 80 35 C3
+0x0180 | 95 07 F3 D9 06 E5 4A 58 76 03 6C 81 20 62 45 65
+0x0190 | 44 73 BC FE C1 9F 31 E5 DB 89 5C 6B 79 D8 68 90
+0x01A0 | D7 26 A8 A1 88 86 81 DC 9A 4F 40 A5 23 C7 DE BE
+0x01B0 | 6F 76 AB 79 16 51 21 67 83 2E F3 D6 27 1A 42 C2
+0x01C0 | 94 D1 5D 6C DB 4A 7A E2 CB 0B B0 68 0B BE 19 59
+0x01D0 | 00 50 FC C0 BD 9D F5 F5 F8 A8 17 19 D6 B3 E9 74
+0x01E0 | BA 50 E5 2C 45 7B F9 93 EA 5A F9 A9 30 B1 6F 5B
+0x01F0 | 36 24 1E 8D 55 57 F4 CC 67 B2 65 6A A9 36 26 D0
+0x0200 | 06 B8 E2 E3 73 8B D1 C0 1C 52 15 CA B5 AC 60 3E
+0x0210 | 36 42 F1 2C BD 99 77 AB A8 A9 A4 8E 9C 8B 84 DE
+0x0220 | 73 F0 91 29 97 AE DB AF D6 F8 5E 9B 86 B3 B3 03
+0x0230 | B3 AC 75 6F A6 11 69 2F 3D 3A CE FA 53 86 60 95
+0x0240 | 6C BB C5 4E F3
+
+An advantage to this format is the ability to configure the decoder at the start and jump into the middle of a stream. This is a common use case where the media is available on a random access medium such as a hard drive, and is therefore used in common container formats such as MP4 and MKV.
+*/
+
+func WalkNALUsAnnexb(nalus [][]byte, write func([]byte)) {
+	for i, nalu := range(nalus) {
+		if i == 0 {
+			write([]byte{0,0,0,1,0x9,0xf0,0,0,0,1}) // AUD
+		} else {
+			write([]byte{0,0,1})
+		}
+		write(nalu)
+	}
+	return
+}
+
+func WalkNALUsAVCC(nalus [][]byte, write func([]byte)) {
+	for _, nalu := range(nalus) {
+		var b [4]byte
+		bits.PutUIntBE(b[:], uint(len(nalu)), 32)
+		write(b[:])
+		write(nalu)
+	}
+}
+
+func CheckNALUsType(b []byte) (typ int) {
+	_, typ = SplitNALUs(b)
+	return
+}
+
+func FindDataNALUInAVCCNALUs(b []byte) (data []byte, ok bool) {
+	var typ int
+	var nalus [][]byte
+	if nalus, typ = SplitNALUs(b); typ != NALU_AVCC {
+		return
+	}
+	for _, nalu := range nalus {
+		if IsDataNALU(nalu) {
+			return nalu, true
+		}
+	}
+	return
+}
+
+const (
+	NALU_RAW = iota
+	NALU_AVCC
+	NALU_ANNEXB
+)
+
+func SplitNALUs(b []byte) (nalus [][]byte, typ int) {
+	if len(b) < 4 {
+		return [][]byte{b}, NALU_RAW
+	}
+
+	val3 := bits.GetUIntBE(b, 24)
+	val4 := bits.GetUIntBE(b, 32)
+
+	// maybe AVCC
+	if val4 <= uint(len(b)) {
+		_val4 := val4
+		_b := b[4:]
+		nalus := [][]byte{}
+		for {
+			nalus = append(nalus, _b[:_val4])
+			_b = _b[_val4:]
+			if len(_b) < 4 {
+				break
+			}
+			_val4 = bits.GetUIntBE(_b, 32)
+			_b = _b[4:]
+			if _val4 > uint(len(_b)) {
+				break
+			}
+		}
+		if len(_b) == 0 {
+			return nalus, NALU_AVCC
+		}
+	}
+
+	// is Annex B
+	if val3 == 1 || val4 == 1 {
+		_val3 := val3
+		_val4 := val4
+		start := 0
+		pos := 0
+		for {
+			if start != pos {
+				nalus = append(nalus, b[start:pos])
+			}
+			if _val3 == 1 {
+				pos += 3
+			} else if _val4 == 1 {
+				pos += 4
+			}
+			start = pos
+			if start == len(b) {
+				break
+			}
+			_val3 = 0
+			_val4 = 0
+			for pos < len(b) {
+				if pos+2 < len(b) && b[pos] == 0 {
+					_val3 = bits.GetUIntBE(b[pos:], 24)
+					if _val3 == 0 {
+						if pos+3 < len(b) {
+							_val4 = uint(b[pos+3])
+							if _val4 == 1 {
+								break
+							}
+						}
+					} else if _val3 == 1 {
+						break
+					}
+					pos++
+				} else {
+					pos++
+				}
+			}
+		}
+		typ = NALU_ANNEXB
+		return
+	}
+
+	return [][]byte{b}, NALU_RAW
+}
+
+type SPSInfo struct {
+	ProfileIdc uint
+	LevelIdc   uint
+
+	MbWidth  uint
+	MbHeight uint
+
+	CropLeft   uint
+	CropRight  uint
+	CropTop    uint
+	CropBottom uint
+
+	Width  uint
+	Height uint
+}
+
+func ParseSPS(data []byte) (self SPSInfo, err error) {
+	r := &bits.GolombBitReader{R: bytes.NewReader(data)}
+
+	if _, err = r.ReadBits(8); err != nil {
+		return
+	}
+
+	if self.ProfileIdc, err = r.ReadBits(8); err != nil {
+		return
+	}
+
+	// constraint_set0_flag-constraint_set6_flag,reserved_zero_2bits
+	if _, err = r.ReadBits(8); err != nil {
+		return
+	}
+
+	// level_idc
+	if self.LevelIdc, err = r.ReadBits(8); err != nil {
+		return
+	}
+
+	// seq_parameter_set_id
+	if _, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+
+	if self.ProfileIdc == 100 || self.ProfileIdc == 110 ||
+		self.ProfileIdc == 122 || self.ProfileIdc == 244 ||
+		self.ProfileIdc == 44 || self.ProfileIdc == 83 ||
+		self.ProfileIdc == 86 || self.ProfileIdc == 118 {
+
+		var chroma_format_idc uint
+		if chroma_format_idc, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+
+		if chroma_format_idc == 3 {
+			// residual_colour_transform_flag
+			if _, err = r.ReadBit(); err != nil {
+				return
+			}
+		}
+
+		// bit_depth_luma_minus8
+		if _, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+		// bit_depth_chroma_minus8
+		if _, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+		// qpprime_y_zero_transform_bypass_flag
+		if _, err = r.ReadBit(); err != nil {
+			return
+		}
+
+		var seq_scaling_matrix_present_flag uint
+		if seq_scaling_matrix_present_flag, err = r.ReadBit(); err != nil {
+			return
+		}
+
+		if seq_scaling_matrix_present_flag != 0 {
+			for i := 0; i < 8; i++ {
+				var seq_scaling_list_present_flag uint
+				if seq_scaling_list_present_flag, err = r.ReadBit(); err != nil {
+					return
+				}
+				if seq_scaling_list_present_flag != 0 {
+					var sizeOfScalingList uint
+					if i < 6 {
+						sizeOfScalingList = 16
+					} else {
+						sizeOfScalingList = 64
+					}
+					lastScale := uint(8)
+					nextScale := uint(8)
+					for j := uint(0); j < sizeOfScalingList; j++ {
+						if nextScale != 0 {
+							var delta_scale uint
+							if delta_scale, err = r.ReadSE(); err != nil {
+								return
+							}
+							nextScale = (lastScale + delta_scale + 256) % 256
+						}
+						if nextScale != 0 {
+							lastScale = nextScale
+						}
+					}
+				}
+			}
+		}
+	}
+
+	// log2_max_frame_num_minus4
+	if _, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+
+	var pic_order_cnt_type uint
+	if pic_order_cnt_type, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+	if pic_order_cnt_type == 0 {
+		// log2_max_pic_order_cnt_lsb_minus4
+		if _, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+	} else if pic_order_cnt_type == 1 {
+		// delta_pic_order_always_zero_flag
+		if _, err = r.ReadBit(); err != nil {
+			return
+		}
+		// offset_for_non_ref_pic
+		if _, err = r.ReadSE(); err != nil {
+			return
+		}
+		// offset_for_top_to_bottom_field
+		if _, err = r.ReadSE(); err != nil {
+			return
+		}
+		var num_ref_frames_in_pic_order_cnt_cycle uint
+		if num_ref_frames_in_pic_order_cnt_cycle, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+		for i := uint(0); i < num_ref_frames_in_pic_order_cnt_cycle; i++ {
+			if _, err = r.ReadSE(); err != nil {
+				return
+			}
+		}
+	}
+
+	// max_num_ref_frames
+	if _, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+
+	// gaps_in_frame_num_value_allowed_flag
+	if _, err = r.ReadBit(); err != nil {
+		return
+	}
+
+	if self.MbWidth, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+	self.MbWidth++
+
+	if self.MbHeight, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+	self.MbHeight++
+
+	var frame_mbs_only_flag uint
+	if frame_mbs_only_flag, err = r.ReadBit(); err != nil {
+		return
+	}
+	if frame_mbs_only_flag == 0 {
+		// mb_adaptive_frame_field_flag
+		if _, err = r.ReadBit(); err != nil {
+			return
+		}
+	}
+
+	// direct_8x8_inference_flag
+	if _, err = r.ReadBit(); err != nil {
+		return
+	}
+
+	var frame_cropping_flag uint
+	if frame_cropping_flag, err = r.ReadBit(); err != nil {
+		return
+	}
+	if frame_cropping_flag != 0 {
+		if self.CropLeft, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+		if self.CropRight, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+		if self.CropTop, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+		if self.CropBottom, err = r.ReadExponentialGolombCode(); err != nil {
+			return
+		}
+	}
+
+	self.Width = (self.MbWidth * 16) - self.CropLeft*2 - self.CropRight*2
+	self.Height = ((2 - frame_mbs_only_flag) * self.MbHeight * 16) - self.CropTop*2 - self.CropBottom*2
+
+	return
+}
+
+func WriteAVCDecoderConfRecord(w io.Writer, self AVCDecoderConfRecord) (err error) {
+	if err = bits.WriteUIntBE(w, 1, 8); err != nil {
+		return
+	}
+	if err = bits.WriteUIntBE(w, uint(self.AVCProfileIndication), 8); err != nil {
+		return
+	}
+	if err = bits.WriteUIntBE(w, uint(self.ProfileCompatibility), 8); err != nil {
+		return
+	}
+	if err = bits.WriteUIntBE(w, uint(self.AVCLevelIndication), 8); err != nil {
+		return
+	}
+	if err = bits.WriteUIntBE(w, uint(self.LengthSizeMinusOne|0xfc), 8); err != nil {
+		return
+	}
+
+	if err = bits.WriteUIntBE(w, uint(len(self.SPS)|0xe0), 8); err != nil {
+		return
+	}
+	for _, data := range self.SPS {
+		if err = bits.WriteUIntBE(w, uint(len(data)), 16); err != nil {
+			return
+		}
+		if err = bits.WriteBytes(w, data, len(data)); err != nil {
+			return
+		}
+	}
+
+	if err = bits.WriteUIntBE(w, uint(len(self.PPS)), 8); err != nil {
+		return
+	}
+	for _, data := range self.PPS {
+		if err = bits.WriteUIntBE(w, uint(len(data)), 16); err != nil {
+			return
+		}
+		if err = bits.WriteBytes(w, data, len(data)); err != nil {
+			return
+		}
+	}
+
+	return
+}
+
+type CodecData struct {
+	Record []byte
+	RecordInfo AVCDecoderConfRecord
+	SPSInfo SPSInfo
+}
+
+func (self CodecData) Type() av.CodecType {
+	return av.H264
+}
+
+func (self CodecData) AVCDecoderConfRecordBytes() []byte {
+	return self.Record
+}
+
+func (self CodecData) SPS() []byte {
+	return self.RecordInfo.SPS[0]
+}
+
+func (self CodecData) PPS() []byte {
+	return self.RecordInfo.PPS[0]
+}
+
+func (self CodecData) Width() int {
+	return int(self.SPSInfo.Width)
+}
+
+func (self CodecData) Height() int {
+	return int(self.SPSInfo.Height)
+}
+
+func NewCodecDataFromAVCDecoderConfRecord(record []byte) (self CodecData, err error) {
+	self.Record = record
+	if self.RecordInfo, err = ParseAVCDecoderConfRecord(record); err != nil {
+		return
+	}
+	if len(self.RecordInfo.SPS) == 0 {
+		err = fmt.Errorf("h264parser: no SPS found in AVCDecoderConfRecord")
+		return
+	}
+	if len(self.RecordInfo.PPS) == 0 {
+		err = fmt.Errorf("h264parser: no PPS found in AVCDecoderConfRecord")
+		return
+	}
+	if self.SPSInfo, err = ParseSPS(self.RecordInfo.SPS[0]); err != nil {
+		err = fmt.Errorf("h264parser: parse SPS failed(%s)", err)
+		return
+	}
+	return
+}
+
+func NewCodecDataFromSPSAndPPS(sps, pps []byte) (self CodecData, err error) {
+	recordinfo := AVCDecoderConfRecord{}
+	recordinfo.AVCProfileIndication = uint(sps[1])
+	recordinfo.ProfileCompatibility = uint(sps[2])
+	recordinfo.AVCLevelIndication = uint(sps[3])
+	recordinfo.SPS = [][]byte{sps}
+	recordinfo.PPS = [][]byte{pps}
+	recordinfo.LengthSizeMinusOne = 3
+	buf := &bytes.Buffer{}
+	if err = WriteAVCDecoderConfRecord(buf, recordinfo); err != nil {
+		return
+	}
+	self.RecordInfo = recordinfo
+	self.Record = buf.Bytes()
+	if self.SPSInfo, err = ParseSPS(sps); err != nil {
+		return
+	}
+	return
+}
+
+type AVCDecoderConfRecord struct {
+	AVCProfileIndication uint
+	ProfileCompatibility uint
+	AVCLevelIndication   uint
+	LengthSizeMinusOne   uint
+	SPS                  [][]byte
+	PPS                  [][]byte
+}
+
+func ParseAVCDecoderConfRecord(config []byte) (self AVCDecoderConfRecord, err error) {
+	r := bytes.NewReader(config)
+
+	if _, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	if self.AVCProfileIndication, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	if self.ProfileCompatibility, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	if self.AVCLevelIndication, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	if self.LengthSizeMinusOne, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	self.LengthSizeMinusOne &= 0x03
+
+	var u uint
+	var n, length int
+	var data []byte
+
+	if u, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	n = int(u&0x1f)
+	for i := 0; i < n; i++ {
+		if u, err = bits.ReadUIntBE(r, 16); err != nil {
+			return
+		}
+		length = int(u)
+		if data, err = bits.ReadBytes(r, length); err != nil {
+			return
+		}
+		self.SPS = append(self.SPS, data)
+	}
+
+	if u, err = bits.ReadUIntBE(r, 8); err != nil {
+		return
+	}
+	n = int(u)
+	for i := 0; i < n; i++ {
+		if u, err = bits.ReadUIntBE(r, 16); err != nil {
+			return
+		}
+		length = int(u)
+		if data, err = bits.ReadBytes(r, length); err != nil {
+			return
+		}
+		self.PPS = append(self.PPS, data)
+	}
+
+	return
+}
+
+type SliceType uint
+
+func (self SliceType) String() string {
+	switch self {
+	case SLICE_P:
+		return "P"
+	case SLICE_B:
+		return "B"
+	case SLICE_I:
+		return "I"
+	}
+	return ""
+}
+
+const (
+	SLICE_P = iota+1
+	SLICE_B
+	SLICE_I
+)
+
+func ParseSliceHeaderFromNALU(packet []byte) (sliceType SliceType, err error) {
+
+	if len(packet) <= 1 {
+		err = fmt.Errorf("h264parser: packet too short to parse slice header")
+		return
+	}
+
+	nal_unit_type := packet[0]&0x1f
+	switch nal_unit_type {
+	case 1,2,5,19:
+		// slice_layer_without_partitioning_rbsp
+		// slice_data_partition_a_layer_rbsp
+
+	default:
+		err = fmt.Errorf("h264parser: nal_unit_type=%d has no slice header", nal_unit_type)
+		return
+	}
+
+	r := &bits.GolombBitReader{R: bytes.NewReader(packet[1:])}
+
+	// first_mb_in_slice
+	if _, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+
+	// slice_type
+	var u uint
+	if u, err = r.ReadExponentialGolombCode(); err != nil {
+		return
+	}
+
+	switch u {
+	case 0,3,5,8:
+		sliceType = SLICE_P
+	case 1,6:
+		sliceType = SLICE_B
+	case 2,4,7,9:
+		sliceType = SLICE_I
+	default:
+		err = fmt.Errorf("h264parser: slice_type=%d invalid", u)
+		return
+	}
+
+	return
+}
+
+/*
+type CodecInfo struct {
+	Record AVCDecoderConfRecord
+	SPSInfo SPSInfo
+}
+
+func ParseCodecData(config []byte) (info CodecInfo, err error) {
+	if info.Record, err = ParseAVCDecoderConfRecord(config); err != nil {
+		return
+	}
+	if len(info.Record.SPS) < 1 {
+		err = fmt.Errorf("CodecData invalid: no SPS found in AVCDecoderConfRecord")
+		return
+	}
+	if info.SPSInfo, err = ParseSPS(info.Record.SPS[0]); err != nil {
+		err = fmt.Errorf("CodecData invalid: parse SPS failed(%s)", err)
+		return
+	}
+	return
+}
+
+func CreateCodecDataBySPSAndPPS(SPS, PPS []byte) (codecData []byte, err error) {
+	self := AVCDecoderConfRecord{}
+	self.AVCProfileIndication = uint(SPS[1])
+	self.ProfileCompatibility = uint(SPS[2])
+	self.AVCLevelIndication = uint(SPS[3])
+	self.SPS = [][]byte{SPS}
+	self.PPS = [][]byte{PPS}
+	self.LengthSizeMinusOne = 3
+
+	buf := &bytes.Buffer{}
+	if err = WriteAVCDecoderConfRecord(buf, self); err != nil {
+		return
+	}
+	codecData = buf.Bytes()
+
+	return
+}
+*/
+
--- a/codec/h264parser/parser_test.go
+++ b/codec/h264parser/parser_test.go
@ -0,0 +1,23 @@
+
+package h264parser
+
+import (
+	"testing"
+	"encoding/hex"
+)
+
+func TestParser(t *testing.T) {
+	var ok bool
+	var nalus [][]byte
+
+	annexbFrame, _ := hex.DecodeString("00000001223322330000000122332233223300000133000001000001")
+	nalus, ok = SplitNALUs(annexbFrame)
+	t.Log(ok, len(nalus))
+
+	avccFrame, _ := hex.DecodeString(
+		"00000008aabbccaabbccaabb00000001aa",
+	)
+	nalus, ok = SplitNALUs(avccFrame)
+	t.Log(ok, len(nalus))
+}
+