add comments

This commit is contained in:
nareix 2016-07-30 13:08:26 +08:00
parent a90312e55b
commit 6b25c3b4db
2 changed files with 106 additions and 52 deletions

139
av/av.go
View File

@ -5,20 +5,21 @@ import (
"time" "time"
) )
// Audio sample format
type SampleFormat uint8 type SampleFormat uint8
const ( const (
U8 = SampleFormat(iota + 1) U8 = SampleFormat(iota + 1) // 8-bit unsigned integer
S16 S16 // signed 16-bit integer
S32 S32 // signed 32-bit integer
FLT FLT // 32-bit float
DBL DBL // 64-bit float
U8P U8P // 8-bit unsigned integer in planar
S16P S16P // signed 16-bit integer in planar
S32P S32P // signed 32-bit integer in planar
FLTP FLTP // 32-bit float in planar
DBLP DBLP // 64-bit float in planar
U32 U32 // unsigned 32-bit integer
) )
func (self SampleFormat) BytesPerSample() int { func (self SampleFormat) BytesPerSample() int {
@ -63,6 +64,7 @@ func (self SampleFormat) String() string {
} }
} }
// checkout if this sample format is in planar
func (self SampleFormat) IsPlanar() bool { func (self SampleFormat) IsPlanar() bool {
switch self { switch self {
case S16P, S32P, FLTP, DBLP: case S16P, S32P, FLTP, DBLP:
@ -72,6 +74,7 @@ func (self SampleFormat) IsPlanar() bool {
} }
} }
// audio channel layout
type ChannelLayout uint16 type ChannelLayout uint16
func (self ChannelLayout) String() string { func (self ChannelLayout) String() string {
@ -107,8 +110,18 @@ func (self ChannelLayout) Count() (n int) {
return return
} }
// Video/Audio codec type. can be H264/AAC/SPEEX/...
type CodecType uint32 type CodecType uint32
var (
H264 = MakeVideoCodecType(avCodecTypeMagic + 1)
AAC = MakeAudioCodecType(avCodecTypeMagic + 1)
PCM_MULAW = MakeAudioCodecType(avCodecTypeMagic + 2)
PCM_ALAW = MakeAudioCodecType(avCodecTypeMagic + 3)
SPEEX = MakeAudioCodecType(avCodecTypeMagic + 4)
NELLYMOSER = MakeAudioCodecType(avCodecTypeMagic + 5)
)
const codecTypeAudioBit = 0x1 const codecTypeAudioBit = 0x1
const codecTypeOtherBits = 1 const codecTypeOtherBits = 1
@ -130,19 +143,23 @@ func (self CodecType) String() string {
return "" return ""
} }
// CodecType is audio
func (self CodecType) IsAudio() bool { func (self CodecType) IsAudio() bool {
return self&codecTypeAudioBit != 0 return self&codecTypeAudioBit != 0
} }
// CodecType is video
func (self CodecType) IsVideo() bool { func (self CodecType) IsVideo() bool {
return self&codecTypeAudioBit == 0 return self&codecTypeAudioBit == 0
} }
// make a new audio codec type
func MakeAudioCodecType(base uint32) (c CodecType) { func MakeAudioCodecType(base uint32) (c CodecType) {
c = CodecType(base)<<codecTypeOtherBits | CodecType(codecTypeAudioBit) c = CodecType(base)<<codecTypeOtherBits | CodecType(codecTypeAudioBit)
return return
} }
// make a new video codec type
func MakeVideoCodecType(base uint32) (c CodecType) { func MakeVideoCodecType(base uint32) (c CodecType) {
c = CodecType(base) << codecTypeOtherBits c = CodecType(base) << codecTypeOtherBits
return return
@ -150,31 +167,28 @@ func MakeVideoCodecType(base uint32) (c CodecType) {
const avCodecTypeMagic = 233333 const avCodecTypeMagic = 233333
var ( // CodecData is some bytes for initializing audio/video decoder
H264 = MakeVideoCodecType(avCodecTypeMagic + 1) // video width/height and audio samplerate/channellayout can get from CodecData
// CodecData can convert to VideoCodecData or AudioCodecData using:
AAC = MakeAudioCodecType(avCodecTypeMagic + 1) // codecdata.(AudioCodecData) or codecdata.(VideoCodecData)
PCM_MULAW = MakeAudioCodecType(avCodecTypeMagic + 2) // e.g: for H264, CodecData is AVCDecoderConfigure bytes, includes SPS/PPS
PCM_ALAW = MakeAudioCodecType(avCodecTypeMagic + 3)
SPEEX = MakeAudioCodecType(avCodecTypeMagic + 4)
NELLYMOSER = MakeAudioCodecType(avCodecTypeMagic + 5)
)
type CodecData interface { type CodecData interface {
Type() CodecType Type() CodecType // Video/Audio codec type
} }
type VideoCodecData interface { type VideoCodecData interface {
CodecData CodecData
Width() int Width() int // Video height
Height() int Height() int // Video width
} }
type AudioCodecData interface { type AudioCodecData interface {
CodecData CodecData
SampleFormat() SampleFormat SampleFormat() SampleFormat // Audio sample format
SampleRate() int SampleRate() int // Audio sample rate
ChannelLayout() ChannelLayout ChannelLayout() ChannelLayout // Audio channel layout
// get audio packet duration
PacketDuration([]byte) (time.Duration, error) PacketDuration([]byte) (time.Duration, error)
} }
@ -186,47 +200,66 @@ type PacketReader interface {
ReadPacket() (Packet,error) ReadPacket() (Packet,error)
} }
// Muxer describes the steps of writing compressed audio/video packets into container formats like MP4/FLV/MPEG-TS.
// 1. WriteHeader([]CodecData) write the file header, each stream
// 2. WritePacket(Packet) write the audio/video packets
// 3. WriteTrailer() end writing, now it's a complete file.
//
// WriteHeader/WriteTrailer can be called only once.
//
// every formsts(format/flv format/mp4 ...) implements Muxer interface.
// rtmp.Conn implements Muxer interface.
// and in some case like transcode.Muxer also implements Muxer interface.
type Muxer interface { type Muxer interface {
PacketWriter PacketWriter
WriteHeader([]CodecData) error WriteHeader([]CodecData) error
WriteTrailer() error WriteTrailer() error
} }
// Muxer with Close() method
type MuxCloser interface { type MuxCloser interface {
Muxer Muxer
Close() error Close() error
} }
// Demuxer can demux compressed audio/video packets from container formats like MP4/FLV/MPEG-TS.
// Streams() ([]CodecData, error) reads the file header, contains video/audio meta infomations
// ReadPacket() (Packet, error) read compressed audio/video packets
type Demuxer interface { type Demuxer interface {
PacketReader PacketReader
Streams() ([]CodecData, error) Streams() ([]CodecData, error)
} }
// Demuxer with Close() method
type DemuxCloser interface { type DemuxCloser interface {
Demuxer Demuxer
Close() error Close() error
} }
// Packet stores compressed audio/video data
type Packet struct { type Packet struct {
IsKeyFrame bool IsKeyFrame bool // video packet is key frame
Idx int8 Idx int8 // stream index in container format
CompositionTime time.Duration CompositionTime time.Duration // packet presentation time minus decode time for H264 B-Frame
Time time.Duration Time time.Duration // packet decode time
Data []byte Data []byte // packet data
} }
// Raw audio frame
type AudioFrame struct { type AudioFrame struct {
SampleFormat SampleFormat SampleFormat SampleFormat // audio sample format, e.g: S16,FLTP,...
ChannelLayout ChannelLayout ChannelLayout ChannelLayout // audio channel layout, e.g: CH_MONO,CH_STEREO,...
SampleCount int SampleCount int // sample count in this frame
SampleRate int SampleRate int // sample rate
Data [][]byte Data [][]byte // data array for planar format len(Data) > 1
} }
// audio frame duration
func (self AudioFrame) Duration() time.Duration { func (self AudioFrame) Duration() time.Duration {
return time.Second * time.Duration(self.SampleCount) / time.Duration(self.SampleRate) return time.Second * time.Duration(self.SampleCount) / time.Duration(self.SampleRate)
} }
// check this audio frame has same format as other audio frame
func (self AudioFrame) HasSameFormat(other AudioFrame) bool { func (self AudioFrame) HasSameFormat(other AudioFrame) bool {
if self.SampleRate != other.SampleRate { if self.SampleRate != other.SampleRate {
return false return false
@ -240,6 +273,7 @@ func (self AudioFrame) HasSameFormat(other AudioFrame) bool {
return true return true
} }
// split sample audio sample from this frame
func (self AudioFrame) Slice(start int, end int) (out AudioFrame) { func (self AudioFrame) Slice(start int, end int) (out AudioFrame) {
out = self out = self
out.Data = append([][]byte(nil), out.Data...) out.Data = append([][]byte(nil), out.Data...)
@ -251,6 +285,7 @@ func (self AudioFrame) Slice(start int, end int) (out AudioFrame) {
return return
} }
// concat two audio frames
func (self AudioFrame) Concat(in AudioFrame) (out AudioFrame) { func (self AudioFrame) Concat(in AudioFrame) (out AudioFrame) {
out = self out = self
out.Data = append([][]byte(nil), out.Data...) out.Data = append([][]byte(nil), out.Data...)
@ -261,25 +296,31 @@ func (self AudioFrame) Concat(in AudioFrame) (out AudioFrame) {
return return
} }
// AudioEncoder can encode raw audio frame into compressed audio packets
// now cgo/ffmpeg inplements AudioEncoder, using ffmpeg.NewAudioEncoder to create it
type AudioEncoder interface { type AudioEncoder interface {
CodecData() (AudioCodecData, error) CodecData() (AudioCodecData, error) // encoder's codec data can put into container
Encode(AudioFrame) ([][]byte, error) Encode(AudioFrame) ([][]byte, error) // encode raw audio frame into compressed pakcet(s)
//Flush() ([]Packet, error) //Flush() ([]Packet, error)
Close() Close() // close encoder, free cgo contexts
SetSampleRate(int) (error) SetSampleRate(int) (error) // set encoder sample rate
SetChannelLayout(ChannelLayout) (error) SetChannelLayout(ChannelLayout) (error) // set encoder channel layout
SetSampleFormat(SampleFormat) (error) SetSampleFormat(SampleFormat) (error) // set encoder sample format
SetBitrate(int) (error) SetBitrate(int) (error) // set encoder bitrate
SetOption(string,interface{}) (error) SetOption(string,interface{}) (error) // encoder setopt, in ffmpeg is av_opt_set_dict()
GetOption(string,interface{}) (error) GetOption(string,interface{}) (error) // encoder getopt
} }
// AudioDecoder can decode compressed audio packets into raw audio frame
// use ffmpeg.NewAudioDecoder to create it
type AudioDecoder interface { type AudioDecoder interface {
Decode([]byte) (bool, AudioFrame, error) Decode([]byte) (bool, AudioFrame, error) // decode one compressed audio packet
//Flush() (AudioFrame, error) //Flush() (AudioFrame, error)
Close() Close() // close decode, free cgo contexts
} }
// AudioResampler can convert raw audio frames in different sample rate/format/channel layout
type AudioResampler interface { type AudioResampler interface {
Resample(AudioFrame) (AudioFrame, error) Resample(AudioFrame) (AudioFrame, error) // convert raw audio frames
} }

View File

@ -17,14 +17,17 @@ type tStream struct {
adec av.AudioDecoder adec av.AudioDecoder
} }
// Transcode options
type Options struct { type Options struct {
FindAudioDecoderEncoder func(codec av.AudioCodecData, i int) (ok bool, dec av.AudioDecoder, enc av.AudioEncoder, err error) // if transcode is needed, create your AudioDecoder and AudioEncoder for Transcoder use.
FindAudioDecoderEncoder func(codec av.AudioCodecData, i int) (need bool, dec av.AudioDecoder, enc av.AudioEncoder, err error)
} }
type Transcoder struct { type Transcoder struct {
streams []*tStream streams []*tStream
} }
// Create new Transcoder
func NewTranscoder(streams []av.CodecData, options Options) (_self *Transcoder, err error) { func NewTranscoder(streams []av.CodecData, options Options) (_self *Transcoder, err error) {
self := &Transcoder{} self := &Transcoder{}
self.streams = []*tStream{} self.streams = []*tStream{}
@ -102,6 +105,10 @@ func (self *tStream) audioDecodeAndEncode(inpkt av.Packet) (outpkts []av.Packet,
return return
} }
// Do the transcode.
//
// in audio transcoding one Packet may transcode into many Packets.
// packet time will be adjusted in transcoder.
func (self *Transcoder) Do(pkt av.Packet) (out []av.Packet, err error) { func (self *Transcoder) Do(pkt av.Packet) (out []av.Packet, err error) {
stream := self.streams[pkt.Idx] stream := self.streams[pkt.Idx]
if stream.aenc != nil && stream.adec != nil { if stream.aenc != nil && stream.adec != nil {
@ -114,6 +121,7 @@ func (self *Transcoder) Do(pkt av.Packet) (out []av.Packet, err error) {
return return
} }
// CodecData after transcode
func (self *Transcoder) Streams() (streams []av.CodecData, err error) { func (self *Transcoder) Streams() (streams []av.CodecData, err error) {
for _, stream := range self.streams { for _, stream := range self.streams {
streams = append(streams, stream.codec) streams = append(streams, stream.codec)
@ -121,6 +129,7 @@ func (self *Transcoder) Streams() (streams []av.CodecData, err error) {
return return
} }
// Close transcoder, will close related encoder and decoders
func (self *Transcoder) Close() (err error) { func (self *Transcoder) Close() (err error) {
for _, stream := range self.streams { for _, stream := range self.streams {
if stream.aenc != nil { if stream.aenc != nil {
@ -136,10 +145,12 @@ func (self *Transcoder) Close() (err error) {
return return
} }
// Wrap transcoder and origin Muxer into new Muxer.
// write to new Muxer will do transcode automatically
type Muxer struct { type Muxer struct {
av.Muxer av.Muxer // origin Muxer
transcoder *Transcoder transcoder *Transcoder
Options Options // transcode options
} }
func (self *Muxer) WriteHeader(streams []av.CodecData) (err error) { func (self *Muxer) WriteHeader(streams []av.CodecData) (err error) {
@ -176,6 +187,8 @@ func (self *Muxer) Close() (err error) {
return return
} }
// Wrap transcoder and origin Demuxer into new Demuxer.
// read this Demuxer will do transcode automatically
type Demuxer struct { type Demuxer struct {
av.Demuxer av.Demuxer
transcoder *Transcoder transcoder *Transcoder