diff --git a/audio.go b/audio.go index 60bea48..b00c296 100644 --- a/audio.go +++ b/audio.go @@ -23,24 +23,87 @@ type ffctx struct { ff C.FFCtx } +type Resampler struct { + InSampleFormat, OutSampleFormat av.SampleFormat + InChannelLayout, OutChannelLayout av.ChannelLayout + InSampleRate, OutSampleRate int + avr *C.AVAudioResampleContext + inframe, outframe *C.AVFrame +} + +func freeResampler(self *Resampler) { + C.avresample_free(&self.avr) +} + +func (self *Resampler) Setup() (err error) { + avr := C.avresample_alloc_context() + C.av_opt_set_int(avr, C.CString("in_channel_layout"), C.int64_t(channelLayoutAV2FF(self.InChannelLayout)), 0) + C.av_opt_set_int(avr, C.CString("out_channel_layout"), C.int64_t(channelLayoutAV2FF(self.OutChannelLayout)), 0) + C.av_opt_set_int(avr, C.CString("in_sample_rate"), C.int64_t(self.InSampleRate), 0) + C.av_opt_set_int(avr, C.CString("out_sample_rate"), C.int64_t(self.OutSampleRate), 0) + C.av_opt_set_int(avr, C.CString("in_sample_fmt"), C.int64_t(sampleFormatAV2FF(self.InSampleFormat)), 0) + C.av_opt_set_int(avr, C.CString("out_sample_fmt"), C.int64_t(sampleFormatAV2FF(self.OutSampleFormat)), 0) + self.avr = avr + runtime.SetFinalizer(self, freeResampler) + if C.avresample_open(avr) != 0 { + err = fmt.Errorf("avresample_open failed") + return + } + self.inframe = C.av_frame_alloc() + self.outframe = C.av_frame_alloc() + return +} + +func (self *Resampler) Resample(in av.AudioFrame) (out av.AudioFrame) { + return +} + type AudioEncoder struct { ff *ffctx SampleRate int BitRate int - ChannelCount int + ChannelLayout av.ChannelLayout SampleFormat av.SampleFormat FrameSampleCount int + framebuf av.AudioFrame codecData av.AudioCodecData + resampler *Resampler } -func convffSampleFormat(ffsamplefmt int32) (sampleFormat av.SampleFormat, err error) { +func sampleFormatAV2FF(sampleFormat av.SampleFormat) (ffsamplefmt C.int) { + switch sampleFormat { + case av.U8: + ffsamplefmt = C.AV_SAMPLE_FMT_U8 + case av.S16: + ffsamplefmt = C.AV_SAMPLE_FMT_S16 + case av.S32: + ffsamplefmt = C.AV_SAMPLE_FMT_S32 + case av.FLT: + ffsamplefmt = C.AV_SAMPLE_FMT_FLT + case av.DBL: + ffsamplefmt = C.AV_SAMPLE_FMT_DBL + case av.U8P: + ffsamplefmt = C.AV_SAMPLE_FMT_U8P + case av.S16P: + ffsamplefmt = C.AV_SAMPLE_FMT_S16P + case av.S32P: + ffsamplefmt = C.AV_SAMPLE_FMT_S32P + case av.FLTP: + ffsamplefmt = C.AV_SAMPLE_FMT_FLTP + case av.DBLP: + ffsamplefmt = C.AV_SAMPLE_FMT_DBLP + } + return +} + +func sampleFormatFF2AV(ffsamplefmt int32) (sampleFormat av.SampleFormat) { switch ffsamplefmt { case C.AV_SAMPLE_FMT_U8: ///< unsigned 8 bits sampleFormat = av.U8 case C.AV_SAMPLE_FMT_S16: ///< signed 16 bits sampleFormat = av.S16 case C.AV_SAMPLE_FMT_S32: ///< signed 32 bits - sampleFormat = av.U32 + sampleFormat = av.S32 case C.AV_SAMPLE_FMT_FLT: ///< float sampleFormat = av.FLT case C.AV_SAMPLE_FMT_DBL: ///< double @@ -55,9 +118,6 @@ func convffSampleFormat(ffsamplefmt int32) (sampleFormat av.SampleFormat, err er sampleFormat = av.FLTP case C.AV_SAMPLE_FMT_DBLP: ///< double, planar sampleFormat = av.DBLP - default: - err = fmt.Errorf("ffsamplefmt=%d invalid", ffsamplefmt) - return } return } @@ -75,22 +135,20 @@ func (self *AudioEncoder) Setup() (err error) { if self.SampleRate == 0 { self.SampleRate = 44100 } - if self.ChannelCount == 0 { - self.ChannelCount = 2 + if self.ChannelLayout == av.ChannelLayout(0) { + self.ChannelLayout = av.CH_STEREO } + C.set_sample_fmt(ff.codecCtx, C.int(self.SampleFormat)) ff.codecCtx.sample_rate = C.int(self.SampleRate) ff.codecCtx.bit_rate = C.int(self.BitRate) - ff.codecCtx.channels = C.int(self.ChannelCount) + ff.codecCtx.channel_layout = channelLayoutAV2FF(self.ChannelLayout) ff.codecCtx.strict_std_compliance = C.FF_COMPLIANCE_EXPERIMENTAL if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { err = fmt.Errorf("avcodec_open2 failed") return } - if self.SampleFormat, err = convffSampleFormat(ff.codecCtx.sample_fmt); err != nil { - return - } - self.ChannelCount = int(ff.codecCtx.channels) + self.SampleFormat = sampleFormatFF2AV(ff.codecCtx.sample_fmt) self.FrameSampleCount = int(ff.codecCtx.frame_size) extradata := C.GoBytes(unsafe.Pointer(ff.codecCtx.extradata), ff.codecCtx.extradata_size) @@ -102,7 +160,7 @@ func (self *AudioEncoder) Setup() (err error) { default: self.codecData = AudioCodecData{ - channelCount: self.ChannelCount, + channelLayout: self.ChannelLayout, sampleFormat: self.SampleFormat, sampleRate: self.SampleRate, codecId: ff.codecCtx.codec_id, @@ -117,40 +175,22 @@ func (self *AudioEncoder) CodecData() (codec av.AudioCodecData) { return self.codecData } -func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt []byte, err error) { +func (self *AudioEncoder) Encode(frame av.AudioFrame) (gotPkt bool, pkt av.Packet, err error) { ff := &self.ff.ff - nbSamples := self.FrameSampleCount - channelCount := int(ff.codecCtx.channels) - sampleSize := int(C.av_get_bytes_per_sample(ff.codecCtx.sample_fmt)) - expectedSize := nbSamples*sampleSize*channelCount - frame := ff.frame - if flush { - frame = nil - } else { - if len(sample) != expectedSize { - err = fmt.Errorf("len(sample) should be %d", expectedSize) + if self.FrameSampleCount != 0 { + self.framebuf = self.framebuf.Concat(frame) + if self.framebuf.SampleCount < self.FrameSampleCount { return } - - frame.nb_samples = C.int(nbSamples) - frame.format = C.int(ff.codecCtx.sample_fmt) - frame.channel_layout = ff.codecCtx.channel_layout - if C.av_sample_fmt_is_planar(ff.codecCtx.sample_fmt) != 0 { - for i := 0; i < self.ChannelCount; i++ { - frame.data[i] = (*C.uint8_t)(unsafe.Pointer(&sample[i*nbSamples*sampleSize])) - frame.linesize[i] = C.int(nbSamples*sampleSize) - } - } else { - frame.data[0] = (*C.uint8_t)(unsafe.Pointer(&sample[0])) - frame.linesize[0] = C.int(channelCount*nbSamples*sampleSize) - } - //frame.extended_data = &frame.data[0] + frame = self.framebuf.Slice(0, self.FrameSampleCount) + self.framebuf = self.framebuf.Slice(self.FrameSampleCount, self.framebuf.SampleCount) } cpkt := C.AVPacket{} cgotpkt := C.int(0) - cerr := C.avcodec_encode_audio2(ff.codecCtx, &cpkt, frame, &cgotpkt) + audioFrameAssignToFF(frame, ff.frame) + cerr := C.avcodec_encode_audio2(ff.codecCtx, &cpkt, ff.frame, &cgotpkt) if cerr < C.int(0) { err = fmt.Errorf("avcodec_encode_audio2 failed: %d", cerr) return @@ -158,7 +198,8 @@ func (self *AudioEncoder) Encode(sample []byte, flush bool) (gotPkt bool, pkt [] if cgotpkt != 0 { gotPkt = true - pkt = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) + pkt.Data = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) + pkt.Duration = float64(frame.SampleCount)/float64(self.SampleRate) C.av_free_packet(&cpkt) } @@ -169,9 +210,92 @@ func (self *AudioEncoder) Close() { freeFFCtx(self.ff) } +func audioFrameAssignToAV(f *C.AVFrame, frame *av.AudioFrame) { + frame.SampleCount = int(f.nb_samples) + frame.SampleFormat = sampleFormatFF2AV(int32(f.format)) + frame.ChannelLayout = channelLayoutFF2AV(f.channel_layout) + channels := int(f.channels) + frame.Data = make([][]byte, channels) + for i := 0; i < channels; i++ { + frame.Data[i] = C.GoBytes(unsafe.Pointer(f.data[i]), f.linesize[i]) + } +} + +func audioFrameAssignToFF(frame av.AudioFrame, f *C.AVFrame) { + f.nb_samples = C.int(frame.SampleCount) + f.format = C.int(sampleFormatAV2FF(frame.SampleFormat)) + f.channel_layout = channelLayoutAV2FF(frame.ChannelLayout) + for i := range frame.Data { + f.data[i] = (*C.uint8_t)(unsafe.Pointer(&frame.Data[i])) + f.linesize[i] = C.int(len(frame.Data[i])) + } +} + +func channelLayoutFF2AV(layout C.uint64_t) (channelLayout av.ChannelLayout) { + if layout & C.AV_CH_FRONT_CENTER != 0 { + channelLayout |= av.CH_FRONT_CENTER + } + if layout & C.AV_CH_FRONT_LEFT != 0 { + channelLayout |= av.CH_FRONT_LEFT + } + if layout & C.AV_CH_FRONT_RIGHT != 0 { + channelLayout |= av.CH_FRONT_RIGHT + } + if layout & C.AV_CH_BACK_CENTER != 0 { + channelLayout |= av.CH_BACK_CENTER + } + if layout & C.AV_CH_BACK_LEFT != 0 { + channelLayout |= av.CH_BACK_LEFT + } + if layout & C.AV_CH_BACK_RIGHT != 0 { + channelLayout |= av.CH_BACK_RIGHT + } + if layout & C.AV_CH_SIDE_LEFT != 0 { + channelLayout |= av.CH_SIDE_LEFT + } + if layout & C.AV_CH_SIDE_RIGHT != 0 { + channelLayout |= av.CH_SIDE_RIGHT + } + if layout & C.AV_CH_LOW_FREQUENCY != 0 { + channelLayout |= av.CH_LOW_FREQ + } + return +} + +func channelLayoutAV2FF(channelLayout av.ChannelLayout) (layout C.uint64_t) { + if channelLayout & av.CH_FRONT_CENTER != 0 { + layout |= C.AV_CH_FRONT_CENTER + } + if channelLayout & av.CH_FRONT_LEFT != 0 { + layout |= C.AV_CH_FRONT_LEFT + } + if channelLayout & av.CH_FRONT_RIGHT != 0 { + layout |= C.AV_CH_FRONT_RIGHT + } + if channelLayout & av.CH_BACK_CENTER != 0 { + layout |= C.AV_CH_BACK_CENTER + } + if channelLayout & av.CH_BACK_LEFT != 0 { + layout |= C.AV_CH_BACK_LEFT + } + if channelLayout & av.CH_BACK_RIGHT != 0 { + layout |= C.AV_CH_BACK_RIGHT + } + if channelLayout & av.CH_SIDE_LEFT != 0 { + layout |= C.AV_CH_SIDE_LEFT + } + if channelLayout & av.CH_SIDE_RIGHT != 0 { + layout |= C.AV_CH_SIDE_RIGHT + } + if channelLayout & av.CH_LOW_FREQ != 0 { + layout |= C.AV_CH_LOW_FREQUENCY + } + return +} + type AudioDecoder struct { ff *ffctx - ChannelCount int + ChannelLayout av.ChannelLayout SampleFormat av.SampleFormat Extradata []byte } @@ -186,34 +310,30 @@ func (self *AudioDecoder) Setup() (err error) { ff.codecCtx.extradata_size = C.int(len(self.Extradata)) } - ff.codecCtx.channels = C.int(self.ChannelCount) + ff.codecCtx.channel_layout = channelLayoutAV2FF(self.ChannelLayout) if C.avcodec_open2(ff.codecCtx, ff.codec, nil) != 0 { err = fmt.Errorf("avcodec_open2 failed") return } - if self.SampleFormat, err = convffSampleFormat(ff.codecCtx.sample_fmt); err != nil { - return - } - self.ChannelCount = int(ff.codecCtx.channels) + self.SampleFormat = sampleFormatFF2AV(ff.codecCtx.sample_fmt) + self.ChannelLayout = channelLayoutFF2AV(ff.codecCtx.channel_layout) return } -func (self *AudioDecoder) Decode(frame []byte) (gotPkt bool, pkt []byte, err error) { +func (self *AudioDecoder) Decode(data []byte) (gotFrame bool, frame av.AudioFrame, err error) { ff := &self.ff.ff cgotpkt := C.int(0) - cerr := C.wrap_avcodec_decode_audio4(ff.codecCtx, ff.frame, unsafe.Pointer(&frame[0]), C.int(len(frame)), &cgotpkt) + cerr := C.wrap_avcodec_decode_audio4(ff.codecCtx, ff.frame, unsafe.Pointer(&data[0]), C.int(len(data)), &cgotpkt) if cerr < C.int(0) { err = fmt.Errorf("avcodec_decode_audio4 failed: %d", cerr) return } if cgotpkt != C.int(0) { - gotPkt = true - //pkt = C.GoBytes(unsafe.Pointer(cpkt.data), cpkt.size) - size := C.av_samples_get_buffer_size(nil, ff.codecCtx.channels, ff.frame.nb_samples, ff.codecCtx.sample_fmt, C.int(1)) - pkt = C.GoBytes(unsafe.Pointer(ff.frame.data[0]), size) + gotFrame = true + audioFrameAssignToAV(ff.frame, &frame) } return @@ -223,16 +343,18 @@ func (self *AudioDecoder) Close() { freeFFCtx(self.ff) } -//func HasEncoder(name string) bool -//func HasDecoder(name string) bool +func HasEncoder(name string) bool { + return C.avcodec_find_encoder_by_name(C.CString(name)) != nil +} + +func HasDecoder(name string) bool { + return C.avcodec_find_decoder_by_name(C.CString(name)) != nil +} + //func EncodersList() []string //func DecodersList() []string func newFFCtxByCodec(codec *C.AVCodec) (ff *ffctx, err error) { - if codec == nil { - err = fmt.Errorf("AVCodec not found") - return - } ff = &ffctx{} ff.ff.codec = codec ff.ff.codecCtx = C.avcodec_alloc_context3(codec) @@ -255,15 +377,26 @@ func freeFFCtx(self *ffctx) { func NewAudioEncoder( name string, - sampleFormat av.SampleFormat, sampleRate int, channelCount int, bitRate int, + sampleFormat av.SampleFormat, sampleRate int, channelLayout av.ChannelLayout, bitRate int, ) (enc *AudioEncoder, err error) { _enc := &AudioEncoder{} - if _enc.ff, err = newFFCtxByCodec(C.avcodec_find_encoder_by_name(C.CString(name))); err != nil { + + codec := C.avcodec_find_encoder_by_name(C.CString(name)) + if codec == nil { + err = fmt.Errorf("cannot find encoder=%s", name) + return + } + if C.avcodec_get_type(codec.id) != C.AVMEDIA_TYPE_AUDIO { + err = fmt.Errorf("encoder=%s type is not audio", name) + return + } + + if _enc.ff, err = newFFCtxByCodec(codec); err != nil { return } _enc.SampleFormat = sampleFormat _enc.SampleRate = sampleRate - _enc.ChannelCount = channelCount + _enc.ChannelLayout = channelLayout _enc.BitRate = bitRate if err = _enc.Setup(); err != nil { return @@ -289,7 +422,7 @@ func NewAudioDecoder(codec av.AudioCodecData) (dec *AudioDecoder, err error) { default: if ffcodec, ok := codec.(AudioCodecData); ok { _dec.SampleFormat = ffcodec.sampleFormat - _dec.ChannelCount = ffcodec.channelCount + _dec.ChannelLayout = ffcodec.channelLayout _dec.Extradata = ffcodec.extradata id = ffcodec.codecId } else { @@ -298,7 +431,18 @@ func NewAudioDecoder(codec av.AudioCodecData) (dec *AudioDecoder, err error) { } } - if _dec.ff, err = newFFCtxByCodec(C.avcodec_find_decoder(id)); err != nil { + c := C.avcodec_find_decoder(id) + if c == nil { + err = fmt.Errorf("cannot find decoder id=%d", id) + return + } + + if C.avcodec_get_type(c.id) != C.AVMEDIA_TYPE_AUDIO { + err = fmt.Errorf("decoder id=%d type is not audio", c.id) + return + } + + if _dec.ff, err = newFFCtxByCodec(c); err != nil { return } if err = _dec.Setup(); err != nil { @@ -312,7 +456,7 @@ func NewAudioDecoder(codec av.AudioCodecData) (dec *AudioDecoder, err error) { type AudioCodecData struct { codecId uint32 sampleFormat av.SampleFormat - channelCount int + channelLayout av.ChannelLayout sampleRate int extradata []byte } @@ -337,7 +481,7 @@ func (self AudioCodecData) SampleFormat() av.SampleFormat { return self.sampleFormat } -func (self AudioCodecData) ChannelCount() int { - return self.channelCount +func (self AudioCodecData) ChannelLayout() av.ChannelLayout { + return self.channelLayout } diff --git a/ffmpeg.go b/ffmpeg.go index 70f7cfb..b9b5468 100644 --- a/ffmpeg.go +++ b/ffmpeg.go @@ -1,7 +1,7 @@ package ffmpeg /* -#cgo LDFLAGS: -lavformat -lavutil -lavcodec +#cgo LDFLAGS: -lavformat -lavutil -lavcodec -lavresample #include "ffmpeg.h" void ffinit() { av_register_all(); diff --git a/ffmpeg.h b/ffmpeg.h index d039caa..308fc52 100644 --- a/ffmpeg.h +++ b/ffmpeg.h @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include typedef struct {