From 55d359da2114ca0abd09ccdf08517a5cb08346c2 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sun, 14 Mar 2021 10:26:01 +0100 Subject: [PATCH] Updates to media and added audio --- Makefile | 3 + audio.go | 97 +++++++ cmd/audioid/app.go | 67 +++++ cmd/audioid/main.go | 11 + cmd/audioid/units.go | 5 + cmd/ffextract/app.go | 2 +- media.go | 83 ++++-- pkg/media/ffmpeg/audioprofile.go | 101 +++++++ pkg/media/ffmpeg/audioprofile_test.go | 19 ++ pkg/media/ffmpeg/codec.go | 2 +- pkg/media/ffmpeg/decodectx.go | 2 +- pkg/media/ffmpeg/frame.go | 30 +- pkg/media/ffmpeg/inputctx.go | 2 +- pkg/media/ffmpeg/manager.go | 7 + pkg/media/ffmpeg/metadata.go | 2 +- pkg/media/ffmpeg/stream.go | 2 +- pkg/sys/ffmpeg/avcodeccontext.go | 10 - pkg/sys/ffmpeg/avconsts_fmt.go | 384 +++++++++++++------------- pkg/sys/ffmpeg/avutil.go | 2 +- 19 files changed, 592 insertions(+), 239 deletions(-) create mode 100644 audio.go create mode 100644 cmd/audioid/app.go create mode 100644 cmd/audioid/main.go create mode 100644 cmd/audioid/units.go create mode 100644 pkg/media/ffmpeg/audioprofile.go create mode 100644 pkg/media/ffmpeg/audioprofile_test.go diff --git a/Makefile b/Makefile index 43f77091..75ecf60d 100644 --- a/Makefile +++ b/Makefile @@ -252,6 +252,9 @@ dxtest: dispmanx egl rpi ffextract: builddir ffmpeg PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(GO) build -o ${BUILDDIR}/ffextract -tags "$(TAGS)" ${GOFLAGS} ./cmd/ffextract +audioid: builddir ffmpeg chromaprint + PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(GO) build -o ${BUILDDIR}/audioid -tags "$(TAGS)" ${GOFLAGS} ./cmd/audioid + # Build rules - dependencies nfpm: $(GO) get github.com/goreleaser/nfpm/cmd/nfpm@v1.10.1 diff --git a/audio.go b/audio.go new file mode 100644 index 00000000..6fe58c66 --- /dev/null +++ b/audio.go @@ -0,0 +1,97 @@ +package gopi + +/* + This file contains definitions for audio data: + + * Audio representation + * Input and output audio devices + + Resampling of audio is represented in the "media" interfaces +*/ + +//////////////////////////////////////////////////////////////////////////////// +// TYPES + +// AudioFormat defines the audio format +type AudioFormat uint + +// AudioChannelLayout represents number of channels and layout of those channels +type AudioChannelLayout struct { + Channels uint +} + +//////////////////////////////////////////////////////////////////////////////// +// AUDIO INTERFACES + +type AudioManager interface { + // OpenDefaultSink opens default output device + OpenDefaultSink() (AudioContext, error) + + // Close audio stream + Close(AudioContext) error +} + +type AudioContext interface { + // Write data to audio output device + Write(MediaFrame) error +} + +//////////////////////////////////////////////////////////////////////////////// +// CONSTANTS + +const ( + AUDIO_FMT_NONE AudioFormat = iota + AUDIO_FMT_U8 // unsigned 8 bits + AUDIO_FMT_U8P // unsigned 8 bits, planar + AUDIO_FMT_S16 // signed 16 bits + AUDIO_FMT_S16P // signed 16 bits, planar + AUDIO_FMT_S32 // signed 32 bits + AUDIO_FMT_S32P // signed 32 bits, planar + AUDIO_FMT_F32 // float32 + AUDIO_FMT_F32P // float32, planar + AUDIO_FMT_F64 // float64 + AUDIO_FMT_F64P // float64, planar + AUDIO_FMT_S64 // signed 64 bits + AUDIO_FMT_S64P // signed 64 bits, planar +) + +var ( + AudioLayoutMono = AudioChannelLayout{1} + AudioLayoutStereo = AudioChannelLayout{2} +) + +//////////////////////////////////////////////////////////////////////////////// +// STRINGIFY + +func (f AudioFormat) String() string { + switch f { + case AUDIO_FMT_NONE: + return "AUDIO_FMT_NONE" + case AUDIO_FMT_U8: + return "AUDIO_FMT_U8" + case AUDIO_FMT_U8P: + return "AUDIO_FMT_U8P" + case AUDIO_FMT_S16: + return "AUDIO_FMT_S16" + case AUDIO_FMT_S16P: + return "AUDIO_FMT_S16P" + case AUDIO_FMT_S32: + return "AUDIO_FMT_S32" + case AUDIO_FMT_S32P: + return "AUDIO_FMT_S32P" + case AUDIO_FMT_F32: + return "AUDIO_FMT_F32" + case AUDIO_FMT_F32P: + return "AUDIO_FMT_F32P" + case AUDIO_FMT_F64: + return "AUDIO_FMT_F64" + case AUDIO_FMT_F64P: + return "AUDIO_FMT_F64P" + case AUDIO_FMT_S64: + return "AUDIO_FMT_S64" + case AUDIO_FMT_S64P: + return "AUDIO_FMT_S64P" + default: + return "[?? Invalid AudioFormat value]" + } +} diff --git a/cmd/audioid/app.go b/cmd/audioid/app.go new file mode 100644 index 00000000..ec4c0de3 --- /dev/null +++ b/cmd/audioid/app.go @@ -0,0 +1,67 @@ +package main + +import ( + "context" + "os" + + "github.com/djthorpe/gopi/v3" +) + +type app struct { + gopi.Unit + gopi.MediaManager + gopi.Logger + + filename string +} + +func (this *app) Define(cfg gopi.Config) error { + return nil +} + +func (this *app) New(cfg gopi.Config) error { + this.Require(this.Logger, this.MediaManager) + + if args := cfg.Args(); len(args) != 1 { + return gopi.ErrBadParameter.WithPrefix("Missing filename") + } else if stat, err := os.Stat(args[0]); err != nil { + return gopi.ErrBadParameter.WithPrefix(args[0]) + } else if stat.Mode().IsRegular() == false { + return gopi.ErrBadParameter.WithPrefix(args[0]) + } else { + this.filename = args[0] + } + + // Return success + return nil +} + +func (this *app) Run(ctx context.Context) error { + // Open the file, decode the audio + if file, err := this.OpenFile(this.filename); err != nil { + return err + } else if err := this.Decode(ctx, file); err != nil { + return err + } + + // Return success + return nil +} + +func (this *app) Decode(ctx context.Context, file gopi.MediaInput) error { + // Use the first video stream found + streams := file.StreamsForFlag(gopi.MEDIA_FLAG_AUDIO) + if len(streams) == 0 { + return gopi.ErrNotFound.WithPrefix("Audio stream") + } else { + this.Print(file.StreamForIndex(streams[0])) + } + + // Decode frames + return file.Read(ctx, streams[0:1], func(ctx gopi.MediaDecodeContext, packet gopi.MediaPacket) error { + return file.DecodeFrameIterator(ctx, packet, func(frame gopi.MediaFrame) error { + this.Print("Decoded", ctx.Frame(), " => ", frame) + return nil + }) + }) +} diff --git a/cmd/audioid/main.go b/cmd/audioid/main.go new file mode 100644 index 00000000..916e45ba --- /dev/null +++ b/cmd/audioid/main.go @@ -0,0 +1,11 @@ +package main + +import ( + "os" + + "github.com/djthorpe/gopi/v3/pkg/tool" +) + +func main() { + os.Exit(tool.CommandLine("audioid", os.Args[1:], new(app))) +} diff --git a/cmd/audioid/units.go b/cmd/audioid/units.go new file mode 100644 index 00000000..d0025dec --- /dev/null +++ b/cmd/audioid/units.go @@ -0,0 +1,5 @@ +package main + +import ( + _ "github.com/djthorpe/gopi/v3/pkg/media/ffmpeg" +) diff --git a/cmd/ffextract/app.go b/cmd/ffextract/app.go index fee8e1d9..221e3307 100644 --- a/cmd/ffextract/app.go +++ b/cmd/ffextract/app.go @@ -104,6 +104,6 @@ func (this *app) DecodeFrame(path string, frame gopi.MediaFrame) error { return err } - this.Printf("Saved frame:", frame, "=>", path) + this.Print("Saved frame:", frame, " => ", path) return nil } diff --git a/media.go b/media.go index 847aa7ed..b0610759 100644 --- a/media.go +++ b/media.go @@ -11,9 +11,11 @@ import ( /* This file contains definitions for media devices: - * Video and Audio decoding + * Video and Audio encoding and decoding * Input and output media devices - * DVB tuning and decoding + * DVB tuning and decoding (experimental) + + There are aditional interfaces for audio and graphics elsewhere */ //////////////////////////////////////////////////////////////////////////////// @@ -27,7 +29,7 @@ type ( ) //////////////////////////////////////////////////////////////////////////////// -// MEDIA FILE INTERFACES +// MEDIA MANAGER // MediaManager for media file management type MediaManager interface { @@ -47,8 +49,14 @@ type MediaManager interface { // audio, video, encode and decode. By default (empty name and // MediaFlag) lists all codecs ListCodecs(string, MediaFlag) []MediaCodec + + // Create an audio profile with format, sample rate, channels and layout + AudioProfile(AudioFormat, uint, AudioChannelLayout) MediaProfile } +//////////////////////////////////////////////////////////////////////////////// +// MEDIA OBJECTS + // Media is an input or output type Media interface { URL() *url.URL // Return URL for the media location @@ -58,6 +66,7 @@ type Media interface { StreamForIndex(int) MediaStream // Return stream by index } +// MediaInput represents a source of media type MediaInput interface { Media @@ -73,6 +82,7 @@ type MediaInput interface { DecodeFrameIterator(MediaDecodeContext, MediaPacket, DecodeFrameIteratorFunc) error } +// MediaOutput represents a sink for media type MediaOutput interface { Media @@ -80,19 +90,34 @@ type MediaOutput interface { Write(MediaDecodeContext, MediaPacket) error } +//////////////////////////////////////////////////////////////////////////////// +// MEDIA PROFILES + +type MediaProfile interface { + Flags() MediaFlag // Return audio or video profile +} + +type MediaAudioProfile interface { + MediaProfile + + Format() AudioFormat + SampleRate() uint + Layout() AudioChannelLayout +} + +type MediaVideoProfile interface { + MediaProfile +} + +//////////////////////////////////////////////////////////////////////////////// +// MEDIA METADATA AND CODECS + // MediaMetadata are key value pairs for a media object type MediaMetadata interface { Keys() []MediaKey // Return all existing keys Value(MediaKey) interface{} // Return value for key, or nil } -// MediaStream is a stream of packets from a media object -type MediaStream interface { - Index() int // Stream index - Flags() MediaFlag // Flags for the stream (Audio, Video, etc) - Codec() MediaCodec // Return codec and parameters -} - // MediaCodec is the codec and parameters type MediaCodec interface { // Name returns the unique name for the codec @@ -105,6 +130,16 @@ type MediaCodec interface { Flags() MediaFlag } +//////////////////////////////////////////////////////////////////////////////// +// MEDIA STREAMS, PACKETS AND FRAMES + +// MediaStream is a stream of packets from a media object +type MediaStream interface { + Index() int // Stream index + Flags() MediaFlag // Flags for the stream (Audio, Video, etc) + Codec() MediaCodec // Return codec and parameters +} + // MediaPacket is a packet of data from a stream type MediaPacket interface { Size() int @@ -114,9 +149,19 @@ type MediaPacket interface { // MediaFrame is a decoded audio or video frame type MediaFrame interface { + // Implements image interface which can be saved to save frame as bitmap image.Image + + // Resample a frame to a specific profile + Resample(MediaProfile) (MediaFrame, error) + + // Flags for the frame (Audio, Video) + Flags() MediaFlag } +//////////////////////////////////////////////////////////////////////////////// +// MEDIA ENCODING AND DECODING + // MediaDecodeContext provides packet data and streams for decoding // frames of data type MediaDecodeContext interface { @@ -125,23 +170,7 @@ type MediaDecodeContext interface { } //////////////////////////////////////////////////////////////////////////////// -// AUDIO INTERFACES - -type AudioManager interface { - // OpenDefaultSink opens default output device - OpenDefaultSink() (AudioContext, error) - - // Close audio stream - Close(AudioContext) error -} - -type AudioContext interface { - // Write data to audio output device - Write(MediaFrame) error -} - -//////////////////////////////////////////////////////////////////////////////// -// DVB INTERFACES +// DVB INTERFACES - EXPERIMENTAL // DVBManager encapsulates methods for DVB reception type DVBManager interface { diff --git a/pkg/media/ffmpeg/audioprofile.go b/pkg/media/ffmpeg/audioprofile.go new file mode 100644 index 00000000..9e55c58b --- /dev/null +++ b/pkg/media/ffmpeg/audioprofile.go @@ -0,0 +1,101 @@ +// +build ffmpeg + +package ffmpeg + +import ( + "fmt" + + gopi "github.com/djthorpe/gopi/v3" + ffmpeg "github.com/djthorpe/gopi/v3/pkg/sys/ffmpeg" +) + +//////////////////////////////////////////////////////////////////////////////// +// TYPES + +type AudioProfile struct { + fmt ffmpeg.AVSampleFormat + rate uint + channels uint +} + +//////////////////////////////////////////////////////////////////////////////// +// LIFECYCLE + +func NewAudioProfile(fmt gopi.AudioFormat, rate uint, layout gopi.AudioChannelLayout) *AudioProfile { + this := new(AudioProfile) + if fmt := toSampleFormat(fmt); fmt == ffmpeg.AV_SAMPLE_FMT_NONE { + return nil + } else if rate == 0 { + return nil + } else if layout.Channels == 0 { + return nil + } else { + this.fmt = fmt + this.rate = rate + this.channels = layout.Channels + } + + // Return success + return nil +} + +//////////////////////////////////////////////////////////////////////////////// +// PUBLIC METHODS + +func (this *AudioProfile) Flags() gopi.MediaFlag { + return gopi.MEDIA_FLAG_AUDIO +} + +//////////////////////////////////////////////////////////////////////////////// +// STRINGIFY + +func (this *AudioProfile) String() string { + str := "" +} + +//////////////////////////////////////////////////////////////////////////////// +// PRIVATE METHODS + +func toSampleFormat(fmt gopi.AudioFormat) ffmpeg.AVSampleFormat { + switch fmt { + case gopi.AUDIO_FMT_U8: + return ffmpeg.AV_SAMPLE_FMT_U8 + case gopi.AUDIO_FMT_U8P: + return ffmpeg.AV_SAMPLE_FMT_U8P + case gopi.AUDIO_FMT_S16: + return ffmpeg.AV_SAMPLE_FMT_S16 + case gopi.AUDIO_FMT_S16P: + return ffmpeg.AV_SAMPLE_FMT_S16P + case gopi.AUDIO_FMT_S32: + return ffmpeg.AV_SAMPLE_FMT_S32 + case gopi.AUDIO_FMT_S32P: + return ffmpeg.AV_SAMPLE_FMT_S32P + case gopi.AUDIO_FMT_F32: + return ffmpeg.AV_SAMPLE_FMT_FLT + case gopi.AUDIO_FMT_F32P: + return ffmpeg.AV_SAMPLE_FMT_FLTP + case gopi.AUDIO_FMT_F64: + return ffmpeg.AV_SAMPLE_FMT_DBL + case gopi.AUDIO_FMT_F64P: + return ffmpeg.AV_SAMPLE_FMT_DBLP + case gopi.AUDIO_FMT_S64: + return ffmpeg.AV_SAMPLE_FMT_S64 + case gopi.AUDIO_FMT_S64P: + return ffmpeg.AV_SAMPLE_FMT_S64P + default: + return ffmpeg.AV_SAMPLE_FMT_NONE + } +} diff --git a/pkg/media/ffmpeg/audioprofile_test.go b/pkg/media/ffmpeg/audioprofile_test.go new file mode 100644 index 00000000..718126b2 --- /dev/null +++ b/pkg/media/ffmpeg/audioprofile_test.go @@ -0,0 +1,19 @@ +// +build ffmpeg + +package ffmpeg_test + +import ( + "testing" + + gopi "github.com/djthorpe/gopi/v3" + ffmpeg "github.com/djthorpe/gopi/v3/pkg/media/ffmpeg" +) + +func Test_AudioProfile_001(t *testing.T) { + profile := ffmpeg.NewAudioProfile(gopi.AUDIO_FMT_S16, 41000, gopi.AudioLayoutMono) + if profile == nil { + t.Error("Unexpected nil returned") + } else { + t.Log(profile) + } +} diff --git a/pkg/media/ffmpeg/codec.go b/pkg/media/ffmpeg/codec.go index 5118d016..39b2f76c 100644 --- a/pkg/media/ffmpeg/codec.go +++ b/pkg/media/ffmpeg/codec.go @@ -104,7 +104,7 @@ func (this *codec) Flags() gopi.MediaFlag { // STRINGIFY func (this *codec) String() string { - str := "= 0 { str += " frame_number=" + fmt.Sprint(frame_number) } diff --git a/pkg/media/ffmpeg/frame.go b/pkg/media/ffmpeg/frame.go index d353a21c..346da295 100644 --- a/pkg/media/ffmpeg/frame.go +++ b/pkg/media/ffmpeg/frame.go @@ -7,6 +7,7 @@ import ( "image" "image/color" + gopi "github.com/djthorpe/gopi/v3" ffmpeg "github.com/djthorpe/gopi/v3/pkg/sys/ffmpeg" ) @@ -19,7 +20,7 @@ type frame struct { } //////////////////////////////////////////////////////////////////////////////// -// INIT +// LIFECYCLE func NewFrame() *frame { if ctx := ffmpeg.NewAVFrame(); ctx == nil { @@ -60,7 +61,22 @@ func (this *frame) Free() { } //////////////////////////////////////////////////////////////////////////////// -// IMAGE IMPLEMENTATION +// PUBLIC METHODS + +func (this *frame) Flags() gopi.MediaFlag { + if this.ctx == nil { + return gopi.MEDIA_FLAG_NONE + } else if fmt := this.ctx.SampleFormat(); fmt != ffmpeg.AV_SAMPLE_FMT_NONE { + return gopi.MEDIA_FLAG_AUDIO + } else if fmt := this.ctx.PixelFormat(); fmt != ffmpeg.AV_PIX_FMT_NONE { + return gopi.MEDIA_FLAG_VIDEO + } else { + return gopi.MEDIA_FLAG_NONE + } +} + +//////////////////////////////////////////////////////////////////////////////// +// PUBLIC METHODS: IMAGE func (this *frame) ColorModel() color.Model { return color.YCbCrModel @@ -98,11 +114,19 @@ func (this *frame) At(x, y int) color.Color { return color.YCbCr{Y, Cb, Cr} } +func (this *frame) Resample(gopi.MediaProfile) (gopi.MediaFrame, error) { + // TODO + return nil, gopi.ErrNotImplemented +} + //////////////////////////////////////////////////////////////////////////////// // STRINGIFY func (this *frame) String() string { - str := " 0 { str += " keys=" for i, key := range keys { diff --git a/pkg/media/ffmpeg/stream.go b/pkg/media/ffmpeg/stream.go index f72c37d1..28f721d2 100644 --- a/pkg/media/ffmpeg/stream.go +++ b/pkg/media/ffmpeg/stream.go @@ -111,7 +111,7 @@ func (this *stream) NewContextWithOptions(options *ffmpeg.AVDictionary) *ffmpeg. // STRINGIFY func (this *stream) String() string { - str := "