diff --git a/README.md b/README.md index 7c20a9d..cfa8b49 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ end Read from a file: ```elixir -r = Xav.new_reader!("./some_mp4_file.mp4") -{:ok, %Xav.Frame{} = frame} = Xav.next_frame(r) +r = Xav.Reader.new!("./some_mp4_file.mp4") +{:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r) tensor = Xav.Frame.to_nx(frame) Kino.Image.new(tensor) ``` @@ -33,8 +33,8 @@ Kino.Image.new(tensor) Read from a camera: ```elixir -r = Xav.new_reader!("/dev/video0", device?: true) -{:ok, %Xav.Frame{} = frame} = Xav.next_frame(r) +r = Xav.Reader.new!("/dev/video0", device?: true) +{:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r) tensor = Xav.Frame.to_nx(frame) Kino.Image.new(tensor) ``` @@ -42,7 +42,7 @@ Kino.Image.new(tensor) Speech to text: ```elixir -r = Xav.new_reader!("../sample.mp3", read: :audio) +r = Xav.Reader.new!("../sample.mp3", read: :audio) {:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) @@ -57,8 +57,8 @@ serving = # read a couple of frames frames = for _i <- 0..200 do - {:ok, frame} = Xav.next_frame(r) - Xav.to_nx(frame) + {:ok, frame} = Xav.Reader.next_frame(r) + Xav.Frame.to_nx(frame) end batch = Nx.Batch.concatenate(frames) diff --git a/lib/decoder.ex b/lib/decoder.ex index fed3ba8..1bd4358 100644 --- a/lib/decoder.ex +++ b/lib/decoder.ex @@ -3,15 +3,27 @@ defmodule Xav.Decoder do Audio/video decoder. """ + @typedoc """ + Supported codecs. + """ @type codec() :: :opus | :vp8 @type t() :: reference() + @doc """ + Creates a new decoder. + """ @spec new(codec()) :: t() def new(codec) do Xav.NIF.new_decoder(codec) end + @doc """ + Decodes an audio or video frame. + + Video frames are always in the RGB format. + Audio samples are always interleaved. + """ @spec decode(t(), binary(), integer(), integer()) :: {:ok, Xav.Frame.t()} | {:error, atom()} def decode(decoder, data, pts, dts) do case Xav.NIF.decode(decoder, data, pts, dts) do diff --git a/lib/reader.ex b/lib/reader.ex index 92d0aa2..a41270b 100644 --- a/lib/reader.ex +++ b/lib/reader.ex @@ -1,14 +1,14 @@ defmodule Xav.Reader do @moduledoc """ - Media reader. + Audio/video files reader. """ @typedoc """ Reader options. - * `read` - determines which stream to read from a file with both audio and video. + * `read` - determines which stream to read from a file. Defaults to `:video`. - * `device?` - determines whether path points to video camera. Defaults to `false`. + * `device?` - determines whether path points to the camera. Defaults to `false`. """ @type opts :: [read: :audio | :video, device?: boolean] @@ -28,8 +28,8 @@ defmodule Xav.Reader do @doc """ The same as new/1 but raises on error. """ - @spec new!(String.t(), opts) :: t() - def new!(path, opts) do + @spec new!(String.t(), opts()) :: t() + def new!(path, opts \\ []) do case new(path, opts) do {:ok, reader} -> reader {:error, reason} -> raise "Couldn't create a new reader. Reason: #{inspect(reason)}" @@ -37,16 +37,16 @@ defmodule Xav.Reader do end @doc """ - Creates a new media reader. + Creates a new audio/video reader. - Both reading from a file and video camera is supported. - In case of video camera, v4l2 driver is required and FPS are + Both reading from a file and from a video camera are supported. + In case of using a video camera, the v4l2 driver is required, and FPS are locked to 10. Microphone input is not supported. """ @spec new(String.t(), opts()) :: {:ok, t()} | {:error, term()} - def new(path, opts) do + def new(path, opts \\ []) do read = opts[:read] || :video device? = opts[:device?] || false @@ -80,9 +80,11 @@ defmodule Xav.Reader do end @doc """ - Reads next frame. + Reads the next frame. - Frame is always decoded. Video frames are always in RGB format. + A frame is always decoded. + Video frames are always in the RGB format. + Audio samples are always interleaved. """ @spec next_frame(t()) :: {:ok, Xav.Frame.t()} | {:error, :eof} def next_frame(%__MODULE__{reader: reader}) do diff --git a/lib/xav.ex b/lib/xav.ex deleted file mode 100644 index 80ab1e7..0000000 --- a/lib/xav.ex +++ /dev/null @@ -1,20 +0,0 @@ -defmodule Xav do - @moduledoc """ - Main module for interacting with `Xav`. - - `Xav` is an Elixir wrapper over FFmpeg intended for - reading file and network based media streams. - - It doesn't map FFmpeg functions one to one but rather - wraps them in bigger building blocks creating a litte - higher abstraction level. - """ - - defdelegate new_reader!(path, opts \\ []), to: Xav.Reader, as: :new! - - defdelegate new_reader(path, opts \\ []), to: Xav.Reader, as: :new - - defdelegate next_frame(reader), to: Xav.Reader - - defdelegate to_nx(frame), to: Xav.Frame -end diff --git a/test/reader_test.exs b/test/reader_test.exs new file mode 100644 index 0000000..047441b --- /dev/null +++ b/test/reader_test.exs @@ -0,0 +1,53 @@ +defmodule Xav.ReaderTest do + use ExUnit.Case, async: true + + test "new/1" do + assert {:ok, %Xav.Reader{}} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + assert {:error, _reason} = Xav.Reader.new("non_existing_input") + end + + test "new!/1" do + %Xav.Reader{} = Xav.Reader.new!("./test/fixtures/sample_h264.mp4") + assert_raise RuntimeError, fn -> Xav.Reader.new!("non_existing_input") end + end + + test "next_frame/1" do + {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + # test reading 5 seconds + for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) + end + + # @tag :debug + # test "next_frame/1 audio" do + # {:ok, r} = Xav.Reader.new("./test/fixtures/sample.mp3", read: :audio) + + # for _i <- 0..5 do + # assert({:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r)) + # IO.inspect({byte_size(frame.data), frame}) + # end + # end + + test "to_nx/1" do + {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") + {:ok, frame} = Xav.Reader.next_frame(r) + %Nx.Tensor{} = Xav.Frame.to_nx(frame) + end + + test "eof" do + {:ok, r} = Xav.Reader.new("./test/fixtures/one_frame.mp4") + {:ok, _frame} = Xav.Reader.next_frame(r) + {:error, :eof} = Xav.Reader.next_frame(r) + end + + @formats [{"h264", "h264"}, {"h264", "mkv"}, {"vp8", "webm"}, {"vp9", "webm"}, {"av1", "mkv"}] + Enum.map(@formats, fn {codec, container} -> + name = "#{codec} #{container}" + file = "./test/fixtures/sample_#{codec}.#{container}" + + test name do + {:ok, r} = Xav.Reader.new(unquote(file)) + # test reading 5 seconds + for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) + end + end) +end diff --git a/test/xav_test.exs b/test/xav_test.exs deleted file mode 100644 index cac020b..0000000 --- a/test/xav_test.exs +++ /dev/null @@ -1,53 +0,0 @@ -defmodule XavTest do - use ExUnit.Case - doctest Xav - - test "new_reader/1" do - assert {:ok, %Xav.Reader{}} = Xav.new_reader("./test/fixtures/sample_h264.mp4") - assert {:error, _reason} = Xav.new_reader("non_existing_input") - end - - test "new_reader!/1" do - %Xav.Reader{} = Xav.new_reader!("./test/fixtures/sample_h264.mp4") - assert_raise RuntimeError, fn -> Xav.new_reader!("non_existing_input") end - end - - test "next_frame/1" do - {:ok, r} = Xav.new_reader("./test/fixtures/sample_h264.mp4") - # test reading 5 seconds - for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.next_frame(r)) - end - - # @tag :debug - # test "next_frame/1 audio" do - # {:ok, r} = Xav.new_reader("./test/fixtures/sample.mp3", false) - # for _i <- 0.. 5 do - # assert({:ok, %Xav.Frame{}=frame} = Xav.next_frame(r)) - # IO.inspect({byte_size(frame.data), frame}) - # end - # end - - test "to_nx/1" do - {:ok, r} = Xav.new_reader("./test/fixtures/sample_h264.mp4") - {:ok, frame} = Xav.next_frame(r) - %Nx.Tensor{} = Xav.Frame.to_nx(frame) - end - - test "eof" do - {:ok, r} = Xav.new_reader("./test/fixtures/one_frame.mp4") - {:ok, _frame} = Xav.next_frame(r) - {:error, :eof} = Xav.next_frame(r) - end - - @formats [{"h264", "h264"}, {"h264", "mkv"}, {"vp8", "webm"}, {"vp9", "webm"}, {"av1", "mkv"}] - Enum.map(@formats, fn {codec, container} -> - name = "#{codec} #{container}" - file = "./test/fixtures/sample_#{codec}.#{container}" - - test name do - {:ok, r} = Xav.new_reader(unquote(file)) - # test reading 5 seconds - for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.next_frame(r)) - end - end) -end