Skip to content

Commit

Permalink
Separate batch decoding logic into the batch submodule (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmishra authored Oct 21, 2021
1 parent 0f2f1ad commit 8ed40dc
Show file tree
Hide file tree
Showing 23 changed files with 494 additions and 462 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ wasm-bindgen = { version = "0.2", features = ["serde-serialize"], optional = tru
js-sys = { version = "0.3", optional = true }

# for python frontend
numpy = { version = "0.13", optional = true }
pyo3 = { version = "0.13", optional = true }
numpy = { version = "0.14", optional = true }
pyo3 = { version = "0.14", optional = true }

# for binary targets
env_logger = { version = "0.8", optional = true }
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ build-binary: target/frontend-binary/release/$(BABYCAT_BINARY_NAME)
# ===================================================================

## docs-sphinx
.b/docs-sphinx: .b/init-javascript-tools .b/install-python-wheel target/frontend-wasm/release/bundler/babycat_bg.wasm babycat.h
.b/docs-sphinx: .b/init-javascript-tools .b/install-python-wheel target/frontend-wasm/release/bundler/babycat_bg.wasm babycat.h $(DOCS_FILES)
rm -rf docs/build
mkdir docs/build
$(DOXYGEN)
Expand All @@ -435,7 +435,7 @@ docs-sphinx: .b/docs-sphinx
# This is the command we use to build docs on Netlify.
# The Netlify build image has Python 3.8 installed,
# but does not come with the virtualenv extension.
.b/docs-sphinx-netlify: .b/init-javascript-tools target/frontend-wasm/release/bundler/babycat_bg.wasm babycat.h .b/build-python
.b/docs-sphinx-netlify: .b/init-javascript-tools target/frontend-wasm/release/bundler/babycat_bg.wasm babycat.h .b/build-python $(DOCS_FILES)
# Clean any previous builds.
rm -rf docs/build
mkdir docs/build
Expand Down
4 changes: 0 additions & 4 deletions docs/source/api/python/Waveform/from_many_files.rst

This file was deleted.

1 change: 0 additions & 1 deletion docs/source/api/python/Waveform/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ Decoding audio

.from_encoded_bytes() <from_encoded_bytes>
.from_file() <from_file>
.from_many_files() <from_many_files>


Resampling audio
Expand Down
15 changes: 15 additions & 0 deletions docs/source/api/python/batch/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
babycat.batch
=============

.. py:module:: babycat.batch
This submodule contains functions for decoding/demuxing multiple audio files in parallel.
Parallelism is achieved using multithreadinrg in Rust, which means that decoding will
not be slowed down by the `Python Global Interpreter Lock (GIL) <https://realpython.com/python-gil/>`_.

Decoding audio
--------------
.. toctree::
:maxdepth: 2

.waveforms_from_files() <waveforms_from_files>
4 changes: 4 additions & 0 deletions docs/source/api/python/batch/waveforms_from_files.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
babycat.batch.waveforms_from_files()
====================================

.. automethod:: babycat.batch.waveforms_from_files
6 changes: 4 additions & 2 deletions docs/source/api/python/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Babycat Python API Documentation
:maxdepth: 5
:hidden:

batch <batch/index>
Waveform <Waveform/index>
WaveformNamedResult <WaveformNamedResult/index>
exceptions <exceptions>
Expand All @@ -14,8 +15,9 @@ This page shows the public API of the Python ``babycat`` package.

Submodules
----------
- :doc:`exceptions`: A submodule for all Babycat Python exception classes.
- :doc:`resample_mode`: A submodule containing named constants for each Babycat resampling model.
- :doc:`batch/index`: Functions for batched multithreaded decoding of multiple audio files.
- :doc:`exceptions`: All Babycat Python exception classes.
- :doc:`resample_mode`: Named constants for each Babycat resampling model.

Classes
-------
Expand Down
122 changes: 122 additions & 0 deletions src/backend/batch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//! Functions that use multithreading to manipulate multiple audio files in parallel.
//!
//! This submodule is only available if the Cargo feature
//! `enable-multithreading` is enabled. Functions that read audio from
//! the filesystem also need the Cargo feature `enable-filesystem`
//! to be enabled. Both of these feature are disabled in Babycat's
//! WebAssembly frontend.
use rayon::prelude::*;
use serde::{Deserialize, Serialize};

use crate::backend::Waveform;
use crate::backend::WaveformArgs;
use crate::backend::WaveformNamedResult;

/// The default number of threads to use for multithreaded operations.
/// By default, we will initialize as many threads as *logical*
/// CPU cores on your machine.
pub const DEFAULT_NUM_WORKERS: u32 = 0;

/// Configures multithreading in Babycat.
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct BatchArgs {
/// The maximum number of threads to initialize when doing multithreaded work.
///
/// Babycat uses Rayon for multithreading, which
/// [by default](https://github.com/rayon-rs/rayon/blob/master/FAQ.md)
/// will initialize as many threads as *logical* CPU cores on your machine.
pub num_workers: usize,
}

impl Default for BatchArgs {
fn default() -> Self {
BatchArgs {
num_workers: DEFAULT_NUM_WORKERS as usize,
}
}
}

/// Decodes a list of audio files in parallel.
///
/// # Arguments
/// - `filenames`: A filename of an encoded audio file on the local filesystem.
/// - `waveform_args`: Instructions on how to demux/decode each audio file.
/// - `batch_args`: Instructions on how to divide the work across multiple threads.
///
/// # Feature flags
/// This function is only available if both of the `enable-filesystem`
/// and `enable-multithreading` features are enabled. These features
/// are enabled by default in Babycat's Rust, Python, and C frontends.
/// These features are disabled in Babycat's WebAssembly frontend.
///
/// # Examples
/// **(Attempt to) decode three files:**
///
/// In this example, we process three filenames and demonstrate how to handle errors.
/// The first two files are successfully processed, and we catch a
/// [`Error::FileNotFound`][crate::Error::FileNotFound] error when processing the third file.
/// ```
/// use babycat::{Error, WaveformArgs, WaveformNamedResult};
/// use babycat::batch::{BatchArgs, waveforms_from_files};
///
/// let filenames = &[
/// "audio-for-tests/andreas-theme/track.mp3",
/// "audio-for-tests/blippy-trance/track.mp3",
/// "does-not-exist",
/// ];
/// let decode_args = Default::default();
/// let batch_args = Default::default();
/// let batch = waveforms_from_files(
/// filenames,
/// decode_args,
/// batch_args
/// );
///
/// fn display_result(wnr: &WaveformNamedResult) -> String {
/// match &wnr.result {
/// Ok(waveform) => format!("\nSuccess: {}:\n{:?}", wnr.name, waveform),
/// Err(err) => format!("\nFailure: {}:\n{}", wnr.name, err),
/// }
/// }
/// assert_eq!(
/// display_result(&batch[0]),
/// "
/// Success: audio-for-tests/andreas-theme/track.mp3:
/// Waveform { frame_rate_hz: 44100, num_channels: 2, num_frames: 9586944}",
/// );
/// assert_eq!(
/// display_result(&batch[1]),
/// "
/// Success: audio-for-tests/blippy-trance/track.mp3:
/// Waveform { frame_rate_hz: 44100, num_channels: 2, num_frames: 5293440}",
/// );
/// assert_eq!(
/// display_result(&batch[2]),
/// "
/// Failure: does-not-exist:
/// Cannot find the given filename does-not-exist.",
/// );
/// ```
#[allow(dead_code)] // Silence dead code warning because we do not use this function in the C frontend.
pub fn waveforms_from_files(
filenames: &[&str],
waveform_args: WaveformArgs,
batch_args: BatchArgs,
) -> Vec<WaveformNamedResult> {
let thread_pool: rayon::ThreadPool = rayon::ThreadPoolBuilder::new()
.num_threads(batch_args.num_workers)
.build()
.unwrap();

let waveforms: Vec<WaveformNamedResult> = thread_pool.install(|| {
filenames
.par_iter()
.map(|filename| WaveformNamedResult {
name: (*filename).to_string(),
result: Waveform::from_file(filename, waveform_args),
})
.collect()
});
waveforms
}
26 changes: 0 additions & 26 deletions src/backend/batch_args.rs

This file was deleted.

10 changes: 6 additions & 4 deletions src/backend/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
mod batch_args;
#[cfg(all(feature = "enable-multithreading", feature = "enable-filesystem"))]
pub mod batch;
mod common;
pub mod decode;
mod errors;
mod named_result;
pub mod resample;
mod waveform;
mod waveform_args;
mod waveform_named_result;
mod waveform_result;

pub use batch_args::*;
pub use errors::*;
pub use named_result::*;
pub use waveform::*;
pub use waveform_args::*;
pub use waveform_named_result::WaveformNamedResult;
pub use waveform_result::WaveformResult;
25 changes: 0 additions & 25 deletions src/backend/named_result.rs

This file was deleted.

89 changes: 0 additions & 89 deletions src/backend/waveform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@ use std::marker::Send;

use serde::{Deserialize, Serialize};

#[cfg(feature = "enable-multithreading")]
use crate::backend::batch_args::BatchArgs;
#[cfg(feature = "enable-multithreading")]
use crate::backend::named_result::NamedResult;
#[cfg(feature = "enable-multithreading")]
use rayon::prelude::*;

use crate::backend::common::milliseconds_to_frames;
use crate::backend::decode::Decoder;
use crate::backend::decode::SymphoniaDecoder;
Expand Down Expand Up @@ -193,88 +186,6 @@ impl Waveform {
Self::from_encoded_stream_with_hint(file, waveform_args, file_extension, DEFAULT_MIME_TYPE)
}

/// Decodes a list of audio files in parallel.
///
/// # Arguments
/// - `filenames`: A filename of an encoded audio file on the local filesystem.
/// - `waveform_args`: Instructions on how to decode the audio.
/// - `batch_args`: Instructions on how to divide the work across multiple threads.
///
/// # Feature flags
/// This function is only available if both of the `enable-filesystem`
/// and `enable-multithreading` features are enabled. These features
/// are enabled by default in Babycat's Rust, Python, and C frontends.
/// These features are disabled in Babycat's WebAssembly frontend.
///
/// # Examples
/// **(Attempt to) decode three files:**
///
/// In this example, we process three filenames and demonstrate how to handle errors.
/// The first two files are successfully processed, and we catch a
/// [`Error::FileNotFound`][crate::Error::FileNotFound] error when processing the third file.
/// ```
/// use babycat::{Error, Waveform, NamedResult};
///
/// let filenames = &[
/// "audio-for-tests/andreas-theme/track.mp3",
/// "audio-for-tests/blippy-trance/track.mp3",
/// "does-not-exist",
/// ];
/// let waveform_args = Default::default();
/// let batch_args = Default::default();
/// let batch = Waveform::from_many_files(
/// filenames,
/// waveform_args,
/// batch_args
/// );
///
/// fn display_result(nr: &NamedResult<Waveform, Error>) -> String {
/// match &nr.result {
/// Ok(waveform) => format!("\nSuccess: {}:\n{:?}", nr.name, waveform),
/// Err(err) => format!("\nFailure: {}:\n{}", nr.name, err),
/// }
/// }
/// assert_eq!(
/// display_result(&batch[0]),
/// "
/// Success: audio-for-tests/andreas-theme/track.mp3:
/// Waveform { frame_rate_hz: 44100, num_channels: 2, num_frames: 9586944}",
/// );
/// assert_eq!(
/// display_result(&batch[1]),
/// "
/// Success: audio-for-tests/blippy-trance/track.mp3:
/// Waveform { frame_rate_hz: 44100, num_channels: 2, num_frames: 5293440}",
/// );
/// assert_eq!(
/// display_result(&batch[2]),
/// "
/// Failure: does-not-exist:
/// Cannot find the given filename does-not-exist.",
/// );
/// ```
#[cfg(all(feature = "enable-multithreading", feature = "enable-filesystem"))]
pub fn from_many_files(
filenames: &[&str],
waveform_args: WaveformArgs,
batch_args: BatchArgs,
) -> Vec<NamedResult<Self, Error>> {
let thread_pool: rayon::ThreadPool = rayon::ThreadPoolBuilder::new()
.num_threads(batch_args.num_workers)
.build()
.unwrap();

thread_pool.install(|| {
filenames
.par_iter()
.map(|filename| NamedResult {
name: (*filename).to_string(),
result: Self::from_file(filename, waveform_args),
})
.collect::<Vec<NamedResult<Self, Error>>>()
})
}

/// Decodes audio from an input stream.
///
/// [`Waveform`][crate::Waveform] will take ownership of the stream
Expand Down
8 changes: 8 additions & 0 deletions src/backend/waveform_named_result.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use crate::backend::WaveformResult;

#[repr(C)]
#[derive(Clone, Debug)]
pub struct WaveformNamedResult {
pub name: String,
pub result: WaveformResult,
}
4 changes: 4 additions & 0 deletions src/backend/waveform_result.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
use crate::backend::Error;
use crate::backend::Waveform;

pub type WaveformResult = Result<Waveform, Error>;
Loading

0 comments on commit 8ed40dc

Please sign in to comment.