-
Notifications
You must be signed in to change notification settings - Fork 1
/
demo.py
38 lines (28 loc) · 1.05 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
import onnxcrepe
from onnxcrepe.session import CrepeInferenceSession
# Load audio
audio, sr = onnxcrepe.load.audio(r'assets/xtgg_mono_16k_denoise.wav')
# Here we'll use a 5 millisecond hop length
precision = 5.0
# Provide a sensible frequency range for your domain (upper limit is 2006 Hz)
# This would be a reasonable range for speech
fmin = 50
fmax = 1100
# Select a model capacity--one of "full", "large", "medium", "small" and "tiny"
model = 'full'
# Choose execution providers to use for inference
providers = ['CUDAExecutionProvider', 'DmlExecutionProvider', 'CPUExecutionProvider']
# Pick a batch size that doesn't cause memory errors on your device
batch_size = 1024
# Create inference session
session = CrepeInferenceSession(
model='full',
providers=providers)
# Compute pitch using the default DirectML GPU or CPU
pitch = onnxcrepe.predict(session, audio, sr, precision=precision, fmin=fmin, fmax=fmax, batch_size=batch_size)
print(pitch.shape)
print(np.mean(pitch))
print(np.var(pitch))
# Dispose inference session
del session