Skip to content

Commit

Permalink
Add scale range
Browse files Browse the repository at this point in the history
  • Loading branch information
yvann-ba committed Oct 7, 2024
1 parent 3a8c27d commit dd4b57b
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 89 deletions.
93 changes: 32 additions & 61 deletions nodes/audio/Audio_Analysis_Yvann.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class AudioNodeBase(Yvann):


class Audio_Analysis_Yvann(AudioNodeBase):
analysis_modes = ["audio", "drums only", "vocals only"]
analysis_modes = ["Drums Only", "Full Audio", "Vocals Only", "Bass Only", "Other Audio"]

@classmethod
def INPUT_TYPES(cls):
Expand All @@ -23,10 +23,11 @@ def INPUT_TYPES(cls):
"fps": ("FLOAT", {"forceInput": True}),
"audio": ("AUDIO", {"forceInput": True}),
"analysis_mode": (cls.analysis_modes,),
"threshold": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 0.6, "step": 0.01}),
"add": ("FLOAT", {"default": 0.0, "min": -0.5, "max": 0.5, "step": 0.01}),
"smooth": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 1.0, "step": 0.01}),
"threshold": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1, "step": 0.01}),
"add": ("FLOAT", {"default": 0.0, "min": -1, "max": 1, "step": 0.01}),
"smooth": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.01}),
"multiply_by": ("FLOAT", {"default": 1.0, "min": 0, "max": 10.0, "step": 0.1}),
"scale_range": ("FLOAT", {"default": 0, "min": 0, "max":3, "step": 0.1}),
"invert_weights": ("BOOLEAN", {"default": False}),
}
}
Expand All @@ -36,7 +37,6 @@ def INPUT_TYPES(cls):
FUNCTION = "process_audio"

def download_and_load_model(self):
# Download and load the OpenUnmix model for audio separation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
download_path = os.path.join(folder_paths.models_dir, "openunmix")
os.makedirs(download_path, exist_ok=True)
Expand Down Expand Up @@ -76,18 +76,15 @@ def _rms_energy(self, waveform, num_frames, samples_per_frame):
print(f"Error in RMS energy calculation: {e}")
return np.zeros(num_frames)

def _apply_audio_processing(self, weights, threshold, add, smooth, multiply_by):
def _apply_audio_processing(self, weights, threshold, add, smooth, scale_range, multiply_by):
# Normalize weights to 0-1 range
weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights)) if np.max(weights) - np.min(weights) > 0 else weights
weights = np.round(weights, 4)

# Apply threshold
weights = np.where(weights > threshold, weights, 0)
weights = np.round(weights, 4)

# Apply addition and clip to 0-1.25 range
weights = np.clip(weights + add, 0, 1.25)
weights = np.round(weights, 4)
#Apply Add
weights = np.clip(weights + add, 0, (1))

# Apply smoothing
smoothed = np.zeros_like(weights)
Expand All @@ -96,54 +93,24 @@ def _apply_audio_processing(self, weights, threshold, add, smooth, multiply_by):
smoothed[i] = weights[i]
else:
smoothed[i] = smoothed[i-1] * smooth + weights[i] * (1 - smooth)
smoothed = np.round(smoothed, 4)

# Apply final multiplication
smoothed = smoothed * multiply_by
smoothed = np.round(smoothed, 4)

return smoothed

def _apply_threshold(self, weights, threshold):
# Apply threshold to weights with normalization
weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights)) if np.max(weights) - np.min(weights) > 0 else weights
weights = np.round(weights, 4)

thresholded = np.where(weights > threshold,
(weights - threshold) / (1 - threshold),
0)
return np.round(thresholded, 4)

def generate_masks(self, input_values, width, height):
# Generate compressed masks from input values
if isinstance(input_values, (float, int)):
input_values = [round(input_values, 4)]
elif isinstance(input_values, list) and all(isinstance(item, list) for item in input_values):
input_values = [round(item, 4) for sublist in input_values for item in sublist]
else:
input_values = [round(item, 4) for item in input_values]

# Compress the mask resolution
compressed_width = max(32, width // 8) # Minimum width of 32 pixels
compressed_height = max(32, height // 8) # Minimum height of 32 pixels

masks = []
for value in input_values:
# Create a small mask
small_mask = torch.ones((compressed_height, compressed_width), dtype=torch.float32) * value
# Resize the mask to original dimensions
mask = torch.nn.functional.interpolate(small_mask.unsqueeze(0).unsqueeze(0),
size=(height, width),
mode='nearest').squeeze(0).squeeze(0)
masks.append(mask)
masks_out = torch.stack(masks, dim=0)

return masks_out

def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, smooth, multiply_by, invert_weights):
# Main function to process audio and generate weights and masks
return thresholded

def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, smooth, multiply_by, scale_range, invert_weights):
# Main function to process audio and generate weights

# Input validation
if audio is None or 'waveform' not in audio or 'sample_rate' not in audio:
print("Invalid audio input")
return None, None, None, None
Expand All @@ -166,20 +133,23 @@ def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, s
pad_length = total_samples_needed - waveform.shape[-1]
waveform = torch.nn.functional.pad(waveform, (0, pad_length))

# Calculate samples per frame
samples_per_frame = total_samples_needed // num_frames

# Apply audio separation if needed
if analysis_mode in ["drums only", "vocals only"]:
if analysis_mode in ["Drums Only", "Vocals Only", "Bass Only", "Other Audio"]:
try:
model = self.download_and_load_model()
device = next(model.parameters()).device
waveform = waveform.to(device)
estimates = model(waveform)
if analysis_mode == "drums only":
if analysis_mode == "Drums Only":
processed_waveform = estimates[:, 1, :, :]
else: # vocals only
elif analysis_mode == "Vocals Only":
processed_waveform = estimates[:, 0, :, :]
elif analysis_mode == "Bass Only":
processed_waveform = estimates[:, 2, :, :]
elif analysis_mode == "Other Audio":
processed_waveform = estimates[:, 3, :, :]
except Exception as e:
print(f"Error in model processing: {e}")
return None, None, None, None
Expand All @@ -202,28 +172,28 @@ def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, s
print("Invalid audio weights calculated")
return None, None, None, None

# Apply audio processing
audio_weights = self._apply_audio_processing(
audio_weights, threshold, add, smooth, multiply_by)

# Ensure audio_weights are within [0, 1] range
audio_weights, threshold, add, smooth, scale_range, multiply_by)

audio_weights = np.clip(audio_weights, 0, 1)

scale_audio_weights = audio_weights + scale_range

if (invert_weights == True):
# Calculate inverted weights
audio_weights_inverted = 1.0 - np.array(audio_weights)
# Ensure inverted weights are also within [0, 1] range
audio_weights_inverted = np.clip(audio_weights_inverted, 0, 1)
scale_audio_weights_inverted = audio_weights_inverted + scale_range



# Generate visualization
try:
plt.figure(figsize=(10, 6), facecolor='white')
if (invert_weights == False):
plt.plot(list(range(1, len(audio_weights) + 1)), audio_weights,
plt.plot(list(range(1, len(audio_weights) + 1)), scale_audio_weights,
label=f'{analysis_mode.capitalize()} Weights', color='blue')

if (invert_weights == True):
plt.plot(list(range(1, len(audio_weights_inverted) + 1)), audio_weights_inverted,
plt.plot(list(range(1, len(scale_audio_weights_inverted) + 1)), scale_audio_weights_inverted,
label='Inverted Weights', color='red', linestyle='--')

plt.xlabel('Frame Number')
Expand All @@ -247,10 +217,11 @@ def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, s
weights_graph = None

if (invert_weights == True):
audio_weights = audio_weights_inverted
scale_audio_weights = scale_audio_weights_inverted

if processed_audio is None or audio_weights is None or weights_graph is None:
print("One or more outputs are invalid")
return None, None, None, None

return (audio_weights.tolist(), processed_audio, original_audio, weights_graph)
scale_audio_weights = scale_audio_weights.tolist()
rounded_scale_audio_weights = [ round(elem, 3) for elem in scale_audio_weights]
return (rounded_scale_audio_weights, processed_audio, original_audio, weights_graph)
27 changes: 0 additions & 27 deletions nodes/utils/Math_Float_List.py

This file was deleted.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "comfyui_yvann-nodes"
description = "Audio reactivity nodes for AI animations 🔊 Analyze audio, extract drums and vocals. Generate reactive masks and weights. Create audio-driven visuals. Produce weight graphs and audio masks. Compatible with IPAdapter, ControlNets and more. Ideal for music videos and reactive animations. Features audio scheduling and waveform analysis"
description = "Audio reactivity nodes for AI animations 🔊 Analyze audio, extract drums and vocals. Generate reactive masks and weights. Create audio-driven visuals. Produce weight graphs and audio masks. Compatible with IPAdapter, ControlNets, AnimateDiff and more. Ideal for music videos and reactive animations. Features audio scheduling and waveform analysis"
version = "1.0.9"
license = {file = "LICENSE"}
dependencies = ["openunmix", "numpy", "torch", "matplotlib", "pillow"]
Expand Down

0 comments on commit dd4b57b

Please sign in to comment.