Add scale range

yvann-ba · Oct 7, 2024 · dd4b57b · dd4b57b
1 parent 3a8c27d
commit dd4b57b
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 89 deletions.
diff --git a/nodes/audio/Audio_Analysis_Yvann.py b/nodes/audio/Audio_Analysis_Yvann.py
@@ -13,7 +13,7 @@ class AudioNodeBase(Yvann):
 
 
 class Audio_Analysis_Yvann(AudioNodeBase):
-    analysis_modes = ["audio", "drums only", "vocals only"]
+    analysis_modes = ["Drums Only", "Full Audio", "Vocals Only", "Bass Only", "Other Audio"]
 
     @classmethod
     def INPUT_TYPES(cls):
@@ -23,10 +23,11 @@ def INPUT_TYPES(cls):
                 "fps": ("FLOAT", {"forceInput": True}),
                 "audio": ("AUDIO", {"forceInput": True}),
                 "analysis_mode": (cls.analysis_modes,),
-                "threshold": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 0.6, "step": 0.01}),
-                "add": ("FLOAT", {"default": 0.0, "min": -0.5, "max": 0.5, "step": 0.01}),
-                "smooth": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 1.0, "step": 0.01}),
+                "threshold": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1, "step": 0.01}),
+                "add": ("FLOAT", {"default": 0.0, "min": -1, "max": 1, "step": 0.01}),
+                "smooth": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.01}),
                 "multiply_by": ("FLOAT", {"default": 1.0, "min": 0, "max": 10.0, "step": 0.1}),
+                "scale_range": ("FLOAT", {"default": 0, "min": 0, "max":3, "step": 0.1}),
                 "invert_weights": ("BOOLEAN", {"default": False}),
             }
         }
@@ -36,7 +37,6 @@ def INPUT_TYPES(cls):
     FUNCTION = "process_audio"
 
     def download_and_load_model(self):
-        # Download and load the OpenUnmix model for audio separation
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         download_path = os.path.join(folder_paths.models_dir, "openunmix")
         os.makedirs(download_path, exist_ok=True)
@@ -76,18 +76,15 @@ def _rms_energy(self, waveform, num_frames, samples_per_frame):
             print(f"Error in RMS energy calculation: {e}")
             return np.zeros(num_frames)
 
-    def _apply_audio_processing(self, weights, threshold, add, smooth, multiply_by):
+    def _apply_audio_processing(self, weights, threshold, add, smooth, scale_range, multiply_by):
         # Normalize weights to 0-1 range
         weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights)) if np.max(weights) - np.min(weights) > 0 else weights
-        weights = np.round(weights, 4)
 
         # Apply threshold
         weights = np.where(weights > threshold, weights, 0)
-        weights = np.round(weights, 4)
 
-        # Apply addition and clip to 0-1.25 range
-        weights = np.clip(weights + add, 0, 1.25)
-        weights = np.round(weights, 4)
+        #Apply Add
+        weights = np.clip(weights + add, 0, (1))
 
         # Apply smoothing
         smoothed = np.zeros_like(weights)
@@ -96,54 +93,24 @@ def _apply_audio_processing(self, weights, threshold, add, smooth, multiply_by):
                 smoothed[i] = weights[i]
             else:
                 smoothed[i] = smoothed[i-1] * smooth + weights[i] * (1 - smooth)
-        smoothed = np.round(smoothed, 4)
 
         # Apply final multiplication
         smoothed = smoothed * multiply_by
-        smoothed = np.round(smoothed, 4)
 
         return smoothed
 
     def _apply_threshold(self, weights, threshold):
         # Apply threshold to weights with normalization
         weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights)) if np.max(weights) - np.min(weights) > 0 else weights
-        weights = np.round(weights, 4)
 
         thresholded = np.where(weights > threshold,
                         (weights - threshold) / (1 - threshold),
                         0)
-        return np.round(thresholded, 4)
-
-    def generate_masks(self, input_values, width, height):
-        # Generate compressed masks from input values
-        if isinstance(input_values, (float, int)):
-            input_values = [round(input_values, 4)]
-        elif isinstance(input_values, list) and all(isinstance(item, list) for item in input_values):
-            input_values = [round(item, 4) for sublist in input_values for item in sublist]
-        else:
-            input_values = [round(item, 4) for item in input_values]
-
-        # Compress the mask resolution
-        compressed_width = max(32, width // 8)  # Minimum width of 32 pixels
-        compressed_height = max(32, height // 8)  # Minimum height of 32 pixels
-
-        masks = []
-        for value in input_values:
-            # Create a small mask
-            small_mask = torch.ones((compressed_height, compressed_width), dtype=torch.float32) * value
-            # Resize the mask to original dimensions
-            mask = torch.nn.functional.interpolate(small_mask.unsqueeze(0).unsqueeze(0), 
-                                                   size=(height, width), 
-                                                   mode='nearest').squeeze(0).squeeze(0)
-            masks.append(mask)
-        masks_out = torch.stack(masks, dim=0)
-
-        return masks_out
-
-    def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, smooth, multiply_by, invert_weights):
-        # Main function to process audio and generate weights and masks
+        return thresholded
+
+    def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, smooth, multiply_by, scale_range, invert_weights):
+        # Main function to process audio and generate weights
 
-        # Input validation
         if audio is None or 'waveform' not in audio or 'sample_rate' not in audio:
             print("Invalid audio input")
             return None, None, None, None
@@ -166,20 +133,23 @@ def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, s
             pad_length = total_samples_needed - waveform.shape[-1]
             waveform = torch.nn.functional.pad(waveform, (0, pad_length))
 
-        # Calculate samples per frame
         samples_per_frame = total_samples_needed // num_frames
 
         # Apply audio separation if needed
-        if analysis_mode in ["drums only", "vocals only"]:
+        if analysis_mode in ["Drums Only", "Vocals Only", "Bass Only", "Other Audio"]:
             try:
                 model = self.download_and_load_model()
                 device = next(model.parameters()).device
                 waveform = waveform.to(device)
                 estimates = model(waveform)
-                if analysis_mode == "drums only":
+                if analysis_mode == "Drums Only":
                     processed_waveform = estimates[:, 1, :, :]
-                else:  # vocals only
+                elif analysis_mode == "Vocals Only":
                     processed_waveform = estimates[:, 0, :, :]
+                elif analysis_mode == "Bass Only":
+                    processed_waveform = estimates[:, 2, :, :]
+                elif analysis_mode == "Other Audio":
+                    processed_waveform = estimates[:, 3, :, :]
             except Exception as e:
                 print(f"Error in model processing: {e}")
                 return None, None, None, None
@@ -202,28 +172,28 @@ def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, s
             print("Invalid audio weights calculated")
             return None, None, None, None
 
-        # Apply audio processing
         audio_weights = self._apply_audio_processing(
-            audio_weights, threshold, add, smooth, multiply_by)
-
-        # Ensure audio_weights are within [0, 1] range
+            audio_weights, threshold, add, smooth, scale_range, multiply_by)
+
         audio_weights = np.clip(audio_weights, 0, 1)
-
+        scale_audio_weights = audio_weights + scale_range
+
         if (invert_weights == True):
-            # Calculate inverted weights
             audio_weights_inverted = 1.0 - np.array(audio_weights)        
-            # Ensure inverted weights are also within [0, 1] range
             audio_weights_inverted = np.clip(audio_weights_inverted, 0, 1)
+            scale_audio_weights_inverted = audio_weights_inverted + scale_range
+
 
+
         # Generate visualization
         try:
             plt.figure(figsize=(10, 6), facecolor='white')
             if (invert_weights == False):
-                plt.plot(list(range(1, len(audio_weights) + 1)), audio_weights,
+                plt.plot(list(range(1, len(audio_weights) + 1)), scale_audio_weights,
                     label=f'{analysis_mode.capitalize()} Weights', color='blue')
 
             if (invert_weights == True):
-                plt.plot(list(range(1, len(audio_weights_inverted) + 1)), audio_weights_inverted,
+                plt.plot(list(range(1, len(scale_audio_weights_inverted) + 1)), scale_audio_weights_inverted,
                     label='Inverted Weights', color='red', linestyle='--')
 
             plt.xlabel('Frame Number')
@@ -247,10 +217,11 @@ def process_audio(self, audio, num_frames, fps, analysis_mode, threshold, add, s
             weights_graph = None
 
         if (invert_weights == True):
-            audio_weights = audio_weights_inverted
+            scale_audio_weights = scale_audio_weights_inverted
 
         if processed_audio is None or audio_weights is None or weights_graph is None:
             print("One or more outputs are invalid")
             return None, None, None, None
-
-        return (audio_weights.tolist(), processed_audio, original_audio, weights_graph)
+        scale_audio_weights = scale_audio_weights.tolist()
+        rounded_scale_audio_weights = [ round(elem, 3) for elem in scale_audio_weights]
+        return (rounded_scale_audio_weights, processed_audio, original_audio, weights_graph)
diff --git a/nodes/utils/Math_Float_List.py b/nodes/utils/Math_Float_List.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "comfyui_yvann-nodes"
-description = "Audio reactivity nodes for AI animations 🔊 Analyze audio, extract drums and vocals. Generate reactive masks and weights. Create audio-driven visuals. Produce weight graphs and audio masks. Compatible with IPAdapter, ControlNets and more. Ideal for music videos and reactive animations. Features audio scheduling and waveform analysis"
+description = "Audio reactivity nodes for AI animations 🔊 Analyze audio, extract drums and vocals. Generate reactive masks and weights. Create audio-driven visuals. Produce weight graphs and audio masks. Compatible with IPAdapter, ControlNets, AnimateDiff and more. Ideal for music videos and reactive animations. Features audio scheduling and waveform analysis"
 version = "1.0.9"
 license = {file = "LICENSE"}
 dependencies = ["openunmix", "numpy", "torch", "matplotlib", "pillow"]