Merge pull request #188 from Vishisht16/main

Created a text-to-music generator app on streamlit
UppuluriKalyani · Oct 10, 2024 · f9c1bd4 · f9c1bd4
2 parents c684d31 + c67ccef
commit f9c1bd4
Show file tree

Hide file tree

Showing 3 changed files with 184 additions and 0 deletions.
diff --git a/Generative Models/Text-to-Music Generator/README.md b/Generative Models/Text-to-Music Generator/README.md
@@ -0,0 +1,62 @@
+# Text-to-Music Generator
+
+This folder contains a Streamlit-based web application that generates music from text descriptions using Meta's Audiocraft library and the MusicGen model.
+
+## Features
+
+- **Text Input**: Enter a textual description of the type of music you want to generate.
+- **Duration Control**: Select the duration of the generated music (up to 20 seconds).
+- **Music Generation**: Generates music based on the provided description and duration.
+- **Audio Playback**: Listen to the generated music directly in the browser.
+
+## Installation
+
+1. **Clone the repository**:
+    ```bash
+    git clone https://github.com/UppuluriKalyani/ML-Nexus.git
+    cd Text-to-Music Generator
+    ```
+
+2. **Create a virtual environment**:
+    ```bash
+    python3 -m venv music-env
+    source music-env/bin/activate
+    ```
+
+3. **Install the required packages**:
+    ```bash
+    pip install -r requirements.txt
+    ```
+
+## Usage
+
+1. **Run the Streamlit app**:
+    ```bash
+    streamlit run app.py
+    ```
+
+2. **Open your web browser** and go to `http://localhost:8501` to access the app.
+
+3. **Enter a description** of the type of music you want to generate in the text area.
+
+4. **Select the duration** of the music using the slider.
+
+5. **Click "Generate Music"** to create and listen to your music.
+
+<img width="656" alt="Screenshot 2024-05-21 at 6 12 32 PM" src="https://github.com/langchain-tech/Musicgen-Text-to-Music/assets/100914015/da41fbea-6565-4ac7-a78f-559666ff4b6f">
+
+## Requirements
+
+- `streamlit`
+- `audiocraft`
+- `torchaudio`
+- `scipy`
+
+## Project Structure
+
+- `app.py`: The main Streamlit application script.
+- `requirements.txt`: The dependencies required to run the app.
+
+## License
+
+This project is licensed under the MIT License.
diff --git a/Generative Models/Text-to-Music Generator/app.py b/Generative Models/Text-to-Music Generator/app.py
@@ -0,0 +1,101 @@
+from audiocraft.models import MusicGen
+import streamlit as st
+import torch
+import torchaudio
+import os
+import numpy as np
+import base64
+
+
+@st.cache_resource
+def load_model():
+    model = MusicGen.get_pretrained('facebook/musicgen-small')
+    return model
+
+
+def generate_music_tensors(description, duration: int):
+    print("Description: ", description)
+    print("Duration: ", duration)
+    model = load_model()
+
+    model.set_generation_params(
+        use_sampling=True,
+        top_k=250,
+        duration=duration
+    )
+
+    output = model.generate(
+        descriptions=[description],
+        progress=True,
+        return_tokens=True
+    )
+
+    return output[0]
+
+
+def save_audio(samples: torch.Tensor):
+    """Renders an audio player for the given audio samples and saves them to a local directory.
+
+    Args:
+        samples (torch.Tensor): a Tensor of decoded audio samples
+            with shapes [B, C, T] or [C, T]
+        sample_rate (int): sample rate audio should be displayed with.
+        save_path (str): path to the directory where audio should be saved.
+    """
+
+    print("Samples (inside function): ", samples)
+    sample_rate = 32000
+    save_path = "audio_output/"
+    assert samples.dim() == 2 or samples.dim() == 3
+
+    samples = samples.detach().cpu()
+    if samples.dim() == 2:
+        samples = samples[None, ...]
+
+    for idx, audio in enumerate(samples):
+        audio_path = os.path.join(save_path, f"audio_{idx}.wav")
+        torchaudio.save(audio_path, audio, sample_rate)
+
+
+def get_binary_file_downloader_html(bin_file, file_label='File'):
+    with open(bin_file, 'rb') as f:
+        data = f.read()
+    bin_str = base64.b64encode(data).decode()
+    href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
+    return href
+
+
+st.set_page_config(
+    page_icon="musical_note",
+    page_title="Music Gen"
+)
+
+
+def main():
+    st.title("Text to Music Generator🎵")
+
+    with st.expander("See explanation"):
+        st.write("Music Generator app built using Meta's Audiocraft library. We are using Music Gen Small model.")
+
+    text_area = st.text_area("Enter your description.......")
+    time_slider = st.slider("Select time duration (In Seconds)", 0, 20, 10)
+
+    if text_area and time_slider:
+        st.json({
+            'Your Description': text_area,
+            'Selected Time Duration (in Seconds)': time_slider
+        })
+
+        st.subheader("Generated Music")
+        music_tensors = generate_music_tensors(text_area, time_slider)
+        print("Musci Tensors: ", music_tensors)
+        save_music_file = save_audio(music_tensors)
+        audio_filepath = 'audio_output/audio_0.wav'
+        audio_file = open(audio_filepath, 'rb')
+        audio_bytes = audio_file.read()
+        st.audio(audio_bytes)
+        st.markdown(get_binary_file_downloader_html(audio_filepath, 'Audio'), unsafe_allow_html=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Generative Models/Text-to-Music Generator/requirements.txt b/Generative Models/Text-to-Music Generator/requirements.txt
@@ -0,0 +1,21 @@
+av
+einops
+flashy>=0.0.1
+hydra-core>=1.1
+hydra_colorlog
+julius
+num2words
+numpy
+sentencepiece
+spacy==3.5.2
+torch>=2.0.0
+torchaudio>=2.0.0
+huggingface_hub
+tqdm
+transformers>=4.31.0  # need Encodec there.
+xformers
+demucs
+librosa
+gradio
+torchmetrics
+encodec