UPDATES

AnanthaRajuC · Feb 25, 2024 · a63d25a · a63d25a
1 parent 816ccaa
commit a63d25a
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 29 deletions.
diff --git a/codePython/nlp1.py b/codePython/nlp1.py
@@ -1,31 +1,53 @@
 import torch
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-from datasets import load_dataset
-import speech_recognition as sr
+
 import numpy as np
 import sys
 import json
 
 # Access command-line arguments
 arguments = sys.argv[1:]
 
-# Print the arguments passed from Java
+# Create an empty dictionary to store key-value pairs
+args_dict = {}
+
+# Parse arguments with keys
 for arg in arguments:
-    print("Argument:", arg)
+    key, value = arg.split('=')  # Split the argument at '=' to separate key and value
+    args_dict[key] = value
+
+# Define the key of interest
+audio_file_path = 'audio_file_path'
+
+# Check if the desired key exists in the dictionary
+if audio_file_path in args_dict:
+    audio_file_path_value = args_dict[audio_file_path]
+    print(f"Value for key '{audio_file_path}': {audio_file_path_value}")
+else:
+    print(f"Key '{audio_file_path}' not found in the arguments.")
 
-# device = "cuda:0" if torch.cuda.is_available() else "cpu"
-device = "cpu"
+# Check device availability
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+# Determine the torch data type based on device availability
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
+# Define the model ID - https://huggingface.co/openai/whisper-large-v3
 model_id = "openai/whisper-large-v3"
 
+# Load the speech-to-text model
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+    model_id, 
+    torch_dtype=torch_dtype, 
+    low_cpu_mem_usage=True, 
+    use_safetensors=True
 )
 model.to(device)
 
+# Load the processor
 processor = AutoProcessor.from_pretrained(model_id)
 
+# Create a pipeline for automatic speech recognition
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model,
@@ -39,21 +61,22 @@
     device=device,
 )
 
+# Perform automatic speech recognition
+resultTranscription = pipe(audio_file_path_value, generate_kwargs={"language": None})
+print("\n",resultTranscription["text"])
 
-resultTranscription = pipe("/home/anantha/PCF/AIML/SpeechToText/HIN_M_AbhishekS.mp3", generate_kwargs={"language": None})
-print("")
-print(resultTranscription["text"])
-
-
-resultTranslation = pipe("/home/anantha/PCF/AIML/SpeechToText/HIN_M_AbhishekS.mp3", generate_kwargs={"task": "translate"})
-print("")
-print(resultTranslation["text"])
+# Perform translation
+resultTranslation = pipe(audio_file_path_value, generate_kwargs={"task": "translate"})
+print("\n",resultTranslation["text"])
 
+# Load the sentiment analysis classifier
+classifier = pipeline("sentiment-analysis",
+                      model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+                      revision ="af0f99b")
 
-classifier = pipeline("sentiment-analysis",model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",revision ="af0f99b")
+# Perform sentiment analysis on the translated text
 resultClassifier = classifier(resultTranslation["text"])
-print("")
-print(resultClassifier)
+print("\n",resultClassifier)
 
 # Create a Python dictionary
 data = {
@@ -66,4 +89,4 @@
 json_string = json.dumps(data)
 
 # Print the JSON string
-print(json_string)
+print("\n",json_string)
diff --git a/codePython/program_output.json b/codePython/program_output.json
@@ -0,0 +1,60 @@
+{
+    "resultTranscription": {
+      "text": " इसके बाद हम एक ऐसे खिलाड़ी के बारे में बात करेंगे जिससे इस वर्ल्ड कप में भारतिय फैंस को काफी उमीदे रहेंगे इस खिलाड़ी के बारे में मैं आपको बस इतना बता दूं कि ये वो खिलाड़ी हैं जिसने इंटरनेशनल क्रिकेट में अपनी एंट्री का एलान औस्ट्रेलिया के गेंज जोरदार प्रदर्शन से किया आज से वर्ल्ड कप के दारान हर रोज हम आपको मिलते रहेंगे EAM Cricket World Cup 2007 अपडेट में",
+      "chunks": [
+        {
+          "timestamp": [
+            0.0,
+            5.42
+          ],
+          "text": " इसके बाद हम एक ऐसे खिलाड़ी के बारे में बात करेंगे जिससे इस वर्ल्ड कप में भारतिय फैंस को काफी उमीदे रहेंगे"
+        },
+        {
+          "timestamp": [
+            5.42,
+            13.46
+          ],
+          "text": " इस खिलाड़ी के बारे में मैं आपको बस इतना बता दूं कि ये वो खिलाड़ी हैं जिसने इंटरनेशनल क्रिकेट में अपनी एंट्री का एलान औस्ट्रेलिया के गेंज जोरदार प्रदर्शन से किया"
+        },
+        {
+          "timestamp": [
+            13.62,
+            19.52
+          ],
+          "text": " आज से वर्ल्ड कप के दारान हर रोज हम आपको मिलते रहेंगे EAM Cricket World Cup 2007 अपडेट में"
+        }
+      ]
+    },
+    "resultTranslation": {
+      "text": " Now we will talk about a player who will be a big fan of Indian fans in this World Cup. I will tell you that he is a player who has made his entry in international cricket with a strong Australian against. From today during the World Cup, we will meet you every day in EAM Cricket World Cup 2007 Update.",
+      "chunks": [
+        {
+          "timestamp": [
+            0.0,
+            5.6
+          ],
+          "text": " Now we will talk about a player who will be a big fan of Indian fans in this World Cup."
+        },
+        {
+          "timestamp": [
+            5.6,
+            13.6
+          ],
+          "text": " I will tell you that he is a player who has made his entry in international cricket with a strong Australian against."
+        },
+        {
+          "timestamp": [
+            13.6,
+            19.64
+          ],
+          "text": " From today during the World Cup, we will meet you every day in EAM Cricket World Cup 2007 Update."
+        }
+      ]
+    },
+    "resultClassifier": [
+      {
+        "label": "POSITIVE",
+        "score": 0.999645471572876
+      }
+    ]
+  }
diff --git a/documentation/GETTING_STARTED.MD b/documentation/GETTING_STARTED.MD
@@ -4,21 +4,38 @@ These instructions will get you a copy of the project up and running on your loc
 
 ~~~shell
 python --version
+python3 --version
+~~~
 
+~~~shell
 pip --version
+~~~
 
+~~~shell
 python -c "import torch; print(torch.__version__)"
 pip3 show torch
+~~~
 
+~~~shell
 pip3 list | grep tensorflow
 pip list | grep tensorflow
+~~~
 
+~~~shell
 pip list 
 ~~~
 
+---  
 
 ~~~shell
 python -m venv .env
 
 source .env/bin/activate
-~~~
+~~~
+
+pip install transformers
+pip install torch
+pip install datasets
+pip install accelerate
+pip install librosa
+sudo apt install ffmpeg
diff --git a/documentation/TECHNOLOGY_STACK.MD b/documentation/TECHNOLOGY_STACK.MD
@@ -2,13 +2,13 @@
 
 ### Overview
 
-|Technology                |Description         |
-|--------------------------|--------------------|
-|PyTorch/Tensorflow        |                    |
-|Python                    |                    |
-|Transformers              |                    |
-|openai/whisper-large-v3   |                    |
-
-
-
+|Technology                                                                |                                                                       Description                                                                                                                                      |
+|--------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|[PyTorch](https://pytorch.org/)                                           | A machine learning framework based on the Torch library, used for applications such as computer vision and natural language processing, originally developed by Meta AI and now part of the Linux Foundation umbrella. |
+|[Python](https://www.python.org/)                                         | A high-level, general-purpose programming language                                                                                                                                                                     |
+|[Transformers](https://huggingface.co/docs/transformers/en/index)         | It provides APIs and tools to easily download and train state-of-the-art pretrained models                                                                                                                             |
+|[openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) | A pre-trained model for automatic speech recognition (ASR) and speech translation.a pre-trained model for automatic speech recognition (ASR) and speech translation.                                                   |
+|[Accelerate](https://github.com/huggingface/accelerate)                   | A library that enables the same PyTorch code to be run across any distributed configuration                                                                                                                            |
+|[librosa](https://librosa.org/doc/latest/index.html)                      | A python package for music and audio analysis.                                                                                                                                                                         |
+|[FFmpeg](https://ffmpeg.org/)                                             | A suite of libraries and programs for handling video, audio, and other multimedia files and streams                                                                                                                    |