-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
564 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<classpath> | ||
<classpathentry kind="src" output="target/classes" path="src/main/java"> | ||
<attributes> | ||
<attribute name="optional" value="true"/> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="src" output="target/test-classes" path="src/test/java"> | ||
<attributes> | ||
<attribute name="optional" value="true"/> | ||
<attribute name="maven.pomderived" value="true"/> | ||
<attribute name="test" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> | ||
<attributes> | ||
<attribute name="maven.pomderived" value="true"/> | ||
</attributes> | ||
</classpathentry> | ||
<classpathentry kind="output" path="target/classes"/> | ||
</classpath> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<projectDescription> | ||
<name>java-ogg-to-ms-speech</name> | ||
<comment></comment> | ||
<projects> | ||
</projects> | ||
<buildSpec> | ||
<buildCommand> | ||
<name>org.eclipse.jdt.core.javabuilder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
<buildCommand> | ||
<name>org.eclipse.m2e.core.maven2Builder</name> | ||
<arguments> | ||
</arguments> | ||
</buildCommand> | ||
</buildSpec> | ||
<natures> | ||
<nature>org.eclipse.jdt.core.javanature</nature> | ||
<nature>org.eclipse.m2e.core.maven2Nature</nature> | ||
</natures> | ||
</projectDescription> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,15 @@ | ||
# java-ogg-to-ms-speech | ||
This project demonstrates how use ffmpeg to convert .ogg files (Vorbis and Opus) to the right format for Speech-to-Text transcription using the Microsoft Cognitive Services Speech Service. | ||
# Transcribe .ogg speech files with the Microsoft Speech Java SDK | ||
This project demonstrates how use ffmpeg to convert .ogg files (Vorbis and Opus) to the right format for Speech-to-Text transcription using the Microsoft Cognitive Services Speech Service. This could be used to transcribe voice messages encoded using the Opus (https://en.wikipedia.org/wiki/Opus_(audio_format)) codec or other codecs using the .ogg container format. | ||
|
||
To make this sample work, you need the [Cognitive Services Speech Service Java SDK](https://docs.microsoft.com/en-us/java/api/com.microsoft.cognitiveservices.speech?view=azure-java-stable) which has been already added to the pom file. | ||
```java | ||
public final static String MS_SPEECH_KEY = "your-microsoft-speech-key"; | ||
public final static String MS_SPEECH_REGION = "westeurope"; | ||
public final static String MS_SPEECH_RECOGNITION_LANG = "de-de"; | ||
``` | ||
|
||
You also need to download [ffmpeg](https://ffmpeg.org/) which is used for transcoding and set the right path to it in the source. An audio file can be read from disk or passed as a byte array. It will then, in memory, be transcoded to wav / pcm format for transcription using the Cognitive Services Speech Service. | ||
|
||
Also check out the [Microsoft Speech SDK Sample Repository](https://github.com/Azure-Samples/cognitive-services-speech-sdk) to learn more and use more of it's functionality. | ||
|
||
Thank you [@chgeuer](https://github.com/chgeuer) for your contributions. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>malantin</groupId> | ||
<artifactId>ogg</artifactId> | ||
<version>0.0.1-SNAPSHOT</version> | ||
<packaging>jar</packaging> | ||
|
||
<name>ogg</name> | ||
<url>http://maven.apache.org</url> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
</properties> | ||
<repositories> | ||
<repository> | ||
<id>maven-cognitiveservices-speech</id> | ||
<name>Microsoft Cognitive Services Speech Maven Repository</name> | ||
<url>https://csspeechstorage.blob.core.windows.net/maven/</url> | ||
</repository> | ||
</repositories> | ||
<dependencies> | ||
<dependency> | ||
<groupId>com.microsoft.cognitiveservices.speech</groupId> | ||
<artifactId>client-sdk</artifactId> | ||
<version>1.6.0</version> | ||
</dependency> | ||
</dependencies> | ||
</project> |
37 changes: 37 additions & 0 deletions
37
src/main/java/com/microsoft/cognitiveservices/speech/samples/ogg/App.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
//MIT License | ||
// | ||
//Copyright (c) Microsoft Corporation. All rights reserved. | ||
// | ||
//Permission is hereby granted, free of charge, to any person obtaining a copy | ||
//of this software and associated documentation files (the "Software"), to deal | ||
//in the Software without restriction, including without limitation the rights | ||
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
//copies of the Software, and to permit persons to whom the Software is | ||
//furnished to do so, subject to the following conditions: | ||
// | ||
//The above copyright notice and this permission notice shall be included in all | ||
//copies or substantial portions of the Software. | ||
// | ||
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
//SOFTWARE | ||
|
||
package com.microsoft.cognitiveservices.speech.samples.ogg; | ||
|
||
import java.io.IOException; | ||
import java.util.concurrent.ExecutionException; | ||
|
||
public class App { | ||
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException { | ||
// Test with local file | ||
String filePath = "test.ogg"; | ||
|
||
OggToTextService service = new OggToTextService(); | ||
|
||
System.out.println(service.transcribeOGGFile(filePath)); | ||
} | ||
} |
56 changes: 56 additions & 0 deletions
56
src/main/java/com/microsoft/cognitiveservices/speech/samples/ogg/DataPipe.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
//MIT License | ||
// | ||
//Copyright (c) Microsoft Corporation. All rights reserved. | ||
// | ||
//Permission is hereby granted, free of charge, to any person obtaining a copy | ||
//of this software and associated documentation files (the "Software"), to deal | ||
//in the Software without restriction, including without limitation the rights | ||
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
//copies of the Software, and to permit persons to whom the Software is | ||
//furnished to do so, subject to the following conditions: | ||
// | ||
//The above copyright notice and this permission notice shall be included in all | ||
//copies or substantial portions of the Software. | ||
// | ||
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
//SOFTWARE | ||
|
||
package com.microsoft.cognitiveservices.speech.samples.ogg; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.OutputStream; | ||
|
||
public class DataPipe implements Runnable { | ||
private final InputStream is; | ||
private final OutputStream os; | ||
|
||
DataPipe(InputStream is, OutputStream os) { | ||
this.is = is; | ||
this.os = os; | ||
} | ||
|
||
public void run() { | ||
byte buffer[] = new byte[1024 * 1024]; | ||
int numRead; | ||
try { | ||
while ((numRead = this.is.read(buffer)) != -1) { | ||
os.write(buffer, 0, numRead); | ||
} | ||
this.os.close(); | ||
} catch (IOException ioe) { | ||
System.err.println(ioe.getMessage()); | ||
} | ||
} | ||
|
||
public static Thread start(InputStream is, OutputStream os) { | ||
Thread t = new Thread(new DataPipe(is, os)); | ||
t.start(); | ||
return t; | ||
} | ||
} |
Oops, something went wrong.