Skip to content

Commit

Permalink
Add project source code
Browse files Browse the repository at this point in the history
  • Loading branch information
malantin committed Sep 17, 2019
1 parent 2e21d6b commit 9bbafe8
Show file tree
Hide file tree
Showing 9 changed files with 564 additions and 2 deletions.
27 changes: 27 additions & 0 deletions .classpath
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
61 changes: 61 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,64 @@

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
/target/



.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders

# From https://github.com/github/gitignore/blob/master/Global/Eclipse.gitignore

# External tool builders
.externalToolBuilders/

# Locally stored "Eclipse launch configurations"
*.launch

# PyDev specific (Python IDE for Eclipse)
*.pydevproject

# CDT-specific (C/C++ Development Tooling)
.cproject

# CDT- autotools
.autotools

# Java annotation processor (APT)
.factorypath

# PDT-specific (PHP Development Tools)
.buildpath

# sbteclipse plugin
.target

# Tern plugin
.tern-project

# TeXlipse plugin
.texlipse

# STS (Spring Tool Suite)
.springBeans

# Code Recommenders
.recommenders/

# Annotation Processing
.apt_generated/

# Scala IDE specific (Scala & Java development for Eclipse)
.cache-main
.scala_dependencies
.worksheet
23 changes: 23 additions & 0 deletions .project
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>java-ogg-to-ms-speech</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,15 @@
# java-ogg-to-ms-speech
This project demonstrates how use ffmpeg to convert .ogg files (Vorbis and Opus) to the right format for Speech-to-Text transcription using the Microsoft Cognitive Services Speech Service.
# Transcribe .ogg speech files with the Microsoft Speech Java SDK
This project demonstrates how use ffmpeg to convert .ogg files (Vorbis and Opus) to the right format for Speech-to-Text transcription using the Microsoft Cognitive Services Speech Service. This could be used to transcribe voice messages encoded using the Opus (https://en.wikipedia.org/wiki/Opus_(audio_format)) codec or other codecs using the .ogg container format.

To make this sample work, you need the [Cognitive Services Speech Service Java SDK](https://docs.microsoft.com/en-us/java/api/com.microsoft.cognitiveservices.speech?view=azure-java-stable) which has been already added to the pom file.
```java
public final static String MS_SPEECH_KEY = "your-microsoft-speech-key";
public final static String MS_SPEECH_REGION = "westeurope";
public final static String MS_SPEECH_RECOGNITION_LANG = "de-de";
```

You also need to download [ffmpeg](https://ffmpeg.org/) which is used for transcoding and set the right path to it in the source. An audio file can be read from disk or passed as a byte array. It will then, in memory, be transcoded to wav / pcm format for transcription using the Cognitive Services Speech Service.

Also check out the [Microsoft Speech SDK Sample Repository](https://github.com/Azure-Samples/cognitive-services-speech-sdk) to learn more and use more of it's functionality.

Thank you [@chgeuer](https://github.com/chgeuer) for your contributions.
30 changes: 30 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>malantin</groupId>
<artifactId>ogg</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>ogg</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<repositories>
<repository>
<id>maven-cognitiveservices-speech</id>
<name>Microsoft Cognitive Services Speech Maven Repository</name>
<url>https://csspeechstorage.blob.core.windows.net/maven/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>com.microsoft.cognitiveservices.speech</groupId>
<artifactId>client-sdk</artifactId>
<version>1.6.0</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//MIT License
//
//Copyright (c) Microsoft Corporation. All rights reserved.
//
//Permission is hereby granted, free of charge, to any person obtaining a copy
//of this software and associated documentation files (the "Software"), to deal
//in the Software without restriction, including without limitation the rights
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//copies of the Software, and to permit persons to whom the Software is
//furnished to do so, subject to the following conditions:
//
//The above copyright notice and this permission notice shall be included in all
//copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
//SOFTWARE

package com.microsoft.cognitiveservices.speech.samples.ogg;

import java.io.IOException;
import java.util.concurrent.ExecutionException;

public class App {
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
// Test with local file
String filePath = "test.ogg";

OggToTextService service = new OggToTextService();

System.out.println(service.transcribeOGGFile(filePath));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//MIT License
//
//Copyright (c) Microsoft Corporation. All rights reserved.
//
//Permission is hereby granted, free of charge, to any person obtaining a copy
//of this software and associated documentation files (the "Software"), to deal
//in the Software without restriction, including without limitation the rights
//to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
//copies of the Software, and to permit persons to whom the Software is
//furnished to do so, subject to the following conditions:
//
//The above copyright notice and this permission notice shall be included in all
//copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
//FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
//AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
//LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
//OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
//SOFTWARE

package com.microsoft.cognitiveservices.speech.samples.ogg;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

public class DataPipe implements Runnable {
private final InputStream is;
private final OutputStream os;

DataPipe(InputStream is, OutputStream os) {
this.is = is;
this.os = os;
}

public void run() {
byte buffer[] = new byte[1024 * 1024];
int numRead;
try {
while ((numRead = this.is.read(buffer)) != -1) {
os.write(buffer, 0, numRead);
}
this.os.close();
} catch (IOException ioe) {
System.err.println(ioe.getMessage());
}
}

public static Thread start(InputStream is, OutputStream os) {
Thread t = new Thread(new DataPipe(is, os));
t.start();
return t;
}
}
Loading

0 comments on commit 9bbafe8

Please sign in to comment.