Skip to content

Instantly share code, notes, and snippets.

@rhulha
Created October 2, 2016 22:32
Show Gist options
  • Save rhulha/ab2216e6fd58976025b3c05e49afbe31 to your computer and use it in GitHub Desktop.
Save rhulha/ab2216e6fd58976025b3c05e49afbe31 to your computer and use it in GitHub Desktop.
A small example Google Natural Language Analyzer demo client with microphone recording.
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import javax.sound.sampled.*;
import net.raysforge.commons.Codecs;
import net.raysforge.commons.Json;
import net.raysforge.rest.client.GenericRestClient;
public class SpeechClient extends GenericRestClient {
protected static final int SAMPLE_RATE = 16000;
public SpeechClient(String authToken) {
super("https://speech.googleapis.com/v1beta1/", null, authToken, Auth.Token);
}
public Object recordAndSyncrecognize(int seconds) throws IOException {
countDown(seconds);
return syncrecognize(Codecs.toBase64(record(seconds)));
}
public Object syncrecognize(byte[] rawAudioData) throws IOException {
return syncrecognize(Codecs.toBase64(rawAudioData));
}
public Object syncrecognize(String base64StringOfRawAudioData) throws IOException {
List<String> phrases = new ArrayList<>();
phrases.add("ctree");
phrases.add("please");
phrases.add("create");
phrases.add("database");
phrases.add("called");
phrases.add("test");
String phrases2 = "ctree please create database called test";
HashMap<String, Object> speechContext = new HashMap<>();
speechContext.put("phrases", phrases2);
HashMap<String, Object> config = new HashMap<>();
config.put("encoding", "LINEAR16");
config.put("sampleRate", 16000);
config.put("languageCode", "en-US");
config.put("speechContext", speechContext);
HashMap<String, Object> audio = new HashMap<>();
audio.put("content", base64StringOfRawAudioData);
HashMap<String, Object> request = new HashMap<>();
request.put("config", config);
request.put("audio", audio);
System.out.println(Json.toJsonString(request));
return postData("POST", "speech:syncrecognize", Json.toJsonString(request));
}
public static byte[] record(int seconds) {
try {
AudioFormat googleSpeechAudioFormat = new AudioFormat(SAMPLE_RATE, 16, 1, true, false); // https://cloud.google.com/speech/reference/rest/Shared.Types/AudioEncoding
DataLine.Info info = new DataLine.Info(TargetDataLine.class, googleSpeechAudioFormat);
// checks if system supports the data line
if (!AudioSystem.isLineSupported(info)) {
System.out.println("Line not supported");
System.exit(0);
}
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
line.open(googleSpeechAudioFormat);
line.start();
AudioInputStream ais = new AudioInputStream(line);
byte[] b = new byte[SAMPLE_RATE * 2 * seconds];
int read = ais.read(b);
if (read != b.length)
System.out.println("read != " + b.length + " " + read);
line.close();
return b;
} catch (LineUnavailableException ex) {
ex.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
return null;
}
public static void countDown(int seconds) {
new Thread(new Runnable() {
@Override
public void run() {
for(int i=seconds; i>0; i--) {
System.out.println(i);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
System.err.println(e.getMessage());
}
}
}
}).start();
}
public static void main(String[] args) throws IOException {
SpeechClient nlc = new SpeechClient("yourAuthToken");
Object result = nlc.recordAndSyncrecognize(5);
System.out.println(result);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment