srli · June 25, 2018 14:47 · joydeb28 · Apr 19, 2019 · slowrunner · Jun 22, 2019
diff --git a/stt_py3.py b/stt_py3.py
 from pocketsphinx.pocketsphinx import *
 from sphinxbase.sphinxbase import *

 import os
 import pyaudio
 import wave
 import audioop
 from collections import deque
 import time
 import math

 “””
 Written by Sophie Li, 2016
 http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
 “””

 class SpeechDetector:
 def __init__(self):
 # Microphone stream config.
 self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
 self.FORMAT = pyaudio.paInt16
 self.CHANNELS = 1
 self.RATE = 16000

 self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
 # only silence is recorded. When this time passes the
 # recording finishes and the file is decoded

 self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
 # is detected, how much of previously recorded audio is
 # prepended. This helps to prevent chopping the beginning
 # of the phrase.

 self.THRESHOLD = 4500
 self.num_phrases = -1

 # These will need to be modified according to where the pocketsphinx folder is
 MODELDIR = “pocketsphinx/model”
 DATADIR = “pocketsphinx/test/data”

 # Create a decoder with certain model
 config = Decoder.default_config()
 config.set_string(‘-hmm’, os.path.join(MODELDIR, ‘en-us/en-us’))
 config.set_string(‘-lm’, os.path.join(MODELDIR, ‘en-us/en-us.lm.bin’))
 config.set_string(‘-dict’, os.path.join(MODELDIR, ‘en-us/cmudict-en-us.dict’))

 # Creaders decoder object for streaming data.
 self.decoder = Decoder(config)

 def setup_mic(self, num_samples=50):
 “”” Gets average audio intensity of your mic sound. You can use it to get
 average intensities while you’re talking and/or silent. The average
 is the avg of the .2 of the largest intensities recorded.
 “””
 print (“Getting intensity values from mic.”)
 p = pyaudio.PyAudio()
 stream = p.open(format=self.FORMAT,
 channels=self.CHANNELS,
 rate=self.RATE,
 input=True,
 frames_per_buffer=self.CHUNK)

 values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
 for x in range(num_samples)]
 values = sorted(values, reverse=True)
 r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
 print (” Finished “)
 print (” Average audio intensity is %s ” % r)
 stream.close()
 p.terminate()

 if r self.THRESHOLD for x in slid_win]) > 0:
 if started == False:
 print (“Starting recording of phrase”)
 started = True
 audio2send.append(cur_data)

 elif started:
 print (“Finished recording, decoding phrase”)
 filename = self.save_speech(list(prev_audio) + audio2send, p)
 r = self.decode_phrase(filename)
 print (“DETECTED: %s” % r)

 # Removes temp audio file
 os.remove(filename)
 # Reset all
 started = False
 slid_win = deque(maxlen=int(self.SILENCE_LIMIT * rel))
 prev_audio = deque(maxlen=int(0.5 * rel))
 audio2send = []
 print (“Listening …”)

 else:
 prev_audio.append(cur_data)

 print (“* Done listening”)
 stream.close()
 p.terminate()

 if __name__ == “__main__”:
 sd = SpeechDetector()
 sd.run()
	from pocketsphinx.pocketsphinx import *
	from sphinxbase.sphinxbase import *

	import os
	import pyaudio
	import wave
	import audioop
	from collections import deque
	import time
	import math

	“””
	Written by Sophie Li, 2016
	http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
	“””

	class SpeechDetector:
	def __init__(self):
	# Microphone stream config.
	self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
	self.FORMAT = pyaudio.paInt16
	self.CHANNELS = 1
	self.RATE = 16000

	self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
	# only silence is recorded. When this time passes the
	# recording finishes and the file is decoded

	self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
	# is detected, how much of previously recorded audio is
	# prepended. This helps to prevent chopping the beginning
	# of the phrase.

	self.THRESHOLD = 4500
	self.num_phrases = -1

	# These will need to be modified according to where the pocketsphinx folder is
	MODELDIR = “pocketsphinx/model”
	DATADIR = “pocketsphinx/test/data”

	# Create a decoder with certain model
	config = Decoder.default_config()
	config.set_string(‘-hmm’, os.path.join(MODELDIR, ‘en-us/en-us’))
	config.set_string(‘-lm’, os.path.join(MODELDIR, ‘en-us/en-us.lm.bin’))
	config.set_string(‘-dict’, os.path.join(MODELDIR, ‘en-us/cmudict-en-us.dict’))

	# Creaders decoder object for streaming data.
	self.decoder = Decoder(config)

	def setup_mic(self, num_samples=50):
	“”” Gets average audio intensity of your mic sound. You can use it to get
	average intensities while you’re talking and/or silent. The average
	is the avg of the .2 of the largest intensities recorded.
	“””
	print (“Getting intensity values from mic.”)
	p = pyaudio.PyAudio()
	stream = p.open(format=self.FORMAT,
	channels=self.CHANNELS,
	rate=self.RATE,
	input=True,
	frames_per_buffer=self.CHUNK)

	values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
	for x in range(num_samples)]
	values = sorted(values, reverse=True)
	r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
	print (” Finished “)
	print (” Average audio intensity is %s ” % r)
	stream.close()
	p.terminate()

	if r self.THRESHOLD for x in slid_win]) > 0:
	if started == False:
	print (“Starting recording of phrase”)
	started = True
	audio2send.append(cur_data)

	elif started:
	print (“Finished recording, decoding phrase”)
	filename = self.save_speech(list(prev_audio) + audio2send, p)
	r = self.decode_phrase(filename)
	print (“DETECTED: %s” % r)

	# Removes temp audio file
	os.remove(filename)
	# Reset all
	started = False
	slid_win = deque(maxlen=int(self.SILENCE_LIMIT * rel))
	prev_audio = deque(maxlen=int(0.5 * rel))
	audio2send = []
	print (“Listening …”)

	else:
	prev_audio.append(cur_data)

	print (“* Done listening”)
	stream.close()
	p.terminate()

	if __name__ == “__main__”:
	sd = SpeechDetector()
	sd.run()