Last active
July 19, 2020 08:23
-
-
Save aniline/b477bb2a6485401d66f6d49f09ed4c9a to your computer and use it in GitHub Desktop.
WIP scriptlet to use using Google cloud TTS api. For voice over and stuff.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# usage: tts.py [-h] [-f SPEC_FILE] [ssml [ssml ...]] | |
# | |
# positional arguments: | |
# ssml ssml message file | |
# | |
# optional arguments: | |
# -h, --help show this help message and exit | |
# -f SPEC_FILE, --spec-file SPEC_FILE | |
# Synthesis specification file(s) | |
# | |
# The 'SPEC_FILE' is a json file of the form. | |
# { | |
# "synth" : [ | |
# { "file" : "message1.ssml" }, | |
# { "file" : "message2.ssml" } | |
# ] | |
# } | |
# | |
# Tries to create message1.mp3, message2.mp3 relative to the folder where the json file is. | |
# | |
# the ssml files passed on the command line are used to synthesize audio withe same basename and .mp3 extension. | |
# | |
import argparse | |
import json | |
import traceback | |
import sys | |
from pathlib import Path | |
from google.cloud import texttospeech | |
def render(client, voice, audio_config, input_text, output_file): | |
synthesis_input = texttospeech.SynthesisInput(ssml=input_text) | |
response = client.synthesize_speech( | |
input=synthesis_input, | |
voice=voice, | |
audio_config=audio_config | |
) | |
with output_file.open("wb") as out: | |
out.write(response.audio_content) | |
print(f'Audio content written to file "{output_file}.mp3"') | |
def process(specfile, ssmls): | |
try: | |
specdir = Path(specfile).parent | |
synth = json.load(open(specfile))["synth"] | |
spec = [{ "file" : specdir.joinpath(i["file"]) } for i in synth if "file" in i] | |
spec.extend([{ "output_file" : specdir.joinpath(i["output_file"]), "text" : i["text"] } for i in synth if "text" in i]) | |
print(f"Processing file {specfile}"); | |
except Exception as e: | |
print(e) | |
spec = [] | |
spec.extend([{ 'file' : Path(ssml) } for ssml in ssmls]) | |
client = texttospeech.TextToSpeechClient() | |
voice = texttospeech.VoiceSelectionParams( | |
language_code="en-IN", name="en-IN-Wavenet-D", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE | |
) | |
audio_config = texttospeech.AudioConfig( | |
audio_encoding=texttospeech.AudioEncoding.MP3, | |
speaking_rate=0.9 | |
) | |
print(spec) | |
for k in spec: | |
if "file" in k: | |
input_file = k["file"] | |
with input_file.open() as inpf: | |
input_text = inpf.read() | |
output_file = input_file.with_suffix(".mp3") | |
print(f"Input {input_file}, output {output_file}") | |
render(client, voice, audio_config, input_text, output_file) | |
if "text" in k: | |
if "output_file" in k: | |
output_file = Path(k["output_file"]).with_suffix(".mp3") | |
input_text = k["text"] | |
print(f"Input <text>, output {output_file}") | |
render(client, voice, audio_config, input_text, output_file) | |
else: | |
print("Not output filename for text item:") | |
print(k["text"]) | |
continue | |
def setup_arguments(parser): | |
parser.add_argument("-f", '--spec-file', help="Synthesis specification file(s)") | |
parser.add_argument("ssml", nargs='*', help="ssml message file") | |
def main(): | |
global args | |
parser = argparse.ArgumentParser() | |
setup_arguments(parser) | |
try: | |
args = parser.parse_args() | |
ssmls = [] + args.ssml | |
process(args.spec_file, ssmls) | |
except Exception as e: | |
print(f"{e.__class__.__name__}:", ', '.join([str(a) for a in e.args])) | |
traceback.print_tb(e) | |
return 1 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment