Created
July 3, 2021 09:23
-
-
Save manifestinteractive/6fd9be62d0ede934d4e1171e5e751aba to your computer and use it in GitHub Desktop.
MRS to LJ Speech Processor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script generates the folder structure for ljspeech-1.1 processing from mimic-recording-studio database | |
# This is a modified version of what was written by Thorsten Müller | |
import glob | |
import sqlite3 | |
import ffmpeg | |
import os | |
from shutil import copyfile | |
from shutil import rmtree | |
# Setup Directory Data | |
cwd = os.path.dirname(os.path.abspath(__file__)) | |
mrs_dir = os.path.join(cwd, os.pardir, "mimic-recording-studio") | |
output_dir = os.path.join(cwd, "dataset") | |
output_dir_audio = "" | |
output_dir_audio_temp="" | |
output_dir_speech = "" | |
# Create folders needed for ljspeech | |
def create_folders(): | |
global output_dir | |
global output_dir_audio | |
global output_dir_audio_temp | |
global output_dir_speech | |
print('→ Creating Dataset Folders') | |
output_dir_speech = os.path.join(output_dir, "LJSpeech-1.1") | |
# Delete existing folder if exists for clean run | |
if os.path.exists(output_dir_speech): | |
rmtree(output_dir_speech) | |
output_dir_audio = os.path.join(output_dir_speech, "wavs") | |
output_dir_audio_temp = os.path.join(output_dir_speech, "temp") | |
# Create Clean Folders | |
os.makedirs(output_dir_speech) | |
os.makedirs(output_dir_audio) | |
os.makedirs(output_dir_audio_temp) | |
def convert_audio(): | |
global output_dir_audio | |
global output_dir_audio_temp | |
recordings = len([name for name in os.listdir(output_dir_audio_temp) if os.path.isfile(name)]) | |
print('→ Converting %s Audio Files to 22050 Hz, 16 Bit, Mono\n' % "{:,}".format(recordings)) | |
for idx, wav in enumerate(glob.glob(os.path.join(output_dir_audio_temp, "*.wav"))): | |
percent = (idx + 1) / recordings | |
print('› \033[96m%s\033[0m \033[2m%s / %s (%s)\033[0m ' % (os.path.basename(wav), "{:,}".format((idx + 1)), "{:,}".format(recordings), "{:.0%}".format(percent))) | |
# Convert WAV file to required format | |
(ffmpeg | |
.input(wav) | |
.output(os.path.join(output_dir_audio, os.path.basename(wav)), acodec='pcm_s16le', ac=1, ar=22050, loglevel='error') | |
.overwrite_output() | |
.run(capture_stdout=True) | |
) | |
# Delete Temp File | |
os.remove(wav) | |
# Remove Temp Folder | |
rmtree(output_dir_audio_temp) | |
def create_meta_data(): | |
print('→ Creating META Data') | |
conn = sqlite3.connect(os.path.join(mrs_dir, "backend", "db", "mimicstudio.db")) | |
c = conn.cursor() | |
# Create metadata.csv for ljspeech | |
metadata = open(os.path.join(output_dir_speech, "metadata.csv"), mode="w", encoding="utf8") | |
for row in c.execute('SELECT audio_id, prompt, lower(prompt) FROM audiomodel ORDER BY length(prompt)'): | |
metadata.write(row[0] + "|" + row[1] + "|" + row[2] + "\n") | |
copyfile(os.path.join(mrs_dir, "backend", "audio_files", "default_user", row[0] + ".wav"), os.path.join(output_dir_audio_temp, row[0] + ".wav")) | |
metadata.close() | |
conn.close() | |
def main(): | |
print('\n\033[48;5;22m MRS to LJ Speech Processor \033[0m\n') | |
create_folders() | |
create_meta_data() | |
convert_audio() | |
print('\n\033[38;5;86;1m✔\033[0m COMPLETE【ツ】\n') | |
if __name__ == '__main__': | |
main() |
Hi @manifestinteractive.
That's really a great improvement of my original version, so thank you Peter for this 👏 👏 👏.
I've added some logic to choose which recording session you want to export when you use MRS for more recording sessions. I hope it's okay for you that i took your optimized version, added multi recording session stuff and released it here - of course giving the credits to you 👍 .
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was based on the work of Thorsten Müller as seen in this video: https://www.youtube.com/watch?app=desktop&v=4YT8WZT_x48
What does this do?
Generates some pretty command line output like this:
Setup
mimic-recording-studio
- This is a clone of Mimic Recording StudioTTS
- This is a clone of Coqui AI TTSMRS2LJSpeech.py
is then placed in the root ofTTS
folderHow to use
ffmpeg-python
packagemimic-recording-studio
&TTS
)