Skip to content

Instantly share code, notes, and snippets.

@tam17aki
Forked from r9y9/a.py
Last active July 3, 2019 11:52
Show Gist options
  • Save tam17aki/0f9fd16cb6022d2015b9358ee7ce01b0 to your computer and use it in GitHub Desktop.
Save tam17aki/0f9fd16cb6022d2015b9358ee7ce01b0 to your computer and use it in GitHub Desktop.
juliusのセグメンテーション結果に基づいて音声をトリミングする
import numpy as np
import os
from nnmnkwii.datasets import jsut
import librosa
import librosa.display
from matplotlib import pyplot as plt
from nnmnkwii.io import hts
from os.path import exists
if __name__ == "__main__":
in_dir = "/home/ryuichi/data/jsut_ver1"
transcriptions = jsut.TranscriptionDataSource(
in_dir, subsets=jsut.available_subsets).collect_files()
wav_paths = jsut.WavFileDataSource(
in_dir, subsets=jsut.available_subsets).collect_files()
for subset in jsut.available_subsets:
save_dir = join(in_dir, subset, "wav_trim")
os.makedirs(save_dir, exist_ok=True)
for idx, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
print(idx, wav_path)
lab_path = wav_path.replace("wav/", "lab/").replace(".wav", ".lab")
x, sr = librosa.load(wav_path, sr=20000)
y, _ = librosa.effects.trim(x, top_db=30)
if exists(lab_path):
labels = hts.load(lab_path)
assert labels[0][-1] == "silB"
assert labels[-1][-1] == "silE"
b = int(labels[0][1] * 1e-7 * sr)
e = int(labels[-1][0] * 1e-7 * sr)
yy = x[b:e]
else:
yy = x
out_wav_path = wav_path.replace("wav/", "wav_trim/")
librosa.output.write_wav(out_wav_path, yy, sr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment