Created
May 19, 2021 17:20
-
-
Save rvipandey/20fdba0098280814e96be4029db57514 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def speakerdiarisationdf(hyp, frameRate, wavFile): | |
audioname=[] | |
starttime=[] | |
endtime=[] | |
speakerlabel=[] | |
spkrChangePoints = np.where(hyp[:-1] != hyp[1:])[0] | |
if spkrChangePoints[0]!=0 and hyp[0]!=-1: | |
spkrChangePoints = np.concatenate(([0],spkrChangePoints)) | |
spkrLabels = [] | |
for spkrHomoSegI in range(len(spkrChangePoints)): | |
spkrLabels.append(hyp[spkrChangePoints[spkrHomoSegI]+1]) | |
for spkrI,spkr in enumerate(spkrLabels[:-1]): | |
if spkr!=-1: | |
audioname.append(wavFile.split('/')[-1].split('.')[0]+".wav") | |
starttime.append((spkrChangePoints[spkrI]+1)/float(frameRate)) | |
endtime.append((spkrChangePoints[spkrI+1]-spkrChangePoints[spkrI])/float(frameRate)) | |
speakerlabel.append("Speaker "+str(int(spkr))) | |
if spkrLabels[-1]!=-1: | |
audioname.append(wavFile.split('/')[-1].split('.')[0]+".wav") | |
starttime.append(spkrChangePoints[-1]/float(frameRate)) | |
endtime.append((len(hyp) - spkrChangePoints[-1])/float(frameRate)) | |
speakerlabel.append("Speaker "+str(int(spkrLabels[-1]))) | |
# | |
speakerdf=pd.DataFrame({"Audio":audioname,"starttime":starttime,"endtime":endtime,"speakerlabel":speakerlabel}) | |
spdatafinal=pd.DataFrame(columns=['Audio','SpeakerLabel','StartTime','EndTime']) | |
i=0 | |
k=0 | |
j=0 | |
spfind="" | |
stime="" | |
etime="" | |
for row in speakerdf.itertuples(): | |
if(i==0): | |
spfind=row.speakerlabel | |
stime=row.starttime | |
else: | |
if(spfind==row.speakerlabel): | |
etime=row.starttime | |
else: | |
spdatafinal.loc[k]=[wavFile.split('/')[-1].split('.')[0]+".wav",spfind,stime,row.starttime] | |
k=k+1 | |
spfind=row.speakerlabel | |
stime=row.starttime | |
i=i+1 | |
spdatafinal.loc[k]=[wavFile.split('/')[-1].split('.')[0]+".wav",spfind,stime,etime] | |
return spdatafinal | |
pass1hyp = -1*np.ones(len(vad)) | |
pass1hyp[vad] = frameClust | |
spkdf=speakerdiarisationdf(pass1hyp, frameRate, wavFile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment