from IPython.core.display import HTML,Image
HTML('''<script> code_show=true; function code_toggle() { if (code_show){ $('div.input').hide(); } else { $('div.input').show(); } code_show = !code_show } $( document ).ready(code_toggle); </script> <form action='javascript:code_toggle()'><input type='submit' value='Toggle Code'></form>''')
cd /Users/james/Desktop/videolab/ECoG/
import gc, argparse, sys, os, errno
%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#sns.set()
#sns.set_style('whitegrid')
import h5py
from PIL import Image
import os
from tqdm.notebook import tqdm
import scipy
import sklearn
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')
from pystoi import stoi
ex_train = np.load("report/2020/audio_recon_result/HD06/0906/ddsp_inv_fton_mdb_HD06_train_onhpc_20200820_longer_train_all_mask_ecogsr1000_125_4.npy",allow_pickle=True).item()
ex_test = np.load("report/2020/audio_recon_result/HD06/0906/ddsp_inv_fton_mdb_HD06_train_onhpc_20200820_longer_test_all_mask_ecogsr1000_125_4.npy",allow_pickle=True).item()
ex_train = np.load("report/2020/audio_recon_result/NY742/NY742_latent_train.npy",allow_pickle=True).item()
ex_test = np.load("report/2020/audio_recon_result/NY742/NY742_latent_test.npy",allow_pickle=True).item()
import librosa
from scipy.io import wavfile
import wave
import numpy as np
import IPython.display as ipd
import json
from pandas.io.json import json_normalize
from IPython.display import Markdown, display
import librosa
from scipy.io import loadmat
import wave
import h5py
import numpy as np
%pylab inline
sys.path.append('codes/bin')
from speech_denoise import spectral_sub
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300
from librosa.display import specshow
def plot_stft(audio,ax=None,n_fft=2048,hop_length=250,show=False,n_mels=128,y_axis='mel'):
X = librosa.stft(audio,n_fft=n_fft,hop_length=hop_length)
if y_axis=='mel':
#x_stft_db = librosa.feature.melspectrogram(x, sr=16000,n_fft=n_fft,win_length=win_length,hop_length=hop_length)
S = librosa.feature.melspectrogram(audio, sr=16000,n_mels=n_mels,fmax=8000,n_fft=n_fft,hop_length=hop_length)
#print (S.shape)
if show:
librosa.display.specshow(librosa.power_to_db(S,
ref=np.max),
y_axis='mel',cmap='gray_r',ax=ax, fmax=8000)
else:
return librosa.power_to_db(S,ref=np.max)
else:
if show:
specshow(librosa.amplitude_to_db(abs(X)),cmap=cm.Blues,#cm.gray_r,
sr=16000,ax=ax)
else:
return librosa.amplitude_to_db(abs(X))
def MSE_pcc(A,B,ax=None):
mse =np.mean(((A - B)**2/B.var()))
pcc = pearsonr(A.ravel(),B.ravel())[0]
return mse,pcc
def analyze(predict,GT_STFT_test_spkr,audio_pred,audio_gt,exp_path,mode):
samples = predict.shape[0]
pcc = np.zeros([samples])
mse = np.zeros([samples])
if mode=='test':
latent = ex_test[512]
elif mode=='train':
latent = ex_train[512]
for i in range(samples):
tmp_latend_ind = np.where(latent[i]==1)[0]
#mse[i], pcc[i] = MSE_pcc(predict[i][:,tmp_latend_ind],GT_STFT_test_spkr[i][:,tmp_latend_ind])
#mse[i], pcc[i] = MSE_pcc(predict[i] ,GT_STFT_test_spkr[i] )
if mode=='train':
start_ind = max(tmp_latend_ind[0]-5,0)
end_ind = min(tmp_latend_ind[-1]+5,predict.shape[-1])
tmp_latend_ind = np.concatenate((np.arange(start_ind,tmp_latend_ind[0]),tmp_latend_ind,np.arange(tmp_latend_ind[-1],end_ind)))
else:
tmp_latend_ind = np.arange(0,predict.shape[-1]//4)
mse[i], pcc[i] = MSE_pcc(predict[i][:,tmp_latend_ind],GT_STFT_test_spkr[i][:,tmp_latend_ind])
#mse[i], pcc[i] = MSE_pcc(predict[i] ,GT_STFT_test_spkr[i] )
stois = []
for i in range(samples):
stois.append(stoi(audio_pred[i], audio_gt[i], 16000, extended=False))
stois = np.array(stois)
fig,ax=plt.subplots(1,3,figsize=(18,4))
ax[0].hist(mse,bins=25,color='b')
ax[0].set_title(mode+' MSE: %g(%g)' %(np.round(mse.mean(),3),np.round(mse.std(),3)))
ax[1].hist(pcc,bins=50,color='g')
ax[1].set_title(mode+' PCC: %g(%g)' %(np.round(pcc.mean(),3),np.round(pcc.std(),3)))
ax[2].hist(stois,bins=50,color='r')
ax[2].set_title(mode+' STOI: %g(%g)' %(np.round(stois.mean(),3),np.round(stois.std(),3)))
fig.savefig(exp_path+mode+'_PCC_MSE.pdf')
return mse,pcc,stois
def play(audio,sr=16000):
'''
audio: tensor, eg: ex['audio']
'''
if len(audio.shape) >=2:
audio = audio.ravel()
display(ipd.Audio(audio,rate=sr))
#exp_path ='../report/2020/audio_recon_result/HD06/20200906/exp1/'
#exp_path ='report/2020/audio_recon_result/HD06/0906/exp30/'
exp_path ='report/2020/audio_recon_result/NY742/0917/exp2/' #exp2 0.748 exp4 0.719 exp3 0.739
#exp_path ='report/2020/audio_recon_result/NY749/0917/exp25/' #exp25 0.737
test_path = exp_path+'test/'
train_path = exp_path+'train/'
recon_audio_test = {}
gt_audio_test = {}
recon_spec_test = {}
gt_spec_test = {}
recon_audio_train = {}
gt_audio_train = {}
recon_spec_train = {}
gt_spec_train = {}
for i in tqdm(range(50)):
recon_audio_test[i] = np.array(spectral_sub(signal=librosa.core.load(test_path+'recon_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
gt_audio_test[i] = np.array(spectral_sub(signal=librosa.core.load(test_path+'gt_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
recon_spec_test[i] = plot_stft(recon_audio_test[i],ax=None ,n_mels=64)
gt_spec_test[i] = plot_stft(gt_audio_test[i],ax=None,n_mels=64 )
recon_audio_train[i] = np.array(spectral_sub(signal=librosa.core.load(train_path+'recon_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
gt_audio_train[i] = np.array(spectral_sub(signal=librosa.core.load(train_path+'gt_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
recon_spec_train[i] = plot_stft(recon_audio_train[i],ax=None ,n_mels=64)
gt_spec_train[i] = plot_stft(gt_audio_train[i],ax=None ,n_mels=64)
mse_test,pcc_test,stois_test = analyze(np.stack(([value for (key,value) in recon_spec_test.items()]),axis=0),\
np.stack(([value for (key,value) in gt_spec_test.items()]),axis=0),recon_audio_test,gt_audio_test,exp_path,mode = 'test')
mse_train,pcc_train,stois_train = analyze(np.stack(([value for (key,value) in recon_spec_train.items()]),axis=0),\
np.stack(([value for (key,value) in gt_spec_train.items()]),axis=0),recon_audio_train,gt_audio_train,exp_path,mode = 'train')
#exp_path ='../report/2020/audio_recon_result/HD06/20200906/exp1/'
#exp_path ='report/2020/audio_recon_result/HD06/0906/exp30/'
exp_path ='report/2020/audio_recon_result/NY742/0917/exp2/' #exp2 0.748 exp4 0.719 exp3 0.739
#exp_path ='report/2020/audio_recon_result/NY749/0917/exp25/' #exp25 0.737
test_path = exp_path+'test/'
train_path = exp_path+'train/'
recon_audio_test = {}
gt_audio_test = {}
recon_spec_test = {}
gt_spec_test = {}
recon_audio_train = {}
gt_audio_train = {}
recon_spec_train = {}
gt_spec_train = {}
for i in tqdm(range(50)):
recon_audio_test[i] = np.array(spectral_sub(signal=librosa.core.load(test_path+'recon_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
gt_audio_test[i] = np.array(spectral_sub(signal=librosa.core.load(test_path+'gt_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
recon_spec_test[i] = plot_stft(recon_audio_test[i],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64)
gt_spec_test[i] = plot_stft(gt_audio_test[i],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64 )
recon_audio_train[i] = np.array(spectral_sub(signal=librosa.core.load(train_path+'recon_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
gt_audio_train[i] = np.array(spectral_sub(signal=librosa.core.load(train_path+'gt_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
recon_spec_train[i] = plot_stft(recon_audio_train[i],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64)
gt_spec_train[i] = plot_stft(gt_audio_train[i],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64)
mse_test,pcc_test,stois_test = analyze(np.stack(([value for (key,value) in recon_spec_test.items()]),axis=0),\
np.stack(([value for (key,value) in gt_spec_test.items()]),axis=0),recon_audio_test,gt_audio_test,exp_path,mode = 'test')
mse_train,pcc_train,stois_train = analyze(np.stack(([value for (key,value) in recon_spec_train.items()]),axis=0),\
np.stack(([value for (key,value) in gt_spec_train.items()]),axis=0),recon_audio_train,gt_audio_train,exp_path,mode = 'train')
for i in range(50):
recon_audio_test[i] = np.array(spectral_sub(signal=librosa.core.load(test_path+'recon_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
gt_audio_test[i] = np.array(spectral_sub(signal=librosa.core.load(test_path+'gt_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
recon_spec_test[i] = plot_stft(recon_audio_test[i],ax=None ,n_mels=128)
gt_spec_test[i] = plot_stft(gt_audio_test[i],ax=None,n_mels=128 )
recon_audio_train[i] = np.array(spectral_sub(signal=librosa.core.load(train_path+'recon_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
gt_audio_train[i] = np.array(spectral_sub(signal=librosa.core.load(train_path+'gt_audio_'+str(i)+'.wav',sr=16000)[0],fs=16000,beta=0.01))
recon_spec_train[i] = plot_stft(recon_audio_train[i],ax=None ,n_mels=128)
gt_spec_train[i] = plot_stft(gt_audio_train[i],ax=None ,n_mels=128)
mode = 'test'
spec_concat = np.concatenate((np.stack(([value for (key,value) in gt_spec_test.items()]),axis=0),\
np.stack(([value for (key,value) in recon_spec_test.items()]),axis=0)),\
axis=1)
row_nums = 5
col_nums = 10
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*5,row_nums*6))
cmap = cm.gray_r
latent = ex_test[512]
for i in range(row_nums):
for j in range(col_nums):
tmp_latend_ind = np.where(latent[np.argsort(-pcc_test)[i*col_nums+j]]==1)[0]
if mode=='train':
start_ind = max(tmp_latend_ind[0]-5,0)
end_ind = min(tmp_latend_ind[-1]+5,spec_concat.shape[-1])
tmp_latend_ind = np.concatenate((np.arange(start_ind,tmp_latend_ind[0]),tmp_latend_ind,np.arange(tmp_latend_ind[-1],end_ind)))
else:
tmp_latend_ind = np.arange(0,spec_concat.shape[-1]//4)
ax[i,j].imshow(spec_concat[np.argsort(-pcc_test)[i*col_nums+j]][:,tmp_latend_ind] ,cmap=cmap)
try:
ax[i,j].set_title(str(np.argsort(-pcc_test)[i*col_nums+j]), fontsize=30)
except:
pass
fig.suptitle('Test Spectrogram', fontsize=50)
fig.subplots_adjust(top=1.5)
fig.tight_layout()
fig.savefig(exp_path+'testspec.pdf')
mode = 'train'
spec_concat = np.concatenate((np.stack(([value for (key,value) in gt_spec_train.items()]),axis=0),\
np.stack(([value for (key,value) in recon_spec_train.items()]),axis=0)),\
axis=1)
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*5,row_nums*6))
latent = ex_train[512]
for i in range(row_nums):
for j in range(col_nums):
tmp_latend_ind = np.where(latent[np.argsort(-pcc_train)[i*col_nums+j]]==1)[0]
if mode=='train':
start_ind = max(tmp_latend_ind[0]-5,0)
end_ind = min(tmp_latend_ind[-1]+5,spec_concat.shape[-1])
tmp_latend_ind = np.concatenate((np.arange(start_ind,tmp_latend_ind[0]),tmp_latend_ind,np.arange(tmp_latend_ind[-1],end_ind)))
else:
tmp_latend_ind = np.arange(0,spec_concat.shape[-1]//4)
ax[i,j].imshow(spec_concat[np.argsort(-pcc_train)[i*col_nums+j]][:,tmp_latend_ind] ,cmap=cmap)
try:
ax[i,j].set_title(str(np.argsort(-pcc_test)[i*col_nums+j]), fontsize=30)
except:
pass
fig.suptitle('Train Spectrogram', fontsize=50)
fig.subplots_adjust(top=1.5)
fig.tight_layout()
fig.savefig(exp_path+'trainspec.pdf')
mode = 'test'
spec_concat = np.concatenate((np.stack(([value for (key,value) in gt_spec_test.items()]),axis=0),\
np.stack(([value for (key,value) in recon_spec_test.items()]),axis=0)),\
axis=1)
row_nums = 5
col_nums = 10
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*2,row_nums*7))
cmap = cm.gray_r
latent = ex_test[512]
for i in range(row_nums):
for j in range(col_nums):
tmp_latend_ind = np.where(latent[np.argsort(-pcc_test)[i*col_nums+j]]==1)[0]
if mode=='train':
start_ind = max(tmp_latend_ind[0]-5,0)
end_ind = min(tmp_latend_ind[-1]+5,spec_concat.shape[-1])
tmp_latend_ind = np.concatenate((np.arange(start_ind,tmp_latend_ind[0]),tmp_latend_ind,np.arange(tmp_latend_ind[-1],end_ind)))
else:
tmp_latend_ind = np.arange(0,spec_concat.shape[-1]//4)
ax[i,j].imshow(spec_concat[np.argsort(-pcc_test)[i*col_nums+j]][:,tmp_latend_ind] ,cmap=cmap)
try:
ax[i,j].set_title(str(np.argsort(-pcc_test)[i*col_nums+j]), fontsize=30)
except:
pass
fig.suptitle('Test Spectrogram', fontsize=50)
fig.subplots_adjust(top=1.5)
fig.tight_layout()
fig.savefig(exp_path+'testspec.pdf')
mode = 'train'
spec_concat = np.concatenate((np.stack(([value for (key,value) in gt_spec_train.items()]),axis=0),\
np.stack(([value for (key,value) in recon_spec_train.items()]),axis=0)),\
axis=1)
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*2,row_nums*7))
latent = ex_train[512]
for i in range(row_nums):
for j in range(col_nums):
tmp_latend_ind = np.where(latent[np.argsort(-pcc_train)[i*col_nums+j]]==1)[0]
if mode=='train':
start_ind = max(tmp_latend_ind[0]-5,0)
end_ind = min(tmp_latend_ind[-1]+5,spec_concat.shape[-1])
tmp_latend_ind = np.concatenate((np.arange(start_ind,tmp_latend_ind[0]),tmp_latend_ind,np.arange(tmp_latend_ind[-1],end_ind)))
else:
tmp_latend_ind = np.arange(0,spec_concat.shape[-1]//4)
ax[i,j].imshow(spec_concat[np.argsort(-pcc_train)[i*col_nums+j]][:,tmp_latend_ind] ,cmap=cmap)
try:
ax[i,j].set_title(str(np.argsort(-pcc_test)[i*col_nums+j]), fontsize=30)
except:
pass
fig.suptitle('Train Spectrogram', fontsize=50)
fig.subplots_adjust(top=1.5)
fig.tight_layout()
fig.savefig(exp_path+'trainspec.pdf')
spec_concat = np.concatenate((np.stack(([value for (key,value) in gt_spec_test.items()]),axis=0),\
np.stack(([value for (key,value) in recon_spec_test.items()]),axis=0)),\
axis=1)
row_nums = 5
col_nums = 10
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*6,row_nums*6))
cmap = cm.gray_r
for i in range(row_nums):
for j in range(col_nums):
ax[i,j].imshow(spec_concat[np.argsort(-pcc_test)[i*col_nums+j]] ,cmap=cmap)
try:
ax[i,j].set_title(str(np.argsort(-pcc_test)[i*col_nums+j]), fontsize=30)
except:
pass
fig.suptitle('Test Spectrogram', fontsize=50)
fig.subplots_adjust(top=1)
fig.tight_layout()
fig.savefig(exp_path+'testspec.pdf')
spec_concat = np.concatenate((np.stack(([value for (key,value) in gt_spec_train.items()]),axis=0),\
np.stack(([value for (key,value) in recon_spec_train.items()]),axis=0)),\
axis=1)
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*6,row_nums*6))
for i in range(row_nums):
for j in range(col_nums):
ax[i,j].imshow(spec_concat[np.argsort(-pcc_train)[i*col_nums+j]] ,cmap=cmap)
try:
ax[i,j].set_title(str(np.argsort(-pcc_test)[i*col_nums+j]), fontsize=30)
except:
pass
fig.suptitle('Train Spectrogram', fontsize=50)
fig.subplots_adjust(top=1)
fig.tight_layout()
fig.savefig(exp_path+'trainspec.pdf')
#3 25 29 1 33 28 13 0 40 HD06
for i in range(30):
print (i,np.argsort(-pcc_test)[i])
print ('gt')
play(gt_audio_test[np.argsort(-pcc_test)[i]][np.where(np.repeat(latent[np.argsort(-pcc_test)[i]],250)[:64000]==1)[0]])
print ('recon')
play(recon_audio_test[np.argsort(-pcc_test)[i]][np.where(np.repeat(latent[np.argsort(-pcc_test)[i]],250)[:64000]==1)[0]])
for i in range(30):
print (i,np.argsort(-pcc_test)[i])
print ('gt')
play(gt_audio_test[np.argsort(-pcc_test)[i]])
print ('recon')
play(recon_audio_test[np.argsort(-pcc_test)[i]])
all_audio = np.concatenate((gt_audio_test[np.argsort(-pcc_test)[0]][:16000],recon_audio_test[np.argsort(-pcc_test)[0]][:16000]))
for i in range(1,30):
all_audio=np.concatenate((all_audio,gt_audio_test[np.argsort(-pcc_test)[i]][:16000],recon_audio_test[np.argsort(-pcc_test)[i]][:16000]))
play(all_audio)
all_audio = recon_audio_test[np.argsort(-pcc_test)[0]][:16000]
for i in range(1,30):
all_audio=np.concatenate((all_audio,recon_audio_test[np.argsort(-pcc_test)[i]][:16000]))
play(all_audio)
stop_time = 8000
all_audio = np.concatenate(( gt_audio_train[np.argsort(-pcc_train)[0]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]], \
np.zeros([stop_time]),recon_audio_train[np.argsort(-pcc_train)[0]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]],np.zeros([stop_time])))
for i in range(1,30):
all_audio=np.concatenate((all_audio,gt_audio_train[np.argsort(-pcc_train)[i]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]], \
np.zeros([stop_time]),recon_audio_train[np.argsort(-pcc_train)[i]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]],np.zeros([stop_time])))
play(all_audio)
stop_time = 8000
all_audio = np.concatenate((np.zeros([stop_time]),recon_audio_train[np.argsort(-pcc_train)[0]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]],np.zeros([stop_time])))
for i in range(1,30):
all_audio=np.concatenate((all_audio,recon_audio_train[np.argsort(-pcc_train)[i]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]],np.zeros([stop_time])))
play(all_audio)
for i in range(30):
print (i,np.argsort(-pcc_test)[i])
print ('gt')
play(gt_audio_test[np.argsort(-pcc_test)[i]][:16000])
print ('recon')
play(recon_audio_test[np.argsort(-pcc_test)[i]][:16000])
for i in range(30):
print (i,np.argsort(-pcc_train)[i])
print ('gt')
play(gt_audio_train[np.argsort(-pcc_train)[i]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]])
print ('recon')
play(recon_audio_train[np.argsort(-pcc_train)[i]][np.where(np.repeat(latent[np.argsort(-pcc_train)[i]],250)[:64000]==1)[0]])