#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('autoreload', '2')
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import numpy as np
from pysas import World, waveread


# # Speech analysis and synthesis with World

# In[2]:


signal, samplingrate, _ = waveread("test/cmu_arctic/arctic_a0001.wav")  # from http://festvox.org/cmu_arctic/dbs_bdl.html


# In[3]:


world = World(samplingrate)


# In[4]:


f0, spec_mat, aperiod_mat = world.analyze(signal)


# In[5]:


plt.plot(f0)


# In[6]:


spec = spec_mat[300]


# In[7]:


plt.plot(np.log(spec))


# In[8]:


out = world.synthesis(f0, spec_mat, aperiod_mat)


# In[9]:


plt.plot(signal)
plt.plot(out)


# In[10]:


display(Audio(data=np.int16(out * 32767.0), rate=16000))


# # Mel-Cepstrum Analysis

# In[11]:


from pysas.mcep import spec2mcep, mcep2spec, mcep2coef, coef2mcep, estimate_alpha


# In[12]:


alpha = round(estimate_alpha(samplingrate), 3) 
alpha


# In[13]:


mcep = spec2mcep(spec, 24, alpha)


# In[14]:


spec2 = mcep2spec(mcep, alpha, world.fftsize())


# In[15]:


plt.plot(np.log(spec))
plt.plot(np.log(spec2[:world.envelopesize()]))


# ## from fft

# In[16]:


i = 80* 300
windowsize = 1024
sig = signal[i:i+windowsize] * np.hanning(windowsize)
power_spectrum = (np.absolute(np.fft.fft(sig)) ** 2)[:(windowsize>>1) + 1]
plt.plot(np.log(power_spectrum))


# In[17]:


fft_mcep = spec2mcep(power_spectrum, 20, alpha)
reconst_pspec = mcep2spec(fft_mcep, alpha, windowsize)


# In[18]:


plt.plot(np.log(spec))
plt.plot(np.log(spec2))
plt.plot(np.log(power_spectrum))
plt.plot(np.log(reconst_pspec))


# ## Convert Mel-Cepstrum to Coefficients for MLSA Degital Filter

# In[19]:


coef = mcep2coef(fft_mcep, alpha)
reconst_mcep = coef2mcep(coef, alpha)


# In[20]:


plt.plot(fft_mcep)
plt.plot(reconst_mcep)
plt.plot(coef)


# ## Generate excited pulse from F0 sequence

# In[21]:


from pysas.excite import ExcitePulse


# In[22]:


ep = ExcitePulse(16000, 80, False)


# In[23]:


plt.plot(ep.gen(f0))


# In[24]:


display(Audio(data=np.int16(ep.gen(f0) * 700), rate=16000))


# # Speech Synthesis WIth Mel-Cepstrum Analysis and MLSA Filter

# In[25]:


from pysas.synthesis.mlsa import MLSAFilter
from pysas.synthesis import Synthesis
from pysas.mcep import spec2mcep_from_matrix


# In[26]:


cpestrum_dim = 128
mcep_mat = spec2mcep_from_matrix(spec_mat, cpestrum_dim, alpha)


# In[27]:


coef_mat = []
for i in range(mcep_mat.shape[0]):
    coef_mat.append(mcep2coef(mcep_mat[i], alpha))
coef_mat = np.array(coef_mat)


# In[28]:


mlsa = MLSAFilter(cpestrum_dim, alpha, 5)
syn = Synthesis(80, mlsa)
pulse = ep.gen(f0)


# In[29]:


synth = syn.synthesis(pulse, coef_mat)


# In[30]:


plt.plot(synth)
plt.plot(signal)


# ### Original

# In[31]:


display(Audio(data=np.int16(signal * 32767.0), rate=16000))


# ### Synthesized

# In[32]:


display(Audio(data=np.int16(synth * 32767.0), rate=16000))


# ### voice changer

# In[33]:


mlsa = MLSAFilter(cpestrum_dim, alpha*1.3 , 5)
syn = Synthesis(80, mlsa)
pulse = ep.gen(f0 * 0.7)
a = syn.synthesis(pulse, coef_mat)
display(Audio(data=np.int16(a * 32767.0), rate=16000))


# In[ ]: