import os import random path_timit = '/Users/musy/code/TIMIT' ending = 'WAV' speakers = {} for root, dirs, files in os.walk(path_timit, topdown=False): allowed_files = (filename for filename in files if ending in filename) for f in allowed_files: speaker_name = root.split('/')[-1] if speaker_name in speakers: speakers.get(speaker_name).get('sound_files').append(os.path.join(root, f)) else: speakers[speaker_name] = {'sound_files': [os.path.join(root, f)]} def select_random_speaker(sex=None): if sex is None: name = speakers.keys()[random.randint(1, 630)] return name, speakers[name] speaker = select_random_speaker() speaker %pylab inline import wave import sys wav = speaker[1].get('sound_files')[10] #wav = '/Users/musy/Desktop/SA1.WAV' spf = wave.open(wav,'r') print spf.getframerate(), spf.getnchannels(), spf.getnframes(), spf.getsampwidth(), spf.getcomptype(), spf.getcompname() spf = wave.open(wav,'r') #Extract Raw Audio from Wav File signal = spf.readframes(-1) signal = np.fromstring(signal, 'Int16') #data in signal #If Stereo if spf.getnchannels() == 2: print 'Just mono files' sys.exit(0) plt.figure(1) plt.title('Signal Wave...') plt.plot(signal) from IPython.display import Audio Audio(wav) from scipy.io import wavfile fpaths = '/Users/musy/Desktop/SA1.WAV' fs, d = wavfile.read(fpaths) fs, d.shape Audio(d, rate=fs) signal signal.shape import librosa d.shape mfccs = librosa.feature.mfcc(d, sr=fs, n_mfcc=13) mfccs from librosa.feature import melspectrogram from librosa.display import specshow S = melspectrogram(d, sr=fs, n_fft=1024) logS = librosa.logamplitude(S) specshow(logS, sr=fs, x_axis='time', y_axis='mel')