import essentia import essentia.standard as ess import librosa import pandas import warnings warnings.filterwarnings("ignore") rcParams['figure.figsize'] = (15, 6) fs = 44100 loader = ess.MonoLoader(filename='../test/bizet_in.wav', sampleRate=fs) signal_in = loader() from IPython.display import Audio #Audio(data=signal_in, rate=44100) # may take a while to load def get_onsets(signal_in): onset_times, onset_rate = ess.OnsetRate()(signal_in) return onset_times yin = ess.PitchYinFFT() spectrum = ess.Spectrum() def get_pitch(segment): if len(segment) < 4096: # hack to get around Essentia error N = len(segment) if len(segment) % 2 == 0 else len(segment) - 1 else: N = 4096 pitch, pitch_conf = yin(spectrum(segment[:N])) return pitch def generate_sine(pitch, n_duration): n = arange(n_duration) return 0.2*sin(2*pi*pitch*n/fs) def transcribe_pitch(signal_in): # initialize output signal signal_out = essentia.array(zeros(len(signal_in))) # onset detection onsets = get_onsets(signal_in) # for each onset onsets = append(onsets, len(signal_in)/fs) for i in range(len(onsets)-1): # pitch detect each segment n0 = int(onsets[i]*44100) n1 = int(onsets[i+1]*44100) pitch = get_pitch(signal_in[n0:n1]) # generate sine wave; add to output signal signal_out[n0:n1] = generate_sine(pitch, n1-n0) return signal_out signal_out = transcribe_pitch(signal_in) #Audio(data=signal_out, rate=fs) #Audio(data=sum([signal_out, signal_in], axis=0), rate=fs) #ess.MonoWriter(filename='../test/out.wav')(signal_out) t = arange(len(signal_in))/float(fs) x = pandas.Series(signal_in, index=t) x.plot() xlabel('Time (seconds)') for onset in get_onsets(signal_in): axvline(onset, color='r') fmin = librosa.midi_to_hz(60) fmax = librosa.midi_to_hz(120) CQT = librosa.cqt(signal_in[fs:], fs, hop_length=2048, fmin=fmin) librosa.display.specshow(librosa.logamplitude(CQT), sr=fs, y_axis='cqt_note', fmin=fmin) fmin = librosa.midi_to_hz(60) fmax = librosa.midi_to_hz(120) CQT = librosa.cqt(signal_out[fs:], fs, hop_length=2048, fmin=fmin) librosa.display.specshow(librosa.logamplitude(CQT), sr=fs, y_axis='cqt_note', fmin=fmin)