#!/usr/bin/env python
# coding: utf-8

# # Fit Spectre XPS

# In[73]:


import numpy as np
import matplotlib.pyplot as plt
from xpsplot import XPSData
get_ipython().run_line_magic('matplotlib', 'inline')
from scipy.optimize import curve_fit
import pandas as pd

# TODO
# * chercher la relation entre la largeur à mi-hauteur d'une gaussienne 
#   et d'une lorentzienne
#   GLS/GLP => http://www.casaxps.com/help_manual/line_shapes.htm

import seaborn as sns
sns.set(style="whitegrid", font_scale=1.5)


# ## Fit Functions
# 
# ### General functions
# 
# Gaussian functions
# 
# \begin{equation}
#     gauss(x, \mu, \sigma) = \frac{1}{\sigma \sqrt{2\pi}} \exp\left(-\frac{(x - \mu)^2}{2 \sigma^2} \right)
# \end{equation}
# 
# Lorentz function
# 
# \begin{equation}
#     lorentz(x, x_o, \gamma) = \frac{\gamma \big/ 2\pi}{\left(\gamma \big/ 2\right)^2 + (x - x_o)^2}
# \end{equation}
# 
# Primitive functions are a gaussian/lorentzian mixing.

# In[74]:


def gauss(x, mu=0., sigma=1.):
    """ gaussian funcion of mean mu and standard deviation sigma """
    return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2 * sigma**2))
    
def lorentz(x, xo=0., gamma=1.):
    """ lorentz distribution of with gamma and position xo """
    return gamma / (2 * np.pi) * 1 / ((gamma / 2)**2 + (x - xo)**2)

def mix_g_l(x, xo, fwhm, mixing):
    """ 
    primitive function for the fit. The function is define as :
    
    sum_g_l(x) = mixing * gauss(x) + (1 - mixing) lorentz(x)
    
    WARNING: fwhm for gaussian and lorentz function are note equal
    """
    return mixing * gauss(x, xo, fwhm) + (1 - mixing) * lorentz(x, xo, 1.3*fwhm)


# ### Functions define from casaXPS
# 
# GLS and GLP => http://www.casaxps.com/help_manual/line_shapes.htm
# 
# #### GLP, Gaussian Lorentzian product as define in casaXPS
# 
# $\alpha$ is the mixing factor.
# 
# \begin{equation}
#     GLP(x, x_o, \text{fwhm}, \alpha) =
#     \exp\left(-\frac{4\ln 2 \; (1 - \alpha) (x - x_o)^2}{\text{fwhm}^2}\right)
#     \frac{1}{\displaystyle
#         1 + 4 \alpha (x - x_o)^2 \big/ \text{fwhm}^2
#     }
# \end{equation}
# 
# #### GLS, Gaussian Lorentzian sum as define in casaXPS
# 
# $\alpha$ is the mixing factor.
# 
# \begin{equation}
#     GLS(x, x_o, \text{fwhm}, \alpha) =
#     (1-\alpha)\,\exp\left(-\frac{4\ln 2 \; (x - x_o)^2}{\text{fwhm}^2}\right)
#     +
#     \alpha
#     \frac{1}{\displaystyle
#         1 + 4 (x - x_o)^2 \big/ \text{fwhm}^2 
#     }
# \end{equation}

# In[75]:


def GLP(x, xo, fwhm=1, mixing=.5):
    """ 
    gaussian/lorentzian Product form 
    
    Args:
        x: scalar or array 
        xo (float): position
        fwhm (float): full width at half maximum
        mixing (float): proportion of lorentzian function
    """
    return np.exp(- 4 * np.log(2) * (1 - mixing) * (x - xo)**2 / fwhm**2) / (1 + 4 * mixing * (x - xo)**2 / fwhm**2)
    
def GLS(x, xo, fwhm=1, mixing=.5):
    """
    gaussian/lorentzian sum form 
    
    Args:
        x: scalar or array 
        xo (float): position
        fwhm (float): full width at half maximum
        mixing (float): proportion of lorentzian function
    """
    return (1 - mixing) * np.exp(-4 * np.log(2) * (x - xo)**2 / fwhm**2) + mixing / (1 + 4 * (x - xo)**2 / fwhm**2)


# GLS is slightly larger  at the bottom

# In[76]:


x = np.linspace(163, 173, 200)
plt.plot(x, GLS(x, 168), label="GLS")
plt.plot(x, GLP(x, 168), label="GLP")
plt.title("Comparison GLS vs GLP")
plt.legend()


# ## Curve fit
# 
# * use `curve_fit` form `scipy.optimize`
# 
# ### Check on toy datas
# 
# Make toy data with GLP function plus a random distribution.

# In[77]:


x = np.linspace(-2, 2, 100)
xo =0
fwhm = 1
mixing = .5
y = GLP(x, xo, fwhm, mixing)
ye = np.array([yi + 0.1 * np.random.uniform(-yi, yi) for yi in y])
print("xo   = %f\nfwhm = %f\nmix  = %f" % (xo, fwhm, mixing))


# Fit GLP on randomized GLP data.

# In[78]:


params, pcov = curve_fit(GLP, x, ye)
plt.plot(x, GLP(x, *params), label="fit")
plt.plot(x, y, label="model")
plt.plot(x, ye, "o", label="exp")
plt.legend()
print("xo   = %f\nfwhm = %f\nmix  = %f" % tuple(params))


# ### Load real datas

# In[79]:


C1s_data = XPSData.from_file("C_1s.TXT")
C1s_data.substract_bg()
columns = C1s_data.list_columns(to_print=False)
columns.remove("KE")
S2p_data2.get_plot(
    fill=True,
    columns=columns,
    legend=False
)


# In[80]:


df = C1s_data.data["Exp"]
df.index.name = "BE"
df.head()


# In[81]:


ax = df.plot(kind="line", marker="o", linewidth=0)
ax.set_title("XPS plot")


# ### Fit a sum of GLS or GLP functions
# 
# First define the function for the fit.

# In[82]:


def function(x, *param, mixing=.5, basis=GLS):
    """ 
    sum of several primitive functions such as
    
    contaction = sum_i coefs_i * basis(params_i)
    
    args:
        x: float or numpy array of float
        coefs: list of floats
        params: list of parameters (xo, fwhm, mixing) of a primitive function
        basis: a basis function
    """
    nparam = len(param) - 1
    if nparam % 2 != 0:
        raise ValueError("param must be a list of (coef, position) ...")
    
    fwhm = param[-1]
    
    x = np.array(x)
    values = np.zeros(x.shape)
    for i in range(0, nparam, 2):
        values += param[i] * basis(x, param[i+1], fwhm=fwhm, mixing=mixing)
    
    return values


# In[83]:


params, popt = curve_fit(
    function,
    xdata=df.index.values, 
    ydata=df.values,
    p0=(800, 285, 200, 287, 100, 289, 1),
    bounds=(3*[0, 280] + [.8], 3*[4000, 294] + [1.3])
)
plt.plot(df.index, df, "o")
plt.plot(df.index, function(df.index, *params))
line = ""
for i in range(0, len(params) - 2, 2):
    plt.fill(df.index, params[i] * GLS(df.index, params[i+1], fwhm=params[-1], mixing=.5), alpha=.5)
    line += "%4.f GLS(%4.1f) + " % (params[i], params[i+1])
    
print("fit = " + line.strip("+"))
print("fwhm = ", params[-1])
#print(pcov)


# ## Monte Carlo Fit
# 
# Fit with a monte carlo procedure.

# In[86]:


# initial parameter guess
guess = [800, 285, 200, 287, 100, 289, 1]
amplitude = 3 * [30, .2] + [0.01]
nparams = len(guess)
# parameters boundaries
bounds=(2*[10, 280] + [10, 288] + [0.8], 2*[2000, 294] + [100, 290] + [1.1])
# freeze some parameters
freeze = len(guess) * [False]

temperature = 0.5

# data to fit
x = df.index.values
ye = df.values
npts = len(x)

# fit function
f = function

# initialize with first guess parameters
ym = f(x, *guess)
Q_old = np.linalg.norm(ye - ym) / npts
pmin = guess.copy()
pold = guess.copy()
pnew = guess.copy()
Q_min = Q_old

# save paramters
save = True
freq_save = 100
params = []

# Main loop
nstep = 100000
accept = 0
i = 0
while accept < nstep:
    i += 1
    
    # randomly change one parameter
    choice = False
    while not choice:
        # select a parameter
        k = np.random.randint(nparams)
        if not freeze[k]:
            step = amplitude[k] * np.random.uniform(-1, 1)
            attemp = pold[k] + step
            # check bounds of parameter k
            while not (bounds[0][k] < attemp < bounds[1][k]):
                step /= 2
                attemp = pold[k] + step
            choice = True
            pnew[k] = attemp
            #print(pold[k], pnew[k], step, amplitude[k], bounds[0][k], bounds[1][k])
    
    # compute probability
    ym = f(x, *pnew)
    Q_new = np.linalg.norm(ye - ym) / npts
    
    # accept or not step
    newstep = False
    if Q_new < Q_old: # accept without condition
        pold = pnew.copy()
        Q_old = Q_new
        
        if Q_new < Q_min:
            Q_min = Q_new
            pmin = pnew.copy()
            #print("%3d %3d %6.2f%% %10.4f"% (i, accept, accept / i * 100, Q_new))

        accept += 1

    else: # accept with a give probability
        proba = np.exp(-(Q_new - Q_old) / temperature)

        if proba > np.random.rand(): 
            # accept
            pold = pnew.copy()
            Q_old = Q_new
            accept += 1
    
    if save and i % freq_save == 0:
        params.append(pold + [Q_old])
            
    if i % (nstep / 10) == 0 and accept != 0:
        print("%6d %6d %6.2f%% %10.4f"% (i, accept, accept / i * 100, Q_min))


# In[87]:


plt.plot(df.index, df, "o")
plt.plot(df.index, function(df.index, *pmin))
line = ""
for i in range(0, nparams - 2, 2):
    plt.fill(df.index, pmin[i] * GLS(df.index, pmin[i+1], fwhm=pmin[-1], mixing=.5), alpha=.5)
    line += "%4.f GLS(%4.1f) + " % (pmin[i], pmin[i+1])
    
print("fit = " + line.strip("+"))
print("fwhm = ", pmin[-1])


# In[88]:


params = np.array(params)
params.shape


# In[89]:


df_params = pd.DataFrame({"p%d" % i: params[:,i] for i in range(8)}, index=range(len(params)))
df_params.head()


# In[90]:


sns.pairplot(df_params)