Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gaussian Fitting Tool #116

Merged
merged 17 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions lcls_tools/common/data_analysis/fitting/fitting_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import numpy as np
from scipy.optimize import curve_fit
import statistics


# from scipy.ndimage import gaussian_filter
# from scipy.special import erf
# from lmfit import Model
# want functions for gaussian
# truncated gaussian
# super gaussian
# rms
# truncated rms

phys-cgarnier marked this conversation as resolved.
Show resolved Hide resolved

class FittingTool:
def __init__(self, data: np.array, **kwargs) -> dict:
"""tool takes in the data points for some distribution, for now just one distrbution at a time"""
self.y = data
nneveu marked this conversation as resolved.
Show resolved Hide resolved
phys-cgarnier marked this conversation as resolved.
Show resolved Hide resolved
self.x = np.arange(len(data))
# self.initial_guess = kwargs['initial_guess']

self.initial_params = self.guess_params(self.y)

def guess_params(self, y: np.array, initial_guess: dict = {}) -> dict:
initial_params = {}

offset = initial_guess.pop("offset", np.mean(y[-10:]))
phys-cgarnier marked this conversation as resolved.
Show resolved Hide resolved
amplitude = initial_guess.pop("amplitude", y.max() - offset)
mu = initial_guess.pop("mu", np.argmax(y))
sigma = initial_guess.pop("sigma", y.shape[0] / 5)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe reconsider dividing by 5 there

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment I don't have a more mathematical approach to determine sigma but it is something I have been thinking about.

initial_params["gaussian"] = [amplitude, mu, sigma, offset]

# super gaussian extension
power = 2
initial_params["super_gaussian"] = [amplitude, mu, sigma, power, offset]

# for double gaussian
# will make new helper functions for peaks and widths
amplitude2 = amplitude / 3
nu = mu / 2
rho = sigma / 2
initial_params["double_gaussian"] = [
amplitude,
mu,
sigma,
amplitude2,
nu,
rho,
offset,
]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These will be changed to dicts

return initial_params

def get_fit(self, best_fit: bool = False) -> dict:
"""Return fit parameters to data y such that y = method(x,parameters)"""
fits = {}

for key, val in self.initial_params.items():
method = getattr(self, key)
fit_params = curve_fit(method, self.x, self.y, val)[0]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe return everything from curve_fit

y_fitted = method(self.x, *fit_params)
rmse = self.calculate_rms_deviation(self.y, y_fitted)
print(method)
print(rmse)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These will go away when dict added?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can use underscore (_) to hide unused variables

fits[key] = fit_params
print("returning only best fit: ", best_fit)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update this comment.

return fits

def check_skewness(self, outcomes, mu, sigma):
"""Checks for skewness in dataset, neg if mean<median<mode, pos if opposite"""
mode = statistics.mode(outcomes)
pearsons_coeff = (mu - mode) / sigma
print(pearsons_coeff)
return pearsons_coeff

def check_kurtosis(self):
"""greater kurtosis higher the peak"""
"""how fast tails approaching zero, more outliers with higher kurtosis"""
"""positive excess - tails approach zero slower"""
"""negative excess - tails approach zero faster"""
# do later
return 0

def find_peaks(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raise NotImplementedError in functions that are not being used/written yet.

pass

def find_widths(self):
pass

def find_runs(self):
pass

def find_moments(self):
"""mean, sigma, skewness, kurtosis"""
pass

def truncate_distribution(x, lower_bound: float = None, upper_bound: float = None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to truncate x axis? also include info on units/ comment?

if lower_bound is None:
lower_bound = x.min()
if upper_bound is None:
upper_bound = x.max()
truncated_x = np.clip(x, lower_bound, upper_bound)
return truncated_x

def calculate_rms_deviation(self, x: np.array, fit_x: np.array):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a function in numpy for this?

rms_deviation = np.sqrt(np.power(sum(x - fit_x), 2) / len(x))
return rms_deviation

def calculate_unbiased_rms_deviated(x: np.array = None):
mean = np.mean(x)
rms_deviation = np.sqrt(np.power(sum(x - mean), 2) / len(x))
return rms_deviation

@staticmethod
def gaussian(x, amp, mu, sig, offset):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have some of these inputs be optional, add check that x is a 1D numpy array.

"""Gaussian Function"""
"""need a way to guess params if amp =/"""
return amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0))) + offset

@staticmethod
def gaussian_with_linear_background(x, amp, mu, sig, offset, slope):
return (
amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

repeated code?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the pydantic model version, I was going to make a model the gaussian functions. It would simply this. I can also just edit this code for the initial PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok so this will get updated in the next PR?

+ offset
+ slope * x
)

@staticmethod
def super_gaussian(x, amp, mu, sig, P, offset):
"""Super Gaussian Function"""
"""Degree of P related to flatness of curve at peak"""
return amp * np.exp((-abs(x - mu) ** P) / (2 * sig**P)) + offset
nneveu marked this conversation as resolved.
Show resolved Hide resolved

@staticmethod
def double_gaussian(x, amp, mu, sig, amp2, nu, rho, offset):
return (
amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
+ amp2 * np.exp(-np.power(x - nu, 2.0) / (2 * np.power(rho, 2.0)))
) + offset

@staticmethod
def two_dim_gaussian(x, y, A, x0, y0, sigma_x, sigma_y):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add comment here about possible use with images

"""2-D Gaussian Function"""
return A * np.exp(
-((x - x0) ** 2) / (2 * sigma_x**2) - (y - y0) ** 2 / (2 * sigma_y**2)
)

# fit batch images
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from lcls_tools.common.data_analysis.fitting.fitting_tool import FittingTool
import numpy as np
import matplotlib.pyplot as plt


phys-cgarnier marked this conversation as resolved.
Show resolved Hide resolved
def gaussian(x, amp, mu, sig, offset):
return amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0))) + offset


def super_gaussian(x, amp, mu, sig, P, offset):
"""Super Gaussian Function"""
"""Degree of P related to flatness of curve at peak"""
return amp * np.exp((-abs(x - mu) ** P) / (2 * sig**P)) + offset


def double_gaussian(x, amp, mu, sig, amp2, nu, rho, offset):
return (
amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
+ amp2 * np.exp(-np.power(x - nu, 2.0) / (2 * np.power(rho, 2.0)))
) + offset


nneveu marked this conversation as resolved.
Show resolved Hide resolved
x_data = np.arange(500)

# generated data for pure gaussian
test_params = [3, 125, 45, 1.5]
y_data = gaussian(x_data, *test_params)
y_noise = np.random.normal(size=len(x_data), scale=0.04)
y_test = y_data + y_noise
fitting_tool = FittingTool(y_test)
fits = fitting_tool.get_fit()
# print(test_params)
# print(fits)
y_gaussian_fit = gaussian(x_data, *fits["gaussian"])
y_super_gaussian_fit = super_gaussian(x_data, *fits["super_gaussian"])
y_double_gaussian_fit = double_gaussian(x_data, *fits["double_gaussian"])


# generated data for super gaussian
test_params_super_gauss = [4, 215, 75, 4, 1]
y_data_super_gauss = super_gaussian(x_data, *test_params_super_gauss)
y_test_super_gauss = y_data_super_gauss + y_noise
super_gauss_fitting_tool = FittingTool(y_test_super_gauss)

s_fits = super_gauss_fitting_tool.get_fit()
s_gaussian_fit = gaussian(x_data, *s_fits["gaussian"])
s_super_gaussian_fit = super_gaussian(x_data, *s_fits["super_gaussian"])
s_double_gaussian_fit = double_gaussian(x_data, *s_fits["double_gaussian"])

# generated data for double gaussian
test_params_double_gauss = [2, 100, 25, 10, 240, 25, 2]
y_data_double_gauss = double_gaussian(x_data, *test_params_double_gauss)
y_test_double_gauss = y_data_double_gauss + 3 * y_noise
double_gauss_fitting_tool = FittingTool(y_test_double_gauss)
d_fits = double_gauss_fitting_tool.get_fit()
d_gaussian_fit = gaussian(x_data, *d_fits["gaussian"])
d_super_gaussian_fit = super_gaussian(x_data, *d_fits["super_gaussian"])
d_double_gaussian_fit = double_gaussian(x_data, *d_fits["double_gaussian"])


fig, (ax1, ax2, ax3) = plt.subplots(3, 1)

# plots need legends
ax1.plot(x_data, y_test)
ax1.plot(x_data, y_gaussian_fit, "-.")
ax1.plot(x_data, y_super_gaussian_fit, "-.")
ax1.plot(x_data, y_double_gaussian_fit, "-.")


ax2.plot(x_data, y_test_super_gauss)
ax2.plot(x_data, s_gaussian_fit, "-.")
ax2.plot(x_data, s_super_gaussian_fit, "-.")
ax2.plot(x_data, s_double_gaussian_fit, "-.")

ax3.plot(x_data, y_test_double_gauss)
ax3.plot(x_data, d_gaussian_fit, "-.")
ax3.plot(x_data, d_super_gaussian_fit, "-.")
ax3.plot(x_data, d_double_gaussian_fit, "-.")

plt.show()


# right now performs all fits,
# needs option perform single fit only if passed get_fit(best_fit = True)
# needs nested dictionary structure
# {'gaussian':
# 'params':{
# 'amp' : 3.00969146,
# 'mu' : 125.03092854,
# 'sig' : 44.9545378,
# 'offset' : 1.49578195
# }
# ?'fitted_data': [...] yes/no?
# 'rmse': 7.970151073658837e-11
# }
# needs batch fitting option kwarg
# needs initial guess option kwarg