slaclab · nneveu · Dec 11, 2023 · Nov 6, 2023 · Nov 16, 2023 · Nov 16, 2023
diff --git a/lcls_tools/common/data_analysis/fitting/fitting_tool.py b/lcls_tools/common/data_analysis/fitting/fitting_tool.py
@@ -0,0 +1,150 @@
+import numpy as np
+from scipy.optimize import curve_fit
+import statistics
+
+
+# from scipy.ndimage import gaussian_filter
+# from scipy.special import erf
+# from lmfit import Model
+# want functions for gaussian
+# truncated gaussian
+# super gaussian
+# rms
+# truncated rms
+
+
+class FittingTool:
+    def __init__(self, data: np.array, **kwargs) -> dict:
+        """tool takes in the data points for some distribution, for now just one distrbution at a time"""
+        self.y = data
+        self.x = np.arange(len(data))
+        # self.initial_guess = kwargs['initial_guess']
+
+        self.initial_params = self.guess_params(self.y)
+
+    def guess_params(self, y: np.array, initial_guess: dict = {}) -> dict:
+        initial_params = {}
+
+        offset = initial_guess.pop("offset", np.mean(y[-10:]))
+        amplitude = initial_guess.pop("amplitude", y.max() - offset)
+        mu = initial_guess.pop("mu", np.argmax(y))
+        sigma = initial_guess.pop("sigma", y.shape[0] / 5)
+        initial_params["gaussian"] = [amplitude, mu, sigma, offset]
+
+        # super gaussian extension
+        power = 2
+        initial_params["super_gaussian"] = [amplitude, mu, sigma, power, offset]
+
+        #  for double gaussian
+        #  will make new helper functions for peaks and widths
+        amplitude2 = amplitude / 3
+        nu = mu / 2
+        rho = sigma / 2
+        initial_params["double_gaussian"] = [
+            amplitude,
+            mu,
+            sigma,
+            amplitude2,
+            nu,
+            rho,
+            offset,
+        ]
+
+        return initial_params
+
+    def get_fit(self, best_fit: bool = False) -> dict:
+        """Return fit parameters to data y such that y = method(x,parameters)"""
+        fits = {}
+
+        for key, val in self.initial_params.items():
+            method = getattr(self, key)
+            fit_params = curve_fit(method, self.x, self.y, val)[0]
+
+            y_fitted = method(self.x, *fit_params)
+            rmse = self.calculate_rms_deviation(self.y, y_fitted)
+            print(method)
+            print(rmse)
+            fits[key] = fit_params
+        print("returning only best fit: ", best_fit)
+        return fits
+
+    def check_skewness(self, outcomes, mu, sigma):
+        """Checks for skewness in dataset, neg if mean<median<mode, pos if opposite"""
+        mode = statistics.mode(outcomes)
+        pearsons_coeff = (mu - mode) / sigma
+        print(pearsons_coeff)
+        return pearsons_coeff
+
+    def check_kurtosis(self):
+        """greater kurtosis higher the peak"""
+        """how fast tails approaching zero, more outliers with higher kurtosis"""
+        """positive excess - tails approach zero slower"""
+        """negative excess - tails approach zero faster"""
+        # do later
+        return 0
+
+    def find_peaks(self):
+        pass
+
+    def find_widths(self):
+        pass
+
+    def find_runs(self):
+        pass
+
+    def find_moments(self):
+        """mean, sigma, skewness, kurtosis"""
+        pass
+
+    def truncate_distribution(x, lower_bound: float = None, upper_bound: float = None):
+        if lower_bound is None:
+            lower_bound = x.min()
+        if upper_bound is None:
+            upper_bound = x.max()
+        truncated_x = np.clip(x, lower_bound, upper_bound)
+        return truncated_x
+
+    def calculate_rms_deviation(self, x: np.array, fit_x: np.array):
+        rms_deviation = np.sqrt(np.power(sum(x - fit_x), 2) / len(x))
+        return rms_deviation
+
+    def calculate_unbiased_rms_deviated(x: np.array = None):
+        mean = np.mean(x)
+        rms_deviation = np.sqrt(np.power(sum(x - mean), 2) / len(x))
+        return rms_deviation
+
+    @staticmethod
+    def gaussian(x, amp, mu, sig, offset):
+        """Gaussian Function"""
+        """need a way to guess params if amp =/"""
+        return amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0))) + offset
+
+    @staticmethod
+    def gaussian_with_linear_background(x, amp, mu, sig, offset, slope):
+        return (
+            amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
+            + offset
+            + slope * x
+        )
+
+    @staticmethod
+    def super_gaussian(x, amp, mu, sig, P, offset):
+        """Super Gaussian Function"""
+        """Degree of P related to flatness of curve at peak"""
+        return amp * np.exp((-abs(x - mu) ** P) / (2 * sig**P)) + offset
+
+    @staticmethod
+    def double_gaussian(x, amp, mu, sig, amp2, nu, rho, offset):
+        return (
+            amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
+            + amp2 * np.exp(-np.power(x - nu, 2.0) / (2 * np.power(rho, 2.0)))
+        ) + offset
+
+    @staticmethod
+    def two_dim_gaussian(x, y, A, x0, y0, sigma_x, sigma_y):
+        """2-D Gaussian Function"""
+        return A * np.exp(
+            -((x - x0) ** 2) / (2 * sigma_x**2) - (y - y0) ** 2 / (2 * sigma_y**2)
+        )
+
+    # fit batch images
diff --git a/tests/unit_tests/lcls_tools/common/data_analysis/fitting/test_fit_gauss.py b/tests/unit_tests/lcls_tools/common/data_analysis/fitting/test_fit_gauss.py
@@ -0,0 +1,97 @@
+from lcls_tools.common.data_analysis.fitting.fitting_tool import FittingTool
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def gaussian(x, amp, mu, sig, offset):
+    return amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0))) + offset
+
+
+def super_gaussian(x, amp, mu, sig, P, offset):
+    """Super Gaussian Function"""
+    """Degree of P related to flatness of curve at peak"""
+    return amp * np.exp((-abs(x - mu) ** P) / (2 * sig**P)) + offset
+
+
+def double_gaussian(x, amp, mu, sig, amp2, nu, rho, offset):
+    return (
+        amp * np.exp(-np.power(x - mu, 2.0) / (2 * np.power(sig, 2.0)))
+        + amp2 * np.exp(-np.power(x - nu, 2.0) / (2 * np.power(rho, 2.0)))
+    ) + offset
+
+
+x_data = np.arange(500)
+
+# generated data for pure gaussian
+test_params = [3, 125, 45, 1.5]
+y_data = gaussian(x_data, *test_params)
+y_noise = np.random.normal(size=len(x_data), scale=0.04)
+y_test = y_data + y_noise
+fitting_tool = FittingTool(y_test)
+fits = fitting_tool.get_fit()
+# print(test_params)
+# print(fits)
+y_gaussian_fit = gaussian(x_data, *fits["gaussian"])
+y_super_gaussian_fit = super_gaussian(x_data, *fits["super_gaussian"])
+y_double_gaussian_fit = double_gaussian(x_data, *fits["double_gaussian"])
+
+
+# generated data for super gaussian
+test_params_super_gauss = [4, 215, 75, 4, 1]
+y_data_super_gauss = super_gaussian(x_data, *test_params_super_gauss)
+y_test_super_gauss = y_data_super_gauss + y_noise
+super_gauss_fitting_tool = FittingTool(y_test_super_gauss)
+
+s_fits = super_gauss_fitting_tool.get_fit()
+s_gaussian_fit = gaussian(x_data, *s_fits["gaussian"])
+s_super_gaussian_fit = super_gaussian(x_data, *s_fits["super_gaussian"])
+s_double_gaussian_fit = double_gaussian(x_data, *s_fits["double_gaussian"])
+
+# generated data for double gaussian
+test_params_double_gauss = [2, 100, 25, 10, 240, 25, 2]
+y_data_double_gauss = double_gaussian(x_data, *test_params_double_gauss)
+y_test_double_gauss = y_data_double_gauss + 3 * y_noise
+double_gauss_fitting_tool = FittingTool(y_test_double_gauss)
+d_fits = double_gauss_fitting_tool.get_fit()
+d_gaussian_fit = gaussian(x_data, *d_fits["gaussian"])
+d_super_gaussian_fit = super_gaussian(x_data, *d_fits["super_gaussian"])
+d_double_gaussian_fit = double_gaussian(x_data, *d_fits["double_gaussian"])
+
+
+fig, (ax1, ax2, ax3) = plt.subplots(3, 1)
+
+# plots need legends
+ax1.plot(x_data, y_test)
+ax1.plot(x_data, y_gaussian_fit, "-.")
+ax1.plot(x_data, y_super_gaussian_fit, "-.")
+ax1.plot(x_data, y_double_gaussian_fit, "-.")
+
+
+ax2.plot(x_data, y_test_super_gauss)
+ax2.plot(x_data, s_gaussian_fit, "-.")
+ax2.plot(x_data, s_super_gaussian_fit, "-.")
+ax2.plot(x_data, s_double_gaussian_fit, "-.")
+
+ax3.plot(x_data, y_test_double_gauss)
+ax3.plot(x_data, d_gaussian_fit, "-.")
+ax3.plot(x_data, d_super_gaussian_fit, "-.")
+ax3.plot(x_data, d_double_gaussian_fit, "-.")
+
+plt.show()
+
+
+# right now performs all fits,
+# needs option perform single fit only if passed get_fit(best_fit = True)
+# needs nested dictionary structure
+# {'gaussian':
+#          'params':{
+#                 'amp' :  3.00969146,
+#                 'mu'  : 125.03092854,
+#                 'sig' : 44.9545378,
+#                 'offset' : 1.49578195
+#                 }
+#           ?'fitted_data': [...] yes/no?
+#           'rmse': 7.970151073658837e-11
+# }
+# needs batch fitting option kwarg
+# needs initial guess option kwarg