slaclab · phys-cgarnier · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 20, 2024
diff --git a/examples/example_measurement.ipynb b/examples/example_measurement.ipynb
diff --git a/lcls_tools/common/data_analysis/fit/method_base.py b/lcls_tools/common/data_analysis/fit/method_base.py
@@ -0,0 +1,134 @@
+from abc import ABC, abstractmethod
+import numpy as np
+from matplotlib import pyplot as plt
+
+
+class MethodBase(ABC):
+    """
+    Base abstract class for all fit methods, which serves as the bare minimum skeleton code needed.
+    Should be used only as a parent class to all method models.
+    ---------------------------
+    Arguments:
+    param_names: list (list all of param names that the model will contain)
+    param_guesses: np.ndarray (array that contains a guess as to what
+        each param value is organized in the same order as param_names)
+    param_bounds: np.ndarray (array that contains the lower
+        and upper bound on for acceptable values of each parameter)
+    """
+
+    param_names: list = None
+    param_bounds: np.ndarray = None
+
+    def __init__(self):
+
+        self.init_values: dict = None
+        self.fitted_params_dict: dict = None
+
+    @abstractmethod
+    def find_init_values(self) -> list:
+        ...
+
+    @abstractmethod
+    def find_priors(self, data: np.ndarray) -> dict:
+        ...
+
+    # TODO: move to plotting file
+    def plot_init_values(self):
+        init_values = np.array(list(self.init_values.values()))
+        """Plots init values as a function of forward and visually compares it to the initial distribution"""
+        fig, axs = plt.subplots(1, 1)
+        x = np.linspace(0, 1, len(self.profile_data))
+        y_fit = self._forward(x, init_values)
+        axs.plot(x, self.profile_data, label="Projection Data")
+        axs.plot(x, y_fit, label="Initial Guess Fit Data")
+        axs.set_xlabel("x")
+        axs.set_ylabel("Forward(x)")
+        axs.set_title("Initial Fit Guess")
+        return fig, axs
+
+    # TODO: move to plotting file
+    def plot_priors(self):
+        """Plots prior distributions for each param in param_names"""
+        num_plots = len(self.priors)
+        fig, axs = plt.subplots(num_plots, 1)
+        for i, (param, prior) in enumerate(self.priors.items()):
+            x = np.linspace(0, self.param_bounds[i][-1], len(self.profile_data))
+            axs[i].plot(x, prior.pdf(x))
+            axs[i].axvline(
+                self.param_bounds[i, 0],
+                ls="--",
+                c="k",
+            )
+            axs[i].axvline(self.param_bounds[i, 1], ls="--", c="k", label="bounds")
+            axs[i].set_title(param + " prior")
+            axs[i].set_ylabel("Density")
+            axs[i].set_xlabel(param)
+        fig.tight_layout()
+        return fig, axs
+
+    def forward(self, x: np.ndarray, params: dict) -> np.ndarray:
+        # TODO:test new usage
+
+        params_list = np.array([params[name] for name in self.param_names])
+        return self._forward(x, params_list)
+
+    @staticmethod
+    @abstractmethod
+    def _forward(x: np.ndarray, params: np.ndarray) -> np.ndarray:
+        ...
+
+    def log_prior(self, params: dict):
+        # TODO:test new usage
+        print(params)
+        params_list = np.array([params[name] for name in self.param_names])
+        return self._log_prior(params_list)
+
+    @abstractmethod
+    def _log_prior(self, params: np.ndarray):
+        ...
+
+    def log_likelihood(self, x: np.ndarray, y: np.ndarray, params: dict):
+        # TODO:test new usage
+        params_list = np.array([params[name] for name in self.param_names])
+        return self._log_likelihood(x, y, params_list)
+
+    def _log_likelihood(self, x: np.ndarray, y: np.ndarray, params: np.ndarray):
+        # reducing error between data and fit
+        return -np.sum((y - self._forward(x, params)) ** 2)
+
+    def loss(self, params, x, y, use_priors=False):
+        # TODO:implement using private functions _log_likelihood and _log_prior
+        # ML group way of iterating over priors/ reducing difference in fit and data
+        loss_temp = -self._log_likelihood(x, y, params)
+        if use_priors:
+            loss_temp = loss_temp - self._log_prior(params)
+        return loss_temp
+
+    @property
+    def priors(self):
+        """
+        Initial Priors store in a dictionary where the keys are the 
+        complete set of parameters of the Model
+        """
+        return self._priors
+
+    @priors.setter
+    def priors(self, priors):
+        if not isinstance(priors, dict):
+            raise TypeError("Input must be a dictionary")
+        self._priors = priors
+
+    @property
+    def profile_data(self):
+        """1D array typically projection data"""
+        return self._profile_data
+
+    @profile_data.setter
+    def profile_data(self, profile_data):
+        if not isinstance(profile_data, np.ndarray):
+            raise TypeError("Input must be ndarray")
+        self._profile_data = profile_data
+        # should change these to private methods only called when profile data is set
+        self.find_init_values()
+        self.find_priors()
+        self.fitted_params_dict = {}
diff --git a/lcls_tools/common/data_analysis/fit/methods.py b/lcls_tools/common/data_analysis/fit/methods.py
@@ -0,0 +1,101 @@
+import numpy as np
+from scipy.stats import norm, gamma
+from lcls_tools.common.data_analysis.fit.method_base import MethodBase
+
+
+class GaussianModel(MethodBase):
+    """
+    GaussianModel Class that finds initial parameter values for gaussian distribution
+    and builds probability density functions for the likelyhood a parameter
+    to be that value based on those initial parameter values.
+    Passing this class the variable profile_data automatically updates
+    the initial values and and probability density functions to match that data.
+    """
+
+    param_names: list = ["mean", "sigma", "amplitude", "offset"]
+    param_bounds: np.ndarray = np.array(
+        [
+            [0.01, 1.0],
+            [0.01, 5.0],
+            [0.01, 1.0],
+            [0.01, 1.0],
+        ]
+    )
+
+    def find_init_values(self) -> dict:
+        """Fit data without optimization, return values."""
+
+        data = self._profile_data
+        x = np.linspace(0, 1, len(data))
+        # init_fit = norm.pdf(data)
+        offset = data.min()
+        amplitude = data.max() - offset
+
+        weighted_mean = np.average(x, weights=data)
+        weighted_sigma = np.sqrt(np.cov(x, aweights=data))
+
+        self.init_values = {
+            self.param_names[0]: weighted_mean,  # data.mean()
+            self.param_names[1]: weighted_sigma,  # data.std()
+            self.param_names[2]: amplitude,
+            self.param_names[3]: offset,
+        }
+        return self.init_values
+
+    def find_priors(self) -> dict:
+        """Do initial guesses based on data and make distribution from that guess."""
+        # Creating a gamma distribution around the inital amplitude.
+        # TODO: add to comments on why gamma vs. normal dist used for priors.
+        amplitude_mean = self.init_values["amplitude"]
+        amplitude_var = 0.05
+        amplitude_alpha = (amplitude_mean**2) / amplitude_var
+        amplitude_beta = amplitude_mean / amplitude_var
+        amplitude_prior = gamma(amplitude_alpha, loc=0, scale=1 / amplitude_beta)
+
+        # Creating a normal distribution of points around the inital mean.
+        mean_prior = norm(self.init_values["mean"], 0.1)
+        # TODO: remove hard coded numbers?
+        sigma_alpha = 2.5
+        sigma_beta = 5.0
+        sigma_prior = gamma(sigma_alpha, loc=0, scale=1 / sigma_beta)
+
+        # Creating a normal distribution of points around initial offset.
+        offset_prior = norm(self.init_values["offset"], 0.5)
+        self.priors = {
+            self.param_names[0]: mean_prior,
+            self.param_names[1]: sigma_prior,
+            self.param_names[2]: amplitude_prior,
+            self.param_names[3]: offset_prior,
+        }
+        return self.priors
+
+    def _forward(self, x: np.array, params: np.array):
+        # Load distribution parameters
+        # needs to be array for scipy.minimize
+        mean = params[0]
+        sigma = params[1]
+        amplitude = params[2]
+        offset = params[3]
+        return (
+            np.sqrt(2 * np.pi) * amplitude * norm.pdf((x - mean) / sigma) + offset
+        )  # norm.pdf(x, loc=mean, scale=sigma) #(
+
+    # TODO: remove when above is confirmed the same/below not needed.
+    # @staticmethod
+    # def _forward(x: np.ndarray, params_list: np.ndarray):
+    #    amplitude = params_list[0]
+    #    mean = params_list[1]
+    #    sigma = params_list[2]
+    #    offset = params_list[3]
+    #    normal = norm()
+    #    return ((np.sqrt(2 * np.pi)) * amplitude) * normal.pdf(
+    #        (x - mean) / sigma
+    #    ) + offset
+
+    def _log_prior(self, params: np.ndarray) -> float:
+        return np.sum(
+            [
+                prior.logpdf(params[i])
+                for i, (key, prior) in enumerate(self.priors.items())
+            ]
+        )
diff --git a/lcls_tools/common/data_analysis/fit/projection.py b/lcls_tools/common/data_analysis/fit/projection.py
@@ -0,0 +1,146 @@
+import numpy as np
+import scipy.optimize
+import scipy.signal
+from matplotlib import pyplot as plt
+from pydantic import BaseModel, ConfigDict
+from lcls_tools.common.data_analysis.fit.method_base import MethodBase
+
+
+class ProjectionFit(BaseModel):
+    """
+    1d fitting class that allows users to choose the model with which the fit
+    is performed, and if prior assumptions (bayesian regression) about
+    the data should be used when performing the fit.
+    Additionally there is an option to visualize the fitted data and priors.
+    -To perform a 1d fit, call fit_projection(projection_data={*data_to_fit*})
+    ------------------------
+    Arguments:
+    model: MethodBase (this argument is a child class object of method base
+        e.g GaussianModel & DoubleGaussianModel)
+    visualize_priors: bool (shows plots of the priors and init guess
+                      distribution before fit)
+    use_priors: bool (incorporates prior distribution information into fit)
+    visualize_fit: bool (visualize the parameters as a function of the
+                   forward function
+        from our model compared to distribution data)
+    """
+
+    # TODO: come up with better name
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model: MethodBase
+    use_priors: bool = False
+
+    def normalize(self, data: np.ndarray) -> np.ndarray:
+        """
+        Normalize a 1d array by scaling and shifting data
+        s.t. data is between 0 and 1
+        """
+        data_copy = data.copy()
+        normalized_data = (data_copy - np.min(data)) / (np.max(data) - np.min(data))
+        return normalized_data
+
+    def unnormalize_model_params(
+        self, params_dict: dict, projection_data: np.ndarray
+    ) -> np.ndarray:
+        """
+        Takes fitted and normalized params and returns them
+        to unnormalized values i.e the true fitted values of the distribution
+        """
+
+        projection_data_range = np.max(projection_data) - np.min(projection_data)
+        length = len(projection_data)
+        for key, val in params_dict.items():
+            if "sigma" in key or "mean" in key:
+                true_fitted_val = val * length
+            elif "offset" in key:
+                true_fitted_val = val * projection_data_range + np.min(projection_data)
+            else:
+                true_fitted_val = val * projection_data_range
+            temp = {key: true_fitted_val}
+            params_dict.update(temp)
+        return params_dict
+
+    def model_setup(self, projection_data=np.ndarray) -> None:
+        """sets up the model and init_values/priors"""
+        self.model.profile_data = projection_data
+
+    def fit_model(self) -> scipy.optimize._optimize.OptimizeResult:
+        """
+        Fits model params to distribution data and plots the fitted params
+        as a function of the model.
+        Returns optimizeResult object
+        """
+        x = np.linspace(0, 1, len(self.model.profile_data))
+        y = self.model.profile_data
+
+        init_values = np.array(
+            [self.model.init_values[name] for name in self.model.param_names]
+        )
+
+        res = scipy.optimize.minimize(
+            self.model.loss,
+            init_values,
+            args=(x, y, self.use_priors),
+            bounds=self.model.param_bounds,
+            method="Powell"
+        )
+        return res  # TODO:optional argument to return fig,ax
+
+    def fit_projection(self, projection_data: np.ndarray) -> dict:
+        """
+        type is dict[str, float]
+        Wrapper function that does all necessary steps to fit 1d array.
+        Returns a dictionary where the keys are the model params and their
+        values are the params fitted to the data
+        """
+        assert len(projection_data.shape) == 1
+        fitted_params_dict = {}
+        normalized_data = self.normalize(projection_data)
+        self.model_setup(projection_data=normalized_data)
+        res = self.fit_model()
+        for i, param in enumerate(self.model.param_names):
+            fitted_params_dict[param] = (res.x)[i]
+        self.model.fitted_params_dict = fitted_params_dict.copy()
+        params_dict = self.unnormalize_model_params(fitted_params_dict, projection_data)
+        return params_dict
+
+    def plot_fit(self, **kwargs):
+        """
+        Plots the normalized fitted forward function against the normed data.
+        To plot unnormalized projection data, pass the unnormalized forward
+        function params and data as kwargs.
+        """
+        fitted_params_dict = {}
+        projection_data = []
+        if kwargs:
+            for key, val in kwargs.items():
+                if key in self.model.fitted_params_dict:
+                    fitted_params_dict[key] = val
+                else:
+                    projection_data = val
+            x = np.linspace(0, len(projection_data), len(projection_data))
+        else:
+            fitted_params_dict.update(self.model.fitted_params_dict)
+            projection_data = self.model.profile_data
+            x = np.linspace(0, 1, len(self.model.profile_data))
+
+        y = projection_data
+        fig, ax = plt.subplots()
+        y_fit = self.model.forward(x, fitted_params_dict)
+        ax.plot(x, y, label="data")
+        ax.plot(x, y_fit, label="fit")
+
+        textstr = "\n".join(
+            [
+                r"$\mathrm{%s}=%.2f$" % (key, val)
+                for key, val in fitted_params_dict.items()
+            ]
+        )
+        ax.text(
+            0.05,
+            0.95,
+            textstr,
+            transform=ax.transAxes,
+            verticalalignment="top",
+        )
+        return fig, ax
diff --git a/lcls_tools/common/devices/screen.py b/lcls_tools/common/devices/screen.py
@@ -242,9 +242,11 @@ def _write_image_to_hdf5(
                     # we update with original key if it isn't in our normal screen metadata
                     # otherwise, prepend user_ to the key to retain all information.
                     [
-                        dset.attrs.update({key: value})
-                        if key not in self.metadata
-                        else dset.attrs.update({"user_" + key: value})
+                        (
+                            dset.attrs.update({key: value})
+                            if key not in self.metadata
+                            else dset.attrs.update({"user_" + key: value})
+                        )
                         for key, value in extra_metadata.items()
                     ]