reorganize analysis dir

ArgonneCPAC · Aug 21, 2024 · f5750c0 · f5750c0
1 parent 0255bd0
commit f5750c0
Show file tree

Hide file tree

Showing 6 changed files with 695 additions and 0 deletions.
diff --git a/diffsmhm/analysis/adam.py b/diffsmhm/analysis/adam.py
@@ -0,0 +1,101 @@
+import numpy as np
+import time
+
+
+def adam(
+    *,
+    static_params,
+    opt_params,
+    err_func,
+    maxiter,
+    tmax=-1,
+    err_threshold=1e-6,
+    a=0.001,
+    b1=0.9,
+    b2=0.999,
+    eps=10**-8
+):
+    """Adam optimizer for a given error function.
+
+    Parameters
+    ---------
+    static_params : array-like
+        Parameters required for an error measurement but not to be optimized.
+    opt_params : array-like, shape(n_params,)
+        Parameters to optimize
+    err_func : function
+        Function that takes in (static_params, opt_params) and returns
+        (error, error_jacobian).
+    maxiter : int
+        Maximum number of optimization loops to perform
+    tmax : float, optional
+        Maximum time for which to run the optimizer in seconds
+    err_threshold : float, optional
+        Error threshold at which algorithm will stop. Default is 1e-6.
+    a : float, optional
+        Adam parameter controlling stepsize scaling. Default is 0.001, taken
+        from Kingma & Ba (2015).
+    b1, b2 : float, optional
+        Adam parameters controlling decay rates of step size. Defaults are
+        b1=0.9, b2=0.999 which are taken from Kingma & Ba (2015).
+
+    Returns
+    -------
+    theta : array-like, shape(n_params)
+        Optimized values for input opt_params.
+    error_history : array-like, shape(n_iter,)
+        Error per iteration.
+    """
+
+    n_params = len(opt_params)
+
+    # initialize vectors
+    m = np.zeros(n_params, dtype=np.float64)
+    v = np.zeros(n_params, dtype=np.float64)
+    t = 0
+
+    err_history = []
+
+    theta = np.copy(opt_params)
+
+    # get start time
+    tstart = time.time()
+
+    # optimize
+    while True:
+        t += 1
+
+        # get error and gradient
+        err, err_grad = err_func(theta)
+
+        err_history.append(err)
+
+        savearray.append(np.concatenate([theta, [err], err_grad]))
+
+        # rank 0 check loop condition
+        cont = True
+        if t > maxiter:
+            cont = False
+        telapsed = time.time() - tstart
+        if tmax > 0 and telapsed > tmax:
+            cont = False
+        if err < err_threshold:
+            cont = False
+        if not cont:
+            break
+
+        # update params
+
+        # biased first moment
+        m = b1*m + (1-b1)*err_grad
+        # biased second moment
+        v = b2*v + (1-b2)*(err_grad**2)
+        # bias correct first moment
+        mhat = m/(1-b1**t)
+        # bias correct second moment
+        vhat = v/(1-b2**t)
+        # update_parameters
+        theta -= a*mhat/(np.sqrt(vhat)+eps)
+
+    # return updated parameters and error history
+    return theta, err_history
diff --git a/diffsmhm/analysis/compute_rhat.py b/diffsmhm/analysis/compute_rhat.py
@@ -0,0 +1,33 @@
+import numpy as np
+import pandas as pd
+
+from blackjax.diagnostics import potential_scale_reduction
+
+
+# define files to work over
+prefix = "/home/jwick/branches_diffsmhm/opt_wprp/diffsmhm/diffsmhm/analysis/scripts/output/"
+position_files = [
+    prefix+"positions_14.6_8.0_9.0_01.csv",
+    prefix+"positions_14.6_8.0_9.0_02.csv",
+    prefix+"positions_14.6_8.0_9.0_03.csv",
+    prefix+"positions_14.6_8.0_9.0_04.csv"
+]
+
+# load files iteratively to build input array
+start_step = -1000
+
+
+input_arr = []
+for f in position_files:
+    df = pd.read_csv(f)
+    stack_list = []
+    for k in df.keys():
+        stack_list.append(df[k][start_step:].to_numpy())
+
+    pos_np = np.vstack(stack_list).T
+
+    input_arr.append(pos_np)
+
+rhat = potential_scale_reduction(np.array(input_arr), chain_axis=0, sample_axis=1)
+
+print(rhat)
diff --git a/diffsmhm/analysis/delta_sigma.py b/diffsmhm/analysis/delta_sigma.py
@@ -0,0 +1,80 @@
+from diffsmhm.diff_stats.mpi.sigma import sigma_mpi_comp_and_reduce
+from diffsmhm.diff_stats.cuda.sigma import sigma_mpi_kernel_cuda
+from diffsmhm.diff_stats.cpu.sigma import delta_sigma_from_sigma
+
+try:
+    from mpi4py import MPI
+
+    COMM = MPI.COMM_WORLD
+    RANK = COMM.Get_rank()
+    N_RANKS = COMM.Get_size()
+except ImportError:
+    COMM = None
+    RANK = 0
+    N_RANKS = 1
+
+
+def compute_delta_sigma(
+    *,
+    xh, yh, zh,
+    wh, wh_jac,
+    xp, yp, zp,
+    inside_subvol,
+    rpbins,
+    zmax,
+    boxsize
+):
+    """Delta Sigma for a given set of halos. Essentially a wrapper for
+    diff_stats.mpi.delta_sigma.
+
+    Parameters
+    ----------
+    xh, yh, zh, wh : array-like, shape (n_halos,)
+        Halo positions and weights.
+    wh_jac : array-like, shape (n_halos,n_params)
+        Halo weight jacobian.
+    xp, yp, zp : array-like, shape (n_particles,)
+        Particle positions.
+    inside_subvol : array-like, shape (n_halos,)
+        Boolean array with `True` when the point is inside the subvolume
+        and `False` otherwise.
+    rpbins : array-like, shape (n_rpbins+1,)
+        Array of the bin edges in the `rp` direction. Note that this array is
+        one longer than the number of bins in the `rp` direction.
+    zmax : float
+        The maximum separation in the `pi` (or typically `z`) direction. Particles
+        with z distance less than zmax for a given halo are included in delta
+        sigma calculations. Note this should be a whole number due to unit binning
+        of Corrfunc.
+    boxsize : float
+        The size of the total periodic volume of the simulation.
+
+    Returns
+    -------
+    delta_sigma : array-like, shape ( n_rpbins,)
+        The 2D surface overdensity function.
+    delta_sigma_grad : array-like, shape (n_rpbins, n_params)
+        The gradients of the 2D surface overdensity function.
+    """
+
+    sigma, sigma_grad = sigma_mpi_comp_and_reduce(
+                                xh=xh, yh=yh, zh=zh,
+                                wh=wh,
+                                wh_jac=wh_jac,
+                                xp=xp, yp=yp, zp=zp,
+                                inside_subvol=inside_subvol,
+                                rpbins=rpbins,
+                                zmax=zmax,
+                                boxsize=boxsize,
+                                kernel_func=sigma_mpi_kernel_cuda
+    )
+
+    delta_sigma, delta_sigma_grad = None, None
+    if RANK == 0:
+        delta_sigma, delta_sigma_grad = delta_sigma_from_sigma(
+                                            sigma,
+                                            sigma_grad,
+                                            rpbins
+        )
+
+    return delta_sigma, delta_sigma_grad