Initial commit of Python translation

libprima · Jul 29, 2023 · 291b271 · 291b271
1 parent b3f85ea
commit 291b271
Show file tree

Hide file tree

Showing 16 changed files with 2,111 additions and 0 deletions.
diff --git a/python/README.md b/python/README.md
@@ -0,0 +1,5 @@
+To develop, `cd` into the `src` directory and run
+
+```pip install --editable .``
+
+This will install prima locally in an editable fashion. From there you can run the examples/cobyla/cobyla_example.py (from any directory) and go from there.
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "prima"
+version = "0.0.1"
+dependencies = [
+	"numpy"
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]  # ["."] by default
+exclude = ["prima.examples*"]  # empty by default
diff --git a/python/src/prima/__init__.py b/python/src/prima/__init__.py
diff --git a/python/src/prima/cobyla/__init__.py b/python/src/prima/cobyla/__init__.py
diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py
diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py
@@ -0,0 +1,188 @@
+import numpy as np
+
+def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi):
+    '''
+    This function finds (the index) of a current interpolation point to be replaced with a
+    geometry-improving point. See (15)-(16) of the COBYLA paper.
+    N.B.: COBYLA never sets jrop to num_vars
+    '''
+
+    num_vars = sim.shape[0]
+
+    # Calculate the values of sigma and eta
+    # vsig[j] for j = 0...num_vars-1 is the Euclidean distance from vertex J to the opposite face of the simplex.
+    vsig = 1 / np.sqrt(np.sum(simi**2, axis=1))
+    veta = np.sqrt(sum(sum[:, :num_vars]**2, axis=0))
+
+    # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the
+    # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper.
+    if any(veta > factor_beta * delta):
+        jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0]
+    elif any(vsig < factor_alpha * delta):
+        jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0]
+    else:
+        # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi
+        # which should not happen unless there is a bug
+        jdrop = None
+
+    # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well.
+    # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1))  # Condition number
+    # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1))  # Condition number times distance
+    return jdrop
+
+def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi):
+    '''
+    This function calculates a geometry step so that the geometry of the interpolation set is improved
+    when sim[: jdrop_geo] is replaced with sim[:num_vars] + d
+    '''
+    num_constraints = conmat.shape[0]
+    num_vars = simi.shape[0]
+
+    # simi[jdrop, :] is a vector perpendicular to the face of the simplex to the opposite of vertex
+    # jdrop. This vsigj * simi[jdrop, :] is the unit vector in this direction
+    vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2))
+
+    # Set d to the vector in the above-mentioned direction and with length factor_gamma * delta. Since
+    # factor_alpha < factor_gamma < factor_beta, d improves the geometry of the simplex as per (14) of
+    # the COBYLA paper. This also explains why this subroutine does not replace delta with
+    # delbar = max(min(np.sqrt(max(distsq))/10, delta/2), rho) as in NEWUOA.
+    d = factor_gamma * delta * (vsigj * simi[jdrop, :])
+
+    # Calculate the coefficients of the linear approximations to the objective and constraint functions,
+    # placing minus the objective function gradient after the constraint gradients in the array A
+    A = np.zeros((num_vars, num_constraints + 1))
+    A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T
+    A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi
+    cvmaxp = max(0, max(-d@A[:, :num_constraints] - conmat[:, num_vars]))
+    cvmaxn = max(0, max(d@A[:, :num_constraints] - conmat[:, num_vars]))
+    if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn):
+        d *= -1
+    return d
+
+def setdrop_tr(ximproved, d, sim, simi):
+    '''
+    This function finds (the index) of a current interpolation point to be replaced with the
+    trust-region trial point. See (19)-(22) of the COBYLA paper.
+    N.B.:
+    1. If ximproved == True, then jdrop >= 0 so that d is included into xpt. Otherwise, it is a bug.
+    2. COBYLA never sets drop = num_vars
+    TODO: Check whether it improves the performance if jdrop = num_vars is allowed when ximproved is True.
+    Note that updatexfc should be revised accordingly.
+    '''
+
+    num_vars = sim.shape[0]
+
+    # -------------------------------------------------------------------------------------------------- #
+    #  The following code is Powell's scheme for defining JDROP.
+    # -------------------------------------------------------------------------------------------------- #
+    # ! JDROP = 0 by default. It cannot be removed, as JDROP may not be set below in some cases (e.g.,
+    # ! when XIMPROVED == FALSE, MAXVAL(ABS(SIMID)) <= 1, and MAXVAL(VETA) <= EDGMAX).
+    # jdrop = 0
+    # 
+    # ! SIMID(J) is the value of the J-th Lagrange function at D. It is the counterpart of VLAG in UOBYQA
+    # ! and DEN in NEWUOA/BOBYQA/LINCOA, but it excludes the value of the (N+1)-th Lagrange function.
+    # simid = matprod(simi, d)
+    # if (any(abs(simid) > 1) .or. (ximproved .and. any(.not. is_nan(simid)))) then
+    #     jdrop = int(maxloc(abs(simid), mask=(.not. is_nan(simid)), dim=1), kind(jdrop))
+    #     !!MATLAB: [~, jdrop] = max(simid, [], 'omitnan');
+    # end if
+    # 
+    # ! VETA(J) is the distance from the J-th vertex of the simplex to the best vertex, taking the trial
+    # ! point SIM(:, N+1) + D into account.
+    # if (ximproved) then
+    #     veta = sqrt(sum((sim(:, 1:n) - spread(d, dim=2, ncopies=n))**2, dim=1))
+    #     !!MATLAB: veta = sqrt(sum((sim(:, 1:n) - d).^2));  % d should be a column! Implicit expansion
+    # else
+    #     veta = sqrt(sum(sim(:, 1:n)**2, dim=1))
+    # end if
+    # 
+    # ! VSIG(J) (J=1, .., N) is the Euclidean distance from vertex J to the opposite face of the simplex.
+    # vsig = ONE / sqrt(sum(simi**2, dim=2))
+    # sigbar = abs(simid) * vsig
+    # 
+    # ! The following JDROP will overwrite the previous one if its premise holds.
+    # mask = (veta > factor_delta * delta .and. (sigbar >= factor_alpha * delta .or. sigbar >= vsig))
+    # if (any(mask)) then
+    #     jdrop = int(maxloc(veta, mask=mask, dim=1), kind(jdrop))
+    #     !!MATLAB: etamax = max(veta(mask)); jdrop = find(mask & ~(veta < etamax), 1, 'first');
+    # end if
+    # 
+    # ! Powell's code does not include the following instructions. With Powell's code, if SIMID consists
+    # ! of only NaN, then JDROP can be 0 even when XIMPROVED == TRUE (i.e., D reduces the merit function).
+    # ! With the following code, JDROP cannot be 0 when XIMPROVED == TRUE, unless VETA is all NaN, which
+    # ! should not happen if X0 does not contain NaN, the trust-region/geometry steps never contain NaN,
+    # ! and we exit once encountering an iterate containing Inf (due to overflow).
+    # if (ximproved .and. jdrop <= 0) then  ! Write JDROP <= 0 instead of JDROP == 0 for robustness.
+    #     jdrop = int(maxloc(veta, mask=(.not. is_nan(veta)), dim=1), kind(jdrop))
+    #     !!MATLAB: [~, jdrop] = max(veta, [], 'omitnan');
+    # end if
+    # -------------------------------------------------------------------------------------------------- #
+    #  Powell's scheme ends here.
+    # -------------------------------------------------------------------------------------------------- #
+
+    # The following definition of jdrop is inspired by setdrop_tr in UOBYQA/NEWUOA/BOBYQA/LINCOA.
+    # It is simpler and works better than Powell's scheme. Note that we allow jdrop to be num_vars if
+    # ximproved is True, whereas Powell's code does not.
+    # See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for
+    # Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined
+    # distance/poisedness criteria".
+
+    # distsq[j] is the square of the distance from the jth vertex of the simplex to get "best" point so
+    # far, taking the trial point sim[:, num_vars] + d into account.
+    distsq = np.zeros(sim.shape[0])
+    if ximproved:
+        distsq[:num_vars] = sum((sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T)**2, axis=0)
+        distsq[num_vars] = sum(d**2)
+    else:
+        distsq[:num_vars] = sum(sim[:, :num_vars]**2, axis=0)
+        distsq[num_vars] = 0
+
+    weight = distsq
+
+    # Other possible definitions of weight. They work almost the same as the one above.
+    # weight = max(1, max(25 * distsq / delta**2))
+    # weight = max(1, max(10 * distsq / delta**2))
+    # weight = max(1, max(1e2 * distsq / delta**2))
+    # weight = max(1, max(distsq / max(rho, delta/10)**2))
+    # weight = max(1, max(distsq / rho**2))
+
+    # If 0 <= j < num_vars, simid[j] is the value of the jth Lagrange function at d; the value of the
+    # (num_vars+1)th Lagrange function is 1 - sum(simid). [simid, 1 - sum(simid)] is the counterpart of
+    # vlag in UOBYQA and den in NEWUOA/BOBYQA/LINCOA.
+    simid = simi@d
+    score = weight * abs(np.array([*simid, 1 - sum(simid)]))
+
+    # If ximproved is False (the new d does not render a "better" x), we set score[num_vars] = -1 to avoid
+    # jdrop = num_vars. This is not really needed if weight is defined to distsq to some power, in which case
+    # score[num_vars] = 0. We keep the code for robustness (in case the definition of weight changes later).
+    if not ximproved:
+        score[num_vars] = -1
+
+    if any(score > 0):
+        jdrop = np.where(score == max(score[~np.isnan(score)]))[0][0]
+    elif ximproved:
+        jdrop = np.argmax(distsq)
+    else:
+        jdrop = None
+
+    return jdrop
+
+def assess_geo(delta, factor_alpha, factor_beta, sim, simi):
+    # This function checks if an interpolation set has acceptable geometry as
+    # (14) of the COBYLA paper
+
+    # Calculate the values of sigma and eta
+    # veta[j] (0 <= j < n) is the distance between the vertices j and 0 (the best vertex)
+    # of the simplex.
+    # vsig[j] (0 <= j < n) is the distance from vertex j to its opposite face of the simplex.
+    # Thus vsig <= veta.
+    # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex
+    # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1,
+    # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a
+    # constant in the order of delta. This simplex is optimal in the sense that the interpolation
+    # system has the minimal condition number, i.e. 1. For this simplex, the distance from
+    # V_{N+1} to its opposite face is L/sqrt(n).
+    vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis
+    veta = np.sqrt(np.sum(sim[:, :-1]**2, axis=0))
+    adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta)
+    return adequate_geo