From 2e4f21c56651550b939dd21685ee9d53e32b7a7e Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Tue, 14 Mar 2023 17:10:50 +0100 Subject: [PATCH 01/53] Adding support for the direct method --- openquake/engine/postproc/compute_mrd.py | 8 +++++--- openquake/hazardlib/calc/mrd.py | 19 +++++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/openquake/engine/postproc/compute_mrd.py b/openquake/engine/postproc/compute_mrd.py index 95f4b9e306c8..851b8dbf6346 100644 --- a/openquake/engine/postproc/compute_mrd.py +++ b/openquake/engine/postproc/compute_mrd.py @@ -26,7 +26,7 @@ def compute_mrd(ctx, N, cmaker, crosscorr, imt1, imt2, - meabins, sigbins, monitor): + meabins, sigbins, method, monitor): """ :param ctx: a context array :param N: the total number of sites @@ -36,10 +36,11 @@ def compute_mrd(ctx, N, cmaker, crosscorr, imt1, imt2, :param str imt1: the second Intensity Measure Type :param meabins: bins for the means :param sigbins: bins for the sigmas + :param method: a string 'direct' or 'indirect' :returns: 4D-matrix with shape (L1, L1, N, G) """ mrd = calc_mean_rate_dist(ctx, N, cmaker, crosscorr, - imt1, imt2, meabins, sigbins) + imt1, imt2, meabins, sigbins, method) return {g: mrd[:, :, :, i] for i, g in enumerate(cmaker.gidx)} @@ -71,6 +72,7 @@ def main(parent_id, config): dic = toml.load(f) imt1 = dic['imt1'] imt2 = dic['imt2'] + method = dic.get('method', 'indirect') crosscorr = getattr(cross_correlation, dic['cross_correlation'])() meabins = dic['meabins'] sigbins = dic['sigbins'] @@ -93,7 +95,7 @@ def main(parent_id, config): cmaker = cmakers[grp_id] for slc in general.gen_slices(0, len(ctx), blocksize): smap.submit((ctx[slc], N, cmaker, crosscorr, imt1, imt2, - meabins, sigbins)) + meabins, sigbins, method)) acc = smap.reduce() mrd = dstore.create_dset('_mrd', float, (L1, L1, N)) mrd[:] = combine_mrds(acc, dstore['rlzs_by_g'][:], dstore['weights'][:]) diff --git a/openquake/hazardlib/calc/mrd.py b/openquake/hazardlib/calc/mrd.py index ca1f94888741..4fadd901477b 100644 --- a/openquake/hazardlib/calc/mrd.py +++ b/openquake/hazardlib/calc/mrd.py @@ -42,7 +42,7 @@ def get_uneven_bins_edges(lefts, num_bins): return numpy.array(tmp) -def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd): +def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): """ This computes the mean rate density by means of the multivariate normal function available in scipy. @@ -85,7 +85,8 @@ def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd): # Compute the MRD for the current rupture mvn = sts.multivariate_normal(mea[slc0], comtx) - partial = get_mrd(mvn, grids) + with monitor: # this is ultra-slow! + partial = get_mrd(mvn, grids) # Check msg = f'{numpy.max(partial):.8f}' @@ -211,7 +212,7 @@ def update_mrd_indirect(ctx, cm, corrm, be_mea, be_sig, mrd, monitor=Monitor()): def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, - bins_mea, bins_sig, mon=Monitor()): + bins_mea, bins_sig, method, mon=Monitor()): """ :param ctxt: a sequence of parametric sources :param num_sites: total number of sites (small) @@ -221,6 +222,7 @@ def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, :param str imt2: second IMT to consider (must be inside cmaker.imtls) :param bins_mea: bins for the mean :param bins_sig: bins for the standard deviation + :param method: a string 'direct' or 'indirect' """ cm = cmaker.restrict([imt1, imt2]) G = len(cm.gsims) @@ -228,7 +230,12 @@ def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, corrm = crosscorr.get_cross_correlation_mtx(cm.imts) mrd = numpy.zeros((len1, len1, nsites, G)) for sid in range(nsites): - update_mrd_indirect( - ctxt[ctxt.sids == sid], cmaker, corrm, - bins_mea, bins_sig, mrd[:, :, sid], mon) + if method == 'direct': + update_mrd( + ctxt[ctxt.sids == sid], cmaker, corrm, + bins_mea, bins_sig, mrd[:, :, sid], mon) + else: + update_mrd_indirect( + ctxt[ctxt.sids == sid], cmaker, corrm, + bins_mea, bins_sig, mrd[:, :, sid], mon) return mrd From 5c1f48a6f7dc7ae562da53b7b58d8a702d2d49b9 Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Tue, 14 Mar 2023 17:18:09 +0100 Subject: [PATCH 02/53] Adding default --- openquake/hazardlib/calc/mrd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openquake/hazardlib/calc/mrd.py b/openquake/hazardlib/calc/mrd.py index 4fadd901477b..baef3259593f 100644 --- a/openquake/hazardlib/calc/mrd.py +++ b/openquake/hazardlib/calc/mrd.py @@ -212,7 +212,7 @@ def update_mrd_indirect(ctx, cm, corrm, be_mea, be_sig, mrd, monitor=Monitor()): def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, - bins_mea, bins_sig, method, mon=Monitor()): + bins_mea, bins_sig, method='indirect', mon=Monitor()): """ :param ctxt: a sequence of parametric sources :param num_sites: total number of sites (small) From 15ce0f3afb8d4e5a7b415be5efcb8f377e332e12 Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Wed, 15 Mar 2023 08:57:15 +0100 Subject: [PATCH 03/53] Little changes --- openquake/hazardlib/calc/mrd.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openquake/hazardlib/calc/mrd.py b/openquake/hazardlib/calc/mrd.py index baef3259593f..fb10cdaee03e 100644 --- a/openquake/hazardlib/calc/mrd.py +++ b/openquake/hazardlib/calc/mrd.py @@ -232,8 +232,7 @@ def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, for sid in range(nsites): if method == 'direct': update_mrd( - ctxt[ctxt.sids == sid], cmaker, corrm, - bins_mea, bins_sig, mrd[:, :, sid], mon) + ctxt[ctxt.sids == sid], cmaker, crosscorr, mrd[:, :, sid], mon) else: update_mrd_indirect( ctxt[ctxt.sids == sid], cmaker, corrm, From 4cd8d9e2df4d721dd9826314607b6de7b8acdb15 Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Mon, 19 Jun 2023 23:25:47 +0200 Subject: [PATCH 04/53] Fixing variable name --- openquake/hazardlib/calc/mrd.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/openquake/hazardlib/calc/mrd.py b/openquake/hazardlib/calc/mrd.py index ee01402711d6..56976244b69d 100644 --- a/openquake/hazardlib/calc/mrd.py +++ b/openquake/hazardlib/calc/mrd.py @@ -47,7 +47,7 @@ def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): This computes the mean rate density by means of the multivariate normal function available in scipy. - :param ctxt: + :param ctx: A context array for a single site :param cm: A ContextMaker @@ -62,7 +62,7 @@ def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): corrm = crosscorr.get_cross_correlation_mtx(imts) # Compute mean and standard deviation - [mea, sig, _, _] = cm.get_mean_stds([ctxt]) + [mea, sig, _, _] = cm.get_mean_stds([ctx]) # Get the logarithmic IMLs ll1 = numpy.log(cm.imtls[im1]) @@ -74,7 +74,7 @@ def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): # is the number of sites trate = 0 for g, _ in enumerate(cm.gsims): - for i, ctx in enumerate(ctxt): + for i, ctx in enumerate(ctx): # Covariance and correlation mtxs slc0 = numpy.index_exp[g, :, i] @@ -211,10 +211,10 @@ def update_mrd_indirect(ctx, cm, corrm, be_mea, be_sig, mrd, monitor=Monitor()): mrd[:, :, gid] += arr[R] * partial -def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, +def calc_mean_rate_dist(ctx, nsites, cmaker, crosscorr, imt1, imt2, bins_mea, bins_sig, method='indirect', mon=Monitor()): """ - :param ctxt: a sequence of parametric sources + :param ctx: a sequence of parametric sources :param num_sites: total number of sites (small) :param cmaker: a ContextMaker instance :param crosscorr: a CrossCorrelation instance @@ -232,9 +232,9 @@ def calc_mean_rate_dist(ctxt, nsites, cmaker, crosscorr, imt1, imt2, for sid in range(nsites): if method == 'direct': update_mrd( - ctxt[ctxt.sids == sid], cmaker, crosscorr, mrd[:, :, sid], mon) + ctx[ctx.sids == sid], cmaker, crosscorr, mrd[:, :, sid], mon) else: update_mrd_indirect( - ctxt[ctxt.sids == sid], cmaker, corrm, + ctx[ctx.sids == sid], cmaker, corrm, bins_mea, bins_sig, mrd[:, :, sid], mon) return mrd From bd9725f25a16adfb04bee6421346618e45e11e43 Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Tue, 20 Jun 2023 09:47:52 +0200 Subject: [PATCH 05/53] Fixing use of context --- openquake/hazardlib/calc/mrd.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openquake/hazardlib/calc/mrd.py b/openquake/hazardlib/calc/mrd.py index 56976244b69d..9d3514ff72f2 100644 --- a/openquake/hazardlib/calc/mrd.py +++ b/openquake/hazardlib/calc/mrd.py @@ -42,12 +42,12 @@ def get_uneven_bins_edges(lefts, num_bins): return numpy.array(tmp) -def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): +def update_mrd(ctxs: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): """ This computes the mean rate density by means of the multivariate normal function available in scipy. - :param ctx: + :param ctxs: A context array for a single site :param cm: A ContextMaker @@ -62,7 +62,7 @@ def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): corrm = crosscorr.get_cross_correlation_mtx(imts) # Compute mean and standard deviation - [mea, sig, _, _] = cm.get_mean_stds([ctx]) + [mea, sig, _, _] = cm.get_mean_stds([ctxs]) # Get the logarithmic IMLs ll1 = numpy.log(cm.imtls[im1]) @@ -74,7 +74,7 @@ def update_mrd(ctx: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): # is the number of sites trate = 0 for g, _ in enumerate(cm.gsims): - for i, ctx in enumerate(ctx): + for i, ctx in enumerate(ctxs): # Covariance and correlation mtxs slc0 = numpy.index_exp[g, :, i] @@ -232,9 +232,9 @@ def calc_mean_rate_dist(ctx, nsites, cmaker, crosscorr, imt1, imt2, for sid in range(nsites): if method == 'direct': update_mrd( - ctx[ctx.sids == sid], cmaker, crosscorr, mrd[:, :, sid], mon) + ctx[ctx.sids == sid], cm, crosscorr, mrd[:, :, sid], mon) else: update_mrd_indirect( - ctx[ctx.sids == sid], cmaker, corrm, + ctx[ctx.sids == sid], cm, corrm, bins_mea, bins_sig, mrd[:, :, sid], mon) return mrd From ecf8ab05b54f1d32a2f87ce956ce16f243ec2d05 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:02:00 +0200 Subject: [PATCH 06/53] Fix description of magnitude-dependent maximum distance in the adv_manual --- doc/adv-manual/general.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 703ed81f3913..6058fd333685 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -129,7 +129,10 @@ magnitudes between 5 and 6 (the maximum distance in this magnitude range will vary linearly between 0 and 100), keep ruptures up to 200 km for magnitudes between 6 and 7 (with `maximum_distance` increasing linearly from 100 to 200 km from magnitude 6 to magnitude 7), keep -ruptures up to 300 km for magnitudes over 7. +ruptures up to 300 km for magnitudes between 7 and 8 (with +`maximum_distance` increasing linearly from 200 to 300 km from +magnitude 7 to magnitude 8) and discard ruptures for magnitudes +over 8. You can have both trt-dependent and mag-dependent maximum distance:: From 327bf4ec7e14e005d87998417c445c03d25070a2 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:22:44 +0200 Subject: [PATCH 07/53] Fix a typo --- doc/adv-manual/general.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 6058fd333685..4c36db48f890 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -318,7 +318,7 @@ to know git and the tools of Open Source development in general, in particular about testing. If this is not the case, you should do some study on your own and come back later. There is a huge amount of resources on the net about these topics. This -manual will focus solely on the OpenQuake engine and it assume that +manual will focus solely on the OpenQuake engine and it assumes that you already know how to use it, i.e. you have read the User Manual first. From 254a37b3cb163af9fd16c014b40f8201792d047f Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:28:04 +0200 Subject: [PATCH 08/53] Fix another typo --- doc/adv-manual/general.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 4c36db48f890..6af196797c55 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -362,7 +362,7 @@ Understanding the engine ------------------------- Once you have the engine installed you can run calculations. We recommend -starting from the demos directory which contains example of hazard and +starting from the demos directory which contains examples of hazard and risk calculations. For instance you could run the area source demo with the following command:: From 57d6ca7c80b5a1efa761cb6a60221bba6c93997c Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:39:50 +0200 Subject: [PATCH 09/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 6af196797c55..a1e70d8eb740 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -369,7 +369,7 @@ following command:: $ oq run demos/hazard/AreaSourceClassicalPSHA/job.ini You should notice that we used here the command ``oq run`` while the engine -manual recommend the usage of ``oq engine --run``. There is no contradiction. +manual recommends the usage of ``oq engine --run``. There is no contradiction. The command ``oq engine --run`` is meant for production usage, but here we are doing development, so the recommended command is ``oq run`` which will will be easier to debug thanks to the @@ -390,7 +390,7 @@ For that one can understand the bottlenecks of a calculation and, with experience, he can understand which part of the engine he needs to optimize. Also, it is very useful to play with the parameters of the calculation (like the maximum distance, the area discretization, the magnitude binning, -etc etc) and see how the performance change. There is also a command to +etc etc) and see how the performance changes. There is also a command to plot hazard curves and a command to compare hazard curves between different calculations: it is common to be able to get big speedups simply by changing the input parameters in the `job.ini` of the model, without changing much the From 372324e32e85dc189682acadb7cd38f34398a514 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:48:22 +0200 Subject: [PATCH 10/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index a1e70d8eb740..5f22fe20691f 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -547,7 +547,7 @@ The ``ContextMaker`` takes care of the maximum_distance filtering, so in general the number of contexts is lower than the total number of ruptures, since some ruptures are normally discarded, being distant from the sites. -The contexts contains all the rupture, site and distance parameters. +The contexts contain all the rupture, site and distance parameters. Then you have @@ -560,7 +560,7 @@ Then you have In this example, the GMPE ``ToroEtAl2002SHARE`` does not require site parameters, so calling ``ctx.vs30`` will raise an ``AttributeError`` -but in general the contexts contains also arrays of site parameters. +but in general the contexts contain also arrays of site parameters. There is also an array of indices telling which are the sites affected by the rupture associated to the context: From dd50c968048775439c346124dcb47814e942af24 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:52:32 +0200 Subject: [PATCH 11/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 5f22fe20691f..1a6b347fa680 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -615,9 +615,9 @@ table has a structure like this:: 2.0,5.0,MEAN,6.43850933e-03,3.61047741e-04,4.57949482e-04,7.24558049e-04,9.44495571e-04,5.11252304e-04,2.21076069e-04,4.73435138e-05 ... -The columns starting with ``rup_`` contains rupture parameters (the +The columns starting with ``rup_`` contain rupture parameters (the magnitude in this example) while the columns starting with ``dist_`` -contains distance parameters. The column ``result_type`` is a string +contain distance parameters. The column ``result_type`` is a string in the set {"MEAN", "INTER_EVENT_STDDEV", "INTRA_EVENT_STDDEV", "TOTAL_STDDEV"}. The remaining columns are the expected results for each intensity measure type; in the the example the IMTs are PGV, PGA, From 513720a134f74c6abd2547cfda601626b3473f2d Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 10:55:17 +0200 Subject: [PATCH 12/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 1a6b347fa680..47b1308784c8 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -662,7 +662,7 @@ code in https://github.com/gem/oq-engine/blob/master/openquake/hazardlib/tests/g Running the engine tests ---------------------------------- -If you are a hazard scientists contributing a bug fix to a GMPE (or any +If you are a hazard scientist contributing a bug fix to a GMPE (or any other kind of bug fix) you may need to run the engine tests and possibly change the expected files if there is a change in the numbers. The way to do it is to start the dbserver and then run the tests from the repository root:: @@ -687,7 +687,7 @@ Architecture of the OpenQuake engine The engine is structured as a regular scientific application: we try to perform calculations as much as possible in memory and when it is not possible intermediate results are stored in HDF5 format. -We try work as much as possible in terms of arrays which are +We try to work as much as possible in terms of arrays which are efficiently manipulated at C/Fortran speed with a stack of well established scientific libraries (numpy/scipy). From 8b0836b5c71154ec2f2a80530b7536e98d5c31d1 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Mon, 26 Jun 2023 11:59:47 +0200 Subject: [PATCH 13/53] Fixed tau<->phi in LanzanoLuzi2019 --- openquake/hazardlib/gsim/lanzano_luzi_2019.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openquake/hazardlib/gsim/lanzano_luzi_2019.py b/openquake/hazardlib/gsim/lanzano_luzi_2019.py index 41b1881331ca..861110c9514e 100644 --- a/openquake/hazardlib/gsim/lanzano_luzi_2019.py +++ b/openquake/hazardlib/gsim/lanzano_luzi_2019.py @@ -141,7 +141,7 @@ def compute(self, ctx: np.recarray, imts, mean, sig, tau, phi): _compute_distance(ctx.rhypo, C, self.kind) + _get_site_amplification(ctx, C)) - istddevs = [C['sigma'], C['phi'], C['tau']] + istddevs = [C['sigma'], C['tau'], C['phi']] # Convert units to g, but only for PGA and SA (not PGV) if imt.string.startswith(("SA", "PGA")): From 284b82dbfd9964de75e7a1caa705b5d8e8967eb4 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Mon, 26 Jun 2023 12:27:06 +0200 Subject: [PATCH 14/53] Updated changelog [ci skip] --- debian/changelog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/debian/changelog b/debian/changelog index eaa678c4d637..2a7ad31cd03f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,6 @@ + [Michele Simionato] + * Fixed tau-phi inversion in LanzanoLuzi2019 + [Claudio Schill] * Fixed sorting bug in the sigma_mu adjustment factor in the Kuehn et al. (2020) GMM From 91ff6f5cd28093084b795191fd61f749165ec59c Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 15:27:59 +0200 Subject: [PATCH 15/53] Fix reference to current CI environments and another typo --- doc/adv-manual/general.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 47b1308784c8..1037c14281be 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -818,8 +818,8 @@ same as testability: it is essential to have a suite of tests checking that the calculators are providing the expected outputs against a set of predefined inputs. Currently we have thousands of tests which are run multiple times per day in our Continuous Integration environments -r(avis, GitLab, Jenkins), split into unit tests, end-to-end tests and -long running tests. +(GitHub Actions, GitLab Pipelines), split into unit +tests, end-to-end tests and long running tests. How the parallelization works in the engine =========================================== @@ -850,7 +850,7 @@ is such that it is very simple to replace celery/rabbitmq with other parallelization mechanisms: people interested in doing so should just contact us. -Another tricky aspects of parallelizing large scientific calculations is +Another tricky aspect of parallelizing large scientific calculations is that the amount of data returned can exceed the 4 GB limit of Python pickles: in this case one gets ugly runtime errors. The solution we found is to make it possible to yield partial results from a task: in this way instead of From 756389067b4b9293dffa3dc23b297c5dbe1beb7c Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 15:57:55 +0200 Subject: [PATCH 16/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 1037c14281be..109802b726d7 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -858,7 +858,7 @@ returning say 40 GB from one task, one can yield 40 times partial results of 1 GB each, thus bypassing the 4 GB limit. It is up to the implementor to code the task carefully. In order to do so, it is essential to have in place some monitoring mechanism measuring how much data is returned -back from a task, as well as other essential informations like how much +back from a task, as well as other essential information like how much memory is allocated and how long it takes to run a task. To this aim the OpenQuake engine offers a ``Monitor`` class @@ -958,7 +958,7 @@ Hence, since recent versions of the engine we are no longer returning data from the tasks via celery/rabbitmq: instead, we use zeromq. This is hidden from the user, but internally the engine keeps track of all tasks that were submitted and waits until they send the message that -they finished. If one tasks runs out of memory badly and never sends +they finished. If one task runs out of memory badly and never sends the message that it finished, the engine may hang, waiting for the results of a task that does not exist anymore. You have to be careful. What we did in our cluster is to set some memory limit on the From 33ce8d563c6ffdb888ea60a480b4a7e5b9927d7e Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 16:09:32 +0200 Subject: [PATCH 17/53] Fix a few other typos --- doc/adv-manual/general.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 109802b726d7..41db60549c25 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -981,7 +981,7 @@ Suppose you want to code a character-counting algorithm, which is a textbook exercise in parallel computing and suppose that you want to store information about the performance of the algorithm. Then you should use the OpenQuake ``Monitor`` class, as well as the utility -``openquake.baselib.commonlib.hdf5new`` that build an empty datastore +``openquake.baselib.commonlib.hdf5new`` that builds an empty datastore for you. Having done that, the ``openquake.baselib.parallel.Starmap`` class can take care of the parallelization for you as in the following example: @@ -989,7 +989,7 @@ example: .. include:: char_count_par.py :code: python -The name ``Starmap`` was chosen it looks very similar to +The name ``Starmap`` was chosen because it looks very similar to how ``multiprocessing.Pool.starmap`` works, the only apparent difference being in the additional monitor argument:: @@ -997,8 +997,8 @@ being in the additional monitor argument:: In reality the ``Starmap`` has a few other differences: -1. it does not use the multiprocessing mechanism to returns back the results, - it uses zmq instead; +1. it does not use the multiprocessing mechanism to return back the results; + it uses zmq instead 2. thanks to that, it can be extended to generator functions and can yield partial results, thus overcoming the limitations of multiprocessing 3. the ``Starmap`` has a ``.submit`` method and it is actually more similar to @@ -1031,7 +1031,7 @@ parallelizing complex situations where it is expensive to use a single starmap. However, there is limit on the number of starmaps that can be alive at the same moment. -Moreover the ``Starmap`` has a ``.shutdown`` methods that allows to +Moreover the ``Starmap`` has a ``.shutdown`` method that allows to shutdown the underlying pool. The idea is to submit the text of each file - here I am considering .rst files, From f6f7a104cb8015571c47d221e99172eb06586108 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 16:42:25 +0200 Subject: [PATCH 18/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 41db60549c25..bb3b6d7cbe42 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -1050,7 +1050,7 @@ However, sometimes you *must* be tech-savvy: for instance if you want to post-process hundreds of GB of ground motion fields produced by an event based calculation, you should *not* use the CSV output, at least if you care about efficiency. To manage this case (huge amounts of data) -there a specific solution, which is also able to manage the case of data +there is a specific solution, which is also able to manage the case of data lacking a predefined exporter: the ``Extractor`` API. There are actually two different kind of extractors: the simple @@ -1097,7 +1097,7 @@ or even all realizations: >> dic = vars(extractor.get('hcurves?kind=rlzs')) Here is an example of using the `WebExtractor` to retrieve hazard maps. -Here we assumes that there is available in a remote machine where there is +Here we assume that in a remote machine there is a WebAPI server running, a.k.a. the Engine Server. The first thing to is to set up the credentials to access the WebAPI. There are two cases: From ec3259ca8cddeac305c452b2586df640878cbe4f Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 16:55:04 +0200 Subject: [PATCH 19/53] Fix a few other typos --- doc/adv-manual/general.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index bb3b6d7cbe42..60f37984d646 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -1148,7 +1148,7 @@ Here are three examples of use:: $ oq plot 'hmaps?kind=mean&imt=PGA' $ oq plot 'uhs?kind=mean&site_id=0' -The ``site_id`` is optional; if missing only the first site (``site_id=0``) +The ``site_id`` is optional; if missing, only the first site (``site_id=0``) will be plotted. If you want to plot all the realizations you can do:: $ oq plot 'hcurves?kind=rlzs&imt=PGA' @@ -1162,7 +1162,7 @@ realizations and also the mean the command to give is:: $ oq plot 'hcurves?kind=rlzs&kind=mean&imt=PGA' -If you want to plot the mean and the median the command is:: +If you want to plot the median and the mean the command is:: $ oq plot 'hcurves?kind=quantile-0.5&kind=mean&imt=PGA' @@ -1178,7 +1178,7 @@ specifying a kind that is not available you will get an error. Extracting disaggregation outputs --------------------------------- -Disaggregation outputs are particularly complex and they are stored in +Disaggregation outputs are particularly complex and they are stored in the datastore in different ways depending on the engine version. Here we will give a few examples for the Disaggregation Demo, which has the flag ``individual_rlzs`` set. If you run the demos with a recent enough version From 303c8ee93cad47c62e4148729fa41857c51cbb91 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Mon, 26 Jun 2023 17:23:01 +0200 Subject: [PATCH 20/53] Remove references to celery/rabbitmq from adv_manual --- doc/adv-manual/general.rst | 61 ++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 35 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 60f37984d646..380a95971387 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -827,8 +827,7 @@ How the parallelization works in the engine The engine builds on top of existing parallelization libraries. Specifically, on a single machine it is based on multiprocessing, which is part of the Python standard library, while on a cluster -it is based on the combination celery/rabbitmq, which are well-known -and maintained tools. +it is based on zeromq, which is a well-known and maintained library. While the parallelization used by the engine may look trivial in theory (it only addresses embarrassingly parallel problems, not true concurrency) @@ -839,14 +838,13 @@ without affecting other calculations that may be running concurrently. Because of this requirement, we abandoned `concurrent.futures`, which is also in the standard library, but is lacking the ability to kill the pool of processes, which is instead available in multiprocessing with the -`Pool.shutdown` method. For the same reason, we discarded `dask`, which -is a lot more powerful than `celery` but lacks the revoke functionality. +`Pool.shutdown` method. For the same reason, we discarded `dask`. Using a real cluster scheduling mechanism (like SLURM) would be of course better, but we do not want to impose on our users a specific cluster -architecture. celery/rabbitmq have the advantage of being simple to install +architecture. Zeromq has the advantage of being simple to install and manage. Still, the architecture of the engine parallelization library -is such that it is very simple to replace celery/rabbitmq with other +is such that it is very simple to replace zeromq with other parallelization mechanisms: people interested in doing so should just contact us. @@ -898,20 +896,14 @@ hazard curves, because the algorithm is considering one rupture at the time and it is not accumulating ruptures in memory, differently from what happens when sampling the ruptures in event based. -If you have ever coded in celery, you will see that the OpenQuake engine -concept of task is different: there is no ``@task`` decorator and while at the -end engine tasks will become celery tasks this is hidden to the developer. -The reason is that we needed a celery-independent abstraction layer to make -it possible to use different kinds of parallelization frameworks/ - From the point of view of the coder, in the engine there is no difference -between a task running on a cluster using celery and a task running locally +between a task running on a cluster using zeromq and a task running locally using ``multiprocessing.Pool``: they are coded the same, but depending -on a configuration parameter in openquake.cfg (``distribute=celery`` +on a configuration parameter in openquake.cfg (``distribute=zmq`` or ``distribute=processpool``) the engine will treat them differently. You can also set an environment variable ``OQ_DISTRIBUTE``, which takes the precedence over openquake.cfg, to specify which kind of distribution -you want to use (``celery`` or ``processpool``): this is mostly used +you want to use (``zmq`` or ``processpool``): this is mostly used when debugging, when you typically insert a breakpoint in the task and then run the calculation with @@ -949,20 +941,19 @@ a single writer). This approach bypasses all the limitations of the SWMR mode in HDF5 and did not require a large refactoring of our existing code. -Another tricky point in cluster situations is that rabbitmq is not good -at transferring gigabytes of data: it was meant to manage lots of small -messages, but here we are perverting it to manage huge messages, i.e. -the large arrays coming from a scientific calculations. - -Hence, since recent versions of the engine we are no longer returning -data from the tasks via celery/rabbitmq: instead, we use zeromq. This +Another tricky point in cluster situations is that we need +to transfer gigabytes of data, i.e. the large arrays coming from +scientific calculations, rather than lots of small +messages. Hence, since recent versions of the engine, we are returning +data from the tasks via zeromq instead of using celery/rabbitmq as we +did in the past. This is hidden from the user, but internally the engine keeps track of all tasks that were submitted and waits until they send the message that they finished. If one task runs out of memory badly and never sends the message that it finished, the engine may hang, waiting for the -results of a task that does not exist anymore. You have to be +results of a task that does not exist anymore. You have to be careful. What we did in our cluster is to set some memory limit on the -celery user with the cgroups technology, so that an out of memory task +zmq user with the cgroups technology, so that an out of memory task normally fails in a clean way with a Python MemoryError, sends the message that it finished and nothing particularly bad happens. Still, in situations of very heavy load the OOM killer may enter in action @@ -1487,20 +1478,20 @@ to use them. You can see the full list of commands by running `oq --help`:: - $ oq --help - usage: oq [--version] - {workerpool,webui,dbserver,info,ltcsv,dump,export,celery,plot_losses,restore,plot_assets,reduce_sm,check_input,plot_ac,upgrade_nrml,shell,plot_pyro,nrml_to,postzip,show,workers,abort,engine,reaggregate,db,compare,renumber_sm,download_shakemap,importcalc,purge,tidy,zip,checksum,to_hdf5,extract,reset,run,show_attrs,prepare_site_model,sample,plot} - ... + $ oq --help + usage: oq [-h] [-v] + {shell,upgrade_nrml,reduce_smlt,show_attrs,prepare_site_model,nrml_from,shakemap2gmfs,importcalc,run,show,purge,renumber_sm,workers,postzip,plot_assets,db,dbserver,tidy,extract,sample,to_hdf5,ltcsv,reaggregate,restore,mosaic,check_input,dump,info,zip,abort,nrml_to,engine,reset,checksum,export,webui,compare,plot,reduce_sm} + ... - positional arguments: - {workerpool,webui,dbserver,info,ltcsv,dump,export,celery,plot_losses,restore,plot_assets,reduce_sm,check_input,plot_ac,upgrade_nrml,shell,plot_pyro,nrml_to,postzip,show,workers,abort,engine,reaggregate,db,compare,renumber_sm,download_shakemap,importcalc,purge,tidy,zip,checksum,to_hdf5,extract,reset,run,show_attrs,prepare_site_model,sample,plot} - available subcommands; use oq --help + positional arguments: + {shell,upgrade_nrml,reduce_smlt,show_attrs,prepare_site_model,nrml_from,shakemap2gmfs,importcalc,run,show,purge,renumber_sm,workers,postzip,plot_assets,db,dbserver,tidy,extract,sample,to_hdf5,ltcsv,reaggregate,restore,mosaic,check_input,dump,info,zip,abort,nrml_to,engine,reset,checksum,export,webui,compare,plot,reduce_sm} + available subcommands; use oq --help - optional arguments: - -h, --help show this help message and exit - -v, --version show program's version number and exit + options: + -h, --help show this help message and exit + -v, --version show program's version number and exit -This is the output that you get at the present time (engine 3.11); depending +This is the output that you get at the present time (engine 3.17); depending on your version of the engine you may get a different output. As you see, there are several commands, like `purge`, `show_attrs`, `export`, `restore`, ... You can get information about each command with `oq --help`; From 8cbe396f3ae6a27a648371122c8b5d22a6769bf2 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Tue, 27 Jun 2023 09:04:50 +0200 Subject: [PATCH 21/53] Fix name of the user that previously was celery --- doc/adv-manual/general.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 380a95971387..fce2cec2c421 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -953,7 +953,7 @@ they finished. If one task runs out of memory badly and never sends the message that it finished, the engine may hang, waiting for the results of a task that does not exist anymore. You have to be careful. What we did in our cluster is to set some memory limit on the -zmq user with the cgroups technology, so that an out of memory task +openquake user with the cgroups technology, so that an out of memory task normally fails in a clean way with a Python MemoryError, sends the message that it finished and nothing particularly bad happens. Still, in situations of very heavy load the OOM killer may enter in action From 4c8c04f427f3f3999235bb7666af365c78451029 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Tue, 27 Jun 2023 09:10:21 +0200 Subject: [PATCH 22/53] Fix another couple of typos --- doc/adv-manual/general.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index fce2cec2c421..e17e78af13c9 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -1285,7 +1285,7 @@ Example: how many events per magnitude? When analyzing an event based calculation, users are often interested in checking the magnitude-frequency distribution, i.e. to count how many -events of a given magnitude present in the stochastic event set for +events of a given magnitude are present in the stochastic event set for a fixed investigation time and a fixed ``ses_per_logic_tree_path``. You can do that with code like the following: @@ -1302,7 +1302,7 @@ You can do that with code like the following: for mag, grp in events.groupby(['mag']): print(mag, len(grp)) # number of events per group -If you want to now the number of events per realization and per stochastic +If you want to know the number of events per realization and per stochastic event set you can just refine the `groupby` clause, using the list ``['mag', 'rlz_id', 'ses_id']`` instead of simply ``['mag']``. From fda568eea9fbbe21236f4414cdb87b95938a6cd2 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Tue, 27 Jun 2023 09:53:07 +0200 Subject: [PATCH 23/53] Fix a few other typos --- doc/adv-manual/general.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index e17e78af13c9..3a4ddff4ffae 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -1522,7 +1522,7 @@ features. logic tree of the calculation. 2. When invoked with the `--report` option, it produces a `.rst` report with -several important informations about the computation. It is ESSENTIAL in the +important information about the computation. It is ESSENTIAL in the case of large calculations, since it will give you an idea of the feasibility of the computation without running it. Here is an example of usage:: @@ -1530,7 +1530,7 @@ of the computation without running it. Here is an example of usage:: Generated /tmp/report_1644.rst -You can open `/tmp/report_1644.rst` and read the informations listed there +You can open `/tmp/report_1644.rst` and read the information listed there (`1644` is the calculation ID, the number will be different each time). 3. It can be invoked without a `job.ini` file, and it that case it provides @@ -1569,7 +1569,7 @@ The list of available exports (i.e. the datastore keys and the available export formats) can be extracted with the `oq info exports` command; the number of exporters defined changes at each version:: - $ oq info exports$ oq info exports + $ oq info exports ? "aggregate_by" ['csv'] ? "disagg_traditional" ['csv'] ? "loss_curves" ['csv'] @@ -1618,7 +1618,7 @@ a specific realization (say the first one), you can use:: but currently this only works for `csv` and `xml`. The exporters are one of the most time-consuming parts on the engine, mostly because of the sheer number -of them; the are more than fifty exporters and they are always increasing. +of them; there are more than fifty exporters and they are always increasing. If you need new exports, please [add an issue on GitHub](https://github.com/gem/oq-engine/issues). oq zip From 7f6116e1f1b76fb6c92946ed20519eb6447aff21 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Tue, 27 Jun 2023 09:57:42 +0200 Subject: [PATCH 24/53] Fix the syntax of a link to a webpage --- doc/adv-manual/general.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/adv-manual/general.rst b/doc/adv-manual/general.rst index 3a4ddff4ffae..fbac12cc1313 100644 --- a/doc/adv-manual/general.rst +++ b/doc/adv-manual/general.rst @@ -1619,7 +1619,8 @@ a specific realization (say the first one), you can use:: but currently this only works for `csv` and `xml`. The exporters are one of the most time-consuming parts on the engine, mostly because of the sheer number of them; there are more than fifty exporters and they are always increasing. -If you need new exports, please [add an issue on GitHub](https://github.com/gem/oq-engine/issues). +If you need new exports, +please `add an issue on GitHub `_. oq zip ------ From f13c5b39ac0fc54938c09e9ce9dcc1c11a4c56a8 Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Tue, 27 Jun 2023 10:58:34 +0200 Subject: [PATCH 25/53] Fix another couple of typos --- doc/adv-manual/logic_trees.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/adv-manual/logic_trees.rst b/doc/adv-manual/logic_trees.rst index 1d52f15fc8b6..627b87a45272 100644 --- a/doc/adv-manual/logic_trees.rst +++ b/doc/adv-manual/logic_trees.rst @@ -80,10 +80,10 @@ and the 6 possible paths can be extracted as follows: The empty square brackets means that the branchset should be applied to all branches in the previous branchset and correspond to the ``applyToBranches`` -tag in the XML version of the logic tree. If ``applyToBranches`` is missing +tag in the XML version of the logic tree. If ``applyToBranches`` is missing, the logic tree is multiplicative and the total number of paths can be obtained simply by multiplying the number of paths in each branchset. -When ``applyToBranches`` is used the logic tree becomes additive and the +When ``applyToBranches`` is used, the logic tree becomes additive and the total number of paths can be obtained by summing the number of paths in the different subtrees. For instance, let us extend the previous example by adding another ``extendModel`` branchset and by using ``applyToBranches``: @@ -262,7 +262,7 @@ export it, you will get a CSV file with the following structure:: 323,ACCCC~BB,3.1111853e-03 For each realization there is a ``branch_path`` string which is split in -two parts separated by a tilde. The left part describe the branches of +two parts separated by a tilde. The left part describes the branches of the source model logic tree and the right part the branches of the gmpe logic tree. In past versions of the engine the branch path was using directly the branch IDs, so it was easy to assess the correspondence From e5347e7fa9f7d972cdf4532f3d59a3a1d621ab6d Mon Sep 17 00:00:00 2001 From: Paolo Tormene Date: Tue, 27 Jun 2023 11:11:28 +0200 Subject: [PATCH 26/53] Fix another couple of typos --- doc/adv-manual/logic_trees.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/adv-manual/logic_trees.rst b/doc/adv-manual/logic_trees.rst index 627b87a45272..57c2e613bbed 100644 --- a/doc/adv-manual/logic_trees.rst +++ b/doc/adv-manual/logic_trees.rst @@ -322,8 +322,7 @@ and then, using the correspondence table ``abbrev->uvalue``, to:: "[ChiouYoungs2008]" "[ToroEtAl2002]" For convenience, the engine provides a simple command to display the content -of a realization, given the realization number, thus answering the -FAQ:: +of a realization, given the realization number:: $ oq show rlz:322 | uncertainty_type | uvalue | @@ -402,7 +401,7 @@ Reduction of the logic tree The simplest case of logic tree reduction is when the actual sources do not span the full range of tectonic region types in the GMPE logic tree file. This happens very often. -FOr instance, in the SHARE calculation for Europe +For instance, in the SHARE calculation for Europe the GMPE logic tree potentially contains 1280 realizations coming from 7 different tectonic region types: From b872724be4bc58b067f582cf7c8234df91413b80 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Tue, 27 Jun 2023 16:03:19 +0200 Subject: [PATCH 27/53] Doc fix --- debian/changelog | 2 +- doc/adv-manual/event_based.rst | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/debian/changelog b/debian/changelog index 56a9f6d4e6b4..39100d48de86 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,7 +3,7 @@ * Fixed another bug with conditioned GMFs appearing as the error `array[m, n, bad] = exp(mean_covs[0, g, m, n], im) TypeError: list indices must be integers or slices, not tuple` - + [Claudio Schill] * Fixed sorting bug in the sigma_mu adjustment factor in the Kuehn et al. (2020) GMM diff --git a/doc/adv-manual/event_based.rst b/doc/adv-manual/event_based.rst index cef853dcf339..0a8b0fba0a8b 100644 --- a/doc/adv-manual/event_based.rst +++ b/doc/adv-manual/event_based.rst @@ -710,8 +710,8 @@ Rupture sampling: how to get it wrong Rupture samplings is *much more complex than one could expect* and in many respects *surprising*. In the many years of existence of the -engine, multiple approached were tried and you can expect some -detail of the rupture sampling mechanism to be different nearly at every +engine, multiple approached were tried and you can expect the +details of the rupture sampling mechanism to be different nearly at every version of the engine. Here we will discuss some tricky points that may help you understand @@ -721,8 +721,10 @@ rupture sampling is nontrivial. We will start with the first subtlety, the *interaction between sampling and filtering*. The short version is that you should *first -sample and then filter*. Here is the long version. Consider the -following code emulating rupture sampling for poissonian ruptures: +sample and then filter*. + +Here is the long version. Consider the following code emulating +rupture sampling for poissonian ruptures: .. code-block:: @@ -756,7 +758,7 @@ feature and it is able to discard ruptures below the minimum magnitude. But how should it work? The natural approach to follow, for performance-oriented applications, would be to first discard the low magnitudes and then perform the sampling. However, that would have effects that would be surprising -for many users. Consider the following two alternative: +for most users. Consider the following two alternative: .. code-block:: @@ -791,17 +793,12 @@ are consistent with the no-filtering case: >> calc_n_occ_before_filtering(fake_ruptures, eff_time, seed, min_mag) [ 9 6 13 7 6 6 10] -Historically the engine followed the filter-early approach and it is in -the process of transiting to the filter-late approach. The transition is -extremely tricky and error prone, since the minimum magnitude feature is -spread across many modules, and it could easily go on for years. - The problem with the filtering is absolutely general and not restricted only to the magnitude filtering: it is exactly the same also for distance filtering. Suppose you have a ``maximum_distance`` of 300 km and than you decide that you want to increase it to 301 km. One would expect this -change to have a minor impact; instead, the occupation numbers will be -completely different and you sample a very different set of ruptures. +change to have a minor impact; instead, you may end up sampling a very +different set of ruptures. It is true that average quantities like the hazard curves obtained from the ground motion fields will converge for long enough effective time, @@ -809,12 +806,13 @@ however in practice you are always in situations were 1. you cannot perform the calculation for a long enough effective time since it would be computationally prohibitive -2. you are interested on quantities which are strongly sensitive to a - change in the sampling, like the Maximum Probable Loss at some return period +2. you are interested on quantities which are strongly sensitive to aany + change, like the Maximum Probable Loss at some return period In such situations changing the site collection (or changing -the maximum distance which is akin to changing the site collection) can change -the sampling of the ruptures significantly. +the maximum distance which is akin to changing the site collection) +can change the sampling of the ruptures significantly, at least for +engine versions lower than 3.17. Users wanting to compare the GMFs or the risk on different site collections should be aware of this effect; the solution is to first sample the From a589f5194204cd9cdaa9ad9a3826483bca7d7209 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Wed, 28 Jun 2023 08:36:50 +0200 Subject: [PATCH 28/53] Raise an error when the same parameter is set in different sections --- debian/changelog | 4 ++++ openquake/commonlib/readinput.py | 10 +++++----- openquake/commonlib/tests/readinput_test.py | 15 +++------------ 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/debian/changelog b/debian/changelog index 39100d48de86..c059615ba60d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,7 @@ + [Paolo Tormene, Michele Simionato] + * Raise an error when the same parameter is set in different sections + of the job.ini file + [Michele Simionato] * Fixed tau-phi inversion in LanzanoLuzi2019 * Fixed another bug with conditioned GMFs appearing as the error diff --git a/openquake/commonlib/readinput.py b/openquake/commonlib/readinput.py index 0cc8c99f1182..5686c1d1a364 100644 --- a/openquake/commonlib/readinput.py +++ b/openquake/commonlib/readinput.py @@ -236,15 +236,15 @@ def update(params, items, base_path): params[key] = value -def _warn_about_duplicates(cp): +def check_params(cp, fname): params_sets = [ set(cp.options(section)) for section in cp.sections()] for pair in itertools.combinations(params_sets, 2): params_intersection = sorted(set.intersection(*pair)) if params_intersection: - logging.warning( - f'Parameter(s) {params_intersection} is(are) defined in' - f' multiple sections') + raise InvalidFile( + f'{fname}: parameter(s) {params_intersection} is(are) defined' + ' in multiple sections') # NB: this function must NOT log, since it is called when the logging @@ -286,7 +286,7 @@ def get_params(job_ini, kw={}): params = dict(base_path=base_path, inputs={'job_ini': job_ini}) cp = configparser.ConfigParser(interpolation=None) cp.read([job_ini], encoding='utf-8-sig') # skip BOM on Windows - _warn_about_duplicates(cp) + check_params(cp, job_ini) dic = {} for sect in cp.sections(): dic.update(cp.items(sect)) diff --git a/openquake/commonlib/tests/readinput_test.py b/openquake/commonlib/tests/readinput_test.py index 0059caf693ec..19060902c7a2 100644 --- a/openquake/commonlib/tests/readinput_test.py +++ b/openquake/commonlib/tests/readinput_test.py @@ -160,19 +160,10 @@ def test_duplicated_parameter(self): bar = foo aggregate_by = taxonomy, policy """) - with self.assertLogs() as captured: + with self.assertRaises(InvalidFile) as ctx: readinput.get_params(job_config, {}) - warning_general_bar = ( - "Parameter(s) ['aggregate_by', 'foo'] is(are) defined in" - " multiple sections") - warning_foo_bar = ("Parameter(s) ['bar'] is(are) defined in" - " multiple sections") - self.assertEqual(captured.records[0].levelname, 'WARNING') - self.assertEqual( - captured.records[0].getMessage(), warning_general_bar) - self.assertEqual(captured.records[1].levelname, 'WARNING') - self.assertEqual( - captured.records[1].getMessage(), warning_foo_bar) + msg = "['aggregate_by', 'foo'] is(are) defined in multiple sections" + self.assertIn(msg, str(ctx.exception)) def sitemodel(): From 6bd2f8f591ee7a25be1b782a5817d62f2017bac7 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Wed, 28 Jun 2023 08:55:58 +0200 Subject: [PATCH 29/53] Fixed test --- openquake/qa_tests_data/classical_risk/case_master/job.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/openquake/qa_tests_data/classical_risk/case_master/job.ini b/openquake/qa_tests_data/classical_risk/case_master/job.ini index 7fa316cf889e..685dfa5a6c57 100644 --- a/openquake/qa_tests_data/classical_risk/case_master/job.ini +++ b/openquake/qa_tests_data/classical_risk/case_master/job.ini @@ -56,7 +56,6 @@ risk_investigation_time = 50 lrem_steps_per_interval = 1 [risk_outputs] -quantiles = 0.15, 0.50, 0.85 conditional_loss_poes = 0.02, 0.10 [export] From 32f32ce8e6c230d73e8d5724fb1f84999f047a67 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Wed, 28 Jun 2023 09:20:19 +0200 Subject: [PATCH 30/53] Fixed another test --- openquake/qa_tests_data/event_based_risk/case_1f/job.ini | 3 --- 1 file changed, 3 deletions(-) diff --git a/openquake/qa_tests_data/event_based_risk/case_1f/job.ini b/openquake/qa_tests_data/event_based_risk/case_1f/job.ini index b2a0cf629f51..2a3cd55f380f 100644 --- a/openquake/qa_tests_data/event_based_risk/case_1f/job.ini +++ b/openquake/qa_tests_data/event_based_risk/case_1f/job.ini @@ -15,9 +15,6 @@ structural_vulnerability_file = vulnerability_model.xml [hazard] asset_hazard_distance = 20.0 -[sites] -exposure_file = exposure_model.xml - [site_params] reference_vs30_type = measured reference_vs30_value = 760.0 From c8c98a0d2bd17db556c983d5f2bbb5a7f5c6f00d Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Wed, 28 Jun 2023 09:40:25 +0200 Subject: [PATCH 31/53] Fixed another test --- openquake/qa_tests_data/event_based/case_29/job.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/openquake/qa_tests_data/event_based/case_29/job.ini b/openquake/qa_tests_data/event_based/case_29/job.ini index 7abe46aa76e5..1fc469b83865 100644 --- a/openquake/qa_tests_data/event_based/case_29/job.ini +++ b/openquake/qa_tests_data/event_based/case_29/job.ini @@ -34,7 +34,6 @@ intensity_measure_types_and_levels = {"PGA": logscale(0.005, 3.00, 20)} truncation_level = 5 maximum_distance = {'default': 100.} minimum_magnitude = 4.0 -ses_per_logic_tree_path = 100 [output] From 7796503b7df7c30ebdeb6c32600bca22c6b584aa Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Wed, 28 Jun 2023 10:39:10 +0200 Subject: [PATCH 32/53] Fixed another test --- openquake/qa_tests_data/logictree/case_28/job.ini | 3 --- 1 file changed, 3 deletions(-) diff --git a/openquake/qa_tests_data/logictree/case_28/job.ini b/openquake/qa_tests_data/logictree/case_28/job.ini index 708ff224f6be..d1b288b74198 100644 --- a/openquake/qa_tests_data/logictree/case_28/job.ini +++ b/openquake/qa_tests_data/logictree/case_28/job.ini @@ -30,10 +30,7 @@ reference_depth_to_1pt0km_per_sec = 100.0 source_model_logic_tree_file = source_model_logic_tree.xml gsim_logic_tree_file = gmpe_logic_tree.xml - investigation_time = 50.0 -poes = 0.1 0.02 - intensity_measure_types_and_levels = { "PGA": [0.005, 0.007, 0.0098, 0.0137, 0.0192, 0.0269, 0.0376, 0.0527, 0.0738, 0.103, 0.145, 0.203, 0.284, 0.397, 0.556, 0.778, 1.09, 1.52, 2.13], "SA(0.05)": [0.005, 0.007, 0.0098, 0.0137, 0.0192, 0.0269, 0.0376, 0.0527, 0.0738, 0.103, 0.145, 0.203, 0.284, 0.397, 0.556, 0.778, 1.09, 1.52, 2.13], From 763adfef1e8dccf41b383f638d17824d65330131 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Wed, 28 Jun 2023 11:30:41 +0200 Subject: [PATCH 33/53] Fixed another test --- openquake/qa_tests_data/scenario_risk/case_3/job.ini | 2 -- 1 file changed, 2 deletions(-) diff --git a/openquake/qa_tests_data/scenario_risk/case_3/job.ini b/openquake/qa_tests_data/scenario_risk/case_3/job.ini index d38e7a108815..662b3b3d9d20 100644 --- a/openquake/qa_tests_data/scenario_risk/case_3/job.ini +++ b/openquake/qa_tests_data/scenario_risk/case_3/job.ini @@ -8,8 +8,6 @@ structural_vulnerability_file = vulnerability_model.xml region = 78.0 31.5,89.5 31.5,89.5 25.5,78 25.5 -export_dir = /tmp - [geometry] exposure_file = exposure_model.xml From f2b36cd5b306907842edc2063e4b56601214cee3 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 07:39:22 +0200 Subject: [PATCH 34/53] Replaced the conditional_spectrum calculator with a postprocessor --- debian/changelog | 3 + doc/adv-manual/classical_PSHA.rst | 26 +-- doc/sphinx/openquake.calculators.rst | 8 - openquake/calculators/conditional_spectrum.py | 195 ------------------ openquake/commonlib/oqvalidation.py | 28 +-- openquake/hazardlib/tests/calc/data/job.ini | 3 +- .../conditional_spectrum/case_1/job.ini | 3 +- .../conditional_spectrum/case_2/job.ini | 3 +- 8 files changed, 23 insertions(+), 246 deletions(-) delete mode 100644 openquake/calculators/conditional_spectrum.py diff --git a/debian/changelog b/debian/changelog index c059615ba60d..a720227e25c3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,6 @@ + [Michele Simionato] + * Reimplemented the conditional_spectrum calculator as a post-processor + [Paolo Tormene, Michele Simionato] * Raise an error when the same parameter is set in different sections of the job.ini file diff --git a/doc/adv-manual/classical_PSHA.rst b/doc/adv-manual/classical_PSHA.rst index 56bfe93911f8..1bd1c2d2bc68 100644 --- a/doc/adv-manual/classical_PSHA.rst +++ b/doc/adv-manual/classical_PSHA.rst @@ -975,15 +975,19 @@ NB: ``disagg_by_src`` can be set to true only if the not in the original source model, thus breaking the connection between the values of the matrix and the original sources. -The conditional spectrum calculator +The conditional spectrum post-processor ======================================== -The ``conditional_spectrum`` calculator is an experimental calculator -introduced in version 3.13, which is able to compute the conditional -spectrum in the sense of Baker. +Since version 3.17 the engine includes an experimental post-processor +which is able to compute the conditional spectrum. + +The implementation was adapted from the paper *Conditional Spectrum +Computation Incorporating Multiple Causal Earthquakes and +Ground-Motion Prediction Models* by Ting Lin, Stephen C. Harmsen, +Jack W. Baker, and Nicolas Luco (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.845.163&rep=rep1&type=pdf) and it is rather sophisticated. In order to perform a conditional spectrum calculation you need to -specify (on top of the usual parameter of a classical calculation): +specify, in addition to the usual parameter of a classical calculation: 1. a reference intensity measure type (i.e. ``imt_ref = SA(0.2)``) 2. a cross correlation model (i.e. ``cross_correlation = BakerJayaram2008``) @@ -1033,15 +1037,3 @@ standard deviations. Conditional spectra for individual realizations are also computed and stored for debugging purposes, but they are not exportable. - -The implementation was adapted from the paper *Conditional Spectrum -Computation Incorporating Multiple Causal Earthquakes and -Ground-Motion Prediction Models* by Ting Lin, Stephen C. Harmsen, -Jack W. Baker, and Nicolas Luco (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.845.163&rep=rep1&type=pdf) and it is rather sophisticated. -The core formula is implemented in the method -`openquake.hazardlib.contexts.get_cs_contrib`. - -The ``conditional_spectrum`` calculator, like the disaggregation calculator, -is a kind of post-calculator, i.e. you can run a regular classical calculation -and then compute the ``conditional_spectrum`` in post-processing by using -the ``--hc`` option. diff --git a/doc/sphinx/openquake.calculators.rst b/doc/sphinx/openquake.calculators.rst index cb184265a2ae..100b19091f29 100644 --- a/doc/sphinx/openquake.calculators.rst +++ b/doc/sphinx/openquake.calculators.rst @@ -104,14 +104,6 @@ reportwriter module :undoc-members: :show-inheritance: -conditional_spectrum module --------------------------------------------- - -.. automodule:: openquake.calculators.conditional_spectrum - :members: - :undoc-members: - :show-inheritance: - views module ---------------------------------- diff --git a/openquake/calculators/conditional_spectrum.py b/openquake/calculators/conditional_spectrum.py deleted file mode 100644 index a29d547421ee..000000000000 --- a/openquake/calculators/conditional_spectrum.py +++ /dev/null @@ -1,195 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: tabstop=4 shiftwidth=4 softtabstop=4 -# -# Copyright (C) 2021 GEM Foundation -# -# OpenQuake is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# OpenQuake is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with OpenQuake. If not, see . - -""" -Conditional spectrum calculator, inspired by the disaggregation calculator -""" -import logging -import numpy - -from openquake.baselib import parallel, hdf5 -from openquake.baselib.python3compat import decode -from openquake.hazardlib.probability_map import compute_hazard_maps -from openquake.hazardlib.imt import from_string -from openquake.hazardlib import valid -from openquake.hazardlib.contexts import read_cmakers, read_ctx_by_grp -from openquake.hazardlib.calc.cond_spectra import get_cs_out, outdict -from openquake.calculators import base - -U16 = numpy.uint16 -U32 = numpy.uint32 -TWO24 = 2 ** 24 - -# helper function to be used when saving the spectra as an array -def to_spectra(outdic, n, p): - """ - :param outdic: dictionary rlz_id -> array MNOP - :param n: site index in the range 0..N-1 - :param p: IMLs index in the range 0..P-1 - :returns: conditional spectra as an array of shape (R, M, 2) - """ - R = len(outdic) - M = len(outdic[0]) - out = numpy.zeros((R, M, 2)) - for r in range(R): - c = outdic[r] - out[r, :, 0] = numpy.exp(c[:, n, 1, p]) # NB: seems wrong! - out[r, :, 1] = numpy.sqrt(c[:, n, 2, p]) - return out - - -def store_spectra(dstore, name, R, oq, spectra): - """ - Store the conditional spectra array - """ - N = len(spectra) - stats = list(oq.hazard_stats()) - kind = 'rlz' if 'rlzs' in name else 'stat' - dic = dict(shape_descr=['site_id', 'poe', kind, 'period']) - dic['site_id'] = numpy.arange(N) - dic[kind] = numpy.arange(R) if kind == 'rlz' else stats - dic['poe'] = list(oq.poes) - dic['period'] = [from_string(imt).period for imt in oq.imtls] - hdf5.ArrayWrapper(spectra, dic, ['mea', 'std']).save(name, dstore.hdf5) - # NB: the following would segfault - # dstore.hdf5[name] = hdf5.ArrayWrapper(spectra, dic, ['mea', 'std']) - - -@base.calculators.add('conditional_spectrum') -class ConditionalSpectrumCalculator(base.HazardCalculator): - """ - Conditional spectrum calculator, to be used for few sites only - """ - precalc = 'classical' - accept_precalc = ['classical', 'disaggregation'] - - def execute(self): - """ - Compute the conditional spectrum in a sequential way. - NB: since conditional spectra are meant to be computed only for few - sites, there is no point in parallelizing: the computation is dominated - by the time spent reading the contexts, not by the CPU. - """ - assert self.N <= self.oqparam.max_sites_disagg, ( - self.N, self.oqparam.max_sites_disagg) - logging.warning('Conditional spectrum calculations are still ' - 'experimental') - - oq = self.oqparam - self.full_lt = self.datastore['full_lt'].init() - trt_smrs = self.datastore['trt_smrs'][:] - self.trt_rlzs = self.full_lt.get_trt_rlzs(trt_smrs) - self.trts = list(self.full_lt.gsim_lt.values) - self.imts = list(oq.imtls) - imti = self.imts.index(oq.imt_ref) - self.M = len(self.imts) - dstore = (self.datastore.parent if self.datastore.parent - else self.datastore) - totrups = len(dstore['rup/mag']) - logging.info('Reading {:_d} ruptures'.format(totrups)) - rdt = [('grp_id', U32), ('idx', U32)] - rdata = numpy.zeros(totrups, rdt) - rdata['idx'] = numpy.arange(totrups) - rdata['grp_id'] = dstore['rup/grp_id'][:] - self.periods = [from_string(imt).period for imt in self.imts] - - # extract imls from the "mean" hazard map - if 'hcurves-stats' in dstore: # shape (N, S, M, L1) - curves = dstore.sel('hcurves-stats', stat='mean', imt=oq.imt_ref) - else: # there is only 1 realization - curves = dstore.sel('hcurves-rlzs', rlz_id=0, imt=oq.imt_ref) - self.imls = compute_hazard_maps( # shape (N, L) => (N, P) - curves[:, 0, 0, :], oq.imtls[oq.imt_ref], oq.poes) - N, self.P = self.imls.shape - self.cmakers = read_cmakers(self.datastore) - - # IMPORTANT!! we rely on the fact that the classical part - # of the calculation stores the ruptures in chunks of constant - # grp_id, therefore it is possible to build (start, stop) slices - - # Computing CS - toms = decode(dstore['toms'][:]) - ctx_by_grp = read_ctx_by_grp(dstore) - self.datastore.swmr_on() - smap = parallel.Starmap(get_cs_out, h5=self.datastore) - for grp_id, ctx in ctx_by_grp.items(): - tom = valid.occurrence_model(toms[grp_id]) - cmaker = self.cmakers[grp_id] - smap.submit((cmaker, ctx, imti, self.imls, tom)) - out = smap.reduce() - - # Apply weights and get two dictionaries with integer keys - # (corresponding to the rlz ID) and array values - # of shape (M, N, 3, P) where: - # M is the number of IMTs - # N is the number of sites - # 3 is the number of statistical momenta - # P is the the number of IMLs - outdic, outmean = self._apply_weights(out) - - # sanity check: the mean conditional spectrum must be close to the imls - ref_idx = list(oq.imtls).index(oq.imt_ref) - mean_cs = numpy.exp(outmean[0][ref_idx, :, 1, :]) # shape (N, P) - for n in range(self.N): - for p in range(self.P): - diff = abs(1. - self.imls[n, p]/ mean_cs[n, p]) - if diff > .03: - logging.warning('Conditional Spectrum vs mean IML\nfor ' - 'site_id=%d, poe=%s, imt=%s: %.5f vs %.5f', - n, oq.poes[p], oq.imt_ref, - mean_cs[n, p], self.imls[n, p]) - - # Computing standard deviation - smap = parallel.Starmap(get_cs_out, h5=self.datastore.hdf5) - for grp_id, ctx in ctx_by_grp.items(): - cmaker = self.cmakers[grp_id] - smap.submit((cmaker, ctx, imti, self.imls, tom, outmean[0])) - for res in smap: - for g in res: - out[g][:, :, 2] += res[g][:, :, 2] # STDDEV - return out - - def convert_and_save(self, dsetname, outdic): - """ - Save the conditional spectra - """ - spe = numpy.array([[to_spectra(outdic, n, p) for p in range(self.P)] - for n in range(self.N)]) - store_spectra(self.datastore, dsetname, self.R, self.oqparam, spe) - - def post_execute(self, acc): - # apply weights - outdic, outmean = self._apply_weights(acc) - # convert dictionaries into spectra and save them - self.convert_and_save('cs-rlzs', outdic) - self.convert_and_save('cs-stats', outmean) - - def _apply_weights(self, acc): - # build conditional spectra for each realization - outdic = outdict(self.M, self.N, self.P, 0, self.R) - for g, trs in enumerate(self.trt_rlzs): - for r in trs % TWO24: - outdic[r] += acc[g] - - # build final conditional mean and std - weights = self.datastore['weights'][:] - outmean = outdict(self.M, self.N, self.P, 0, 1) # dict with key 0 - for r, weight in enumerate(weights): - outmean[0] += outdic[r] * weight - - return outdic, outmean diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index 23f43f52a2d1..78bfe8ea19fb 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -18,12 +18,10 @@ import os import re -import ast import json import inspect import logging import functools -import collections import multiprocessing import numpy import itertools @@ -837,7 +835,6 @@ 'multi_risk', 'classical_bcr', 'preclassical', - 'conditional_spectrum', 'event_based_damage', 'scenario_damage'] @@ -1292,13 +1289,6 @@ def __init__(self, **names_vals): raise InvalidFile('%s: you cannot set rlzs_index and ' 'num_rlzs_disagg at the same time' % job_ini) - # checks for conditional_spectrum - if self.calculation_mode == 'conditional_spectrum': - if not self.poes: - raise InvalidFile("%s: you must specify the poes" % job_ini) - elif list(self.hazard_stats()) != ['mean']: - raise InvalidFile('%s: only the mean is supported' % job_ini) - # checks for classical_damage if self.calculation_mode == 'classical_damage': if self.conditional_loss_poes: @@ -2062,16 +2052,8 @@ def __toh5__(self): return hdf5.dumps(vars(self)), {} def __fromh5__(self, array, attrs): - if isinstance(array, numpy.ndarray): # old format <= 3.11 - dd = collections.defaultdict(dict) - for (name_, literal_) in array: - name = python3compat.decode(name_) - literal = python3compat.decode(literal_) - if '.' in name: - k1, k2 = name.split('.', 1) - dd[k1][k2] = ast.literal_eval(literal) - else: - dd[name] = ast.literal_eval(literal) - vars(self).update(dd) - else: # new format >= 3.12 - vars(self).update(json.loads(python3compat.decode(array))) + # works for version >= 3.12 + vars(self).update(json.loads(python3compat.decode(array))) + Idist = calc.filters.IntegrationDistance + if not isinstance(self.maximum_distance, Idist): + self.maximum_distance = Idist(**self.maximum_distance) diff --git a/openquake/hazardlib/tests/calc/data/job.ini b/openquake/hazardlib/tests/calc/data/job.ini index 4ce21d62a6ed..7b093203d6c2 100644 --- a/openquake/hazardlib/tests/calc/data/job.ini +++ b/openquake/hazardlib/tests/calc/data/job.ini @@ -1,7 +1,8 @@ [general] description = Conditional spectrum -calculation_mode = conditional_spectrum +calculation_mode = classical +postproc_func = conditional_spectrum uniform_hazard_spectra = true [geometry] diff --git a/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini b/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini index b96b4988565f..9df5ceaf649b 100644 --- a/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini +++ b/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini @@ -1,7 +1,8 @@ [general] description = Conditional spectrum -calculation_mode = conditional_spectrum +calculation_mode = classical +postproc_func = conditional_spectrum uniform_hazard_spectra = true [geometry] diff --git a/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini b/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini index 8fc0bc97d6d4..92bb1a46c25a 100644 --- a/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini +++ b/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini @@ -1,7 +1,8 @@ [general] description = Conditional spectrum -calculation_mode = conditional_spectrum +calculation_mode = classical +postproc_func = conditional_spectrum uniform_hazard_spectra = true [geometry] From 7be6bbc9650895a85aefd1bddd1b0eccf7d767ca Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 07:52:11 +0200 Subject: [PATCH 35/53] Added checks --- openquake/commonlib/oqvalidation.py | 12 ++++++------ openquake/hazardlib/tests/calc/data/job.ini | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index 78bfe8ea19fb..6c03fe343916 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -362,10 +362,10 @@ Default: no default imt_ref: - Reference intensity measure type usedto compute the conditional spectrum. + Reference intensity measure type used to compute the conditional spectrum. The imt_ref must belong to the list of IMTs of the calculation. Example: *imt_ref = SA(0.15)*. - Default: no default + Default: empty string individual_rlzs: When set, store the individual hazard curves and/or individual risk curves @@ -963,7 +963,7 @@ class OqParam(valid.ParamSet): ignore_missing_costs = valid.Param(valid.namelist, []) ignore_covs = valid.Param(valid.boolean, False) iml_disagg = valid.Param(valid.floatdict, {}) # IMT -> IML - imt_ref = valid.Param(valid.intensity_measure_type) + imt_ref = valid.Param(valid.intensity_measure_type, '') individual_rlzs = valid.Param(valid.boolean, None) inputs = valid.Param(dict, {}) ash_wet_amplification_factor = valid.Param(valid.positivefloat, 1.0) @@ -2054,6 +2054,6 @@ def __toh5__(self): def __fromh5__(self, array, attrs): # works for version >= 3.12 vars(self).update(json.loads(python3compat.decode(array))) - Idist = calc.filters.IntegrationDistance - if not isinstance(self.maximum_distance, Idist): - self.maximum_distance = Idist(**self.maximum_distance) + #Idist = calc.filters.IntegrationDistance + #if not isinstance(self.maximum_distance, Idist): + # self.maximum_distance = Idist(**self.maximum_distance) diff --git a/openquake/hazardlib/tests/calc/data/job.ini b/openquake/hazardlib/tests/calc/data/job.ini index 7b093203d6c2..2084ac6cd52a 100644 --- a/openquake/hazardlib/tests/calc/data/job.ini +++ b/openquake/hazardlib/tests/calc/data/job.ini @@ -47,6 +47,7 @@ intensity_measure_types_and_levels = { truncation_level = 3 maximum_distance = 200.0 +# conditional_spectrum parameters cross_correlation = BakerJayaram2008 imt_ref = SA(0.2) poes = 0.002105 From 808ab967e348649937eef43e4ce2cfb5c61325e6 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 07:53:43 +0200 Subject: [PATCH 36/53] Cleanup --- openquake/commonlib/oqvalidation.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index 6c03fe343916..52769ee0ffbb 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -18,10 +18,12 @@ import os import re +import ast import json import inspect import logging import functools +import collections import multiprocessing import numpy import itertools @@ -2052,8 +2054,16 @@ def __toh5__(self): return hdf5.dumps(vars(self)), {} def __fromh5__(self, array, attrs): - # works for version >= 3.12 - vars(self).update(json.loads(python3compat.decode(array))) - #Idist = calc.filters.IntegrationDistance - #if not isinstance(self.maximum_distance, Idist): - # self.maximum_distance = Idist(**self.maximum_distance) + if isinstance(array, numpy.ndarray): # old format <= 3.11 + dd = collections.defaultdict(dict) + for (name_, literal_) in array: + name = python3compat.decode(name_) + literal = python3compat.decode(literal_) + if '.' in name: + k1, k2 = name.split('.', 1) + dd[k1][k2] = ast.literal_eval(literal) + else: + dd[name] = ast.literal_eval(literal) + vars(self).update(dd) + else: # new format >= 3.12 + vars(self).update(json.loads(python3compat.decode(array))) From 48c411c051a4286ddecb463eba3bdf8b51e87fd6 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 08:01:14 +0200 Subject: [PATCH 37/53] Fixed dstore[oqparam].maximum_distance --- openquake/commonlib/oqvalidation.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index 23f43f52a2d1..65317831a47c 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -2062,16 +2062,9 @@ def __toh5__(self): return hdf5.dumps(vars(self)), {} def __fromh5__(self, array, attrs): - if isinstance(array, numpy.ndarray): # old format <= 3.11 - dd = collections.defaultdict(dict) - for (name_, literal_) in array: - name = python3compat.decode(name_) - literal = python3compat.decode(literal_) - if '.' in name: - k1, k2 = name.split('.', 1) - dd[k1][k2] = ast.literal_eval(literal) - else: - dd[name] = ast.literal_eval(literal) - vars(self).update(dd) - else: # new format >= 3.12 - vars(self).update(json.loads(python3compat.decode(array))) + # works for version >= 3.12 + vars(self).update(json.loads(python3compat.decode(array))) + Idist = calc.filters.IntegrationDistance + if hasattr(self, 'maximum_distance') and not isinstance( + self.maximum_distance, Idist): + self.maximum_distance = Idist(**self.maximum_distance) From 73e0bb8037786b031894bc6f3bc5a653f0a24d32 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 08:11:41 +0200 Subject: [PATCH 38/53] Added forgotten module --- .../postproc/conditional_spectrum.py | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 openquake/calculators/postproc/conditional_spectrum.py diff --git a/openquake/calculators/postproc/conditional_spectrum.py b/openquake/calculators/postproc/conditional_spectrum.py new file mode 100644 index 000000000000..7b394e5bbff3 --- /dev/null +++ b/openquake/calculators/postproc/conditional_spectrum.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# +# Copyright (C) 2023, GEM Foundation +# +# OpenQuake is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# OpenQuake is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with OpenQuake. If not, see . +""" +Conditional spectrum post-processor +""" +import logging +import numpy +from openquake.baselib import parallel, hdf5, sap +from openquake.baselib.python3compat import decode +from openquake.hazardlib.probability_map import compute_hazard_maps +from openquake.hazardlib.imt import from_string +from openquake.hazardlib import valid, InvalidFile +from openquake.hazardlib.contexts import read_cmakers, read_ctx_by_grp +from openquake.hazardlib.calc.cond_spectra import get_cs_out, outdict + +U16 = numpy.uint16 +U32 = numpy.uint32 +TWO24 = 2 ** 24 + +# helper function to be used when saving the spectra as an array +def to_spectra(outdic, n, p): + """ + :param outdic: dictionary rlz_id -> array MNOP + :param n: site index in the range 0..N-1 + :param p: IMLs index in the range 0..P-1 + :returns: conditional spectra as an array of shape (R, M, 2) + """ + R = len(outdic) + M = len(outdic[0]) + out = numpy.zeros((R, M, 2)) + for r in range(R): + c = outdic[r] + out[r, :, 0] = numpy.exp(c[:, n, 1, p]) # NB: seems wrong! + out[r, :, 1] = numpy.sqrt(c[:, n, 2, p]) + return out + + +def store_spectra(dstore, name, R, oq, spectra): + """ + Store the conditional spectra array + """ + N = len(spectra) + stats = list(oq.hazard_stats()) + kind = 'rlz' if 'rlzs' in name else 'stat' + dic = dict(shape_descr=['site_id', 'poe', kind, 'period']) + dic['site_id'] = numpy.arange(N) + dic[kind] = numpy.arange(R) if kind == 'rlz' else stats + dic['poe'] = list(oq.poes) + dic['period'] = [from_string(imt).period for imt in oq.imtls] + hdf5.ArrayWrapper(spectra, dic, ['mea', 'std']).save(name, dstore.hdf5) + # NB: the following would segfault + # dstore.hdf5[name] = hdf5.ArrayWrapper(spectra, dic, ['mea', 'std']) + + +def compute_cs(dstore, oq, N, M, P): + """ + Compute the conditional spectrum in a sequential way. + NB: since conditional spectra are meant to be computed only for few + sites, there is no point in parallelizing: the computation is dominated + by the time spent reading the contexts, not by the CPU. + """ + full_lt = dstore['full_lt'].init() + trt_rlzs = full_lt.get_trt_rlzs(dstore['trt_smrs'][:]) + R = full_lt.get_num_paths() + imts = list(oq.imtls) + imti = imts.index(oq.imt_ref) + totrups = len(dstore['rup/mag']) + logging.info('Reading {:_d} contexts'.format(totrups)) + rdt = [('grp_id', U32), ('idx', U32)] + rdata = numpy.zeros(totrups, rdt) + rdata['idx'] = numpy.arange(totrups) + rdata['grp_id'] = dstore['rup/grp_id'][:] + + # extract imls from the "mean" hazard map + if 'hcurves-stats' in dstore: # shape (N, S, M, L1) + curves = dstore.sel('hcurves-stats', stat='mean', imt=oq.imt_ref) + else: # there is only 1 realization + curves = dstore.sel('hcurves-rlzs', rlz_id=0, imt=oq.imt_ref) + imls = compute_hazard_maps( # shape (N, L) => (N, P) + curves[:, 0, 0, :], oq.imtls[oq.imt_ref], oq.poes) + N, P = imls.shape + cmakers = read_cmakers(dstore) + + # IMPORTANT!! we rely on the fact that the classical part + # of the calculation stores the ruptures in chunks of constant + # grp_id, therefore it is possible to build (start, stop) slices + + # Computing CS + toms = decode(dstore['toms'][:]) + ctx_by_grp = read_ctx_by_grp(dstore) + dstore.swmr_on() + smap = parallel.Starmap(get_cs_out, h5=dstore) + for grp_id, ctx in ctx_by_grp.items(): + tom = valid.occurrence_model(toms[grp_id]) + cmaker = cmakers[grp_id] + smap.submit((cmaker, ctx, imti, imls, tom)) + out = smap.reduce() + + # Apply weights and get two dictionaries with integer keys + # (corresponding to the rlz ID) and array values + # of shape (M, N, 3, P) where: + # M is the number of IMTs + # N is the number of sites + # 3 is the number of statistical momenta + # P is the the number of IMLs + outdic, outmean = _apply_weights(dstore, out, N, R, M, P, trt_rlzs) + + # sanity check: the mean conditional spectrum must be close to the imls + ref_idx = list(oq.imtls).index(oq.imt_ref) + mean_cs = numpy.exp(outmean[0][ref_idx, :, 1, :]) # shape (N, P) + for n in range(N): + for p in range(P): + diff = abs(1. - imls[n, p]/ mean_cs[n, p]) + if diff > .03: + logging.warning('Conditional Spectrum vs mean IML\nfor ' + 'site_id=%d, poe=%s, imt=%s: %.5f vs %.5f', + n, oq.poes[p], oq.imt_ref, + mean_cs[n, p], imls[n, p]) + + # Computing standard deviation + dstore.swmr_on() + smap = parallel.Starmap(get_cs_out, h5=dstore.hdf5) + for grp_id, ctx in ctx_by_grp.items(): + cmaker = cmakers[grp_id] + smap.submit((cmaker, ctx, imti, imls, tom, outmean[0])) + for res in smap: + for g in res: + out[g][:, :, 2] += res[g][:, :, 2] # STDDEV + + # apply weights + outdic, outmean = _apply_weights(dstore, out, N, R, M, P, trt_rlzs) + # convert dictionaries into spectra and save them + convert_and_save(dstore, 'cs-rlzs', outdic, oq, N, R, P) + convert_and_save(dstore, 'cs-stats', outmean, oq, N, R, P) + + +def convert_and_save(dstore, dsetname, outdic, oq, N, R, P): + """ + Save the conditional spectra + """ + spe = numpy.array([[to_spectra(outdic, n, p) for p in range(P)] + for n in range(N)]) + store_spectra(dstore, dsetname, R, oq, spe) + + +def _apply_weights(dstore, acc, N, R, M, P, trt_rlzs): + # build conditional spectra for each realization + outdic = outdict(M, N, P, 0, R) + for g, trs in enumerate(trt_rlzs): + for r in trs % TWO24: + outdic[r] += acc[g] + + # build final conditional mean and std + weights = dstore['weights'][:] + outmean = outdict(M, N, P, 0, 1) # dict with key 0 + for r, weight in enumerate(weights): + outmean[0] += outdic[r] * weight + + return outdic, outmean + + +def main(dstore): + logging.warning('Conditional spectrum calculations are still ' + 'experimental') + oq = dstore['oqparam'] + if not oq.cross_correl: + raise InvalidFile("%(job_ini)s: you must specify cross_correlation" + % oq.inputs) + if not oq.imt_ref: + raise InvalidFile("%(job_ini)s: you must specify imt_ref" % oq.inputs) + if not oq.poes: + raise InvalidFile("%(job_ini)s: you must specify the poes" % oq.inputs) + if list(oq.hazard_stats()) != ['mean']: + raise InvalidFile('%(job_ini)s: only the mean is supported' % oq.inputs) + + sitecol = dstore['sitecol'] + N = len(sitecol) + M = len(oq.imtls) + P = len(oq.poes) + assert N <= oq.max_sites_disagg, (N, oq.max_sites_disagg) + compute_cs(dstore, oq, N, M, P) + + +if __name__ == '__main__': + sap.run(main) From b8462ad733dcff6fb9bde88f80e001be4744ceaf Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 08:16:22 +0200 Subject: [PATCH 39/53] Removed a comment [ci skip] --- openquake/calculators/postproc/conditional_spectrum.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/openquake/calculators/postproc/conditional_spectrum.py b/openquake/calculators/postproc/conditional_spectrum.py index 7b394e5bbff3..91fc5f461238 100644 --- a/openquake/calculators/postproc/conditional_spectrum.py +++ b/openquake/calculators/postproc/conditional_spectrum.py @@ -96,10 +96,6 @@ def compute_cs(dstore, oq, N, M, P): N, P = imls.shape cmakers = read_cmakers(dstore) - # IMPORTANT!! we rely on the fact that the classical part - # of the calculation stores the ruptures in chunks of constant - # grp_id, therefore it is possible to build (start, stop) slices - # Computing CS toms = decode(dstore['toms'][:]) ctx_by_grp = read_ctx_by_grp(dstore) From 6afd806f6a620fe7357140a93d69f49576b46c1b Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 08:29:54 +0200 Subject: [PATCH 40/53] Restored old format --- openquake/commonlib/oqvalidation.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index 65317831a47c..bbec71f0a9b1 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -2062,7 +2062,21 @@ def __toh5__(self): return hdf5.dumps(vars(self)), {} def __fromh5__(self, array, attrs): - # works for version >= 3.12 + if isinstance(array, numpy.ndarray): + # old format <= 3.11, tested in read_old_data, + # used to read old GMFs + dd = collections.defaultdict(dict) + for (name_, literal_) in array: + name = python3compat.decode(name_) + literal = python3compat.decode(literal_) + if '.' in name: + k1, k2 = name.split('.', 1) + dd[k1][k2] = ast.literal_eval(literal) + else: + dd[name] = ast.literal_eval(literal) + vars(self).update(dd) + + # for version >= 3.12 vars(self).update(json.loads(python3compat.decode(array))) Idist = calc.filters.IntegrationDistance if hasattr(self, 'maximum_distance') and not isinstance( From ff00e43566f672a41c95d70c77908e918922597d Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 08:34:54 +0200 Subject: [PATCH 41/53] Fixed indentation --- openquake/commonlib/oqvalidation.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index bbec71f0a9b1..a4db1f7259bd 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -2075,10 +2075,10 @@ def __fromh5__(self, array, attrs): else: dd[name] = ast.literal_eval(literal) vars(self).update(dd) - - # for version >= 3.12 - vars(self).update(json.loads(python3compat.decode(array))) - Idist = calc.filters.IntegrationDistance - if hasattr(self, 'maximum_distance') and not isinstance( - self.maximum_distance, Idist): - self.maximum_distance = Idist(**self.maximum_distance) + else: + # for version >= 3.12 + vars(self).update(json.loads(python3compat.decode(array))) + Idist = calc.filters.IntegrationDistance + if hasattr(self, 'maximum_distance') and not isinstance( + self.maximum_distance, Idist): + self.maximum_distance = Idist(**self.maximum_distance) From 0c28e1da75e679b557835009ccf9ee644789bddc Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 08:35:57 +0200 Subject: [PATCH 42/53] Fixed indentation --- openquake/commonlib/oqvalidation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index a4db1f7259bd..69173b2b3e5e 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -2078,7 +2078,8 @@ def __fromh5__(self, array, attrs): else: # for version >= 3.12 vars(self).update(json.loads(python3compat.decode(array))) - Idist = calc.filters.IntegrationDistance - if hasattr(self, 'maximum_distance') and not isinstance( - self.maximum_distance, Idist): - self.maximum_distance = Idist(**self.maximum_distance) + + Idist = calc.filters.IntegrationDistance + if hasattr(self, 'maximum_distance') and not isinstance( + self.maximum_distance, Idist): + self.maximum_distance = Idist(**self.maximum_distance) From 9c68716e2b0234a82c903cfb077c18e06525009f Mon Sep 17 00:00:00 2001 From: Matteo Nastasi Date: Thu, 29 Jun 2023 09:53:27 +0200 Subject: [PATCH 43/53] update documentation for 3.16.4 version --- CONTRIBUTORS.txt | 1 + debian/control | 4 ++-- debian/control.in | 4 ++-- doc/installing/README.md | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index f036fcd27649..29bcf0967c7f 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -76,6 +76,7 @@ Julián Montejo Espitia May 2022 Pablo Iturrieta (@pabloitu) June 2022 Anne Hulsey (@annehulsey) July 2022 Nicole Paul (@nicolepaul) May 2023 +Claudio Schill (@claudio525) June 2023 Project management ------------------ diff --git a/debian/control b/debian/control index 6b36c7154153..a05d5f724268 100644 --- a/debian/control +++ b/debian/control @@ -10,13 +10,13 @@ Homepage: http://github.com/gem/oq-engine Package: python3-oq-engine Architecture: amd64 X-Python-Version: >= 3.6 -Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, ${dist:Depends}, python3-oq-libs (>=3.8.0~) +Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, ${dist:Depends}, python3-oq-libs (>=3.8.1~) Conflicts: python-noq, python-oq, python-nhlib Replaces: python-oq-risklib, python-oq-hazardlib, python-oq-engine (<< ${binary:Version}) # See https://www.debian.org/doc/debian-policy/ch-relationships.html#s-breaks Breaks: python-oq-risklib, python-oq-hazardlib, python-oq-engine (<< ${binary:Version}) Recommends: ${dist:Recommends} -Suggests: python3-oq-libs-extra (>=3.8.0~) +Suggests: python3-oq-libs-extra (>=3.8.1~) Description: computes seismic hazard and physical risk based on the hazard and risk libraries developed by the GEM foundation (http://www.globalquakemodel.org/) diff --git a/debian/control.in b/debian/control.in index 9fd9bd24347e..7425637756c7 100644 --- a/debian/control.in +++ b/debian/control.in @@ -10,13 +10,13 @@ Homepage: http://github.com/gem/oq-engine Package: python3-oq-engine Architecture: amd64 X-Python-Version: >= 3.6 -Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, ${dist:Depends}, python3-oq-libs (>=3.8.0~) +Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, ${dist:Depends}, python3-oq-libs (>=3.8.1~) Conflicts: python-noq, python-oq, python-nhlib Replaces: python-oq-risklib, python-oq-hazardlib, python-oq-engine (<< ${binary:Version}) # See https://www.debian.org/doc/debian-policy/ch-relationships.html#s-breaks Breaks: python-oq-risklib, python-oq-hazardlib, python-oq-engine (<< ${binary:Version}) Recommends: ${dist:Recommends} -Suggests: python3-oq-libs-extra (>=3.8.0~) +Suggests: python3-oq-libs-extra (>=3.8.1~) Description: computes seismic hazard and physical risk based on the hazard and risk libraries developed by the GEM foundation (http://www.globalquakemodel.org/) diff --git a/doc/installing/README.md b/doc/installing/README.md index cfe46dcd5cc7..f245a4e4b0ce 100644 --- a/doc/installing/README.md +++ b/doc/installing/README.md @@ -37,7 +37,7 @@ Check more advanced [hardware suggestions here](./hardware-suggestions.md). **On Windows** - Download OpenQuake Engine for Windows: https://downloads.openquake.org/pkgs/windows/oq-engine/OpenQuake_Engine_3.16.3-1.exe . + Download OpenQuake Engine for Windows: https://downloads.openquake.org/pkgs/windows/oq-engine/OpenQuake_Engine_3.16.4-1.exe . Then follow the wizard on screen. > **Warning**: > Administrator level access may be required. From 72c651d22143373f38b5a7dcbbbcc50c90270a9c Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 10:02:37 +0200 Subject: [PATCH 44/53] Removed accidentally committed file --- openquake/engine/postproc/compute_mrd.py | 106 ----------------------- 1 file changed, 106 deletions(-) delete mode 100644 openquake/engine/postproc/compute_mrd.py diff --git a/openquake/engine/postproc/compute_mrd.py b/openquake/engine/postproc/compute_mrd.py deleted file mode 100644 index 851b8dbf6346..000000000000 --- a/openquake/engine/postproc/compute_mrd.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# vim: tabstop=4 shiftwidth=4 softtabstop=4 -# -# Copyright (C) 2023, GEM Foundation -# -# OpenQuake is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# OpenQuake is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with OpenQuake. If not, see . - -import toml -import logging -import numpy -from openquake.baselib import sap, parallel, general -from openquake.hazardlib.calc.mrd import calc_mean_rate_dist -from openquake.hazardlib import contexts, cross_correlation -from openquake.commonlib import datastore - - -def compute_mrd(ctx, N, cmaker, crosscorr, imt1, imt2, - meabins, sigbins, method, monitor): - """ - :param ctx: a context array - :param N: the total number of sites - :param cmaker: a ContextMaker instance - :param crosscorr: cross correlation model - :param str imt1: the first Intensity Measure Type - :param str imt1: the second Intensity Measure Type - :param meabins: bins for the means - :param sigbins: bins for the sigmas - :param method: a string 'direct' or 'indirect' - :returns: 4D-matrix with shape (L1, L1, N, G) - """ - mrd = calc_mean_rate_dist(ctx, N, cmaker, crosscorr, - imt1, imt2, meabins, sigbins, method) - return {g: mrd[:, :, :, i] for i, g in enumerate(cmaker.gidx)} - - -def combine_mrds(acc, rlzs_by_g, rlz_weight): - g = next(iter(acc)) # first key - out = numpy.zeros(acc[g].shape) # shape (L1, L1, N) - for g in acc: - value = acc[g] - for rlz in rlzs_by_g[g]: - out += rlz_weight[rlz] * value - return out - - -def main(parent_id, config): - """ - :param parent_id: filename or ID of the parent calculation - :param config: name of a .toml file with parameters imt1, imt2, crosscorr - - NB: this is meant to work only for parametric ruptures! - """ - try: - parent_id = int(parent_id) - except ValueError: - # passing a filename is okay - pass - parent = datastore.read(parent_id) - dstore, log = datastore.build_dstore_log(parent=parent) - with open(config) as f: - dic = toml.load(f) - imt1 = dic['imt1'] - imt2 = dic['imt2'] - method = dic.get('method', 'indirect') - crosscorr = getattr(cross_correlation, dic['cross_correlation'])() - meabins = dic['meabins'] - sigbins = dic['sigbins'] - with dstore, log: - oq = parent['oqparam'] - N = len(parent['sitecol']) - L1 = oq.imtls.size // len(oq.imtls) - 1 - if L1 > 24: - logging.warning('There are many intensity levels (%d), the ' - 'calculation can be pretty slow', L1 + 1) - assert N <= 10, 'Too many sites: %d' % N - cmakers = contexts.read_cmakers(parent) - ctx_by_grp = contexts.read_ctx_by_grp(dstore) - n = sum(len(ctx) for ctx in ctx_by_grp.values()) - logging.info('Read {:_d} contexts'.format(n)) - blocksize = numpy.ceil(n / oq.concurrent_tasks) - dstore.swmr_on() - smap = parallel.Starmap(compute_mrd, h5=dstore) - for grp_id, ctx in ctx_by_grp.items(): - cmaker = cmakers[grp_id] - for slc in general.gen_slices(0, len(ctx), blocksize): - smap.submit((ctx[slc], N, cmaker, crosscorr, imt1, imt2, - meabins, sigbins, method)) - acc = smap.reduce() - mrd = dstore.create_dset('_mrd', float, (L1, L1, N)) - mrd[:] = combine_mrds(acc, dstore['rlzs_by_g'][:], dstore['weights'][:]) - return mrd[:] - - -if __name__ == '__main__': - sap.run(main) From 58315ec8c7d9783b563d0769b65843e1e64ca5b7 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 10:06:08 +0200 Subject: [PATCH 45/53] Added method parameter --- openquake/calculators/postproc/compute_mrd.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/openquake/calculators/postproc/compute_mrd.py b/openquake/calculators/postproc/compute_mrd.py index c0b4229d0be8..6d18351dfea1 100644 --- a/openquake/calculators/postproc/compute_mrd.py +++ b/openquake/calculators/postproc/compute_mrd.py @@ -33,7 +33,7 @@ def __init__(self, ctx, cmaker, N): def compute_mrd(inp, crosscorr, imt1, imt2, - meabins, sigbins, monitor): + meabins, sigbins, method, monitor): """ :param inp: an Input object (contexts, contextm maker, num_sites) :param N: the total number of sites @@ -43,11 +43,12 @@ def compute_mrd(inp, crosscorr, imt1, imt2, :param str imt1: the second Intensity Measure Type :param meabins: bins for the means :param sigbins: bins for the sigmas + :param method: a string 'direct' or 'indirect' :returns: 4D-matrix with shape (L1, L1, N, G) """ G = len(inp.cmaker.gsims) mrd = calc_mean_rate_dist(inp.ctx, inp.N, inp.cmaker, crosscorr, - imt1, imt2, meabins, sigbins) + imt1, imt2, meabins, sigbins, method) return {g: mrd[:, :, :, i % G] for i, g in enumerate(inp.cmaker.gid)} @@ -62,7 +63,8 @@ def combine_mrds(acc, g_weights): return out -def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins): +def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins, + method='indirect'): """ :param dstore: datastore with the classical calculation @@ -86,7 +88,7 @@ def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins): # NB: a trivial splitting of the contexts would case task-dependency! cmaker = cmakers[grp_id] smap.submit((Input(ctx, cmaker, N), crosscorr, imt1, imt2, - meabins, sigbins)) + meabins, sigbins, method)) acc = smap.reduce() mrd = dstore.create_dset('mrd', float, (L1, L1, N)) trt_rlzs = full_lt.get_trt_rlzs(dstore['trt_smrs'][:]) From 2aecd5d01d2f3433ecf702ba3b2ba7970674e4a3 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 10:12:03 +0200 Subject: [PATCH 46/53] Small renaming [ci skip] --- openquake/calculators/postproc/compute_mrd.py | 2 +- openquake/hazardlib/calc/mrd.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openquake/calculators/postproc/compute_mrd.py b/openquake/calculators/postproc/compute_mrd.py index 6d18351dfea1..ed8d3e55e44c 100644 --- a/openquake/calculators/postproc/compute_mrd.py +++ b/openquake/calculators/postproc/compute_mrd.py @@ -43,7 +43,7 @@ def compute_mrd(inp, crosscorr, imt1, imt2, :param str imt1: the second Intensity Measure Type :param meabins: bins for the means :param sigbins: bins for the sigmas - :param method: a string 'direct' or 'indirect' + :param method: string 'direct' or 'indirect' :returns: 4D-matrix with shape (L1, L1, N, G) """ G = len(inp.cmaker.gsims) diff --git a/openquake/hazardlib/calc/mrd.py b/openquake/hazardlib/calc/mrd.py index 9d3514ff72f2..f5fa2a175c82 100644 --- a/openquake/hazardlib/calc/mrd.py +++ b/openquake/hazardlib/calc/mrd.py @@ -42,12 +42,12 @@ def get_uneven_bins_edges(lefts, num_bins): return numpy.array(tmp) -def update_mrd(ctxs: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): +def update_mrd(ctxt: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): """ This computes the mean rate density by means of the multivariate normal function available in scipy. - :param ctxs: + :param ctxt: A context array for a single site :param cm: A ContextMaker @@ -62,7 +62,7 @@ def update_mrd(ctxs: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): corrm = crosscorr.get_cross_correlation_mtx(imts) # Compute mean and standard deviation - [mea, sig, _, _] = cm.get_mean_stds([ctxs]) + [mea, sig, _, _] = cm.get_mean_stds([ctxt]) # Get the logarithmic IMLs ll1 = numpy.log(cm.imtls[im1]) @@ -74,7 +74,7 @@ def update_mrd(ctxs: numpy.recarray, cm, crosscorr, mrd, monitor=Monitor()): # is the number of sites trate = 0 for g, _ in enumerate(cm.gsims): - for i, ctx in enumerate(ctxs): + for i, ctx in enumerate(ctxt): # Covariance and correlation mtxs slc0 = numpy.index_exp[g, :, i] From c7ddc9fa310aaa7a4a03c0ab804396af3a4b78e6 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Thu, 29 Jun 2023 10:21:31 +0200 Subject: [PATCH 47/53] Updated changelog [ci skip] --- debian/changelog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/debian/changelog b/debian/changelog index a720227e25c3..1c5cd4b255c7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,6 @@ + [Marco Pagani, Michele Simionato] + * Added support for the "direct" method in MRD calculations + [Michele Simionato] * Reimplemented the conditional_spectrum calculator as a post-processor From ef9638a11428d4d185451acb9a0b4ad979fc9932 Mon Sep 17 00:00:00 2001 From: Matteo Nastasi Date: Thu, 29 Jun 2023 15:05:27 +0200 Subject: [PATCH 48/53] typo fixed in docs.yml action file --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 507b0804700d..ea9eb835e773 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -326,7 +326,7 @@ jobs: echo "Upload OpenQuakeforAdvancedUsers (PDF)" rsync -e 'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' -ax --delete build/latex/OpenQuakeforAdvancedUsers.pdf "docs@docs.openquake.org:${DOCS_BASE}${TARGET}PDF/" - ssh -v -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null docs@docs.openquake.org "bash -cx 'cd \"${DOCS_BASE}${PDFDOCS}\"; ln -sf \"../${TARGET}PDF/OpenQuakeforAdvancedUsers.pdf\" \"OpenQuake for Advanced Users ${PDF_VER}.pdf\" .'" + ssh -v -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null docs@docs.openquake.org "bash -cx 'cd \"${DOCS_BASE}${PDFDOCS}\"; ln -sf \"../${TARGET}PDF/OpenQuakeforAdvancedUsers.pdf\" \"OpenQuake for Advanced Users ${PDF_VER}.pdf\"'" echo "Make Advanced Manual (HTML)" cd ../adv-manual && make clean && make html From 2b95beb853aa17e2a1beb3e9d9922cddefdfab0b Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Fri, 30 Jun 2023 08:18:53 +0200 Subject: [PATCH 49/53] Documented the postprocessing framework --- doc/adv-manual/classical_PSHA.rst | 195 +++++++++++------- openquake/calculators/base.py | 4 +- openquake/calculators/postproc/compute_mrd.py | 2 +- openquake/calculators/tests/postproc_test.py | 2 +- openquake/commonlib/oqvalidation.py | 4 +- openquake/hazardlib/tests/calc/data/job.ini | 2 +- openquake/hazardlib/valid.py | 1 + .../conditional_spectrum/case_1/job.ini | 2 +- .../conditional_spectrum/case_2/job.ini | 2 +- .../qa_tests_data/logictree/case_01/job.ini | 2 +- .../qa_tests_data/logictree/case_05/job.ini | 2 +- .../logictree/case_05/job_bis.ini | 2 +- .../qa_tests_data/logictree/case_06/job.ini | 2 +- .../qa_tests_data/logictree/case_07/job.ini | 2 +- .../logictree/case_07/sampling.ini | 2 +- .../qa_tests_data/logictree/case_12/job.ini | 2 +- .../logictree/case_13/job_gmv.ini | 2 +- .../qa_tests_data/logictree/case_19/job.ini | 2 +- .../qa_tests_data/postproc/case_mrd/job.ini | 5 +- 19 files changed, 144 insertions(+), 93 deletions(-) diff --git a/doc/adv-manual/classical_PSHA.rst b/doc/adv-manual/classical_PSHA.rst index 1bd1c2d2bc68..ebbc4e8e4622 100644 --- a/doc/adv-manual/classical_PSHA.rst +++ b/doc/adv-manual/classical_PSHA.rst @@ -851,84 +851,74 @@ are the same. disagg_by_src ======================================= -Given a system of various sources affecting a specific site, -one very common question to ask is: what are the more relevant sources, -i.e. which sources contribute the most to the mean hazard curve? -The engine is able to answer such question by setting the ``disagg_by_src`` -flag in the job.ini file. When doing that, the engine saves in -the datastore a 5-dimensional array called ``disagg_by_src`` with -dimensions (site ID, realization ID, intensity measure type, -intensity measure level, source ID). For that it is possible to extract -the contribution of each source to the mean hazard curve (interested -people should look at the code in the function ``check_disagg_by_src``). -The array ``disagg_by_src`` can also be read as a pandas DataFrame, -then getting something like the following:: - - >> dstore.read_df('disagg_by_src', index='src_id') - site_id rlz_id imt lvl value - ASCTRAS407 0 0 PGA 0 9.703749e-02 - IF-CFS-GRID03 0 0 PGA 0 3.720510e-02 - ASCTRAS407 0 0 PGA 1 6.735009e-02 - IF-CFS-GRID03 0 0 PGA 1 2.851081e-02 - ASCTRAS407 0 0 PGA 2 4.546237e-02 - ... ... ... ... ... ... - IF-CFS-GRID03 0 31 PGA 17 6.830692e-05 - ASCTRAS407 0 31 PGA 18 1.072884e-06 - IF-CFS-GRID03 0 31 PGA 18 1.275539e-05 - ASCTRAS407 0 31 PGA 19 1.192093e-07 - IF-CFS-GRID03 0 31 PGA 19 5.960464e-07 +Given a system of various sources affecting a specific site, one very +common question to ask is: what are the more relevant sources, +i.e. which sources contribute the most to the mean hazard curve? The +engine is able to answer such question by setting the +``disagg_by_src`` flag in the job.ini file. When doing that, the +engine saves in the datastore a 4-dimensional ArrayWrapper called +``mean_rates_by_src`` with dimensions (site ID, intensity measure +type, intensity measure level, source ID). From that it is possible to +extract the contribution of each source to the mean hazard curve +(interested people should look at the code in the function +``check_disagg_by_src``). The ArrayWrapper ``mean_rates_by_src`` can also be +converted into a pandas DataFrame, then getting something like the +following:: + + >> dstore['mean_rates_by_src'].to_dframe().set_index('src_id') + site_id imt lvl value + ASCTRAS407 0 PGA 0 9.703749e-02 + IF-CFS-GRID03 0 PGA 0 3.720510e-02 + ASCTRAS407 0 PGA 1 6.735009e-02 + IF-CFS-GRID03 0 PGA 1 2.851081e-02 + ASCTRAS407 0 PGA 2 4.546237e-02 + ... ... ... ... ... + IF-CFS-GRID03 0 PGA 17 6.830692e-05 + ASCTRAS407 0 PGA 18 1.072884e-06 + IF-CFS-GRID03 0 PGA 18 1.275539e-05 + ASCTRAS407 0 PGA 19 1.192093e-07 + IF-CFS-GRID03 0 PGA 19 5.960464e-07 The ``value`` field here is the probability of exceedence in the hazard curve. The ``lvl`` field is an integer corresponding to the intensity measure level in the hazard curve. -There is a consistency check comparing the mean hazard curves -with the value obtained by composing the probabilities in the -disagg_by_src` array, for the heighest level of each intensity measure type. - -It should be noticed that many hazard models contain thousands of -sources and as a consequence the ``disagg_by_src`` matrix can be -impossible to compute without running out of memory. Even if you have -enough memory, having a very large ``disagg_by_src`` matrix is a bad -idea, so there is a limit on the size of the matrix, hard-coded to 4 -GB. The way to circumvent the limit is to reduce the number of -sources: for instance you could convert point sources in multipoint -sources. - -In engine 3.15 we also introduced the so-called "colon convention" on source -IDs: if you have many sources that for some reason should be collected -together - for instance because they all account for seismicity in the same -tectonic region, or because they are components of a same source but are split -into separate sources by magnitude - you -can tell the engine to collect them into one source in the ``disagg_by_src`` -matrix. The trick is to use IDs with the same prefix, a colon, and then a -numeric index. For instance, if you had 3 sources with IDs ``src_mag_6.65``, -``src_mag_6.75``, ``src_mag_6.85``, fragments of the same source with -different magnitudes, you could change their IDs to something like -``src:0``, ``src:1``, ``src:2`` and that would reduce the size of the -matrix ``disagg_by_src`` by 3 times by collecting together the contributions -of each source. There is no restriction on the numeric indices to start -from 0, so using the names ``src:665``, ``src:675``, ``src:685`` would -work too and would be clearer: the IDs should be unique, however. - -If the IDs are not unique and the engine determines that the underlying -sources are different, then an extension "semicolon + incremental index" -is automatically added. This is useful when the hazard modeler wants -to define a model where the more than one version of the same source appears -in one source model, having changed some of the parameters, or when varied -versions of a source appear in each branch of a logic tree. In that case, -the modeler should use always the exact same ID (i.e. without the colon and -numeric index): the engine will automatically distinguish the -sources during the calculation of the hazard curves and consider them the same -when saving the array ``disagg_by_src``: you can see an example in the -test ``qa_tests_data/classical/case_79`` in the engine code base. In that -case the ``source_info`` dataset will list 6 sources -``ASCTRAS407;0``, ``ASCTRAS407;1``, ``ASCTRAS407;2``, ``ASCTRAS407;3``, -``IF-CFS-GRID03;0``, ``IF-CFS-GRID03;1`` but the matrix ``disagg_by_src`` -will see only two sources ``ASCTRAS407`` and ``IF-CFS-GRID03`` obtained -by composing together the versions of the underlying sources. - -In version 3.15 ``disagg_by_src`` was extended to work with mutually +In engine 3.15 we introduced the so-called "colon convention" on +source IDs: if you have many sources that for some reason should be +collected together - for instance because they all account for +seismicity in the same tectonic region, or because they are components +of a same source but are split into separate sources by magnitude - +you can tell the engine to collect them into one source in the +``mean_rates_by_src`` matrix. The trick is to use IDs with the same +prefix, a colon, and then a numeric index. For instance, if you had 3 +sources with IDs ``src_mag_6.65``, ``src_mag_6.75``, ``src_mag_6.85``, +fragments of the same source with different magnitudes, you could +change their IDs to something like ``src:0``, ``src:1``, ``src:2`` and +that would reduce the size of the matrix ``mean_rates_by_src`` by 3 +times by collecting together the contributions of each source. There +is no restriction on the numeric indices to start from 0, so using the +names ``src:665``, ``src:675``, ``src:685`` would work too and would +be clearer: the IDs should be unique, however. + +If the IDs are not unique and the engine determines that the +underlying sources are different, then an extension "semicolon + +incremental index" is automatically added. This is useful when the +hazard modeler wants to define a model where the more than one version +of the same source appears in one source model, having changed some of +the parameters, or when varied versions of a source appear in each +branch of a logic tree. In that case, the modeler should use always +the exact same ID (i.e. without the colon and numeric index): the +engine will automatically distinguish the sources during the +calculation of the hazard curves and consider them the same when +saving the array ``mean_rates_by_src``: you can see an example in the +test ``qa_tests_data/classical/case_20/job_bis.ini`` in the engine +code base. In that case the ``source_info`` dataset will list 7 +sources ``CHAR1;0 CHAR1;1 CHAR1;2 COMFLT1;0 COMFLT1;1 SFLT1;0 +SFLT1;1`` but the matrix ``mean_rates_by_src`` will see only three +sources ``CHAR1 COMFLT1 SFLT1`` obtained by composing together the +versions of the underlying sources. + +In version 3.15 ``mean_rates_by_src`` was extended to work with mutually exclusive sources, i.e. for the Japan model. You can see an example in the test ``qa_tests_data/classical/case_27``. However, the case of mutually exclusive ruptures - an example is the New Madrid cluster @@ -975,8 +965,65 @@ NB: ``disagg_by_src`` can be set to true only if the not in the original source model, thus breaking the connection between the values of the matrix and the original sources. +The post-processing framework and Vector-valued PSHA +==================================================== + +Since version 3.17 the OpenQuake engine has special support for +custom post-processors. A postprocessor is a Python module located +in the directory ``openquake/calculators/postproc`` and containing +a ``main`` function with signature:: + +.. code-block:: + + def main(dstore, [csm], ...): + ... + +In version 3.17 post-processors work only after classical or +preclassical calculations; the ``dstore`` parameter is a DataStore +instance corresponding to the current calculation, while the ``csm`` +parameter is a CompositeSourceModel instance (it can be omitted if not +needed). + +The ``main`` function is automatically called when the user sets in +the job.ini file the parameters ``postproc_mod`` and +``postproc_args``. ``postproc_mod`` is the name of the postprocessing +module and ``postproc_args`` is a dictionary of literal arguments that +get passed to the ``main`` function; if not specified the empty +dictionary is passed. This happens for istance for the conditional +spectrum post-processor since it does not require additional arguments +with respect to the ones in ``dstore['oqparam']``. + +The post-processing framework was put in place in order to run +VPSHA calculations. The user can find an example in +https://github.com/gem/oq-engine/blob/engine-3.17/openquake/qa_tests_data/postproc/case_mrd. In the job.ini file there are the lines:: + + postproc_mod = compute_mrd + postproc_args = { + 'imt1': 'PGA', + 'imt2': 'SA(0.05)', + 'cross_correlation': 'BakerJayaram2008', + 'seed': 42, + 'meabins': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], + 'sigbins': [0.2, 0.3, 0.4, 0.5, 0.6, 0.7], + 'method': 'indirect'} + +while the postprocessor module ``openquake.calculators.postproc.compute_mrd`` +contains the function + +.. code-block: + + # in openquake.calculators.postproc.compute_mrd + def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins, + method='indirect'): + ... + +Inside ``main`` there is code to create the dataset ``mrd`` which +contains the Mean Rate Distribution as an array of shape L1 x L1 x N +where L1 is the number of levels per IMT minus 1 and N the number of +sites (normally 1). + The conditional spectrum post-processor -======================================== +--------------------------------------- Since version 3.17 the engine includes an experimental post-processor which is able to compute the conditional spectrum. diff --git a/openquake/calculators/base.py b/openquake/calculators/base.py index 63a8691f3802..32e2d065b87a 100644 --- a/openquake/calculators/base.py +++ b/openquake/calculators/base.py @@ -997,7 +997,9 @@ def post_process(self): """ oq = self.oqparam if oq.postproc_func: - func = getattr(postproc, oq.postproc_func).main + modname, funcname = oq.postproc_func.rsplit('.', 1) + mod = getattr(postproc, modname) + func = getattr(mod, funcname) if 'csm' in inspect.getargspec(func).args: if hasattr(self, 'csm'): # already there csm = self.csm diff --git a/openquake/calculators/postproc/compute_mrd.py b/openquake/calculators/postproc/compute_mrd.py index ed8d3e55e44c..c1193903a5dd 100644 --- a/openquake/calculators/postproc/compute_mrd.py +++ b/openquake/calculators/postproc/compute_mrd.py @@ -78,7 +78,7 @@ def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins, if L1 > 24: logging.warning('There are many intensity levels (%d), the ' 'calculation can be pretty slow', L1 + 1) - assert N <= 10, 'Too many sites: %d' % N + assert N <= oq.max_sites_disagg, 'Too many sites: %d' % N cmakers = contexts.read_cmakers(dstore) ctx_by_grp = contexts.read_ctx_by_grp(dstore) n = sum(len(ctx) for ctx in ctx_by_grp.values()) diff --git a/openquake/calculators/tests/postproc_test.py b/openquake/calculators/tests/postproc_test.py index 34d0f438cba0..15e0709b099a 100644 --- a/openquake/calculators/tests/postproc_test.py +++ b/openquake/calculators/tests/postproc_test.py @@ -34,7 +34,7 @@ class PostProcTestCase(CalculatorTestCase): def test_mrd(self): # Computes the mean rate density using a simple PSHA input model - self.run_calc(case_mrd.__file__, 'job.ini', postproc_func='dummy') + self.run_calc(case_mrd.__file__, 'job.ini', postproc_func='dummy.main') fnames = export(('hcurves', 'csv'), self.calc.datastore) for fname in fnames: self.assertEqualFiles('expected/%s' % strip_calc_id(fname), fname) diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py index 884d1ddbff91..cd2f97f760f0 100644 --- a/openquake/commonlib/oqvalidation.py +++ b/openquake/commonlib/oqvalidation.py @@ -566,7 +566,7 @@ postproc_func: Specify a postprocessing function in calculators/postproc. - Example: *postproc_func = compute_mrd* + Example: *postproc_func = compute_mrd.main* Default: '' (no postprocessing) postproc_args: @@ -1004,7 +1004,7 @@ class OqParam(valid.ParamSet): poes = valid.Param(valid.probabilities, []) poes_disagg = valid.Param(valid.probabilities, []) pointsource_distance = valid.Param(valid.floatdict, {'default': PSDIST}) - postproc_func = valid.Param(valid.simple_id, '') + postproc_func = valid.Param(valid.mod_func, 'dummy.main') postproc_args = valid.Param(valid.dictionary, {}) prefer_global_site_params = valid.Param(valid.boolean, None) ps_grid_spacing = valid.Param(valid.positivefloat, 0) diff --git a/openquake/hazardlib/tests/calc/data/job.ini b/openquake/hazardlib/tests/calc/data/job.ini index 2084ac6cd52a..75733faafadb 100644 --- a/openquake/hazardlib/tests/calc/data/job.ini +++ b/openquake/hazardlib/tests/calc/data/job.ini @@ -2,7 +2,7 @@ description = Conditional spectrum calculation_mode = classical -postproc_func = conditional_spectrum +postproc_func = conditional_spectrum.main uniform_hazard_spectra = true [geometry] diff --git a/openquake/hazardlib/valid.py b/openquake/hazardlib/valid.py index 301c96350aae..9ca987e36708 100644 --- a/openquake/hazardlib/valid.py +++ b/openquake/hazardlib/valid.py @@ -353,6 +353,7 @@ def __call__(self, value): source_id = SimpleId(MAX_ID_LENGTH, r'^[\w\-_:]+$') nice_string = SimpleId( # nice for Windows, Linux, HDF5 and XML ASSET_ID_LENGTH, r'[a-zA-Z0-9\.`!#$%\(\)\+/,;@\[\]\^_{|}~-]+') +mod_func = SimpleId(MAX_ID_LENGTH, r'\w+\.\w+') def risk_id(value): diff --git a/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini b/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini index 9df5ceaf649b..d7b35c434f7a 100644 --- a/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini +++ b/openquake/qa_tests_data/conditional_spectrum/case_1/job.ini @@ -2,7 +2,7 @@ description = Conditional spectrum calculation_mode = classical -postproc_func = conditional_spectrum +postproc_func = conditional_spectrum.main uniform_hazard_spectra = true [geometry] diff --git a/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini b/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini index 92bb1a46c25a..58a26c5c78b5 100644 --- a/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini +++ b/openquake/qa_tests_data/conditional_spectrum/case_2/job.ini @@ -2,7 +2,7 @@ description = Conditional spectrum calculation_mode = classical -postproc_func = conditional_spectrum +postproc_func = conditional_spectrum.main uniform_hazard_spectra = true [geometry] diff --git a/openquake/qa_tests_data/logictree/case_01/job.ini b/openquake/qa_tests_data/logictree/case_01/job.ini index ab06f67aea1e..a012a2770117 100644 --- a/openquake/qa_tests_data/logictree/case_01/job.ini +++ b/openquake/qa_tests_data/logictree/case_01/job.ini @@ -5,7 +5,7 @@ calculation_mode = classical random_seed = 1066 disagg_by_src = true use_rates = true -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_05/job.ini b/openquake/qa_tests_data/logictree/case_05/job.ini index f47fb5cf4b75..9381970ebaef 100644 --- a/openquake/qa_tests_data/logictree/case_05/job.ini +++ b/openquake/qa_tests_data/logictree/case_05/job.ini @@ -5,7 +5,7 @@ calculation_mode = classical random_seed = 23 disagg_by_src = true use_rates = true -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_05/job_bis.ini b/openquake/qa_tests_data/logictree/case_05/job_bis.ini index 9c6c859dac1b..701edffb1afd 100644 --- a/openquake/qa_tests_data/logictree/case_05/job_bis.ini +++ b/openquake/qa_tests_data/logictree/case_05/job_bis.ini @@ -3,7 +3,7 @@ description = Reducing source specific logic tree, sampling calculation_mode = classical random_seed = 23 -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_06/job.ini b/openquake/qa_tests_data/logictree/case_06/job.ini index 8afddd4dafa8..83e04e962203 100644 --- a/openquake/qa_tests_data/logictree/case_06/job.ini +++ b/openquake/qa_tests_data/logictree/case_06/job.ini @@ -5,7 +5,7 @@ calculation_mode = classical random_seed = 1066 disagg_by_src = true use_rates = true -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_07/job.ini b/openquake/qa_tests_data/logictree/case_07/job.ini index 2b57de9801fe..e876c36efd2a 100644 --- a/openquake/qa_tests_data/logictree/case_07/job.ini +++ b/openquake/qa_tests_data/logictree/case_07/job.ini @@ -5,7 +5,7 @@ calculation_mode = classical random_seed = 1066 disagg_by_src = true use_rates = true -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_07/sampling.ini b/openquake/qa_tests_data/logictree/case_07/sampling.ini index 0f905a0568af..ff2bf1aedb29 100644 --- a/openquake/qa_tests_data/logictree/case_07/sampling.ini +++ b/openquake/qa_tests_data/logictree/case_07/sampling.ini @@ -5,7 +5,7 @@ calculation_mode = classical random_seed = 1066 disagg_by_src = true use_rates = true -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_12/job.ini b/openquake/qa_tests_data/logictree/case_12/job.ini index 3e20cb51733c..ba5ece00e55e 100755 --- a/openquake/qa_tests_data/logictree/case_12/job.ini +++ b/openquake/qa_tests_data/logictree/case_12/job.ini @@ -3,7 +3,7 @@ description = 2018 PSHA model - North Africa calculation_mode = classical random_seed = 19 -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] diff --git a/openquake/qa_tests_data/logictree/case_13/job_gmv.ini b/openquake/qa_tests_data/logictree/case_13/job_gmv.ini index 0f55c2cfb159..61a9b03b51e7 100644 --- a/openquake/qa_tests_data/logictree/case_13/job_gmv.ini +++ b/openquake/qa_tests_data/logictree/case_13/job_gmv.ini @@ -34,5 +34,5 @@ intensity_measure_types_and_levels = {"PGA": [0.005, 0.007, 0.0098, 0.0137, 0.01 truncation_level = 3 maximum_distance = 100 -postproc_func = med_gmv +postproc_func = med_gmv.main postproc_args = {} diff --git a/openquake/qa_tests_data/logictree/case_19/job.ini b/openquake/qa_tests_data/logictree/case_19/job.ini index 0c8f763e088d..076792155280 100644 --- a/openquake/qa_tests_data/logictree/case_19/job.ini +++ b/openquake/qa_tests_data/logictree/case_19/job.ini @@ -6,7 +6,7 @@ random_seed = 23 disagg_by_src = true use_rates = true concurrent_tasks = 1 -postproc_func = disagg_by_rel_sources +postproc_func = disagg_by_rel_sources.main [geometry] sites = 16.2657 39.3002 diff --git a/openquake/qa_tests_data/postproc/case_mrd/job.ini b/openquake/qa_tests_data/postproc/case_mrd/job.ini index ce0f3c75da81..55d1965fdda6 100644 --- a/openquake/qa_tests_data/postproc/case_mrd/job.ini +++ b/openquake/qa_tests_data/postproc/case_mrd/job.ini @@ -32,14 +32,15 @@ maximum_distance = 300 [postproc] -postproc_func = compute_mrd +postproc_func = compute_mrd.main postproc_args = { 'imt1': 'PGA', 'imt2': 'SA(0.05)', 'cross_correlation': 'BakerJayaram2008', 'seed': 42, 'meabins': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], - 'sigbins': [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]} + 'sigbins': [0.2, 0.3, 0.4, 0.5, 0.6, 0.7], + 'method': 'indirect'} [output] From 637406a64befa4db3390e310466ec2a07223e33e Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Fri, 30 Jun 2023 08:31:02 +0200 Subject: [PATCH 50/53] Fixed AELO tests --- openquake/engine/aelo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openquake/engine/aelo.py b/openquake/engine/aelo.py index 6b42a9696689..aea9bfc0fad8 100644 --- a/openquake/engine/aelo.py +++ b/openquake/engine/aelo.py @@ -53,7 +53,7 @@ def get_params_from(inputs): params['sites'] = '%(lon)s %(lat)s' % inputs if 'vs30' in inputs: params['override_vs30'] = '%(vs30)s' % inputs - params['postproc_func'] = 'disagg_by_rel_sources' + params['postproc_func'] = 'disagg_by_rel_sources.main' # params['cachedir'] = datastore.get_datadir() return params From abce2e5dcc4a7848965e217598fd549cf05d3b63 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Fri, 30 Jun 2023 08:51:02 +0200 Subject: [PATCH 51/53] Fixed test --- doc/adv-manual/classical_PSHA.rst | 24 +++++++++++++++++++ openquake/calculators/tests/logictree_test.py | 2 +- openquake/hazardlib/valid.py | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/adv-manual/classical_PSHA.rst b/doc/adv-manual/classical_PSHA.rst index ebbc4e8e4622..3be52c92bf64 100644 --- a/doc/adv-manual/classical_PSHA.rst +++ b/doc/adv-manual/classical_PSHA.rst @@ -1022,6 +1022,30 @@ contains the Mean Rate Distribution as an array of shape L1 x L1 x N where L1 is the number of levels per IMT minus 1 and N the number of sites (normally 1). +While the postprocessing part for VPSHA calculations is computationally +very heavy, it is much more common to have a light postprocessing, a +lot faster than the classical calculation it depends on. In such +situations the postprocessing framework really shines, since it is +possible to reuse the original calculation via the standard ``--hc`` +switch, i.e. you can avoid repeating multiple times the same classical +calculation if you are interested in running the postprocessor with +different parameters. In that situation the ``main`` function will +get passed a DataStore instance with an attribute ``parent`` corresponding +to the DataStore of the original calculation. + +The postprocessing framework also integrates very well with interactive +development (think of Jupyter notebooks). The following lines are all +you need to create a child datastore where the postprocessing function +can store its results after reading the data from the calculation datastore: + +.. code-block:: + + >> from openquake.commonlib.datastore import read, build_dstore_log + >> from openquake.calculators.postproc import mypostproc + >> dstore, log = build_dstore_log(parent=read(calc_id)) + >> with log: + .. mypostproc.main(dstore) + The conditional spectrum post-processor --------------------------------------- diff --git a/openquake/calculators/tests/logictree_test.py b/openquake/calculators/tests/logictree_test.py index 91c26af22fc2..597f60002362 100644 --- a/openquake/calculators/tests/logictree_test.py +++ b/openquake/calculators/tests/logictree_test.py @@ -112,7 +112,7 @@ def test_case_04(self): calculation_mode='preclassical') hc_id = str(self.calc.datastore.calc_id) self.run_calc(case_04.__file__, 'job.ini', hazard_calculation_id=hc_id, - postproc_func='disagg_by_rel_sources') + postproc_func='disagg_by_rel_sources.main') [fname] = export(('hcurves', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/curve-mean.csv', fname) diff --git a/openquake/hazardlib/valid.py b/openquake/hazardlib/valid.py index 9ca987e36708..861507eff2d2 100644 --- a/openquake/hazardlib/valid.py +++ b/openquake/hazardlib/valid.py @@ -353,7 +353,7 @@ def __call__(self, value): source_id = SimpleId(MAX_ID_LENGTH, r'^[\w\-_:]+$') nice_string = SimpleId( # nice for Windows, Linux, HDF5 and XML ASSET_ID_LENGTH, r'[a-zA-Z0-9\.`!#$%\(\)\+/,;@\[\]\^_{|}~-]+') -mod_func = SimpleId(MAX_ID_LENGTH, r'\w+\.\w+') +mod_func = SimpleId(MAX_ID_LENGTH, r'[\w_]+\.[\w_]+') def risk_id(value): From e21363fe92b6de21b3041504f4310ceb3de22860 Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Fri, 30 Jun 2023 09:52:29 +0200 Subject: [PATCH 52/53] Doc fix --- doc/adv-manual/classical_PSHA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/adv-manual/classical_PSHA.rst b/doc/adv-manual/classical_PSHA.rst index 3be52c92bf64..873b158dbdd1 100644 --- a/doc/adv-manual/classical_PSHA.rst +++ b/doc/adv-manual/classical_PSHA.rst @@ -1010,7 +1010,7 @@ https://github.com/gem/oq-engine/blob/engine-3.17/openquake/qa_tests_data/postpr while the postprocessor module ``openquake.calculators.postproc.compute_mrd`` contains the function -.. code-block: +.. code-block:: # in openquake.calculators.postproc.compute_mrd def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins, From 5d09a5c33cce61348ea82664e5940cca367fbc6c Mon Sep 17 00:00:00 2001 From: Michele Simionato Date: Fri, 30 Jun 2023 10:08:44 +0200 Subject: [PATCH 53/53] Doc fixes --- doc/adv-manual/classical_PSHA.rst | 44 ++++++++++++++++--------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/doc/adv-manual/classical_PSHA.rst b/doc/adv-manual/classical_PSHA.rst index 873b158dbdd1..f0928708dfac 100644 --- a/doc/adv-manual/classical_PSHA.rst +++ b/doc/adv-manual/classical_PSHA.rst @@ -965,39 +965,41 @@ NB: ``disagg_by_src`` can be set to true only if the not in the original source model, thus breaking the connection between the values of the matrix and the original sources. -The post-processing framework and Vector-valued PSHA -==================================================== +The post-processing framework and Vector-valued PSHA calculations +================================================================= Since version 3.17 the OpenQuake engine has special support for -custom post-processors. A postprocessor is a Python module located +custom postprocessors. A postprocessor is a Python module located in the directory ``openquake/calculators/postproc`` and containing -a ``main`` function with signature:: +a ``main`` function with signature: .. code-block:: def main(dstore, [csm], ...): ... -In version 3.17 post-processors work only after classical or -preclassical calculations; the ``dstore`` parameter is a DataStore -instance corresponding to the current calculation, while the ``csm`` +Post-processors are called after a classical or +preclassical calculation: the ``dstore`` parameter is a DataStore +instance corresponding to the calculation, while the ``csm`` parameter is a CompositeSourceModel instance (it can be omitted if not needed). -The ``main`` function is automatically called when the user sets in -the job.ini file the parameters ``postproc_mod`` and -``postproc_args``. ``postproc_mod`` is the name of the postprocessing -module and ``postproc_args`` is a dictionary of literal arguments that -get passed to the ``main`` function; if not specified the empty -dictionary is passed. This happens for istance for the conditional -spectrum post-processor since it does not require additional arguments -with respect to the ones in ``dstore['oqparam']``. +The ``main`` function is called when the user sets in +the job.ini file the parameters ``postproc_func`` and +``postproc_args``. ``postproc_func`` is the dotted name of the +postprocessing function (in the form ``modulename.funcname`` +where ``funcname`` is normally ``main``) and +``postproc_args`` is a dictionary of literal arguments that get passed +to the function; if not specified the empty dictionary is +passed. This happens for istance for the conditional spectrum +post-processor since it does not require additional arguments with +respect to the ones in ``dstore['oqparam']``. The post-processing framework was put in place in order to run VPSHA calculations. The user can find an example in -https://github.com/gem/oq-engine/blob/engine-3.17/openquake/qa_tests_data/postproc/case_mrd. In the job.ini file there are the lines:: +``qa_tests_data/postproc/case_mrd``. In the job.ini file there are the lines:: - postproc_mod = compute_mrd + postproc_func = compute_mrd.main postproc_args = { 'imt1': 'PGA', 'imt2': 'SA(0.05)', @@ -1012,7 +1014,7 @@ contains the function .. code-block:: - # in openquake.calculators.postproc.compute_mrd + # inside openquake.calculators.postproc.compute_mrd def main(dstore, imt1, imt2, cross_correlation, seed, meabins, sigbins, method='indirect'): ... @@ -1023,14 +1025,14 @@ where L1 is the number of levels per IMT minus 1 and N the number of sites (normally 1). While the postprocessing part for VPSHA calculations is computationally -very heavy, it is much more common to have a light postprocessing, a -lot faster than the classical calculation it depends on. In such +intensive, it is much more common to have a light postprocessing, i.e. +faster than the classical calculation it depends on. In such situations the postprocessing framework really shines, since it is possible to reuse the original calculation via the standard ``--hc`` switch, i.e. you can avoid repeating multiple times the same classical calculation if you are interested in running the postprocessor with different parameters. In that situation the ``main`` function will -get passed a DataStore instance with an attribute ``parent`` corresponding +get a DataStore instance with an attribute ``parent`` corresponding to the DataStore of the original calculation. The postprocessing framework also integrates very well with interactive