Merge branch 'master' into todorovic-liquefaction

gem · Sep 18, 2023 · 8ec6da8 · 8ec6da8
2 parents 34bab21 + 950c16f
commit 8ec6da8
Show file tree

Hide file tree

Showing 52 changed files with 3,751 additions and 3,605 deletions.
diff --git a/debian/changelog b/debian/changelog
@@ -1,4 +1,8 @@
+  [Michele Simionato, Paolo Tormene]
+  * Implemented AEP, OEP curves
+
   [Michele Simionato]
+  * Internal: changed events.year to be in the range 1..eff_time
   * Enhanced `oq engine --delete-calculation` to remove calculation files
     even if the DbServer is on a remote machine
   * Fixed another site collection filtering bug in conditioned GMFs

diff --git a/openquake/calculators/extract.py b/openquake/calculators/extract.py
@@ -752,35 +752,39 @@ def extract_agg_curves(dstore, what):
         agg_id = lst.index(','.join(tagvalues))
     else:
         agg_id = 0  # total aggregation
+    ep_fields = dstore.get_attr('aggcurves', 'ep_fields')
     if qdic['rlzs']:
         [li] = qdic['loss_type']  # loss type index
         units = dstore.get_attr('aggcurves', 'units').split()
         df = dstore.read_df('aggcurves', sel=dict(agg_id=agg_id, loss_id=li))
         rps = list(df.return_period.unique())
         P = len(rps)
         R = len(qdic['kind'])
-        arr = numpy.zeros((P, R))
+        EP = len(ep_fields)
+        arr = numpy.zeros((R, P, EP))
         for rlz in df.rlz_id.unique():
-            # NB: df may contains zeros but there are no missing periods
-            # by construction (see build_aggcurves)
-            arr[:, rlz] = df[df.rlz_id == rlz].loss
+            for ep_field_idx, ep_field in enumerate(ep_fields):
+                # NB: df may contains zeros but there are no missing periods
+                # by construction (see build_aggcurves)
+                arr[rlz, :, ep_field_idx] = df[df.rlz_id == rlz][ep_field]
     else:
         name = 'agg_curves-stats/' + lts[0]
         shape_descr = hdf5.get_shape_descr(dstore.get_attr(name, 'json'))
         rps = list(shape_descr['return_period'])
         units = dstore.get_attr(name, 'units').split()
-        arr = dstore[name][agg_id, k].T  # shape P, R
+        arr = dstore[name][agg_id, k]  # shape (P, S, EP)
     if qdic['absolute'] == [1]:
         pass
     elif qdic['absolute'] == [0]:
         evalue, = dstore['agg_values'][agg_id][lts]
         arr /= evalue
     else:
         raise ValueError('"absolute" must be 0 or 1 in %s' % what)
-    attrs = dict(shape_descr=['return_period', 'kind'] + tagnames)
-    attrs['return_period'] = rps
+    attrs = dict(shape_descr=['kind', 'return_period', 'ep_field'] + tagnames)
     attrs['kind'] = qdic['kind']
+    attrs['return_period'] = rps
     attrs['units'] = units  # used by the QGIS plugin
+    attrs['ep_field'] = ep_fields
     for tagname, tagvalue in zip(tagnames, tagvalues):
         attrs[tagname] = [tagvalue]
     if tagnames:
@@ -1186,7 +1190,7 @@ def extract_mean_rates_by_src(dstore, what):
     [imt] = qdict['imt']
     [iml] = qdict['iml']
     [site_id] = qdict.get('site_id', ['0'])
-    site_id = int(site_id)    
+    site_id = int(site_id)
     imt_id = list(oq.imtls).index(imt)
     rates = dset[site_id, imt_id]
     L1, Ns = rates.shape

diff --git a/openquake/calculators/post_risk.py b/openquake/calculators/post_risk.py
@@ -52,21 +52,29 @@ def save_curve_stats(dstore):
     aggcurves_df = dstore.read_df('aggcurves')
     periods = aggcurves_df.return_period.unique()
     P = len(periods)
+    ep_fields = ['loss']
+    if 'loss_aep' in aggcurves_df:
+        ep_fields.append('loss_aep')
+    if 'loss_oep' in aggcurves_df:
+        ep_fields.append('loss_oep')
+    EP = len(ep_fields)
     for lt in oq.ext_loss_types:
         loss_id = scientific.LOSSID[lt]
-        out = numpy.zeros((K + 1, S, P))
+        out = numpy.zeros((K + 1, S, P, EP))
         aggdf = aggcurves_df[aggcurves_df.loss_id == loss_id]
         for agg_id, df in aggdf.groupby("agg_id"):
             for s, stat in enumerate(stats.values()):
                 for p in range(P):
-                    dfp = df[df.return_period == periods[p]]
-                    ws = weights[dfp.rlz_id.to_numpy()]
-                    ws /= ws.sum()
-                    out[agg_id, s, p] = stat(dfp.loss.to_numpy(), ws)
+                    for e, ep_field in enumerate(ep_fields):
+                        dfp = df[df.return_period == periods[p]]
+                        ws = weights[dfp.rlz_id.to_numpy()]
+                        ws /= ws.sum()
+                        out[agg_id, s, p, e] = stat(dfp[ep_field].to_numpy(),
+                                                    ws)
         stat = 'agg_curves-stats/' + lt
-        dstore.create_dset(stat, F64, (K + 1, S, P))
+        dstore.create_dset(stat, F64, (K + 1, S, P, EP))
         dstore.set_shape_descr(stat, agg_id=K+1, stat=list(stats),
-                               return_period=periods)
+                               return_period=periods, ep_fields=ep_fields)
         dstore.set_attrs(stat, units=units)
         dstore[stat][:] = out
 
@@ -198,22 +206,26 @@ def fix_dtypes(dic):
     fix_dtype(dic, F32, floatcolumns)
 
 
-def build_aggcurves(items, builder):
+def build_aggcurves(items, builder, aggregate_loss_curves_types):
     """
     :param items: a list of pairs ((agg_id, rlz_id, loss_id), losses)
     :param builder: a :class:`LossCurvesMapsBuilder` instance
     """
     dic = general.AccumDict(accum=[])
     for (agg_id, rlz_id, loss_id), data in items:
-        curve = {kind: builder.build_curve(data[kind], rlz_id)
+        year = data.pop('year', ())
+        curve = {kind: builder.build_curve(
+                    year, kind, data[kind], aggregate_loss_curves_types, rlz_id)
                  for kind in data}
         for p, period in enumerate(builder.return_periods):
             dic['agg_id'].append(agg_id)
             dic['rlz_id'].append(rlz_id)
             dic['loss_id'].append(loss_id)
             dic['return_period'].append(period)
             for kind in data:
-                dic[kind].append(curve[kind][p])
+                # NB: kind be ['fatalities', 'losses'] in a scenario_damage test
+                for k, c in curve[kind].items():
+                    dic[k].append(c[p])
     return dic
 
 
@@ -230,12 +242,12 @@ def build_store_agg(dstore, rbe_df, num_events):
         tr = oq.time_ratio  # (risk_invtime / haz_invtime) * num_ses
         if oq.collect_rlzs:  # reduce the time ratio by the number of rlzs
             tr /= len(dstore['weights'])
-    rlz_id = dstore['events']['rlz_id']
+    events = dstore['events'][:]
+    rlz_id = events['rlz_id']
     if len(num_events) > 1:
         rbe_df['rlz_id'] = rlz_id[rbe_df.event_id.to_numpy()]
     else:
         rbe_df['rlz_id'] = 0
-    del rbe_df['event_id']
     aggrisk = general.AccumDict(accum=[])
     columns = [col for col in rbe_df.columns if col not in {
         'event_id', 'agg_id', 'rlz_id', 'loss_id', 'variance'}]
@@ -271,20 +283,30 @@ def build_store_agg(dstore, rbe_df, num_events):
         logging.info('Building aggcurves')
         units = dstore['cost_calculator'].get_units(oq.loss_types)
         builder = get_loss_builder(dstore, num_events=num_events)
+        try:
+            year = events['year']
+            if len(numpy.unique(year)) == 1:  # there is a single year
+                year = ()
+        except ValueError:  # missing in case of GMFs from CSV
+            year = ()
         items = []
         for agg_id in agg_ids:
             gb = rbe_df[rbe_df.agg_id == agg_id].groupby(['rlz_id', 'loss_id'])
             for (rlz_id, loss_id), df in gb:
                 data = {kind: df[kind].to_numpy() for kind in loss_kinds}
+                if len(year):
+                    data['year'] = year[df.event_id.to_numpy()]
                 items.append([(agg_id, rlz_id, loss_id), data])
         dic = parallel.Starmap.apply(
-            build_aggcurves, (items, builder),
+            build_aggcurves, (items, builder, oq.aggregate_loss_curves_types),
             concurrent_tasks=oq.concurrent_tasks,
             h5=dstore.hdf5).reduce()
         fix_dtypes(dic)
+        ep_fields = ['loss' + suffix
+                     for suffix in oq.aggregate_loss_curves_types.split(',')]
         dstore.create_df('aggcurves', pandas.DataFrame(dic),
                          limit_states=' '.join(oq.limit_states),
-                         units=units)
+                         units=units, ep_fields=ep_fields)
     return aggrisk
 
 
@@ -302,7 +324,14 @@ def build_reinsurance(dstore, num_events):
         tr = oq.time_ratio  # risk_invtime / (haz_invtime * num_ses)
         if oq.collect_rlzs:  # reduce the time ratio by the number of rlzs
             tr /= len(dstore['weights'])
-    rlz_id = dstore['events']['rlz_id']
+    events = dstore['events'][:]
+    rlz_id = events['rlz_id']
+    try:
+        year = events['year']
+        if len(numpy.unique(year)) == 1:  # there is a single year
+            year = ()
+    except ValueError:  # missing in case of GMFs from CSV
+        year = ()
     rbe_df = dstore.read_df('reinsurance-risk_by_event', 'event_id')
     columns = rbe_df.columns
     if len(num_events) > 1:
@@ -320,13 +349,22 @@ def build_reinsurance(dstore, num_events):
             agg = df[col].sum()
             avg[col].append(agg * tr if oq.investigation_time else agg / ne)
         if oq.investigation_time:
-            curve = {col: builder.build_curve(df[col].to_numpy(), rlzid)
+            if len(year):
+                years = year[df.index.to_numpy()]
+            else:
+                years = ()
+            curve = {col: builder.build_curve(
+                        years, col, df[col].to_numpy(),
+                        oq.aggregate_loss_curves_types,
+                        rlzid)
                      for col in columns}
             for p, period in enumerate(builder.return_periods):
                 dic['rlz_id'].append(rlzid)
                 dic['return_period'].append(period)
                 for col in curve:
-                    dic[col].append(curve[col][p])
+                    for k, c in curve[col].items():
+                        dic[k].append(c[p])
+
     dstore.create_df('reinsurance-avg_portfolio', pandas.DataFrame(avg),
                      units=dstore['cost_calculator'].get_units(
                          oq.loss_types))

diff --git a/openquake/commonlib/calc.py b/openquake/commonlib/calc.py
@@ -220,6 +220,7 @@ def import_rups_events(self, rup_array, get_rupture_getters):
                              ne, nr, int(eff_time), mag))
 
     def _save_events(self, rup_array, rgetters):
+        oq  = self.oqparam
         # this is very fast compared to saving the ruptures
         E = rup_array['n_occ'].sum()
         events = numpy.zeros(E, rupture.events_dt)
@@ -253,13 +254,14 @@ def _save_events(self, rup_array, rgetters):
         numpy.testing.assert_equal(events['id'], numpy.arange(E))
 
         # set event year and event ses starting from 1
-        nses = self.oqparam.ses_per_logic_tree_path
+        nses = oq.ses_per_logic_tree_path
         extra = numpy.zeros(len(events), [('year', U32), ('ses_id', U32)])
 
-        rng = numpy.random.default_rng(self.oqparam.ses_seed)
-        if self.oqparam.investigation_time:
-            itime = int(self.oqparam.investigation_time)
-            extra['year'] = rng.choice(itime, len(events)) + 1
+        rng = numpy.random.default_rng(oq.ses_seed)
+        if oq.investigation_time:
+            eff_time = int(oq.investigation_time * oq.ses_per_logic_tree_path *
+                           len(self.datastore['weights']))
+            extra['year'] = rng.choice(eff_time, len(events)) + 1
         extra['ses_id'] = rng.choice(nses, len(events)) + 1
         self.datastore['events'] = util.compose_arrays(events, extra)
 

diff --git a/openquake/commonlib/oqvalidation.py b/openquake/commonlib/oqvalidation.py
@@ -61,6 +61,14 @@
   Example: *aggregate_by = region, taxonomy*.
   Default: empty list
 
+aggregate_loss_curves_types:
+  Used for event-based risk and damage calculations, to estimate the aggregated
+  loss Exceedance Probability (EP) only or to also calculate (if possible) the
+  Occurrence Exceedance Probability (OEP) and/or the Aggregate Exceedance
+  Probability (AEP).
+  Example: *aggregate_loss_curves_types = ,_oep,_aep*.
+  Default: ,_oep,_aep
+
 reaggregate_by:
   Used to perform additional aggregations in risk calculations. Takes in
   input a proper subset of the tags in the aggregate_by option.
@@ -903,6 +911,13 @@ class OqParam(valid.ParamSet):
     hazard_imtls = {}
     override_vs30 = valid.Param(valid.positivefloat, None)
     aggregate_by = valid.Param(valid.namelists, [])
+    aggregate_loss_curves_types = valid.Param(
+        valid.Choice('',
+                     ',_oep',
+                     ',_aep',
+                     ',_oep,_aep',
+                     ',_aep,_oep'),
+        ',_oep,_aep')
     reaggregate_by = valid.Param(valid.namelist, [])
     amplification_method = valid.Param(
         valid.Choice('convolution', 'kernel'), 'convolution')