From 158c2444c7a6614b3b0682427641e69211174c0b Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 15 May 2023 13:58:05 +0200 Subject: [PATCH 1/8] Reddd ML processes for 2.1.0 #416 --- meta/subtype-schemas.json | 6 ++ proposals/fit_class_random_forest.json | 110 +++++++++++++++++++++++++ proposals/fit_regr_random_forest.json | 110 +++++++++++++++++++++++++ proposals/load_ml_model.json | 53 ++++++++++++ proposals/predict_random_forest.json | 42 ++++++++++ proposals/save_ml_model.json | 44 ++++++++++ 6 files changed, 365 insertions(+) create mode 100644 proposals/fit_class_random_forest.json create mode 100644 proposals/fit_regr_random_forest.json create mode 100644 proposals/load_ml_model.json create mode 100644 proposals/predict_random_forest.json create mode 100644 proposals/save_ml_model.json diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index b44cb8dc..6d0030fc 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -232,6 +232,12 @@ } } }, + "ml-model": { + "type": "object", + "subtype": "ml-model", + "title": "Machine Learning Model", + "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension." + }, "output-format": { "type": "string", "subtype": "output-format", diff --git a/proposals/fit_class_random_forest.json b/proposals/fit_class_random_forest.json new file mode 100644 index 00000000..6eb874bf --- /dev/null +++ b/proposals/fit_class_random_forest.json @@ -0,0 +1,110 @@ +{ + "id": "fit_class_random_forest", + "summary": "Train a random forest classification model", + "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "predictors", + "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "schema": [ + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "bands" + } + ] + }, + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "other" + } + ] + } + ] + }, + { + "name": "target", + "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + } + }, + { + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest classification.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/fit_regr_random_forest.json b/proposals/fit_regr_random_forest.json new file mode 100644 index 00000000..51191fa5 --- /dev/null +++ b/proposals/fit_regr_random_forest.json @@ -0,0 +1,110 @@ +{ + "id": "fit_regr_random_forest", + "summary": "Train a random forest regression model", + "description": "Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "predictors", + "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "schema": [ + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "bands" + } + ] + }, + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "other" + } + ] + } + ] + }, + { + "name": "target", + "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + } + }, + { + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split. This is often the default for regression.\n- `sqrt`: The square root of the number of variables are considered for each split.", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest regression.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json new file mode 100644 index 00000000..151513c8 --- /dev/null +++ b/proposals/load_ml_model.json @@ -0,0 +1,53 @@ +{ + "id": "load_ml_model", + "summary": "Load a ML model", + "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "id", + "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", + "schema": [ + { + "title": "URL", + "type": "string", + "format": "uri", + "subtype": "uri", + "pattern": "^https?://" + }, + { + "title": "Batch Job ID", + "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.", + "type": "string", + "subtype": "job-id", + "pattern": "^[\\w\\-\\.~]+$" + }, + { + "title": "User-uploaded File", + "type": "string", + "subtype": "file-path", + "pattern": "^[^\r\n\\:'\"]+$" + } + ] + } + ], + "returns": { + "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/predict_random_forest.json b/proposals/predict_random_forest.json new file mode 100644 index 00000000..62c54e9f --- /dev/null +++ b/proposals/predict_random_forest.json @@ -0,0 +1,42 @@ +{ + "id": "predict_random_forest", + "summary": "Predict values based on a Random Forest model", + "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", + "categories": [ + "machine learning", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "An array of numbers.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + }, + { + "name": "model", + "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.", + "schema": { + "type": [ + "number", + "null" + ] + } + } +} diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json new file mode 100644 index 00000000..5e9ea8b0 --- /dev/null +++ b/proposals/save_ml_model.json @@ -0,0 +1,44 @@ +{ + "id": "save_ml_model", + "summary": "Save a ML model", + "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The data to store as a machine learning model.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + { + "name": "options", + "description": "Additional parameters to create the file(s).", + "schema": { + "type": "object", + "additionalParameters": false + }, + "default": {}, + "optional": true + } + ], + "returns": { + "description": "Returns `false` if the process failed to store the model, `true` otherwise.", + "schema": { + "type": "boolean" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file From 756602039841e528c558ee40de2cf25658341b6e Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 15 May 2023 14:00:01 +0200 Subject: [PATCH 2/8] Make predict processes for ML more general #368 --- CHANGELOG.md | 4 ++ proposals/load_ml_model.json | 2 +- proposals/predict_curve.json | 2 +- ...ndom_forest.json => predict_ml_model.json} | 8 ++-- proposals/predict_ml_model_probabilities.json | 45 +++++++++++++++++++ 5 files changed, 55 insertions(+), 6 deletions(-) rename proposals/{predict_random_forest.json => predict_ml_model.json} (69%) create mode 100644 proposals/predict_ml_model_probabilities.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d268dd9..f46669a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `filter_vector` - `flatten_dimensions` - `load_geojson` + - `load_ml_model` - `load_url` + - `predict_ml_model` + - `predict_ml_model_probabilities` + - `save_ml_model` - `unflatten_dimension` - `vector_buffer` - `vector_reproject` diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 151513c8..076caa3d 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -36,7 +36,7 @@ } ], "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", + "description": "A machine learning model to be used with machine learning processes such as ``predict_ml_model()`` or ``predict_ml_model_probabilities()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_curve.json b/proposals/predict_curve.json index 9fb5d341..2d1974d8 100644 --- a/proposals/predict_curve.json +++ b/proposals/predict_curve.json @@ -1,6 +1,6 @@ { "id": "predict_curve", - "summary": "Predict values", + "summary": "Predict values using a model function", "description": "Predict values using a model function and pre-computed parameters. The process is primarily intended to compute values for new labels, but it can also fill gaps where existing labels contain no-data (`null`) values.", "categories": [ "cubes", diff --git a/proposals/predict_random_forest.json b/proposals/predict_ml_model.json similarity index 69% rename from proposals/predict_random_forest.json rename to proposals/predict_ml_model.json index 62c54e9f..fe61bf45 100644 --- a/proposals/predict_random_forest.json +++ b/proposals/predict_ml_model.json @@ -1,7 +1,7 @@ { - "id": "predict_random_forest", - "summary": "Predict values based on a Random Forest model", - "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", + "id": "predict_ml_model", + "summary": "Predict values values using a ML model", + "description": "Applies a machine learning model to an array and predicts a value/class for it.", "categories": [ "machine learning", "reducer" @@ -23,7 +23,7 @@ }, { "name": "model", - "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", + "description": "A ML model that can be trained with one of the ML processes such as ``fit_class_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_ml_model_probabilities.json b/proposals/predict_ml_model_probabilities.json new file mode 100644 index 00000000..afdf256d --- /dev/null +++ b/proposals/predict_ml_model_probabilities.json @@ -0,0 +1,45 @@ +{ + "id": "predict_ml_model_probabilities", + "summary": "Predict class probabilities using a ML model", + "description": "Applies a machine learning model to an array and predicts (class) probabilities for them.", + "categories": [ + "machine learning", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "An array of numbers.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + }, + { + "name": "model", + "description": "A ML model that can be trained with one of the ML processes such as ``fit_regr_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "The predicted (class) probabilities. Returns `null` if any of the given values in the array is a no-data value.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + } +} From 8e85e860c2c382db80b44d902cbc91a875911bc8 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 15 May 2023 17:04:00 +0200 Subject: [PATCH 3/8] Predict DL proposal --- proposals/predict_dl_model.json | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 proposals/predict_dl_model.json diff --git a/proposals/predict_dl_model.json b/proposals/predict_dl_model.json new file mode 100644 index 00000000..3ba15f52 --- /dev/null +++ b/proposals/predict_dl_model.json @@ -0,0 +1,45 @@ +{ + "id": "predict_dl_model", + "summary": "Predict values values using a DL model", + "description": "Applies a machine learning model to a data cube and predicts a values/classes for it. This process can be used as process in ``apply_neighborhood()`` to specify the patch size and overlap.", + "categories": [ + "deep learning", + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The input data cube.", + "schema": { + "type": "object", + "subtype": "datacube" + } + }, + { + "name": "model", + "description": "A DL model that can be trained with one of the DL `fit_*` processes.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + { + "name": "dimensions", + "description": "Dimensions to remove.", + "schema": { + "type": "array", + "items": { + "type": "string" + } + } + } + ], + "returns": { + "description": "The data cube with predicted value.", + "schema": { + "type": "object", + "subtype": "datacube" + } + } +} From 5e27496be8dc379ee10acdf84037d020cb6f28cc Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 16 May 2023 11:07:46 +0200 Subject: [PATCH 4/8] Variant 2? --- proposals/load_ml_model.json | 7 ---- proposals/predict_ml_model.json | 36 +++++++++-------- proposals/predict_ml_model_probabilities.json | 39 ++++++++++--------- 3 files changed, 40 insertions(+), 42 deletions(-) diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 076caa3d..fa76c9da 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -19,13 +19,6 @@ "subtype": "uri", "pattern": "^https?://" }, - { - "title": "Batch Job ID", - "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.", - "type": "string", - "subtype": "job-id", - "pattern": "^[\\w\\-\\.~]+$" - }, { "title": "User-uploaded File", "type": "string", diff --git a/proposals/predict_ml_model.json b/proposals/predict_ml_model.json index fe61bf45..225cf63a 100644 --- a/proposals/predict_ml_model.json +++ b/proposals/predict_ml_model.json @@ -1,24 +1,18 @@ { "id": "predict_ml_model", "summary": "Predict values values using a ML model", - "description": "Applies a machine learning model to an array and predicts a value/class for it.", + "description": "Applies a machine learning model to a datacube and predicts values/classes for it.", "categories": [ - "machine learning", - "reducer" + "machine learning" ], "experimental": true, "parameters": [ { "name": "data", - "description": "An array of numbers.", + "description": "The input data cube.", "schema": { - "type": "array", - "items": { - "type": [ - "number", - "null" - ] - } + "type": "object", + "subtype": "datacube" } }, { @@ -28,15 +22,25 @@ "type": "object", "subtype": "ml-model" } + }, + { + "name": "dimension", + "description": "The name of the dimension that the model applies to. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist.", + "schema": { + "type": "string" + } } ], "returns": { - "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.", + "description": "The data cube with the predicted values. It removes the specified dimension.", "schema": { - "type": [ - "number", - "null" - ] + "type": "object", + "subtype": "datacube" + } + }, + "exceptions": { + "DimensionNotAvailable": { + "message": "A dimension with the specified name does not exist." } } } diff --git a/proposals/predict_ml_model_probabilities.json b/proposals/predict_ml_model_probabilities.json index afdf256d..7c852037 100644 --- a/proposals/predict_ml_model_probabilities.json +++ b/proposals/predict_ml_model_probabilities.json @@ -1,24 +1,18 @@ { "id": "predict_ml_model_probabilities", "summary": "Predict class probabilities using a ML model", - "description": "Applies a machine learning model to an array and predicts (class) probabilities for them.", + "description": "Applies a machine learning model to a data cube and predicts (class) probabilities.", "categories": [ - "machine learning", - "reducer" + "machine learning" ], "experimental": true, "parameters": [ { "name": "data", - "description": "An array of numbers.", + "description": "The input data cube.", "schema": { - "type": "array", - "items": { - "type": [ - "number", - "null" - ] - } + "type": "object", + "subtype": "datacube" } }, { @@ -28,18 +22,25 @@ "type": "object", "subtype": "ml-model" } + }, + { + "name": "dimension", + "description": "The name of the dimension that the model applies to. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist.", + "schema": { + "type": "string" + } } ], "returns": { - "description": "The predicted (class) probabilities. Returns `null` if any of the given values in the array is a no-data value.", + "description": "A data cube with the predicted (class) probabilities. It removes the specified dimension and adds a dimension for the class probabilities. It has the name `probabilities` and is of type `other`.", "schema": { - "type": "array", - "items": { - "type": [ - "number", - "null" - ] - } + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "other" + } + ] } } } From 6baf73717f98f8e3ed3e55a6ec761465f519c57c Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 16 May 2023 11:28:22 +0200 Subject: [PATCH 5/8] Wording improvements from #396 --- proposals/predict_dl_model.json | 8 ++++---- proposals/predict_ml_model.json | 8 ++++---- proposals/predict_ml_model_probabilities.json | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/proposals/predict_dl_model.json b/proposals/predict_dl_model.json index 3ba15f52..2b194756 100644 --- a/proposals/predict_dl_model.json +++ b/proposals/predict_dl_model.json @@ -1,7 +1,7 @@ { "id": "predict_dl_model", - "summary": "Predict values values using a DL model", - "description": "Applies a machine learning model to a data cube and predicts a values/classes for it. This process can be used as process in ``apply_neighborhood()`` to specify the patch size and overlap.", + "summary": "Predict values values using DL", + "description": "Applies a machine learning model to a data cube of input features and predicts output values or classes for it. This process can be used as process in ``apply_neighborhood()`` to specify the patch size and overlap.", "categories": [ "deep learning", "machine learning" @@ -10,7 +10,7 @@ "parameters": [ { "name": "data", - "description": "The input data cube.", + "description": "The data cube containing the input features.", "schema": { "type": "object", "subtype": "datacube" @@ -18,7 +18,7 @@ }, { "name": "model", - "description": "A DL model that can be trained with one of the DL `fit_*` processes.", + "description": "A DL model that was trained with one of the DL training processes.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_ml_model.json b/proposals/predict_ml_model.json index 225cf63a..a8220a55 100644 --- a/proposals/predict_ml_model.json +++ b/proposals/predict_ml_model.json @@ -1,7 +1,7 @@ { "id": "predict_ml_model", - "summary": "Predict values values using a ML model", - "description": "Applies a machine learning model to a datacube and predicts values/classes for it.", + "summary": "Predict classification or regression values using ML", + "description": "Applies a machine learning model to a data cube of input features and predicts output values or classes for it.", "categories": [ "machine learning" ], @@ -9,7 +9,7 @@ "parameters": [ { "name": "data", - "description": "The input data cube.", + "description": "The data cube containing the input features.", "schema": { "type": "object", "subtype": "datacube" @@ -17,7 +17,7 @@ }, { "name": "model", - "description": "A ML model that can be trained with one of the ML processes such as ``fit_class_random_forest()``.", + "description": "A ML model that was trained with one of the ML training processes such as ``fit_class_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_ml_model_probabilities.json b/proposals/predict_ml_model_probabilities.json index 7c852037..2e4ee302 100644 --- a/proposals/predict_ml_model_probabilities.json +++ b/proposals/predict_ml_model_probabilities.json @@ -1,7 +1,7 @@ { "id": "predict_ml_model_probabilities", - "summary": "Predict class probabilities using a ML model", - "description": "Applies a machine learning model to a data cube and predicts (class) probabilities.", + "summary": "Predict class probabilities using ML", + "description": "Applies a machine learning model to a data cube of input features and predicts the probabilities of the output classes.", "categories": [ "machine learning" ], @@ -9,7 +9,7 @@ "parameters": [ { "name": "data", - "description": "The input data cube.", + "description": "The data cube containing the input features.", "schema": { "type": "object", "subtype": "datacube" @@ -17,7 +17,7 @@ }, { "name": "model", - "description": "A ML model that can be trained with one of the ML processes such as ``fit_regr_random_forest()``.", + "description": "A ML model that was trained with one of the ML training processes such as ``fit_regr_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" From 1fc4a8ecfd613265ca69915283b9f66ecf3c4e84 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 16 May 2023 14:30:09 +0200 Subject: [PATCH 6/8] Rename processes according to recent discussions from #396 --- CHANGELOG.md | 6 ++++-- proposals/{predict_dl_model.json => dl_predict.json} | 4 ++-- proposals/load_ml_model.json | 4 ++-- ...s_random_forest.json => ml_fit_class_random_forest.json} | 2 +- ...gr_random_forest.json => ml_fit_regr_random_forest.json} | 2 +- proposals/{predict_ml_model.json => ml_predict.json} | 4 ++-- ...del_probabilities.json => ml_predict_probabilities.json} | 4 ++-- 7 files changed, 14 insertions(+), 12 deletions(-) rename proposals/{predict_dl_model.json => dl_predict.json} (93%) rename proposals/{fit_class_random_forest.json => ml_fit_class_random_forest.json} (99%) rename proposals/{fit_regr_random_forest.json => ml_fit_regr_random_forest.json} (99%) rename proposals/{predict_ml_model.json => ml_predict.json} (93%) rename proposals/{predict_ml_model_probabilities.json => ml_predict_probabilities.json} (93%) diff --git a/CHANGELOG.md b/CHANGELOG.md index f46669a0..63c056e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,8 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `load_geojson` - `load_ml_model` - `load_url` - - `predict_ml_model` - - `predict_ml_model_probabilities` + - `ml_fit_class_random_forest` + - `ml_fit_regr_random_forest` + - `ml_predict` + - `ml_predict_probabilities` - `save_ml_model` - `unflatten_dimension` - `vector_buffer` diff --git a/proposals/predict_dl_model.json b/proposals/dl_predict.json similarity index 93% rename from proposals/predict_dl_model.json rename to proposals/dl_predict.json index 2b194756..5fef5d9e 100644 --- a/proposals/predict_dl_model.json +++ b/proposals/dl_predict.json @@ -1,6 +1,6 @@ { - "id": "predict_dl_model", - "summary": "Predict values values using DL", + "id": "dl_predict", + "summary": "Predict classification or regression values using DL", "description": "Applies a machine learning model to a data cube of input features and predicts output values or classes for it. This process can be used as process in ``apply_neighborhood()`` to specify the patch size and overlap.", "categories": [ "deep learning", diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index fa76c9da..7174e1e2 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -1,7 +1,7 @@ { "id": "load_ml_model", "summary": "Load a ML model", - "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.", + "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``ml_fit_regr_random_forest()`` and ``save_ml_model()``.", "categories": [ "machine learning", "import" @@ -29,7 +29,7 @@ } ], "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``predict_ml_model()`` or ``predict_ml_model_probabilities()``.", + "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()`` or ``ml_predict_probabilities()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json similarity index 99% rename from proposals/fit_class_random_forest.json rename to proposals/ml_fit_class_random_forest.json index 6eb874bf..63da48a1 100644 --- a/proposals/fit_class_random_forest.json +++ b/proposals/ml_fit_class_random_forest.json @@ -1,5 +1,5 @@ { - "id": "fit_class_random_forest", + "id": "ml_fit_class_random_forest", "summary": "Train a random forest classification model", "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", "categories": [ diff --git a/proposals/fit_regr_random_forest.json b/proposals/ml_fit_regr_random_forest.json similarity index 99% rename from proposals/fit_regr_random_forest.json rename to proposals/ml_fit_regr_random_forest.json index 51191fa5..39207324 100644 --- a/proposals/fit_regr_random_forest.json +++ b/proposals/ml_fit_regr_random_forest.json @@ -1,5 +1,5 @@ { - "id": "fit_regr_random_forest", + "id": "ml_fit_regr_random_forest", "summary": "Train a random forest regression model", "description": "Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).", "categories": [ diff --git a/proposals/predict_ml_model.json b/proposals/ml_predict.json similarity index 93% rename from proposals/predict_ml_model.json rename to proposals/ml_predict.json index a8220a55..f29c72f1 100644 --- a/proposals/predict_ml_model.json +++ b/proposals/ml_predict.json @@ -1,5 +1,5 @@ { - "id": "predict_ml_model", + "id": "ml_predict", "summary": "Predict classification or regression values using ML", "description": "Applies a machine learning model to a data cube of input features and predicts output values or classes for it.", "categories": [ @@ -17,7 +17,7 @@ }, { "name": "model", - "description": "A ML model that was trained with one of the ML training processes such as ``fit_class_random_forest()``.", + "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_class_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_ml_model_probabilities.json b/proposals/ml_predict_probabilities.json similarity index 93% rename from proposals/predict_ml_model_probabilities.json rename to proposals/ml_predict_probabilities.json index 2e4ee302..4ff1780a 100644 --- a/proposals/predict_ml_model_probabilities.json +++ b/proposals/ml_predict_probabilities.json @@ -1,5 +1,5 @@ { - "id": "predict_ml_model_probabilities", + "id": "ml_predict_probabilities", "summary": "Predict class probabilities using ML", "description": "Applies a machine learning model to a data cube of input features and predicts the probabilities of the output classes.", "categories": [ @@ -17,7 +17,7 @@ }, { "name": "model", - "description": "A ML model that was trained with one of the ML training processes such as ``fit_regr_random_forest()``.", + "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" From c8a7e3843b62b8c30af15e74ee8a207edddfee66 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 16 May 2023 16:45:49 +0200 Subject: [PATCH 7/8] Remove single value predictions and merge ml and dl --- CHANGELOG.md | 1 - proposals/dl_predict.json | 45 ------------------------ proposals/ml_predict.json | 29 +++++++++------- proposals/ml_predict_probabilities.json | 46 ------------------------- 4 files changed, 16 insertions(+), 105 deletions(-) delete mode 100644 proposals/dl_predict.json delete mode 100644 proposals/ml_predict_probabilities.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 63c056e1..b3210885 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `ml_fit_class_random_forest` - `ml_fit_regr_random_forest` - `ml_predict` - - `ml_predict_probabilities` - `save_ml_model` - `unflatten_dimension` - `vector_buffer` diff --git a/proposals/dl_predict.json b/proposals/dl_predict.json deleted file mode 100644 index 5fef5d9e..00000000 --- a/proposals/dl_predict.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "id": "dl_predict", - "summary": "Predict classification or regression values using DL", - "description": "Applies a machine learning model to a data cube of input features and predicts output values or classes for it. This process can be used as process in ``apply_neighborhood()`` to specify the patch size and overlap.", - "categories": [ - "deep learning", - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "data", - "description": "The data cube containing the input features.", - "schema": { - "type": "object", - "subtype": "datacube" - } - }, - { - "name": "model", - "description": "A DL model that was trained with one of the DL training processes.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - { - "name": "dimensions", - "description": "Dimensions to remove.", - "schema": { - "type": "array", - "items": { - "type": "string" - } - } - } - ], - "returns": { - "description": "The data cube with predicted value.", - "schema": { - "type": "object", - "subtype": "datacube" - } - } -} diff --git a/proposals/ml_predict.json b/proposals/ml_predict.json index f29c72f1..87cd2500 100644 --- a/proposals/ml_predict.json +++ b/proposals/ml_predict.json @@ -1,7 +1,7 @@ { "id": "ml_predict", - "summary": "Predict classification or regression values using ML", - "description": "Applies a machine learning model to a data cube of input features and predicts output values or classes for it.", + "summary": "Predict using ML", + "description": "Applies a machine learning model to a data cube of input features and returns the predicted values.", "categories": [ "machine learning" ], @@ -17,30 +17,33 @@ }, { "name": "model", - "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_class_random_forest()``.", + "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" } }, { - "name": "dimension", - "description": "The name of the dimension that the model applies to. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist.", + "name": "dimensions", + "description": "Zero or more dimensions that will be reduced by the model. Fails with a `DimensionNotAvailable` exception if one of the specified dimensions does not exist.", "schema": { - "type": "string" + "type": "array", + "items": { + "type": "string" + } } } ], "returns": { - "description": "The data cube with the predicted values. It removes the specified dimension.", + "description": "A data cube with the predicted values. It removes the specified dimensions and adds new dimension for the predicted values. It has the name `predictions` and is of type `other`. If a single value is returned, the dimension has a single label with name `0`.", "schema": { "type": "object", - "subtype": "datacube" - } - }, - "exceptions": { - "DimensionNotAvailable": { - "message": "A dimension with the specified name does not exist." + "subtype": "datacube", + "dimensions": [ + { + "type": "other" + } + ] } } } diff --git a/proposals/ml_predict_probabilities.json b/proposals/ml_predict_probabilities.json deleted file mode 100644 index 4ff1780a..00000000 --- a/proposals/ml_predict_probabilities.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "id": "ml_predict_probabilities", - "summary": "Predict class probabilities using ML", - "description": "Applies a machine learning model to a data cube of input features and predicts the probabilities of the output classes.", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "data", - "description": "The data cube containing the input features.", - "schema": { - "type": "object", - "subtype": "datacube" - } - }, - { - "name": "model", - "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - { - "name": "dimension", - "description": "The name of the dimension that the model applies to. Fails with a `DimensionNotAvailable` exception if the specified dimension does not exist.", - "schema": { - "type": "string" - } - } - ], - "returns": { - "description": "A data cube with the predicted (class) probabilities. It removes the specified dimension and adds a dimension for the class probabilities. It has the name `probabilities` and is of type `other`.", - "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "other" - } - ] - } - } -} From d275d787e62013d8cfc0d18a8d2af8631d747f34 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 16 May 2023 17:03:10 +0200 Subject: [PATCH 8/8] load_ml_model: Change from id to uri & fix reference --- proposals/load_ml_model.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 7174e1e2..7fa86d89 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -9,7 +9,7 @@ "experimental": true, "parameters": [ { - "name": "id", + "name": "uri", "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", "schema": [ { @@ -29,7 +29,7 @@ } ], "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()`` or ``ml_predict_probabilities()``.", + "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.", "schema": { "type": "object", "subtype": "ml-model"