From d49d30687741ee5a262c4aa695a8e6302a6f7f2e Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 28 Nov 2022 16:48:48 +0100 Subject: [PATCH] Make predict processes for ML more general #368 --- CHANGELOG.md | 3 +- proposals/load_ml_model.json | 2 +- proposals/predict_curve.json | 4 +- ...ndom_forest.json => predict_ml_model.json} | 8 ++-- proposals/predict_ml_model_probabilities.json | 45 +++++++++++++++++++ 5 files changed, 54 insertions(+), 8 deletions(-) rename proposals/{predict_random_forest.json => predict_ml_model.json} (69%) create mode 100644 proposals/predict_ml_model_probabilities.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a69e93e..8066f38e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `fit_regr_random_forest` - `flatten_dimensions` - `load_ml_model` - - `predict_random_forest` + - `predict_ml_model` + - `predict_ml_model_probabilities` - `save_ml_model` - `unflatten_dimension` - `vector_buffer` diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 151513c8..076caa3d 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -36,7 +36,7 @@ } ], "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", + "description": "A machine learning model to be used with machine learning processes such as ``predict_ml_model()`` or ``predict_ml_model_probabilities()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_curve.json b/proposals/predict_curve.json index 52adcc5e..3588c415 100644 --- a/proposals/predict_curve.json +++ b/proposals/predict_curve.json @@ -1,6 +1,6 @@ { "id": "predict_curve", - "summary": "Predict values", + "summary": "Predict values using a model function", "description": "Predict values using a model function and pre-computed parameters. The process is primarily intended to compute values for new labels, but it can also fill gaps where existing labels contain no-data (`null`) values.", "categories": [ "cubes", @@ -109,4 +109,4 @@ "message": "A dimension with the specified name does not exist." } } -} \ No newline at end of file +} diff --git a/proposals/predict_random_forest.json b/proposals/predict_ml_model.json similarity index 69% rename from proposals/predict_random_forest.json rename to proposals/predict_ml_model.json index 62c54e9f..fe61bf45 100644 --- a/proposals/predict_random_forest.json +++ b/proposals/predict_ml_model.json @@ -1,7 +1,7 @@ { - "id": "predict_random_forest", - "summary": "Predict values based on a Random Forest model", - "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", + "id": "predict_ml_model", + "summary": "Predict values values using a ML model", + "description": "Applies a machine learning model to an array and predicts a value/class for it.", "categories": [ "machine learning", "reducer" @@ -23,7 +23,7 @@ }, { "name": "model", - "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", + "description": "A ML model that can be trained with one of the ML processes such as ``fit_class_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_ml_model_probabilities.json b/proposals/predict_ml_model_probabilities.json new file mode 100644 index 00000000..afdf256d --- /dev/null +++ b/proposals/predict_ml_model_probabilities.json @@ -0,0 +1,45 @@ +{ + "id": "predict_ml_model_probabilities", + "summary": "Predict class probabilities using a ML model", + "description": "Applies a machine learning model to an array and predicts (class) probabilities for them.", + "categories": [ + "machine learning", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "An array of numbers.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + }, + { + "name": "model", + "description": "A ML model that can be trained with one of the ML processes such as ``fit_regr_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "The predicted (class) probabilities. Returns `null` if any of the given values in the array is a no-data value.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + } +}