From da2b6d144f8cae043e20bc15ec7756278cb53ebe Mon Sep 17 00:00:00 2001
From: Brian Pondi <brian.pondi@uni-muenster.de>
Date: Wed, 17 Jul 2024 12:33:07 +0200
Subject: [PATCH] revise ml processes and add mlm extension

---
 meta/subtype-schemas.json                 |  8 ++++----
 proposals/load_ml_model.json              | 10 +++++-----
 proposals/ml_fit_class_random_forest.json | 16 +++++-----------
 proposals/ml_fit_regr_random_forest.json  | 18 ++++++------------
 proposals/ml_predict.json                 |  4 ++--
 proposals/save_ml_model.json              |  8 ++++----
 6 files changed, 26 insertions(+), 38 deletions(-)

diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json
index 83ce72ba..a880847e 100644
--- a/meta/subtype-schemas.json
+++ b/meta/subtype-schemas.json
@@ -232,11 +232,11 @@
                 }
             }
         },
-        "ml-model": {
+        "mlm-model": {
             "type": "object",
-            "subtype": "ml-model",
+            "subtype": "mlm-model",
             "title": "Machine Learning Model",
-            "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension."
+            "description": "A machine learning model, accompanied with STAC metadata that implements the Machine Learning Model STAC mlm-model extension."
         },
         "output-format": {
             "type": "string",
@@ -426,4 +426,4 @@
             "description": "Year as integer, can be any number of digits and can be negative."
         }
     }
-}
+}
\ No newline at end of file
diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json
index 7fa86d89..3e44a515 100644
--- a/proposals/load_ml_model.json
+++ b/proposals/load_ml_model.json
@@ -10,7 +10,7 @@
     "parameters": [
         {
             "name": "uri",
-            "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.",
+            "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `mlm-model` extension.",
             "schema": [
                 {
                     "title": "URL",
@@ -32,15 +32,15 @@
         "description": "A machine learning model to be used with machine learning processes such as ``ml_predict()``.",
         "schema": {
             "type": "object",
-            "subtype": "ml-model"
+            "subtype": "mlm-model"
         }
     },
     "links": [
         {
-            "href": "https://github.com/stac-extensions/ml-model",
-            "title": "STAC ml-model extension",
+            "href": "https://github.com/crim-ca/mlm-extension",
+            "title": "Machine Learning Model STAC extension",
             "type": "text/html",
             "rel": "about"
         }
     ]
-}
+}
\ No newline at end of file
diff --git a/proposals/ml_fit_class_random_forest.json b/proposals/ml_fit_class_random_forest.json
index 63da48a1..e4f718ba 100644
--- a/proposals/ml_fit_class_random_forest.json
+++ b/proposals/ml_fit_class_random_forest.json
@@ -8,8 +8,8 @@
     "experimental": true,
     "parameters": [
         {
-            "name": "predictors",
-            "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.",
+            "name": "training_set",
+            "description": "The training set for the Random Forest classification model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyzes to learn patterns and relationships within the data.",
             "schema": [
                 {
                     "type": "object",
@@ -39,15 +39,9 @@
         },
         {
             "name": "target",
-            "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).",
+            "description": "The column name in the training set that represents the dependent variable for Random Forest classification.",
             "schema": {
-                "type": "object",
-                "subtype": "datacube",
-                "dimensions": [
-                    {
-                        "type": "geometry"
-                    }
-                ]
+                "type": "string"
             }
         },
         {
@@ -107,4 +101,4 @@
             "rel": "about"
         }
     ]
-}
+}
\ No newline at end of file
diff --git a/proposals/ml_fit_regr_random_forest.json b/proposals/ml_fit_regr_random_forest.json
index 39207324..dc774b01 100644
--- a/proposals/ml_fit_regr_random_forest.json
+++ b/proposals/ml_fit_regr_random_forest.json
@@ -8,8 +8,8 @@
     "experimental": true,
     "parameters": [
         {
-            "name": "predictors",
-            "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.",
+            "name": "training_set",
+            "description": "The training set for the Random Forest regression model, provided as a vector data cube. This set contains both the independent variables and dependent variable that the Random Forest algorithm analyzes to learn patterns and relationships within the data.",
             "schema": [
                 {
                     "type": "object",
@@ -39,15 +39,9 @@
         },
         {
             "name": "target",
-            "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).",
+            "description": "The column name in the training set that represents the dependent variable for Random Forest regression.",
             "schema": {
-                "type": "object",
-                "subtype": "datacube",
-                "dimensions": [
-                    {
-                        "type": "geometry"
-                    }
-                ]
+                "type": "string"
             }
         },
         {
@@ -96,7 +90,7 @@
         "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.",
         "schema": {
             "type": "object",
-            "subtype": "ml-model"
+            "subtype": "mlm-model"
         }
     },
     "links": [
@@ -107,4 +101,4 @@
             "rel": "about"
         }
     ]
-}
+}
\ No newline at end of file
diff --git a/proposals/ml_predict.json b/proposals/ml_predict.json
index 87cd2500..8b49054a 100644
--- a/proposals/ml_predict.json
+++ b/proposals/ml_predict.json
@@ -20,7 +20,7 @@
             "description": "A ML model that was trained with one of the ML training processes such as ``ml_fit_regr_random_forest()``.",
             "schema": {
                 "type": "object",
-                "subtype": "ml-model"
+                "subtype": "mlm-model"
             }
         },
         {
@@ -46,4 +46,4 @@
             ]
         }
     }
-}
+}
\ No newline at end of file
diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json
index 5e9ea8b0..c92b79cf 100644
--- a/proposals/save_ml_model.json
+++ b/proposals/save_ml_model.json
@@ -1,7 +1,7 @@
 {
     "id": "save_ml_model",
     "summary": "Save a ML model",
-    "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).",
+    "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [mlm-model extension](https://github.com/crim-ca/mlm-extension).",
     "categories": [
         "machine learning",
         "import"
@@ -13,7 +13,7 @@
             "description": "The data to store as a machine learning model.",
             "schema": {
                 "type": "object",
-                "subtype": "ml-model"
+                "subtype": "mlm-model"
             }
         },
         {
@@ -35,8 +35,8 @@
     },
     "links": [
         {
-            "href": "https://github.com/stac-extensions/ml-model",
-            "title": "STAC ml-model extension",
+            "href": "https://github.com/crim-ca/mlm-extension",
+            "title": "Machine Learning Model STAC extension",
             "type": "text/html",
             "rel": "about"
         }