From 92e407745eb2aaa52fd2b15d1cbf91801aa10536 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 3 Jan 2024 14:56:03 +0100 Subject: [PATCH] Clarify NaN in comparisons, clarify character encoding issues --- CHANGELOG.md | 8 ++++++-- eq.json | 9 ++++++++- gt.json | 9 ++++++++- gte.json | 9 ++++++++- lt.json | 9 ++++++++- lte.json | 9 ++++++++- meta/implementation.md | 20 ++++++++++++++++---- neq.json | 9 ++++++++- proposals/is_infinite.json | 4 ++-- text_begins.json | 2 +- text_contains.json | 2 +- text_ends.json | 2 +- 12 files changed, 75 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4811eee..f4ec2834 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Clarified for various mathematical functions the defined input and output ranges. Mention that `NaN` is returned outside of the defined input range where possible. -- Clarified for various processes the handling of no-data values and null, see also the [implementation guide](meta/implementation.md). [#480](https://github.com/Open-EO/openeo-processes/issues/480) +- Clarified for various mathematical functions the defined input and output ranges. + Mention that `NaN` is returned outside of the defined input range where possible. +- Clarified for several comparison processes how `NaN` values have to be handled. +- Clarified for various processes the handling of no-data values and `null`, see also the [implementation guide](meta/implementation.md#no-data-value). [#480](https://github.com/Open-EO/openeo-processes/issues/480) +- Added a [section about character encodings to the implementation guide](meta/implementation.md#character-encoding). + Removed any character encoding related wording from the process specifications itself. - Added a uniqueness contraint to various array-typed parameters (e.g. lists of dimension names or labels) - `array_interpolate_linear`: Apply interpolation to NaN and no-data values. - `clip`: Throw an exception if min > max. [#472](https://github.com/Open-EO/openeo-processes/issues/472) diff --git a/eq.json b/eq.json index b1b23385..73ffaaa4 100644 --- a/eq.json +++ b/eq.json @@ -1,7 +1,7 @@ { "id": "eq", "summary": "Equal to comparison", - "description": "Compares whether `x` is strictly equal to `y`.\n\n**Remarks:**\n\n* Data types MUST be checked strictly. For example, a string with the content *1* is not equal to the number *1*. Nevertheless, an integer *1* is equal to a floating-point number *1.0* as `integer` is a sub-type of `number`.\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", + "description": "Compares whether `x` is strictly equal to `y`.\n\n**Remarks:**\n\n* Data types MUST be checked strictly. For example, a string with the content *1* is not equal to the number *1*. Nevertheless, an integer *1* is equal to a floating-point number *1.0* as `integer` is a sub-type of `number`.\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* The comparison of `NaN` (not a number) follows [IEEE Standard 754](https://ieeexplore.ieee.org/document/8766229).\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", "categories": [ "texts", "comparison" @@ -146,5 +146,12 @@ }, "returns": false } + ], + "links": [ + { + "rel": "about", + "href": "https://ieeexplore.ieee.org/document/4610935", + "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" + } ] } diff --git a/gt.json b/gt.json index 542f618c..dbb33296 100644 --- a/gt.json +++ b/gt.json @@ -1,7 +1,7 @@ { "id": "gt", "summary": "Greater than comparison", - "description": "Compares whether `x` is strictly greater than `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* If any operand is not a `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", + "description": "Compares whether `x` is strictly greater than `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* The comparison of `NaN` (not a number) follows [IEEE Standard 754](https://ieeexplore.ieee.org/document/8766229).\n* If any operand is not the data type `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", "categories": [ "comparison" ], @@ -90,5 +90,12 @@ }, "returns": false } + ], + "links": [ + { + "rel": "about", + "href": "https://ieeexplore.ieee.org/document/4610935", + "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" + } ] } diff --git a/gte.json b/gte.json index 712b6b9c..3c054ba8 100644 --- a/gte.json +++ b/gte.json @@ -1,7 +1,7 @@ { "id": "gte", "summary": "Greater than or equal to comparison", - "description": "Compares whether `x` is greater than or equal to `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* If the operands are not equal (see process ``eq()``) and any of them is not a `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", + "description": "Compares whether `x` is greater than or equal to `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* The comparison of `NaN` (not a number) follows [IEEE Standard 754](https://ieeexplore.ieee.org/document/8766229).\n* If the operands are not equal (see process ``eq()``) and any of them is not the data type `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", "categories": [ "comparison" ], @@ -84,6 +84,13 @@ "returns": false } ], + "links": [ + { + "rel": "about", + "href": "https://ieeexplore.ieee.org/document/4610935", + "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" + } + ], "process_graph": { "eq": { "process_id": "eq", diff --git a/lt.json b/lt.json index b7e35bf4..bcb167b1 100644 --- a/lt.json +++ b/lt.json @@ -1,7 +1,7 @@ { "id": "lt", "summary": "Less than comparison", - "description": "Compares whether `x` is strictly less than `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* If any operand is not a `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", + "description": "Compares whether `x` is strictly less than `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* The comparison of `NaN` (not a number) follows [IEEE Standard 754](https://ieeexplore.ieee.org/document/8766229).\n* If any operand is not the data type `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", "categories": [ "comparison" ], @@ -90,5 +90,12 @@ }, "returns": false } + ], + "links": [ + { + "rel": "about", + "href": "https://ieeexplore.ieee.org/document/4610935", + "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" + } ] } diff --git a/lte.json b/lte.json index 5ab05126..0968dfa1 100644 --- a/lte.json +++ b/lte.json @@ -1,7 +1,7 @@ { "id": "lte", "summary": "Less than or equal to comparison", - "description": "Compares whether `x` is less than or equal to `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* If the operands are not equal (see process ``eq()``) and any of them is not a `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", + "description": "Compares whether `x` is less than or equal to `y`.\n\n**Remarks:**\n\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* The comparison of `NaN` (not a number) follows [IEEE Standard 754](https://ieeexplore.ieee.org/document/8766229).\n* If the operands are not equal (see process ``eq()``) and any of them is not the data type `number`, the process returns `false`.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", "categories": [ "comparison" ], @@ -84,6 +84,13 @@ "returns": false } ], + "links": [ + { + "rel": "about", + "href": "https://ieeexplore.ieee.org/document/4610935", + "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" + } + ], "process_graph": { "eq": { "process_id": "eq", diff --git a/meta/implementation.md b/meta/implementation.md index fafe6ea0..a9b146fd 100644 --- a/meta/implementation.md +++ b/meta/implementation.md @@ -4,16 +4,16 @@ This file is meant to provide some additional implementation details for back-en ## No-data value -A data cube shall always keep reference of the applicable no-data values. -The no-data value can be chosen by the back-end implementation, e.g. depending on the data type of the data. +A data cube shall always keep reference of the applicable no-data value(s). +The no-data values can be chosen by the back-end implementation, e.g. depending on the data type of the data. No-data values should be exposed for each pre-defined Collection in its metadata. For all data generated through openEO (e.g. through synchronous or batch jobs), the metadata and/or data shall expose the no-data values. -The openEO process specifications generally use `null` as a generic value to express no-data values. +The openEO process specifications generally uses `null` as a generic value to express no-data values. This is primarily meant for the JSON encoding, this means: 1. in the process specification (data type `null` in the schema), and -2. in the process graph (if the no-data value exposed through the metadata can't be used in JSON). +2. in the process graph (if the no-data value exposed through the metadata can't be used in JSON, e.g. `NaN`). Back-ends may or may not use `null` as a no-data value internally. @@ -29,6 +29,18 @@ by the array processes shall be replaced with the no-data value of the data cube None of the openEO processes per se is "special" and thus all are treated the same way by default. Nevertheless, there are some cases where a special treatment can make a huge difference. +## Character encoding + +String-related processes previously mentioned that strings have to be "encoded in UTF-8 by default". +This was removed and we clarify the behavior here: + +For data transfer through the API, the character encoding of strings is specified using HTTP headers. +This means all strings provided in the process graph have the same encoding as specified in the HTTP headers. +Back-ends can internally use any character encoding and as such may need to convert the character encoding +upon receipt of the process graph. +It is recommended to use a [Unicode](https://en.wikipedia.org/wiki/Unicode) character encoding such as UTF-8. +In case of doubt, clients and server should assume UTF-8 as character encoding. + ### Branching behavior The `if` process (and any process that is working on some kind of condition) are usually diff --git a/neq.json b/neq.json index 1b115b3b..383d6b8b 100644 --- a/neq.json +++ b/neq.json @@ -1,7 +1,7 @@ { "id": "neq", "summary": "Not equal to comparison", - "description": "Compares whether `x` is **not** strictly equal to `y`.\n\n**Remarks:**\n\n* Data types MUST be checked strictly. For example, a string with the content *1* is not equal to the number *1*. Nevertheless, an integer *1* is equal to a floating-point number *1.0* as `integer` is a sub-type of `number`.\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* Strings are expected to be encoded in UTF-8 by default.\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", + "description": "Compares whether `x` is **not** strictly equal to `y`.\n\n**Remarks:**\n\n* Data types MUST be checked strictly. For example, a string with the content *1* is not equal to the number *1*. Nevertheless, an integer *1* is equal to a floating-point number *1.0* as `integer` is a sub-type of `number`.\n* If any operand is a no-data value, the result will be the no-data value (or `null`).\n* The comparison of `NaN` (not a number) follows [IEEE Standard 754](https://ieeexplore.ieee.org/document/8766229).\n* Temporal strings are normal strings. To compare temporal strings as dates/times, use ``date_difference()``.", "categories": [ "texts", "comparison" @@ -147,6 +147,13 @@ "returns": true } ], + "links": [ + { + "rel": "about", + "href": "https://ieeexplore.ieee.org/document/4610935", + "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" + } + ], "process_graph": { "eq": { "process_id": "eq", diff --git a/proposals/is_infinite.json b/proposals/is_infinite.json index b6a5acca..da0d165f 100644 --- a/proposals/is_infinite.json +++ b/proposals/is_infinite.json @@ -1,7 +1,7 @@ { "id": "is_infinite", "summary": "Value is an infinite number", - "description": "Checks whether the specified value `x` is an infinite number. The definition of infinite numbers follows the [IEEE Standard 754](https://ieeexplore.ieee.org/document/4610935). The special numerical value `NaN` (not a number) as defined by the [IEEE Standard 754](https://ieeexplore.ieee.org/document/4610935) is not an infinite number and must return `false`.", + "description": "Checks whether the specified value `x` is an infinite number. The definition of infinite numbers (and `NaN`) follows the [IEEE Standard 754](https://ieeexplore.ieee.org/document/4610935). `NaN` (not a number) is not an infinite number and must return `false`.", "categories": [ "comparison" ], @@ -28,4 +28,4 @@ "title": "IEEE Standard 754-2008 for Floating-Point Arithmetic" } ] -} \ No newline at end of file +} diff --git a/text_begins.json b/text_begins.json index f422999f..8fc39624 100644 --- a/text_begins.json +++ b/text_begins.json @@ -1,7 +1,7 @@ { "id": "text_begins", "summary": "Text begins with another text", - "description": "Checks whether the text (also known as *string*) specified for `data` contains the text specified for `pattern` at the beginning. Both are expected to be encoded in UTF-8 by default. No-data values are passed through and therefore get propagated.", + "description": "Checks whether the text (also known as *string*) specified for `data` contains the text specified for `pattern` at the beginning. No-data values are passed through and therefore get propagated.", "categories": [ "texts", "comparison" diff --git a/text_contains.json b/text_contains.json index d0046131..a41a3d95 100644 --- a/text_contains.json +++ b/text_contains.json @@ -1,7 +1,7 @@ { "id": "text_contains", "summary": "Text contains another text", - "description": "Checks whether the text (also known as *string*) specified for `data` contains the text specified for `pattern`. Both are expected to be encoded in UTF-8 by default. No-data values are passed through and therefore get propagated.", + "description": "Checks whether the text (also known as *string*) specified for `data` contains the text specified for `pattern`. No-data values are passed through and therefore get propagated.", "categories": [ "texts", "comparison" diff --git a/text_ends.json b/text_ends.json index 7bd2238c..70a04855 100644 --- a/text_ends.json +++ b/text_ends.json @@ -1,7 +1,7 @@ { "id": "text_ends", "summary": "Text ends with another text", - "description": "Checks whether the text (also known as *string*) specified for `data` contains the text specified for `pattern` at the end. Both are expected to be encoded in UTF-8 by default. No-data values are passed through and therefore get propagated.", + "description": "Checks whether the text (also known as *string*) specified for `data` contains the text specified for `pattern` at the end. No-data values are passed through and therefore get propagated.", "categories": [ "texts", "comparison"