From a0e25efc9e40d71e61b404b4f48ed14b379b5f6b Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Mon, 11 Sep 2023 09:28:13 +0100
Subject: [PATCH 1/9] Start adding npy dtype codec

---
 gufe/custom_codecs.py          | 23 +++++++++++++++++++++++
 gufe/tests/test_custom_json.py | 20 ++++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/gufe/custom_codecs.py b/gufe/custom_codecs.py
index 87e3828e..33c99634 100644
--- a/gufe/custom_codecs.py
+++ b/gufe/custom_codecs.py
@@ -33,6 +33,10 @@ def inherited_is_my_dict(dct, cls):
     stored = gufe.tokenization.get_class(module, classname)
     return cls in stored.mro()
 
+def is_npy_dtype_dict(dct):
+    expected = ["dtype", "bytes"]
+    is_custom = all(exp in dct for exp in expected)
+    return is_custom and ("shape" not in dct)
 
 def is_openff_unit_dict(dct):
     expected = ["pint_unit_registry", "unit_name", ":is_custom:"]
@@ -52,18 +56,34 @@ def is_openff_quantity_dict(dct):
     from_dict=lambda dct: pathlib.PosixPath(dct["path"]),
 )
 
+
 BYTES_CODEC = JSONCodec(
     cls=bytes,
     to_dict=lambda obj: {'latin-1': obj.decode('latin-1')},
     from_dict=lambda dct: dct['latin-1'].encode('latin-1'),
 )
 
+
 DATETIME_CODEC = JSONCodec(
     cls=datetime.datetime,
     to_dict=lambda obj: {'isotime': obj.isoformat()},
     from_dict=lambda dct: datetime.datetime.fromisoformat(dct['isotime']),
 )
 
+
+NPY_DTYPE_CODEC = JSONCodec(
+    cls=np.generic,
+    to_dict=lambda obj: {
+        'dtype': str(obj.dtype),
+        'bytes': obj.tobytes(),
+    },
+    from_dict=lambda dct: np.frombuffer(
+        dct['bytes'], dtype=np.dtype(dct['dtype'])
+    )[0],
+    is_my_dict=is_npy_dtype_dict,
+)
+
+
 NUMPY_CODEC = JSONCodec(
     cls=np.ndarray,
     to_dict=lambda obj: {
@@ -76,6 +96,7 @@ def is_openff_quantity_dict(dct):
     ).reshape(dct['shape'])
 )
 
+
 SETTINGS_CODEC = JSONCodec(
     cls=SettingsBaseModel,
     to_dict=lambda obj: {field: getattr(obj, field) for field in obj.__fields__},
@@ -83,6 +104,7 @@ def is_openff_quantity_dict(dct):
     is_my_dict=functools.partial(inherited_is_my_dict, cls=SettingsBaseModel),
 )
 
+
 OPENFF_QUANTITY_CODEC = JSONCodec(
     cls=None,
     to_dict=lambda obj: {
@@ -98,6 +120,7 @@ def is_openff_quantity_dict(dct):
     is_my_dict=is_openff_quantity_dict,
 )
 
+
 OPENFF_UNIT_CODEC = JSONCodec(
     cls=None,
     to_dict=lambda unit: {
diff --git a/gufe/tests/test_custom_json.py b/gufe/tests/test_custom_json.py
index 1efcb2f5..2d4b92f4 100644
--- a/gufe/tests/test_custom_json.py
+++ b/gufe/tests/test_custom_json.py
@@ -14,6 +14,7 @@
 from gufe.custom_codecs import (
     BYTES_CODEC,
     NUMPY_CODEC,
+    NPY_DTYPE_CODEC,
     OPENFF_QUANTITY_CODEC,
     OPENFF_UNIT_CODEC,
     PATH_CODEC,
@@ -115,9 +116,28 @@ def test_round_trip(self):
             json_str = json.dumps(obj, cls=encoder)
             reconstructed = json.loads(json_str, cls=decoder)
             npt.assert_array_equal(reconstructed, obj)
+            assert reconstructed.dtype == obj.dtype
             json_str_2 = json.dumps(obj, cls=encoder)
             assert json_str == json_str_2
 
+class TestNumpyGenericCodec(TestNumpyCoding):
+    def setup_method(self):
+        self.codec = NPY_DTYPE_CODEC
+        self.objs = [np.float16(1.0), np.float32(1.0), np.float64(1.0),
+                     np.complex128(1.0), np.clongdouble(1.0), np.uint64(1),]
+        dtypes = [str(a.dtype) for a in self.objs]
+        byte_reps = [a.tobytes() for a in self.objs]
+        classes = [str(a.dtype) for a in self.objs]
+        self.dcts = [
+            {
+                ":is_custom:": True,
+                "__class__": classname,
+                "__module__": "numpy",
+                "dtype": dtype,
+                "bytes": byte_rep,
+            }
+            for dtype, byte_rep, classname in zip(dtypes, byte_reps, classes)
+        ]
 
 class TestPathCodec(CustomJSONCodingTest):
     def setup_method(self):

From 4ea0388a36272cc7943c00fd4469a87c130b901a Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Mon, 11 Sep 2023 10:49:59 +0100
Subject: [PATCH 2/9] fixing the bool problem

---
 gufe/custom_codecs.py          |  3 +++
 gufe/tests/test_custom_json.py | 29 ++++++++++++++++++++++++-----
 gufe/tokenization.py           |  2 ++
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/gufe/custom_codecs.py b/gufe/custom_codecs.py
index 33c99634..dd09a043 100644
--- a/gufe/custom_codecs.py
+++ b/gufe/custom_codecs.py
@@ -33,11 +33,13 @@ def inherited_is_my_dict(dct, cls):
     stored = gufe.tokenization.get_class(module, classname)
     return cls in stored.mro()
 
+
 def is_npy_dtype_dict(dct):
     expected = ["dtype", "bytes"]
     is_custom = all(exp in dct for exp in expected)
     return is_custom and ("shape" not in dct)
 
+
 def is_openff_unit_dict(dct):
     expected = ["pint_unit_registry", "unit_name", ":is_custom:"]
     is_custom = all(exp in dct for exp in expected)
@@ -80,6 +82,7 @@ def is_openff_quantity_dict(dct):
     from_dict=lambda dct: np.frombuffer(
         dct['bytes'], dtype=np.dtype(dct['dtype'])
     )[0],
+    is_my_obj=lambda obj: isinstance(obj, np.generic),
     is_my_dict=is_npy_dtype_dict,
 )
 
diff --git a/gufe/tests/test_custom_json.py b/gufe/tests/test_custom_json.py
index 2d4b92f4..9c52deb4 100644
--- a/gufe/tests/test_custom_json.py
+++ b/gufe/tests/test_custom_json.py
@@ -88,8 +88,9 @@ def test_not_mine(self):
 class TestNumpyCoding(CustomJSONCodingTest):
     def setup_method(self):
         self.codec = NUMPY_CODEC
-        self.objs = [np.array([[1.0, 0.0], [2.0, 3.2]]), np.array([1, 0])]
-        shapes = [[2, 2], [2,]]
+        self.objs = [np.array([[1.0, 0.0], [2.0, 3.2]]), np.array([1, 0]),
+                     np.array([1.0, 2.0, 3.0], dtype=np.float32),]
+        shapes = [[2, 2], [2,], [3,]]
         dtypes = [str(arr.dtype) for arr in self.objs]  # may change by system?
         byte_reps = [arr.tobytes() for arr in self.objs]
         self.dcts = [
@@ -123,11 +124,15 @@ def test_round_trip(self):
 class TestNumpyGenericCodec(TestNumpyCoding):
     def setup_method(self):
         self.codec = NPY_DTYPE_CODEC
-        self.objs = [np.float16(1.0), np.float32(1.0), np.float64(1.0),
-                     np.complex128(1.0), np.clongdouble(1.0), np.uint64(1),]
+        self.objs = [np.bool_(True), np.float16(1.0), np.float32(1.0),
+                     np.float64(1.0), np.complex128(1.0),
+                     np.clongdouble(1.0), np.uint64(1),]
         dtypes = [str(a.dtype) for a in self.objs]
         byte_reps = [a.tobytes() for a in self.objs]
-        classes = [str(a.dtype) for a in self.objs]
+        # Overly complicated extraction of the class name
+        # to deal with the bool_ -> bool dtype class name problem
+        classes = [str(a.__class__).split("'")[1].split('.')[1]
+                   for a in self.objs]
         self.dcts = [
             {
                 ":is_custom:": True,
@@ -139,6 +144,20 @@ def setup_method(self):
             for dtype, byte_rep, classname in zip(dtypes, byte_reps, classes)
         ]
 
+    def test_round_trip(self):
+        encoder, decoder = custom_json_factory([self.codec, BYTES_CODEC])
+        for (obj, dct) in zip(self.objs, self.dcts):
+            print(dct)
+            print(encoder)
+            json_str = json.dumps(obj, cls=encoder)
+            print(json_str)
+            reconstructed = json.loads(json_str, cls=decoder)
+            print(type(reconstructed))
+            npt.assert_array_equal(reconstructed, obj)
+            assert reconstructed.dtype == obj.dtype
+            json_str_2 = json.dumps(obj, cls=encoder)
+            assert json_str == json_str_2
+
 class TestPathCodec(CustomJSONCodingTest):
     def setup_method(self):
         self.codec = PATH_CODEC
diff --git a/gufe/tokenization.py b/gufe/tokenization.py
index 25364891..66e8accd 100644
--- a/gufe/tokenization.py
+++ b/gufe/tokenization.py
@@ -15,6 +15,7 @@
 from gufe.custom_codecs import (
     BYTES_CODEC,
     DATETIME_CODEC,
+    NPY_DTYPE_CODEC,
     NUMPY_CODEC,
     OPENFF_QUANTITY_CODEC,
     OPENFF_UNIT_CODEC,
@@ -26,6 +27,7 @@
 _default_json_codecs = [
     PATH_CODEC,
     NUMPY_CODEC,
+    NPY_DTYPE_CODEC,
     BYTES_CODEC,
     DATETIME_CODEC,
     SETTINGS_CODEC,

From 1796f2ccc661d50e7d627f84ca5a3e10432a484b Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Mon, 11 Sep 2023 10:51:30 +0100
Subject: [PATCH 3/9] pep8

---
 gufe/tests/test_custom_json.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gufe/tests/test_custom_json.py b/gufe/tests/test_custom_json.py
index 9c52deb4..18961c42 100644
--- a/gufe/tests/test_custom_json.py
+++ b/gufe/tests/test_custom_json.py
@@ -121,6 +121,7 @@ def test_round_trip(self):
             json_str_2 = json.dumps(obj, cls=encoder)
             assert json_str == json_str_2
 
+
 class TestNumpyGenericCodec(TestNumpyCoding):
     def setup_method(self):
         self.codec = NPY_DTYPE_CODEC
@@ -158,6 +159,7 @@ def test_round_trip(self):
             json_str_2 = json.dumps(obj, cls=encoder)
             assert json_str == json_str_2
 
+
 class TestPathCodec(CustomJSONCodingTest):
     def setup_method(self):
         self.codec = PATH_CODEC

From de84d46036a8e490ea483f0ea571c68d02e3eea3 Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Mon, 11 Sep 2023 11:20:08 +0100
Subject: [PATCH 4/9] cleanup ahead of review

---
 gufe/tests/test_custom_json.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/gufe/tests/test_custom_json.py b/gufe/tests/test_custom_json.py
index 18961c42..f63a2e53 100644
--- a/gufe/tests/test_custom_json.py
+++ b/gufe/tests/test_custom_json.py
@@ -145,20 +145,6 @@ def setup_method(self):
             for dtype, byte_rep, classname in zip(dtypes, byte_reps, classes)
         ]
 
-    def test_round_trip(self):
-        encoder, decoder = custom_json_factory([self.codec, BYTES_CODEC])
-        for (obj, dct) in zip(self.objs, self.dcts):
-            print(dct)
-            print(encoder)
-            json_str = json.dumps(obj, cls=encoder)
-            print(json_str)
-            reconstructed = json.loads(json_str, cls=decoder)
-            print(type(reconstructed))
-            npt.assert_array_equal(reconstructed, obj)
-            assert reconstructed.dtype == obj.dtype
-            json_str_2 = json.dumps(obj, cls=encoder)
-            assert json_str == json_str_2
-
 
 class TestPathCodec(CustomJSONCodingTest):
     def setup_method(self):

From 50040d780c2444406b7efa673f043bf851a13919 Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Tue, 19 Sep 2023 08:55:56 +0100
Subject: [PATCH 5/9] switch to cls=None

---
 gufe/custom_codecs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gufe/custom_codecs.py b/gufe/custom_codecs.py
index dd09a043..ff5f7901 100644
--- a/gufe/custom_codecs.py
+++ b/gufe/custom_codecs.py
@@ -74,7 +74,7 @@ def is_openff_quantity_dict(dct):
 
 
 NPY_DTYPE_CODEC = JSONCodec(
-    cls=np.generic,
+    cls=None,
     to_dict=lambda obj: {
         'dtype': str(obj.dtype),
         'bytes': obj.tobytes(),

From b509d86f22ac453991b8e55327b35c0abd8279cb Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Tue, 19 Sep 2023 09:13:13 +0100
Subject: [PATCH 6/9] Add codec roundtrip test

---
 gufe/tests/test_custom_json.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/gufe/tests/test_custom_json.py b/gufe/tests/test_custom_json.py
index f63a2e53..f82a54c3 100644
--- a/gufe/tests/test_custom_json.py
+++ b/gufe/tests/test_custom_json.py
@@ -45,6 +45,22 @@ def test_add_existing_codec(self):
         assert len(serialization.codecs) == 1
 
 
+@pytest.mark.parametrize('obj',
+    [np.array([[1.0, 0.0], [2.0, 3.2]]),
+    np.float32(1.1)],
+)
+@pytest.mark.parametrize('codecs', [
+    [BYTES_CODEC, NUMPY_CODEC, NPY_DTYPE_CODEC],
+    [NPY_DTYPE_CODEC, BYTES_CODEC, NUMPY_CODEC],
+])
+def test_numpy_codec_order_roundtrip(obj, codecs):
+    serialization = JSONSerializerDeserializer(codecs)
+    serialized = serialization.serializer(obj)
+    reconstructed = serialization.deserializer(serialized)
+    npt.assert_equal(obj, reconstructed)
+    assert obj.dtype == reconstructed.dtype
+
+
 class CustomJSONCodingTest:
     """Base class for testing codecs.
 
@@ -125,8 +141,11 @@ def test_round_trip(self):
 class TestNumpyGenericCodec(TestNumpyCoding):
     def setup_method(self):
         self.codec = NPY_DTYPE_CODEC
+        # Note that np.float64 is treated as a float by the
+        # default json encode (and so returns a float not a numpy
+        # object).
         self.objs = [np.bool_(True), np.float16(1.0), np.float32(1.0),
-                     np.float64(1.0), np.complex128(1.0),
+                     np.complex128(1.0),
                      np.clongdouble(1.0), np.uint64(1),]
         dtypes = [str(a.dtype) for a in self.objs]
         byte_reps = [a.tobytes() for a in self.objs]

From eb98c1dfbcb92b407b96cd2d8b33a53b612fecfa Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Tue, 19 Sep 2023 09:16:18 +0100
Subject: [PATCH 7/9] Go back to using np.generic

---
 gufe/custom_codecs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gufe/custom_codecs.py b/gufe/custom_codecs.py
index ff5f7901..dd09a043 100644
--- a/gufe/custom_codecs.py
+++ b/gufe/custom_codecs.py
@@ -74,7 +74,7 @@ def is_openff_quantity_dict(dct):
 
 
 NPY_DTYPE_CODEC = JSONCodec(
-    cls=None,
+    cls=np.generic,
     to_dict=lambda obj: {
         'dtype': str(obj.dtype),
         'bytes': obj.tobytes(),

From 1fb1e5c1c03f58accabc6e0f82d78d8e9df2f60a Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Wed, 20 Sep 2023 08:33:27 +0100
Subject: [PATCH 8/9] Add some documentation

---
 docs/guide/serialization.rst | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/guide/serialization.rst b/docs/guide/serialization.rst
index 0ed596d5..6557a3e4 100644
--- a/docs/guide/serialization.rst
+++ b/docs/guide/serialization.rst
@@ -153,6 +153,27 @@ Details of how the object is recognized for encoding or how the dictionary
 is recognized for decoding can be changed by passing functions to the
 ``is_my_obj`` or ``is_my_dict`` parameters of :class:`.JSONCodec`.
 
+.. warning::
+    The Custom encoders & decoders only override the default JSON
+    encoder/decoder if they are not able to natively handle the object.
+    This leads to some odd / lossy behaviour for some objects such
+    as ``np.float64`` which is natively converted to a ``float`` type
+    by the default encoder, whilst other numpy generic types are
+    appropriately roundtripped.
+
+On the use of NumPy generic types
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Due to their inconsistent behaviour in how they are handled by the default
+JSON encoder/decoder routines (see warning above), it is our suggestion
+that Python types should be used preferrentially instead of NumPy generic
+types. For example if one would be looking to store a single float value,
+a ``float`` would be prefered to a ``np.float32`` or ``np.float64``.
+
+Please note that this only applied to generic types being used **outside of
+numpy arrays**. NumPy arrays are, as far as we know, always handled
+in a consistent manner.
+
 Dumping arbitrary objects to JSON
 ---------------------------------
 

From a4af42247cff6e1c6e90b62435c2c5ebd4e61a83 Mon Sep 17 00:00:00 2001
From: IAlibay <ialibay@mdanalysis.org>
Date: Wed, 20 Sep 2023 08:33:43 +0100
Subject: [PATCH 9/9] more docs

---
 gufe/custom_codecs.py          |  4 ++++
 gufe/tests/test_custom_json.py | 12 ++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/gufe/custom_codecs.py b/gufe/custom_codecs.py
index dd09a043..f334e266 100644
--- a/gufe/custom_codecs.py
+++ b/gufe/custom_codecs.py
@@ -73,6 +73,10 @@ def is_openff_quantity_dict(dct):
 )
 
 
+# Note that this has inconsistent behaviour for some generic types
+# which end up being handled by the default JSON encoder/decoder.
+# The main example of this is np.float64 which will be turned into
+# a float type on serialization.
 NPY_DTYPE_CODEC = JSONCodec(
     cls=np.generic,
     to_dict=lambda obj: {
diff --git a/gufe/tests/test_custom_json.py b/gufe/tests/test_custom_json.py
index f82a54c3..8222e51a 100644
--- a/gufe/tests/test_custom_json.py
+++ b/gufe/tests/test_custom_json.py
@@ -45,10 +45,10 @@ def test_add_existing_codec(self):
         assert len(serialization.codecs) == 1
 
 
-@pytest.mark.parametrize('obj',
-    [np.array([[1.0, 0.0], [2.0, 3.2]]),
-    np.float32(1.1)],
-)
+@pytest.mark.parametrize('obj', [
+    np.array([[1.0, 0.0], [2.0, 3.2]]),
+    np.float32(1.1)
+])
 @pytest.mark.parametrize('codecs', [
     [BYTES_CODEC, NUMPY_CODEC, NPY_DTYPE_CODEC],
     [NPY_DTYPE_CODEC, BYTES_CODEC, NUMPY_CODEC],
@@ -105,7 +105,7 @@ class TestNumpyCoding(CustomJSONCodingTest):
     def setup_method(self):
         self.codec = NUMPY_CODEC
         self.objs = [np.array([[1.0, 0.0], [2.0, 3.2]]), np.array([1, 0]),
-                     np.array([1.0, 2.0, 3.0], dtype=np.float32),]
+                     np.array([1.0, 2.0, 3.0], dtype=np.float32)]
         shapes = [[2, 2], [2,], [3,]]
         dtypes = [str(arr.dtype) for arr in self.objs]  # may change by system?
         byte_reps = [arr.tobytes() for arr in self.objs]
@@ -146,7 +146,7 @@ def setup_method(self):
         # object).
         self.objs = [np.bool_(True), np.float16(1.0), np.float32(1.0),
                      np.complex128(1.0),
-                     np.clongdouble(1.0), np.uint64(1),]
+                     np.clongdouble(1.0), np.uint64(1)]
         dtypes = [str(a.dtype) for a in self.objs]
         byte_reps = [a.tobytes() for a in self.objs]
         # Overly complicated extraction of the class name