From 276682b3b3cd4e20451c25e7c5613e236f0deecd Mon Sep 17 00:00:00 2001
From: Chelsea Lin <chelsealin@google.com>
Date: Thu, 7 Nov 2024 23:23:51 +0000
Subject: [PATCH] fix: dbjson serialization with most compact JSON
 representation

---
 db_dtypes/json.py                             |  4 +++-
 tests/compliance/json/test_json_compliance.py | 12 ++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/db_dtypes/json.py b/db_dtypes/json.py
index a00fe2b..d0bc6ca 100644
--- a/db_dtypes/json.py
+++ b/db_dtypes/json.py
@@ -143,7 +143,9 @@ def _serialize_json(value):
         else:
             # `sort_keys=True` sorts dictionary keys before serialization, making
             # JSON comparisons deterministic.
-            return json.dumps(value, sort_keys=True)
+            # `separators=(',', ':')` eliminate whitespace to get the most compact
+            # JSON representation.
+            return json.dumps(value, sort_keys=True, separators=(",", ":"))
 
     @staticmethod
     def _deserialize_json(value):
diff --git a/tests/compliance/json/test_json_compliance.py b/tests/compliance/json/test_json_compliance.py
index 18610a0..2a8e69a 100644
--- a/tests/compliance/json/test_json_compliance.py
+++ b/tests/compliance/json/test_json_compliance.py
@@ -31,7 +31,8 @@ def test_astype_str(self, data):
         # Use `json.dumps(str)` instead of passing `str(obj)` directly to the super method.
         result = pd.Series(data[:5]).astype(str)
         expected = pd.Series(
-            [json.dumps(x, sort_keys=True) for x in data[:5]], dtype=str
+            [json.dumps(x, sort_keys=True, separators=(",", ":")) for x in data[:5]],
+            dtype=str,
         )
         tm.assert_series_equal(result, expected)
 
@@ -46,7 +47,7 @@ def test_astype_string(self, data, nullable_string_dtype):
         # Use `json.dumps(str)` instead of passing `str(obj)` directly to the super method.
         result = pd.Series(data[:5]).astype(nullable_string_dtype)
         expected = pd.Series(
-            [json.dumps(x, sort_keys=True) for x in data[:5]],
+            [json.dumps(x, sort_keys=True, separators=(",", ":")) for x in data[:5]],
             dtype=nullable_string_dtype,
         )
         tm.assert_series_equal(result, expected)
@@ -119,11 +120,14 @@ class TestJSONArrayInterface(base.BaseInterfaceTests):
     def test_array_interface(self, data):
         result = np.array(data)
         # Use `json.dumps(data[0])` instead of passing `data[0]` directly to the super method.
-        assert result[0] == json.dumps(data[0])
+        assert result[0] == json.dumps(data[0], sort_keys=True, separators=(",", ":"))
 
         result = np.array(data, dtype=object)
         # Use `json.dumps(x)` instead of passing `x` directly to the super method.
-        expected = np.array([json.dumps(x) for x in data], dtype=object)
+        expected = np.array(
+            [json.dumps(x, sort_keys=True, separators=(",", ":")) for x in data],
+            dtype=object,
+        )
         # if expected.ndim > 1:
         #     # nested data, explicitly construct as 1D
         #     expected = construct_1d_object_array_from_listlike(list(data))