Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
aabb3d7
rename test_endian.py to test_bytes.py
keewis Aug 30, 2025
bde404e
tests for `BytesCodec.to_dict`
keewis Aug 30, 2025
cfad862
tests for `from_dict` and roundtripping
keewis Aug 30, 2025
1027f2c
don't use the system's default in `from_dict`
keewis Aug 30, 2025
9924358
changelog
keewis Aug 30, 2025
08e8ec1
Merge branch 'main' into endian-roundtrip
d-v-b Sep 1, 2025
c727e3f
Merge branch 'main' into endian-roundtrip
d-v-b Sep 2, 2025
8da1333
chore(deps): bump the actions group across 1 directory with 8 updates…
dependabot[bot] May 31, 2026
659c734
Merge branch 'main' of https://github.com/d-v-b/zarr-python
d-v-b Jun 6, 2026
51c994b
Merge branch 'main' of https://github.com/zarr-developers/zarr-python
d-v-b Jun 6, 2026
117b7ba
Merge branch 'main' of github.com:d-v-b/zarr-python
d-v-b Jun 12, 2026
d4de75d
Merge branch 'main' of github.com:zarr-developers/zarr-python
d-v-b Jun 12, 2026
86dabd5
Merge branch 'main' of github.com:zarr-developers/zarr-python
d-v-b Jun 12, 2026
21ba6ed
Merge branch 'main' of github.com:zarr-developers/zarr-python
d-v-b Jun 16, 2026
d28eff7
Merge branch 'main' of github.com:zarr-developers/zarr-python
d-v-b Jun 18, 2026
9e12957
Merge branch 'main' into endian-roundtrip
d-v-b Jun 18, 2026
60771be
Merge branch 'main' into endian-roundtrip
d-v-b Jun 18, 2026
929ffcb
test: consolidate endian tests into one config-parameterized test
d-v-b Jun 18, 2026
10ff170
test: use Expect/ExpectFail helpers for bytes codec dict tests
d-v-b Jun 18, 2026
2454c90
doc: note from_dict behavior change in changelog
d-v-b Jun 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changes/3417.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fixed `BytesCodec.from_dict` so that `BytesCodec` instances roundtrip to / from
their dict representation. `BytesCodec.from_dict` now interprets a missing
`endian` configuration as `endian=None` (matching what `BytesCodec.to_dict`
emits), instead of falling back to the system's native byte order.
1 change: 1 addition & 0 deletions src/zarr/codecs/bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
data, "bytes", require_configuration=False
)
configuration_parsed = configuration_parsed or {}
configuration_parsed.setdefault("endian", None)
return cls(**configuration_parsed) # type: ignore[arg-type]

def to_dict(self) -> dict[str, JSON]:
Expand Down
140 changes: 133 additions & 7 deletions tests/test_codecs/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,98 @@
import enum
import sys
import warnings
from typing import Any, cast
from typing import TYPE_CHECKING, Any, Literal, cast

import numpy as np
import pytest

import zarr
from tests.conftest import Expect, ExpectFail
from zarr.abc.codec import SupportsSyncCodec
from zarr.codecs.bytes import (
ENDIAN,
BytesCodec,
Endian,
EndianLiteral,
)
from zarr.core.array_spec import ArrayConfig, ArraySpec
from zarr.core.buffer import default_buffer_prototype
from zarr.core.buffer import NDBuffer, default_buffer_prototype
from zarr.core.dtype import get_data_type_from_native_dtype
from zarr.core.dtype.npy.int import Int8, Int32
from zarr.core.dtype.npy.structured import Struct
from zarr.storage import StorePath

from .test_codecs import _AsyncArrayProxy

if TYPE_CHECKING:
from zarr.abc.store import Store


@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
@pytest.mark.parametrize("input_dtype", [">u2", "<u2"])
@pytest.mark.parametrize("store_endian", ["big", "little"])
async def test_endian(
store: Store,
input_dtype: Literal[">u2", "<u2"],
store_endian: Literal["big", "little"],
) -> None:
"""
The `bytes` codec stores multi-byte data in the byte order configured on the
codec, regardless of the input array's byte order, and reads it back to the
original values. The input-dtype/store-endian cross-product exercises the
encode-side byteswap (input byte order != store byte order) and the no-op
case alike. Compression is disabled so the stored chunk is the codec's raw
output and its byte layout can be asserted directly.
"""
data = np.arange(0, 256, dtype=input_dtype).reshape((16, 16))
path = "endian"
spath = StorePath(store, path)
a = await zarr.api.asynchronous.create_array(
spath,
shape=data.shape,
chunks=(16, 16),
dtype="uint16",
fill_value=0,
compressors=None,
serializer=BytesCodec(endian=store_endian),
)

await _AsyncArrayProxy(a)[:, :].set(data)

# The stored chunk is laid out in the byte order configured on the codec.
stored = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype())
assert stored is not None
expected_dtype = ">u2" if store_endian == "big" else "<u2"
assert stored.to_bytes() == data.astype(expected_dtype).tobytes()

# ... and the data reads back to the original values.
readback_data = await _AsyncArrayProxy(a)[:, :].get()
assert np.array_equal(data, readback_data)


def test_bytes_codec_supports_sync() -> None:
assert isinstance(BytesCodec(), SupportsSyncCodec)


def test_bytes_codec_sync_roundtrip() -> None:
codec = BytesCodec()
arr = np.arange(100, dtype="float64")
zdtype = get_data_type_from_native_dtype(arr.dtype)
spec = ArraySpec(
shape=arr.shape,
dtype=zdtype,
fill_value=zdtype.cast_scalar(0),
config=ArrayConfig(order="C", write_empty_chunks=True),
prototype=default_buffer_prototype(),
)
nd_buf: NDBuffer = default_buffer_prototype().nd_buffer.from_numpy_array(arr)

codec = codec.evolve_from_array_spec(spec)

encoded = codec._encode_sync(nd_buf, spec)
assert encoded is not None
decoded = codec._decode_sync(encoded, spec)
np.testing.assert_array_equal(arr, decoded.as_numpy_array())


@pytest.mark.parametrize("endian", ENDIAN)
Expand Down Expand Up @@ -46,6 +124,43 @@ def test_bytes_codec_json_roundtrip(endian: EndianLiteral) -> None:
assert restored == codec


# to_dict and from_dict are inverses over this (endian setting, wire dict) mapping:
# to_dict turns the endian setting into the dict; from_dict recovers it.
_ENDIAN_DICT_CASES: list[Expect[EndianLiteral | None, dict[str, Any]]] = [
Expect(
input="little",
output={"name": "bytes", "configuration": {"endian": "little"}},
id="little",
),
Expect(
input="big",
output={"name": "bytes", "configuration": {"endian": "big"}},
id="big",
),
Expect(input=None, output={"name": "bytes"}, id="missing"),
]


@pytest.mark.parametrize("case", _ENDIAN_DICT_CASES, ids=lambda c: c.id)
def test_to_dict(case: Expect[EndianLiteral | None, dict[str, Any]]) -> None:
assert BytesCodec(endian=case.input).to_dict() == case.output


@pytest.mark.parametrize("case", _ENDIAN_DICT_CASES, ids=lambda c: c.id)
def test_from_dict(case: Expect[EndianLiteral | None, dict[str, Any]]) -> None:
assert BytesCodec.from_dict(case.output).endian == case.input


@pytest.mark.parametrize("endian", ["little", "big", pytest.param(None, id="missing")])
def test_roundtrip(endian: EndianLiteral | None) -> None:
codec = BytesCodec(endian=endian)

encoded = codec.to_dict()
roundtripped = BytesCodec.from_dict(encoded)

assert codec == roundtripped


@pytest.mark.parametrize(
("member", "expected"),
[("little", "little"), ("big", "big")],
Expand Down Expand Up @@ -105,14 +220,25 @@ def test_bytes_codec_init_with_deprecated_class_member() -> None:
assert codec.endian == "little"


def test_bytes_codec_rejects_unknown_endian() -> None:
@pytest.mark.parametrize(
"case",
[
ExpectFail(
input="north",
exception=ValueError,
id="unknown-string",
msg="endian must be one of",
),
],
ids=lambda c: c.id,
)
def test_bytes_codec_rejects_unknown_endian(case: ExpectFail[Any]) -> None:
"""
`BytesCodec.__init__` raises `ValueError` when given a string outside
`BytesCodec.__init__` raises `ValueError` when given a value outside
`ENDIAN`, and the error message names the offending parameter.
"""
kwargs: dict[str, Any] = {"endian": "north"}
with pytest.raises(ValueError, match="endian must be one of"):
BytesCodec(**kwargs)
with case.raises():
BytesCodec(endian=case.input)


def test_endian_attribute_error_for_unknown_member() -> None:
Expand Down
89 changes: 0 additions & 89 deletions tests/test_codecs/test_endian.py

This file was deleted.

Loading