diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml
index d15b5b1405..cfffbe26aa 100644
--- a/.github/workflows/hypothesis.yaml
+++ b/.github/workflows/hypothesis.yaml
@@ -41,6 +41,7 @@ jobs:
     steps:
     - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
       with:
+        fetch-depth: 0  # grab all tags so hatch-vcs derives real versions for zarr-python and the in-tree zarr-metadata
         persist-credentials: false
     - name: Set HYPOTHESIS_PROFILE based on trigger
       env:
diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml
index 43436de947..8996e0a026 100644
--- a/.github/workflows/releases.yml
+++ b/.github/workflows/releases.yml
@@ -63,9 +63,119 @@ jobs:
           ls
           ls dist
 
+  # ---------------------------------------------------------------------------
+  # Pre-publish gate: confirm zarr-metadata's required floor is on PyPI.
+  #
+  # zarr-python and zarr-metadata co-develop in this monorepo. During local
+  # development zarr-metadata is resolved from packages/zarr-metadata/ via the
+  # uv workspace (see [tool.uv.sources] in pyproject.toml). The wheel we are
+  # about to publish, however, only carries a version-range requirement
+  # (e.g. `zarr-metadata>=0.1.1,<0.2`); end users will resolve that against
+  # PyPI.
+  #
+  # The failure mode this job catches: a zarr-python PR added code that
+  # depends on a zarr-metadata feature that has been merged into
+  # packages/zarr-metadata/ but not yet released to PyPI. CI passed because
+  # the workspace override resolved to the in-tree copy, but a user installing
+  # the resulting zarr-python wheel would get a published zarr-metadata that
+  # lacks the feature, and zarr-python would fail at import or first use.
+  #
+  # The mitigation here is a presence check on PyPI: extract the floor of
+  # zarr-python's zarr-metadata requirement from the wheel's METADATA file,
+  # and refuse to upload if that exact version is not yet on PyPI. This is
+  # analogous to what `cargo publish` does automatically against crates.io,
+  # but expressed as a CI step because twine has no built-in equivalent.
+  #
+  # When you bump zarr-metadata to a new version that zarr-python depends on,
+  # the required release order is:
+  #   1. release zarr-metadata to PyPI;
+  #   2. bump the floor in zarr-python's [project.dependencies];
+  #   3. release zarr-python.
+  # This job will fail at step 3 if step 1 was skipped.
+  # ---------------------------------------------------------------------------
+  verify_pypi_dependency:
+    name: Verify zarr-metadata floor is on PyPI
+    needs: [build_artifacts]
+    runs-on: ubuntu-latest
+    # Run only on actual releases. Pull-request and push-to-main runs go
+    # through CI without this gate, since their wheels are never uploaded.
+    if: github.event_name == 'release'
+    steps:
+      - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          name: releases
+          path: dist
+
+      - name: Check zarr-metadata floor is published on PyPI
+        run: |
+          # The wheel's METADATA file lives at zarr-*.dist-info/METADATA inside
+          # the wheel. `unzip -p` writes a file's contents to stdout without
+          # extracting; the glob matches whichever dist-info dir is inside.
+          metadata="$(unzip -p dist/zarr-*.whl '*.dist-info/METADATA')"
+
+          # Pick the Requires-Dist line for zarr-metadata. The wheel may have
+          # several Requires-Dist lines for different extras; we want the one
+          # that applies unconditionally (no `; extra == "..."` marker).
+          # Match `Requires-Dist: zarr-metadata` followed by anything that
+          # ends a project name in PEP 508: a version operator (<, >, =, !,
+          # ~), whitespace, `[` (extras), `;` (markers), `(` (legacy
+          # parenthesized version), or end-of-line. The character class
+          # excludes letters/digits/underscore/hyphen, so a hypothetical
+          # `zarr-metadata-ext` dep would not match.
+          req_line="$(printf '%s' "$metadata" \
+            | grep -E '^Requires-Dist: zarr-metadata([^A-Za-z0-9_-]|$)' \
+            | grep -v 'extra ==' \
+            || true)"
+
+          if [ -z "$req_line" ]; then
+            echo "::error::Could not find an unconditional Requires-Dist line for zarr-metadata in the built wheel."
+            echo "Wheel METADATA Requires-Dist lines:"
+            printf '%s' "$metadata" | grep '^Requires-Dist:' || true
+            exit 1
+          fi
+          echo "Requires-Dist line: $req_line"
+
+          # Extract the floor: the version after `>=`. Version specifiers in
+          # PEP 440 are comma-separated (e.g. `>=0.1.1, <0.2`); the floor is
+          # the bound after the first `>=`. `grep -oE '>=[^,]+'` captures
+          # `>=0.1.1` (everything up to the comma), then we strip the
+          # operator and surrounding whitespace.
+          floor="$(printf '%s' "$req_line" \
+            | grep -oE '>=[[:space:]]*[^,]+' \
+            | sed 's/^>=[[:space:]]*//; s/[[:space:]]*$//' \
+            | head -1)"
+
+          if [ -z "$floor" ]; then
+            echo '::error::Could not extract a >= floor from:' "$req_line"
+            echo "zarr-python's zarr-metadata requirement must include a >= bound so this gate has something to check."
+            exit 1
+          fi
+          echo "zarr-metadata floor: $floor"
+
+          # PyPI's JSON API returns 200 if the named version exists and 404
+          # if it doesn't. -s silences progress output; -o /dev/null discards
+          # the body; -w %%{http_code} prints just the status. Any non-200
+          # response means the floor has not been published yet.
+          status="$(curl -s -o /dev/null -w '%{http_code}' \
+            "https://pypi.org/pypi/zarr-metadata/${floor}/json")"
+
+          if [ "$status" != "200" ]; then
+            echo "::error::zarr-metadata ${floor} is not available on PyPI (HTTP ${status})."
+            echo ""
+            echo "The wheel about to be uploaded declares it requires zarr-metadata ${floor} or later,"
+            echo "but no such release exists on PyPI. Publish zarr-metadata ${floor} first, then"
+            echo "re-run this release workflow."
+            exit 1
+          fi
+          echo "OK: zarr-metadata ${floor} is on PyPI; safe to upload zarr-python."
+
   upload_pypi:
     name: Upload to PyPI
-    needs: [build_artifacts, test_dist_pypi]
+    # Depend on the new gate so the upload step does not run if the floor
+    # is missing from PyPI. The gate runs only on releases (see its `if:`
+    # condition); on PR / push runs it is skipped, and skipped jobs in a
+    # `needs:` list are treated as satisfied by GitHub Actions.
+    needs: [build_artifacts, test_dist_pypi, verify_pypi_dependency]
     runs-on: ubuntu-latest
     if: github.event_name == 'release'
     environment:
diff --git a/.github/workflows/zarr-metadata.yml b/.github/workflows/zarr-metadata.yml
index 3081abf094..01f017939b 100644
--- a/.github/workflows/zarr-metadata.yml
+++ b/.github/workflows/zarr-metadata.yml
@@ -20,6 +20,13 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  # zarr-metadata CI installs zarr-metadata standalone, not as a uv
+  # workspace member. The workspace at the repo root forces uv to honor
+  # `requires-python = ">=3.12"` from zarr-python's pyproject.toml, which
+  # blocks Python 3.11 even though zarr-metadata itself supports 3.11+.
+  # Using `uv venv` + `uv pip install` from a tmp directory bypasses
+  # workspace resolution and tests zarr-metadata the way downstream users
+  # actually install it: as a standalone package from PyPI.
   test:
     name: pytest py=${{ matrix.python-version }}
     runs-on: ubuntu-latest
@@ -39,12 +46,18 @@ jobs:
         uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0
         with:
           enable-cache: true
-      - name: Set up Python ${{ matrix.python-version }}
-        run: uv python install ${{ matrix.python-version }}
-      - name: Sync test dependency group
-        run: uv sync --group test --python ${{ matrix.python-version }}
+      - name: Create standalone Python ${{ matrix.python-version }} venv
+        # Place the venv outside the workspace tree so uv doesn't try
+        # to resolve workspace-wide requirements.
+        run: uv venv "$RUNNER_TEMP/zm-venv" --python ${{ matrix.python-version }} --seed
+      - name: Install zarr-metadata and test deps
+        run: |
+          uv pip install \
+            --python "$RUNNER_TEMP/zm-venv/bin/python" \
+            --group pyproject.toml:test \
+            .
       - name: Run pytest
-        run: uv run --group test pytest tests
+        run: '"$RUNNER_TEMP/zm-venv/bin/python" -m pytest tests'
 
   ruff:
     name: ruff
@@ -77,12 +90,16 @@ jobs:
         uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0
         with:
           enable-cache: true
-      - name: Set up Python
-        run: uv python install 3.11
-      - name: Sync test dependency group
-        run: uv sync --group test --python 3.11
+      - name: Create standalone Python 3.11 venv
+        run: uv venv "$RUNNER_TEMP/zm-venv" --python 3.11 --seed
+      - name: Install zarr-metadata and test deps
+        run: |
+          uv pip install \
+            --python "$RUNNER_TEMP/zm-venv/bin/python" \
+            --group pyproject.toml:test \
+            .
       - name: Run pyright
-        run: uv run --group test --with pyright pyright src
+        run: uvx --python "$RUNNER_TEMP/zm-venv/bin/python" pyright src
 
   zarr-metadata-complete:
     name: zarr-metadata complete
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fb2e8c3c6f..70e9af31db 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -57,6 +57,17 @@ repos:
         entry: "\\.(lstrip|rstrip)\\([\"'][^\"']{2,}[\"']\\)"
         types: [python]
         files: ^(src|tests)/
+      - id: check-min-deps-floor
+        name: check min_deps zarr-metadata pin matches the project floor
+        # language: python (not system) so pre-commit provisions an
+        # interpreter; the script is stdlib-only, so no extra deps are
+        # needed. Avoids assuming a bare `python` is on PATH.
+        language: python
+        entry: python ci/check_min_deps_floor.py
+        # Run whenever pyproject.toml changes; pass_filenames is False because
+        # the script reads the file directly rather than processing argv.
+        pass_filenames: false
+        files: ^pyproject\.toml$
   - repo: https://github.com/zizmorcore/zizmor-pre-commit
     rev: v1.25.2
     hooks:
diff --git a/changes/3961.feature.md b/changes/3961.feature.md
new file mode 100644
index 0000000000..00e42cfc7b
--- /dev/null
+++ b/changes/3961.feature.md
@@ -0,0 +1,3 @@
+``zarr-python`` now depends on the [``zarr-metadata``](https://pypi.org/project/zarr-metadata/) package, which provides spec-defined TypedDicts and literal types for Zarr v2 and v3 metadata documents. Several internal types previously defined in ``zarr-python`` are now aliases that re-export their canonical definitions from ``zarr-metadata``: ``zarr.codecs.blosc.BloscShuffleLiteral``, ``zarr.codecs.blosc.BloscCnameLiteral``, ``zarr.codecs.blosc.BloscConfigV3``, ``zarr.codecs.blosc.BloscJSON_V3``, ``zarr.codecs.cast_value.RoundingMode``, ``zarr.codecs.cast_value.OutOfRangeMode``, ``zarr.core.metadata.v2.ArrayV2MetadataDict``, ``zarr.core.metadata.v3.AllowedExtraField``, and ``zarr.core.metadata.v3.ArrayMetadataJSON_V3``.
+
+The version requirement (``zarr-metadata>=0.3.0,<0.4``) caps the major version so a future breaking change in ``zarr-metadata`` cannot silently break installed ``zarr-python``. During local development, ``zarr-metadata`` is resolved from the in-tree copy under ``packages/zarr-metadata/`` via a uv workspace; see [the contributing guide](https://zarr.readthedocs.io/en/stable/contributing.html) for details.
diff --git a/changes/3961.misc.md b/changes/3961.misc.md
new file mode 100644
index 0000000000..0180c88d39
--- /dev/null
+++ b/changes/3961.misc.md
@@ -0,0 +1,13 @@
+`Struct.to_json` now emits the `configuration.fields` array as a tuple rather
+than a list. The serialized JSON is unchanged (a JSON array is produced either
+way), but the in-memory dict returned by `to_json(zarr_format=3)` now holds a
+tuple, matching the `tuple[StructField, ...]` shape that `zarr-metadata` models
+for this field.
+
+Internal: zarr-python now sources its codec, dtype, and chunk-grid name
+constants and the `Endianness`, `BloscShuffle`, `BloscCname`, sharding
+`IndexLocation`, and `DateTimeUnit` literal types from `zarr-metadata`'s
+top-level exports rather than re-defining them. The historical zarr-python
+names (e.g. `zarr.codecs.bytes.EndianLiteral`,
+`zarr.codecs.sharding.IndexLocation`) are retained as re-exports, so existing
+imports keep working. No user-facing behavior changes.
diff --git a/ci/check_min_deps_floor.py b/ci/check_min_deps_floor.py
new file mode 100644
index 0000000000..461e1d0e47
--- /dev/null
+++ b/ci/check_min_deps_floor.py
@@ -0,0 +1,111 @@
+"""
+Enforce the invariant: `min_deps` pins zarr-metadata to the floor of
+zarr-python's declared zarr-metadata range.
+
+zarr-python declares `zarr-metadata>=X.Y.Z,<...>` in `[project.dependencies]`.
+The `min_deps` hatch env tests against the *minimum* supported deps, so it
+must pin zarr-metadata to exactly that floor (e.g. `zarr-metadata==X.Y.Z`).
+Without this script the two declarations can drift silently — the project's
+floor could rise without `min_deps` noticing, and `min_deps` would no longer
+verify what its name claims.
+
+Run:
+    python ci/check_min_deps_floor.py
+
+Exits 0 if floors agree; non-zero with a clear message if not.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+import tomllib
+from pathlib import Path
+
+ROOT = Path(__file__).parent.parent.resolve()
+PYPROJECT = ROOT / "pyproject.toml"
+
+# Match `>=X.Y.Z` (with or without surrounding whitespace) inside a PEP 440
+# version specifier set. Captures just the version number.
+_FLOOR_RE = re.compile(r">=\s*([^,\s]+)")
+# Match `==X.Y.Z` likewise. Captures the version number.
+_PIN_RE = re.compile(r"==\s*([^,\s]+)")
+
+
+def find_zarr_metadata_floor(deps: list[str]) -> str:
+    """Return the >= floor of zarr-metadata declared in `deps`.
+
+    `deps` is a list of PEP 508 strings, e.g. as found in
+    `[project.dependencies]`. Raises if zarr-metadata is not present, or
+    if its specifier set has no `>=` bound.
+    """
+    for dep in deps:
+        # Project name is everything up to the first non-name character.
+        # Quick split: package name terminates at the first occurrence of a
+        # version operator, whitespace, `[`, `;`, or `(`.
+        name = re.split(r"[<>=!~\s\[;(]", dep, maxsplit=1)[0].strip()
+        if name == "zarr-metadata":
+            match = _FLOOR_RE.search(dep)
+            if not match:
+                raise SystemExit(
+                    f"zarr-metadata dependency has no `>=` floor: {dep!r}\n"
+                    "Floor verification requires an explicit lower bound."
+                )
+            return match.group(1)
+    raise SystemExit(
+        "zarr-metadata not found in [project.dependencies]. "
+        "This script assumes zarr-python depends on zarr-metadata."
+    )
+
+
+def find_zarr_metadata_pin(deps: list[str]) -> str:
+    """Return the `==` pin of zarr-metadata declared in `deps`.
+
+    `deps` is a list of PEP 508 strings, e.g. as found in
+    `[tool.hatch.envs.min_deps.extra-dependencies]`. Raises if
+    zarr-metadata is not present, or if its specifier is not a `==` pin.
+    """
+    for dep in deps:
+        name = re.split(r"[<>=!~\s\[;(]", dep, maxsplit=1)[0].strip()
+        if name == "zarr-metadata":
+            match = _PIN_RE.search(dep)
+            if not match:
+                raise SystemExit(
+                    f"min_deps zarr-metadata entry is not an `==` pin: {dep!r}\n"
+                    "The min_deps env must pin zarr-metadata exactly to the floor."
+                )
+            return match.group(1)
+    raise SystemExit(
+        "zarr-metadata not found in [tool.hatch.envs.min_deps.extra-dependencies].\n"
+        "Add `'zarr-metadata==<floor>'` to keep min_deps testing the declared floor."
+    )
+
+
+def main() -> int:
+    data = tomllib.loads(PYPROJECT.read_text())
+
+    project_deps = data["project"]["dependencies"]
+    floor = find_zarr_metadata_floor(project_deps)
+
+    min_deps_extra = data["tool"]["hatch"]["envs"]["min_deps"]["extra-dependencies"]
+    pin = find_zarr_metadata_pin(min_deps_extra)
+
+    if floor != pin:
+        print(
+            f"floor / min_deps pin mismatch for zarr-metadata:\n"
+            f"  [project.dependencies] floor:           >={floor}\n"
+            f"  [tool.hatch.envs.min_deps] pin:         =={pin}\n"
+            f"\n"
+            f"These must agree. Either update the floor in "
+            f"[project.dependencies] or the pin in min_deps so both name "
+            f"the same zarr-metadata version.",
+            file=sys.stderr,
+        )
+        return 1
+
+    print(f"OK: zarr-metadata floor {floor} matches min_deps pin {pin}.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/docs/contributing.md b/docs/contributing.md
index 750f7c7a65..922e653f16 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -93,6 +93,44 @@ To verify that your development environment is working, you can run the unit tes
 hatch env run --env test.py3.12-optional run
 ```
 
+#### The zarr-metadata package and the workspace
+
+zarr-python depends on [`zarr-metadata`](https://pypi.org/project/zarr-metadata/), a small package of TypedDicts and literals describing the JSON shape of Zarr v2 and v3 metadata documents. Both packages live in this repository:
+
+- zarr-python: the project root.
+- zarr-metadata: [`packages/zarr-metadata/`](https://github.com/zarr-developers/zarr-python/tree/main/packages/zarr-metadata) — its own `pyproject.toml`, source tree, and tests.
+
+This is configured as a workspace in two places, because the project supports both [`uv`](https://docs.astral.sh/uv/) and [`hatch`](https://hatch.pypa.io/) as front-ends.
+
+**uv workspace declaration** (consumed by `uv sync`, `uv run`, and anything reading uv's project metadata):
+
+```toml
+[tool.uv.workspace]
+members = ["packages/zarr-metadata"]
+
+[tool.uv.sources]
+zarr-metadata = { workspace = true }
+```
+
+**Hatch workspace declaration** (consumed by `hatch env run`, including the CI test matrix in `test.yml`):
+
+```toml
+[tool.hatch.envs.test]
+workspace.members = ["packages/zarr-metadata"]
+```
+
+Both mechanisms point at the same in-tree path. They have to be declared separately because uv and hatch don't share configuration. The `dev` env, the `test` matrix, the inherited `gputest` and `upstream` envs all use the in-tree source. The `min_deps` env explicitly opts out (`workspace.members = []`) so it tests against the minimum supported zarr-metadata from PyPI — the floor of the version range in `[project.dependencies]`.
+
+What this means in practice:
+
+- **During local development** (whether you invoke `uv run pytest` or `hatch env run --env test.py3.12-optional run`), zarr-python resolves `zarr-metadata` from the in-tree source under `packages/zarr-metadata/`. Changes you make there are immediately visible to zarr-python without reinstalling.
+- **In the published wheel**, only the `[project.dependencies]` version requirement (`zarr-metadata>=0.3.0,<0.4`) is carried. The workspace declarations are development-only configuration. Users installing zarr-python from PyPI get the published zarr-metadata wheel.
+- **In CI**, the primary test matrix (`test.yml`) runs `hatch env run` against the in-tree zarr-metadata. A change in `packages/zarr-metadata/` that breaks zarr-python surfaces immediately, before zarr-metadata is released to PyPI. The `min_deps` job additionally exercises the published floor on every PR, so a change in zarr-python that *requires* an unreleased zarr-metadata feature also gets caught.
+
+If you change zarr-metadata, also run zarr-python's test suite. The workspace setup makes this transparent — your usual `uv run pytest` or `hatch env run` picks up the in-tree source automatically.
+
+When releasing a new zarr-metadata version that contains a breaking change, also bump zarr-python's version cap on zarr-metadata (currently `<0.3`) in the same release cycle. See [Releasing zarr-python when zarr-metadata has changed](#releasing-zarr-python-when-zarr-metadata-has-changed) below for the full procedure.
+
 ### Creating a branch
 
 Before you do any new work or submit a pull request, please open an issue on GitHub to report the bug or propose the feature you'd like to add.
@@ -421,6 +459,32 @@ We aim to either **promote** or **remove** experimental features within **6 mont
 
 Features in `zarr.experimental` carry no stability guarantees. They may be changed or removed in any release, including patch releases. If you depend on an experimental feature, pin your `zarr-python` version accordingly.
 
+## Release procedure
+
+Open an issue on GitHub announcing the release using the release checklist template:
+[https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md](https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md). The release checklist includes all steps necessary for the release.
+
+### Preparing a release
+
+Releases are prepared using the ["Prepare release notes"](https://github.com/zarr-developers/zarr-python/actions/workflows/prepare_release.yml) workflow. To run it:
+
+1. Go to the [workflow page](https://github.com/zarr-developers/zarr-python/actions/workflows/prepare_release.yml) and click "Run workflow".
+2. Enter the release version (e.g. `3.2.0`) and the target branch (defaults to `main`).
+3. The workflow will run `towncrier build` to render the changelog, remove consumed fragments from `changes/`, and open a pull request on the `release/v<version>` branch.
+4. The release PR is automatically labeled `run-downstream`, which triggers the [downstream test workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/downstream.yml) to run Xarray and numcodecs integration tests against the release branch.
+5. Review the rendered changelog in `docs/release-notes.md` and verify downstream tests pass before merging.
+
+### Releasing zarr-python when zarr-metadata has changed
+
+zarr-python depends on the [`zarr-metadata`](https://pypi.org/project/zarr-metadata/) package, which is developed in the same monorepo (see [The zarr-metadata package and the uv workspace](#the-zarr-metadata-package-and-the-uv-workspace) above). When a zarr-python release depends on a zarr-metadata change that has not yet been published to PyPI, the release must follow this order:
+
+1. **Bump zarr-metadata's version** in `packages/zarr-metadata/pyproject.toml` and `packages/zarr-metadata/src/zarr_metadata/__init__.py` (the version literal). Use semver: bump the minor for breaking type changes, the patch for additive changes.
+2. **Release zarr-metadata to PyPI.** Tag and publish from `packages/zarr-metadata/`.
+3. **Bump zarr-python's floor** on zarr-metadata in `[project.dependencies]` (e.g. `zarr-metadata>=0.2.0,<0.3` → `zarr-metadata>=0.3.0,<0.4`). Update `[tool.uv.workspace]` and `[tool.uv.sources]` only if necessary.
+4. **Release zarr-python.**
+
+If steps 1 and 2 are skipped (or step 3's bumped floor names a version that does not yet exist on PyPI), the `verify_pypi_dependency` job in [`releases.yml`](https://github.com/zarr-developers/zarr-python/blob/main/.github/workflows/releases.yml) will fail before the upload step runs. This gate exists because the wheel ships only a version-range requirement; pip resolves that against PyPI on the user's machine, and there is no built-in equivalent of `cargo publish`'s automatic check that the declared dependency is actually available in the registry.
+
 ## Benchmarks
 
 Zarr uses [pytest-benchmark](https://pytest-benchmark.readthedocs.io/en/latest/) for running
diff --git a/pyproject.toml b/pyproject.toml
index 493b18822a..2b42aa3fca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ dependencies = [
     'google-crc32c>=1.5',
     'typing_extensions>=4.14',
     'donfig>=0.8',
+    'zarr-metadata>=0.3.0,<0.4',
 ]
 
 dynamic = [
@@ -158,6 +159,15 @@ omit = [
     "src/zarr/testing/conftest.py",  # only for downstream projects
 ]
 
+# When developing zarr-python locally, resolve zarr-metadata from the in-tree
+# package under packages/zarr-metadata/. The `[project.dependencies]` version
+# requirement is what propagates to consumers installing from PyPI.
+[tool.uv.workspace]
+members = ["packages/zarr-metadata"]
+
+[tool.uv.sources]
+zarr-metadata = { workspace = true }
+
 [tool.hatch]
 version.source = "vcs"
 # Only consider zarr-python's own `v*` tags when deriving the version. Without
@@ -172,9 +182,18 @@ hooks.vcs.version-file = "src/zarr/_version.py"
 
 [tool.hatch.envs.dev]
 dependency-groups = ["dev"]
+# Resolve zarr-metadata from the in-tree workspace member, not PyPI. See
+# `[tool.uv.sources]` above for the equivalent for `uv run` invocations.
+workspace.members = ["packages/zarr-metadata"]
 
 [tool.hatch.envs.test]
 dependency-groups = ["test"]
+# Resolve zarr-metadata from the in-tree workspace member, not PyPI, so CI
+# in `test.yml` exercises the integration between the two packages on every
+# PR. Envs that inherit via `template = "test"` (gputest, upstream) pick
+# this up automatically; min_deps overrides it (see below) to test against
+# the published floor.
+workspace.members = ["packages/zarr-metadata"]
 
 [tool.hatch.envs.test.env-vars]
 
@@ -253,6 +272,15 @@ PIP_EXTRA_INDEX_URL = "https://pypi.org/simple/"
 PIP_PRE = "1"
 
 [tool.hatch.envs.min_deps]
+# Use pip rather than the inherited uv installer. This env must resolve
+# zarr-metadata from PyPI (the published floor pinned below), not the in-tree
+# workspace member. uv would honor the root `[tool.uv.sources] zarr-metadata =
+# { workspace = true }` and substitute the workspace copy (whose hatch-vcs dev
+# version can never equal the `==` floor), producing "No solution found" — see
+# https://github.com/pypa/hatch/issues/1639. pip ignores `[tool.uv.sources]`
+# entirely, so the `zarr-metadata==<floor>` pin below resolves against the
+# published wheel and keeps the "minimum supported deps" guarantee honest.
+installer = "pip"
 description = """Test environment for minimum supported dependencies
 
 See Spec 0000 for details and drop schedule: https://scientific-python.org/specs/spec-0000/
@@ -271,6 +299,10 @@ extra-dependencies = [
     'typing_extensions==4.14.*',
     'donfig==0.8.*',
     'obstore==0.5.*',
+    # Pin to the floor of zarr-python's declared zarr-metadata range. Must
+    # match the >= bound in [project.dependencies] above; the
+    # `check_min_deps_floor.py` pre-commit hook enforces this invariant.
+    'zarr-metadata==0.3.0',
 ]
 
 [tool.hatch.envs.default]
diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py
index 9a1b47b351..a9b0fadc4e 100644
--- a/src/zarr/codecs/__init__.py
+++ b/src/zarr/codecs/__init__.py
@@ -1,5 +1,17 @@
 from __future__ import annotations
 
+from zarr_metadata import (
+    BLOSC_CODEC_NAME,
+    BYTES_CODEC_NAME,
+    CAST_VALUE_CODEC_NAME,
+    CRC32C_CODEC_NAME,
+    GZIP_CODEC_NAME,
+    SCALE_OFFSET_CODEC_NAME,
+    SHARDING_INDEXED_CODEC_NAME,
+    TRANSPOSE_CODEC_NAME,
+    ZSTD_CODEC_NAME,
+)
+
 from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
 from zarr.codecs.bytes import BytesCodec, Endian
 from zarr.codecs.cast_value import CastValue
@@ -54,20 +66,20 @@
     "ZstdCodec",
 ]
 
-register_codec("blosc", BloscCodec)
-register_codec("cast_value", CastValue)
-register_codec("bytes", BytesCodec)
+register_codec(BLOSC_CODEC_NAME, BloscCodec)
+register_codec(CAST_VALUE_CODEC_NAME, CastValue)
+register_codec(BYTES_CODEC_NAME, BytesCodec)
 
 # compatibility with earlier versions of ZEP1
 register_codec("endian", BytesCodec)
-register_codec("crc32c", Crc32cCodec)
-register_codec("gzip", GzipCodec)
-register_codec("scale_offset", ScaleOffset)
-register_codec("sharding_indexed", ShardingCodec)
-register_codec("zstd", ZstdCodec)
+register_codec(CRC32C_CODEC_NAME, Crc32cCodec)
+register_codec(GZIP_CODEC_NAME, GzipCodec)
+register_codec(SCALE_OFFSET_CODEC_NAME, ScaleOffset)
+register_codec(SHARDING_INDEXED_CODEC_NAME, ShardingCodec)
+register_codec(ZSTD_CODEC_NAME, ZstdCodec)
 register_codec("vlen-utf8", VLenUTF8Codec)
 register_codec("vlen-bytes", VLenBytesCodec)
-register_codec("transpose", TransposeCodec)
+register_codec(TRANSPOSE_CODEC_NAME, TransposeCodec)
 
 # Register all the codecs formerly contained in numcodecs.zarr3
 
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 087de716fc..f10114553c 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -3,16 +3,24 @@
 import asyncio
 from dataclasses import dataclass, field, replace
 from functools import cached_property
-from typing import TYPE_CHECKING, ClassVar, Final, Literal, NotRequired, TypedDict
+from typing import TYPE_CHECKING, ClassVar, Literal, NotRequired, TypedDict
 
 import numcodecs
+import zarr_metadata
 from numcodecs.blosc import Blosc
 from packaging.version import Version
+from zarr_metadata import BLOSC_CODEC_NAME
+from zarr_metadata.v3.codec.blosc import (
+    BloscCodecConfiguration as _BloscCodecConfiguration,
+)
+from zarr_metadata.v3.codec.blosc import (
+    BloscCodecObject as _BloscCodecObject,
+)
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.codecs._deprecated_enum import _coerce_enum_input, _DeprecatedStrEnumMeta
 from zarr.core.buffer.cpu import as_numpy_array_wrapper
-from zarr.core.common import JSON, NamedRequiredConfig, parse_named_configuration
+from zarr.core.common import JSON, parse_named_configuration
 from zarr.core.dtype.common import HasItemSize
 
 if TYPE_CHECKING:
@@ -21,19 +29,24 @@
     from zarr.core.array_spec import ArraySpec
     from zarr.core.buffer import Buffer
 
-BloscShuffleLiteral = Literal["noshuffle", "shuffle", "bitshuffle"]
+# Re-exported under zarr-python's historical names; canonical definitions live
+# in `zarr_metadata`. Plain assignments (not `import as`) so these remain
+# explicitly importable from this module.
+BloscShuffleLiteral = zarr_metadata.BloscShuffle
 """The shuffle values permitted for the blosc codec"""
 
-BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")
+BLOSC_SHUFFLE = zarr_metadata.BLOSC_SHUFFLE
 
-BloscCnameLiteral = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"]
+BloscCnameLiteral = zarr_metadata.BloscCName
 """The codec identifiers used in the blosc codec"""
 
-BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd")
+BLOSC_CNAME = zarr_metadata.BLOSC_CNAME
 
 
 class BloscConfigV2(TypedDict):
-    """Configuration for the V2 Blosc codec"""
+    """Configuration for the V2 Blosc codec.
+
+    v2 codec shapes predate zarr-metadata, which models only v3 codecs."""
 
     cname: BloscCnameLiteral
     clevel: int
@@ -42,20 +55,8 @@ class BloscConfigV2(TypedDict):
     typesize: NotRequired[int]
 
 
-class BloscConfigV3(TypedDict):
-    """Configuration for the V3 Blosc codec"""
-
-    cname: BloscCnameLiteral
-    clevel: int
-    shuffle: BloscShuffleLiteral
-    blocksize: int
-    typesize: int
-
-
-class BloscJSON_V3(NamedRequiredConfig[Literal["blosc"], BloscConfigV3]):
-    """
-    The JSON form of the Blosc codec in Zarr V3.
-    """
+BloscConfigV3 = _BloscCodecConfiguration
+BloscJSON_V3 = _BloscCodecObject
 
 
 class BloscShuffle(metaclass=_DeprecatedStrEnumMeta):
@@ -264,12 +265,12 @@ def __init__(
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        _, configuration_parsed = parse_named_configuration(data, "blosc")
+        _, configuration_parsed = parse_named_configuration(data, BLOSC_CODEC_NAME)
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
         result: BloscJSON_V3 = {
-            "name": "blosc",
+            "name": BLOSC_CODEC_NAME,
             "configuration": {
                 "typesize": self.typesize,
                 "cname": self.cname,
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 240c077627..684820fe14 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -3,7 +3,10 @@
 import sys
 import warnings
 from dataclasses import dataclass, replace
-from typing import TYPE_CHECKING, ClassVar, Final, Literal
+from typing import TYPE_CHECKING, ClassVar, Final
+
+import zarr_metadata
+from zarr_metadata import BYTES_CODEC_NAME
 
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.codecs._deprecated_enum import _coerce_enum_input, _DeprecatedStrEnumMeta
@@ -17,11 +20,13 @@
 
     from zarr.core.array_spec import ArraySpec
 
-
-EndianLiteral = Literal["little", "big"]
+# Re-exported under zarr-python's historical names; canonical definitions live
+# in `zarr_metadata`. Plain assignments (not `import as`) so these remain
+# explicitly importable from this module.
+EndianLiteral = zarr_metadata.Endianness
 """Byte order of multi-byte numeric data."""
 
-ENDIAN: Final = ("little", "big")
+ENDIAN: Final = zarr_metadata.ENDIANNESS
 
 
 class Endian(metaclass=_DeprecatedStrEnumMeta):
@@ -59,7 +64,7 @@ def __init__(self, *, endian: Endian | EndianLiteral | None = sys.byteorder) ->
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
-            data, "bytes", require_configuration=False
+            data, BYTES_CODEC_NAME, require_configuration=False
         )
         configuration_parsed = configuration_parsed or {}
         configuration_parsed.setdefault("endian", None)
@@ -67,9 +72,9 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
 
     def to_dict(self) -> dict[str, JSON]:
         if self.endian is None:
-            return {"name": "bytes"}
+            return {"name": BYTES_CODEC_NAME}
         else:
-            return {"name": "bytes", "configuration": {"endian": self.endian}}
+            return {"name": BYTES_CODEC_NAME, "configuration": {"endian": self.endian}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         if isinstance(array_spec.dtype, Struct):
diff --git a/src/zarr/codecs/cast_value.py b/src/zarr/codecs/cast_value.py
index eb8a4de248..dad7dd90a9 100644
--- a/src/zarr/codecs/cast_value.py
+++ b/src/zarr/codecs/cast_value.py
@@ -12,9 +12,10 @@
 
 from collections.abc import Mapping
 from dataclasses import dataclass, replace
-from typing import TYPE_CHECKING, Final, Literal, TypedDict, cast
+from typing import TYPE_CHECKING, Final, TypedDict, cast
 
 import numpy as np
+from zarr_metadata import CAST_VALUE_CODEC_NAME
 
 from zarr.abc.codec import ArrayArrayCodec
 from zarr.core.common import JSON, parse_named_configuration
@@ -23,6 +24,13 @@
 if TYPE_CHECKING:
     from typing import NotRequired, Self
 
+    from zarr_metadata.v3.codec.cast_value import (
+        CastOutOfRangeMode as OutOfRangeMode,
+    )
+    from zarr_metadata.v3.codec.cast_value import (
+        CastRoundingMode as RoundingMode,
+    )
+
     from zarr.core.array_spec import ArraySpec
     from zarr.core.buffer import NDBuffer
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
@@ -33,17 +41,6 @@ class ScalarMapJSON(TypedDict):
         decode: NotRequired[list[tuple[object, object]]]
 
 
-RoundingMode = Literal[
-    "nearest-even",
-    "towards-zero",
-    "towards-positive",
-    "towards-negative",
-    "nearest-away",
-]
-
-OutOfRangeMode = Literal["clamp", "wrap"]
-
-
 class ScalarMap(TypedDict, total=False):
     """
     The normalized, in-memory form of a scalar map.
@@ -230,7 +227,7 @@ def __init__(
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
-            data, "cast_value", require_configuration=True
+            data, CAST_VALUE_CODEC_NAME, require_configuration=True
         )
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
@@ -241,12 +238,18 @@ def to_dict(self) -> dict[str, JSON]:
         if self.out_of_range is not None:
             config["out_of_range"] = self.out_of_range
         if self.scalar_map is not None:
-            json_map: dict[str, list[tuple[object, object]]] = {}
+            # Emit ScalarMap entries as a tuple of 2-tuples. JSON Arrays are
+            # typed fixed-length containers at the spec level; the
+            # in-memory canonical shape is `tuple[tuple[object, object], ...]`
+            # to match `zarr_metadata.v3.codec.cast_value.ScalarMap`.
+            json_map: dict[str, tuple[tuple[object, object], ...]] = {}
             for direction in ("encode", "decode"):
                 if direction in self.scalar_map:
-                    json_map[direction] = [(k, v) for k, v in self.scalar_map[direction].items()]
+                    json_map[direction] = tuple(
+                        (k, v) for k, v in self.scalar_map[direction].items()
+                    )
             config["scalar_map"] = cast("JSON", json_map)
-        return {"name": "cast_value", "configuration": config}
+        return {"name": CAST_VALUE_CODEC_NAME, "configuration": config}
 
     def validate(
         self,
diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py
index ebe2ac8f7a..c2d6e8b37c 100644
--- a/src/zarr/codecs/crc32c_.py
+++ b/src/zarr/codecs/crc32c_.py
@@ -6,6 +6,7 @@
 import google_crc32c
 import numpy as np
 import typing_extensions
+from zarr_metadata import CRC32C_CODEC_NAME
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.common import JSON, parse_named_configuration
@@ -25,11 +26,11 @@ class Crc32cCodec(BytesBytesCodec):
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        parse_named_configuration(data, "crc32c", require_configuration=False)
+        parse_named_configuration(data, CRC32C_CODEC_NAME, require_configuration=False)
         return cls()
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": "crc32c"}
+        return {"name": CRC32C_CODEC_NAME}
 
     def _decode_sync(
         self,
diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py
index b8591748f7..66e1aa0d03 100644
--- a/src/zarr/codecs/gzip.py
+++ b/src/zarr/codecs/gzip.py
@@ -6,6 +6,7 @@
 from typing import TYPE_CHECKING
 
 from numcodecs.gzip import GZip
+from zarr_metadata import GZIP_CODEC_NAME
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.buffer.cpu import as_numpy_array_wrapper
@@ -43,11 +44,11 @@ def __init__(self, *, level: int = 5) -> None:
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        _, configuration_parsed = parse_named_configuration(data, "gzip")
+        _, configuration_parsed = parse_named_configuration(data, GZIP_CODEC_NAME)
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": "gzip", "configuration": {"level": self.level}}
+        return {"name": GZIP_CODEC_NAME, "configuration": {"level": self.level}}
 
     @cached_property
     def _gzip_codec(self) -> GZip:
diff --git a/src/zarr/codecs/scale_offset.py b/src/zarr/codecs/scale_offset.py
index c96e177c6b..bfa887407a 100644
--- a/src/zarr/codecs/scale_offset.py
+++ b/src/zarr/codecs/scale_offset.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import numpy.typing as npt
+from zarr_metadata import SCALE_OFFSET_CODEC_NAME
 
 from zarr.abc.codec import ArrayArrayCodec
 from zarr.core.common import JSON, parse_named_configuration
@@ -327,20 +328,20 @@ def __init__(self, *, offset: object = 0, scale: object = 1) -> None:
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
         _, configuration_parsed = parse_named_configuration(
-            data, "scale_offset", require_configuration=False
+            data, SCALE_OFFSET_CODEC_NAME, require_configuration=False
         )
         configuration_parsed = configuration_parsed or {}
         return cls(**configuration_parsed)
 
     def to_dict(self) -> dict[str, JSON]:
         if self.offset == 0 and self.scale == 1:
-            return {"name": "scale_offset"}
+            return {"name": SCALE_OFFSET_CODEC_NAME}
         config: dict[str, JSON] = {}
         if self.offset != 0:
             config["offset"] = self.offset
         if self.scale != 1:
             config["scale"] = self.scale
-        return {"name": "scale_offset", "configuration": config}
+        return {"name": SCALE_OFFSET_CODEC_NAME, "configuration": config}
 
     def validate(
         self,
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 332aab3351..7bd917e0e5 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -7,6 +7,8 @@
 
 import numpy as np
 import numpy.typing as npt
+import zarr_metadata
+from zarr_metadata import SHARDING_INDEXED_CODEC_NAME
 
 from zarr.abc.codec import (
     ArrayBytesCodec,
@@ -73,10 +75,13 @@
 ShardMutableMapping = MutableMapping[tuple[int, ...], Buffer | None]
 
 
-IndexLocation = Literal["start", "end"]
+# Re-exported under zarr-python's historical names; canonical definitions live
+# in `zarr_metadata`. Plain assignments (not `import as`) so these remain
+# explicitly importable from this module.
+IndexLocation = zarr_metadata.ShardingIndexLocation
 """Position of the shard index within the encoded shard."""
 
-INDEX_LOCATION: Final = ("start", "end")
+INDEX_LOCATION: Final = zarr_metadata.SHARDING_INDEX_LOCATION
 
 
 class ShardingCodecIndexLocation(metaclass=_DeprecatedStrEnumMeta):
@@ -384,7 +389,7 @@ def __setstate__(self, state: dict[str, Any]) -> None:
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        _, configuration_parsed = parse_named_configuration(data, "sharding_indexed")
+        _, configuration_parsed = parse_named_configuration(data, SHARDING_INDEXED_CODEC_NAME)
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     @property
@@ -393,7 +398,7 @@ def codec_pipeline(self) -> CodecPipeline:
 
     def to_dict(self) -> dict[str, JSON]:
         return {
-            "name": "sharding_indexed",
+            "name": SHARDING_INDEXED_CODEC_NAME,
             "configuration": {
                 "chunk_shape": self.chunk_shape,
                 "codecs": tuple(s.to_dict() for s in self.codecs),
diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py
index 5756fba2b4..098155710f 100644
--- a/src/zarr/codecs/transpose.py
+++ b/src/zarr/codecs/transpose.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING, cast
 
 import numpy as np
+from zarr_metadata import TRANSPOSE_CODEC_NAME
 
 from zarr.abc.codec import ArrayArrayCodec
 from zarr.core.array_spec import ArraySpec
@@ -41,11 +42,11 @@ def __init__(self, *, order: Iterable[int]) -> None:
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        _, configuration_parsed = parse_named_configuration(data, "transpose")
+        _, configuration_parsed = parse_named_configuration(data, TRANSPOSE_CODEC_NAME)
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": "transpose", "configuration": {"order": tuple(self.order)}}
+        return {"name": TRANSPOSE_CODEC_NAME, "configuration": {"order": tuple(self.order)}}
 
     def validate(
         self,
diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py
index f93c25a3c7..198bfa47bb 100644
--- a/src/zarr/codecs/zstd.py
+++ b/src/zarr/codecs/zstd.py
@@ -8,6 +8,7 @@
 import numcodecs
 from numcodecs.zstd import Zstd
 from packaging.version import Version
+from zarr_metadata import ZSTD_CODEC_NAME
 
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.buffer.cpu import as_numpy_array_wrapper
@@ -60,11 +61,14 @@ def __init__(self, *, level: int = 0, checksum: bool = False) -> None:
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        _, configuration_parsed = parse_named_configuration(data, "zstd")
+        _, configuration_parsed = parse_named_configuration(data, ZSTD_CODEC_NAME)
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": "zstd", "configuration": {"level": self.level, "checksum": self.checksum}}
+        return {
+            "name": ZSTD_CODEC_NAME,
+            "configuration": {"level": self.level, "checksum": self.checksum},
+        }
 
     @cached_property
     def _zstd_codec(self) -> Zstd:
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 76d763d267..179c2bee29 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -11,13 +11,17 @@
     TypeGuard,
 )
 
+import zarr_metadata
 from typing_extensions import ReadOnly
 
 from zarr.core.common import NamedConfig
 from zarr.errors import UnstableSpecificationWarning
 
-EndiannessStr = Literal["little", "big"]
-ENDIANNESS_STR: Final = "little", "big"
+# Re-exported under zarr-python's historical names; canonical definitions live
+# in `zarr_metadata`. Plain assignments (not `import as`) so these remain
+# explicitly importable from this module.
+EndiannessStr = zarr_metadata.Endianness
+ENDIANNESS_STR: Final = zarr_metadata.ENDIANNESS
 
 SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"]
 SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity")
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index f413f5f678..9382d40693 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -18,6 +18,7 @@
 )
 
 import numpy as np
+import zarr_metadata
 
 from zarr.core.dtype.common import (
     ENDIANNESS_STR,
@@ -33,26 +34,11 @@
 IntLike = SupportsInt | SupportsIndex | bytes | str
 FloatLike = SupportsIndex | SupportsFloat | bytes | str
 ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None
-DateTimeUnit = Literal[
-    "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic"
-]
-DATETIME_UNIT: Final = (
-    "Y",
-    "M",
-    "W",
-    "D",
-    "h",
-    "m",
-    "s",
-    "ms",
-    "us",
-    "μs",
-    "ns",
-    "ps",
-    "fs",
-    "as",
-    "generic",
-)
+# Re-exported under zarr-python's historical names; canonical definitions live
+# in `zarr_metadata`. Plain assignments (not `import as`) so these remain
+# explicitly importable from this module.
+DateTimeUnit = zarr_metadata.NumpyTimeUnit
+DATETIME_UNIT: Final = zarr_metadata.NUMPY_TIME_UNIT
 
 IntishFloat = NewType("IntishFloat", float)
 """A type for floats that represent integers, like 1.0 (but not 1.1)."""
diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py
index b865998e52..1016f1dfcd 100644
--- a/src/zarr/core/dtype/npy/structured.py
+++ b/src/zarr/core/dtype/npy/structured.py
@@ -590,10 +590,14 @@ def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructJSON_V3:
             return {"name": fields_v2, "object_codec_id": None}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
-            fields_v3 = [
+            # `fields` is emitted as a tuple, not a list: a JSON array is a
+            # typed fixed-length container, which `tuple` models faithfully.
+            # This matches zarr-metadata's `StructConfiguration.fields` type.
+            # `json.dumps` serializes tuple and list identically.
+            fields_v3 = tuple(
                 {"name": f_name, "data_type": f_dtype.to_json(zarr_format=zarr_format)}
                 for f_name, f_dtype in self.fields
-            ]
+            )
             return cast(
                 "StructJSON_V3",
                 {"name": self._zarr_v3_name, "configuration": {"fields": fields_v3}},
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 4efa0be7bb..d0c5eaa9c6 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -16,6 +16,8 @@
 
 import numpy as np
 from typing_extensions import ReadOnly
+from zarr_metadata import NUMPY_TIME_UNIT as DATETIME_UNIT
+from zarr_metadata import NumpyTimeUnit as DateTimeUnit
 
 from zarr.core.common import NamedRequiredConfig
 from zarr.core.dtype.common import (
@@ -26,8 +28,6 @@
     check_dtype_spec_v2,
 )
 from zarr.core.dtype.npy.common import (
-    DATETIME_UNIT,
-    DateTimeUnit,
     check_json_int,
     endianness_to_numpy_str,
     get_endianness_from_numpy_dtype,
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index 52eaa3e144..e265096c81 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import itertools
+import json
 import logging
 import unicodedata
 import warnings
@@ -73,6 +74,8 @@
     )
     from typing import Any
 
+    from zarr_metadata.v2 import ConsolidatedMetadataV2
+
     from zarr.core.array_spec import ArrayConfigLike
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.chunk_key_encodings import ChunkKeyEncodingLike
@@ -434,6 +437,12 @@ def to_dict(self) -> dict[str, Any]:
         else:
             # Leave consolidated metadata unset if it's None
             result.pop("consolidated_metadata")
+        # `node_type` is a v3-only field. v2 group metadata (.zgroup) has
+        # only `zarr_format`; attributes live in a sibling .zattrs file.
+        # The dataclass carries `node_type` for in-memory use; strip it
+        # from the serialized v2 form.
+        if self.zarr_format == 2:
+            result.pop("node_type", None)
         return result
 
 
@@ -624,8 +633,12 @@ def _from_bytes_v2(
         group_metadata: dict[str, Any] = {**zgroup, "attributes": zattrs}
 
         if consolidated_metadata_bytes is not None:
-            v2_consolidated_doc = buffer_to_json_object(consolidated_metadata_bytes)
-            v2_consolidated_metadata = cast("dict[str, Any]", v2_consolidated_doc["metadata"])
+            # The parsed file has the shape of `ConsolidatedMetadataV2` from
+            # zarr-metadata (keys like `foo/.zarray`, `foo/.zgroup`,
+            # `foo/.zattrs`). Mutate it below to strip and reorganize
+            # entries, so convert to a mutable `dict` after parsing.
+            parsed: ConsolidatedMetadataV2 = json.loads(consolidated_metadata_bytes.to_bytes())
+            v2_consolidated_metadata = dict(parsed["metadata"])
             # We already read zattrs and zgroup. Should we ignore these?
             v2_consolidated_metadata.pop(".zattrs", None)
             v2_consolidated_metadata.pop(".zgroup", None)
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index ac32521239..f58e701d04 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -4,7 +4,7 @@
 import warnings
 from collections.abc import Iterable, Sequence
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, TypedDict, cast
+from typing import TYPE_CHECKING, Any, cast
 
 from zarr.abc.metadata import Metadata
 from zarr.abc.numcodec import Numcodec, _is_numcodec
@@ -29,6 +29,7 @@
 from dataclasses import dataclass, field, fields, replace
 
 import numpy as np
+from zarr_metadata.v2.array import ArrayMetadataV2 as _ArrayMetadataV2
 
 from zarr.core._json import json_to_buffer
 from zarr.core.array_spec import ArrayConfig, ArraySpec
@@ -43,18 +44,10 @@
 from zarr.core.config import config, parse_indexing_order
 from zarr.core.metadata.common import parse_attributes
 
-
-class ArrayV2MetadataDict(TypedDict):
-    """
-    A typed dictionary model for Zarr format 2 metadata.
-    """
-
-    zarr_format: Literal[2]
-    attributes: dict[str, JSON]
-
-
 # Union of acceptable types for v2 compressors
 type CompressorLikev2 = dict[str, JSON] | Numcodec | None
+# Re-export the v2 array metadata JSON shape under zarr-python's historical name.
+ArrayV2MetadataDict = _ArrayMetadataV2
 
 
 @dataclass(frozen=True, kw_only=True)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 9eaccc5076..95affafb1a 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -3,9 +3,16 @@
 import json
 from collections.abc import Iterable, Mapping, Sequence
 from dataclasses import dataclass, field, replace
-from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast
+from typing import TYPE_CHECKING, Any, Final, Literal, TypeGuard, cast
 
 from typing_extensions import TypedDict
+from zarr_metadata import (
+    RECTILINEAR_CHUNK_GRID_NAME,
+    REGULAR_CHUNK_GRID_NAME,
+    RectilinearChunkGridName,
+    RegularChunkGridName,
+)
+from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.abc.metadata import Metadata
@@ -140,14 +147,12 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
     )
 
 
-class AllowedExtraField(TypedDict, extra_items=JSON):  # type: ignore[call-arg]
-    """
-    This class models allowed extra fields in array metadata.
-    They must have ``must_understand`` set to ``False``, and may contain
-    arbitrary additional JSON data.
-    """
+AllowedExtraField = ExtensionFieldV3
+"""Alias for `zarr_metadata.v3.array.ExtensionFieldV3`.
 
-    must_understand: Literal[False]
+`must_understand` is typed as `bool` to match the spec (extension authors that
+*understand* a field may produce `True`); the runtime guard
+`check_allowed_extra_field` enforces that zarr-python only accepts `False`."""
 
 
 def check_allowed_extra_field(data: object) -> TypeGuard[AllowedExtraField]:
@@ -192,10 +197,10 @@ class RectilinearChunkGridMetadataConfig(TypedDict):
 
 
 RegularChunkGridMetadataJSON = NamedRequiredConfig[
-    Literal["regular"], RegularChunkGridMetadataConfig
+    RegularChunkGridName, RegularChunkGridMetadataConfig
 ]
 RectilinearChunkGridMetadataJSON = NamedRequiredConfig[
-    Literal["rectilinear"], RectilinearChunkGridMetadataConfig
+    RectilinearChunkGridName, RectilinearChunkGridMetadataConfig
 ]
 
 
@@ -260,13 +265,13 @@ def ndim(self) -> int:
 
     def to_dict(self) -> RegularChunkGridMetadataJSON:  # type: ignore[override]
         return {
-            "name": "regular",
+            "name": REGULAR_CHUNK_GRID_NAME,
             "configuration": {"chunk_shape": self.chunk_shape},
         }
 
     @classmethod
     def from_dict(cls, data: RegularChunkGridMetadataJSON) -> Self:  # type: ignore[override]
-        parse_named_configuration(data, "regular")  # validate name
+        parse_named_configuration(data, REGULAR_CHUNK_GRID_NAME)  # validate name
         configuration = data["configuration"]
         return cls(chunk_shape=_parse_chunk_shape(configuration["chunk_shape"]))
 
@@ -316,7 +321,7 @@ def to_dict(self) -> RectilinearChunkGridMetadataJSON:  # type: ignore[override]
                 else:
                     serialized_dims.append(list(dim_spec))
         return {
-            "name": "rectilinear",
+            "name": RECTILINEAR_CHUNK_GRID_NAME,
             "configuration": {
                 "kind": "inline",
                 "chunk_shapes": tuple(serialized_dims),
@@ -349,7 +354,7 @@ def update_shape(
 
     @classmethod
     def from_dict(cls, data: RectilinearChunkGridMetadataJSON) -> Self:  # type: ignore[override]
-        parse_named_configuration(data, "rectilinear")  # validate name
+        parse_named_configuration(data, RECTILINEAR_CHUNK_GRID_NAME)  # validate name
         configuration = data["configuration"]
         validate_rectilinear_kind(configuration.get("kind"))
         raw_shapes = configuration["chunk_shapes"]
@@ -413,32 +418,20 @@ def parse_chunk_grid(
         return data
 
     name, _ = parse_named_configuration(data)
-    if name == "regular":
+    if name == REGULAR_CHUNK_GRID_NAME:
         return RegularChunkGridMetadata.from_dict(data)  # type: ignore[arg-type]
-    if name == "rectilinear":
+    if name == RECTILINEAR_CHUNK_GRID_NAME:
         return RectilinearChunkGridMetadata.from_dict(data)  # type: ignore[arg-type]
     raise ValueError(f"Unknown chunk grid name: {name!r}")
 
 
-class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField):  # type: ignore[call-arg]
-    """
-    A typed dictionary model for zarr v3 array metadata.
+ArrayMetadataJSON_V3 = ArrayMetadataV3
+"""Alias for `zarr_metadata.v3.array.ArrayMetadataV3`, the TypedDict modeling
+the v3 array metadata document.
 
-    Extra keys are permitted if they conform to ``AllowedExtraField``
-    (i.e. they are mappings with ``must_understand: false``).
-    """
-
-    zarr_format: Literal[3]
-    node_type: Literal["array"]
-    data_type: str | NamedConfig[str, Mapping[str, JSON]]
-    shape: tuple[int, ...]
-    chunk_grid: str | NamedConfig[str, Mapping[str, JSON]]
-    chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]]
-    fill_value: JSON
-    codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]
-    attributes: NotRequired[Mapping[str, JSON]]
-    storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]]
-    dimension_names: NotRequired[tuple[str | None, ...]]
+Used throughout zarr-python under this name to avoid visual collision with
+the `ArrayV3Metadata` dataclass — the two differ only in word order. Extra
+keys are permitted on this dict if they conform to `ExtensionFieldV3`."""
 
 
 """
@@ -665,6 +658,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         )
 
     def to_dict(self) -> dict[str, JSON]:
+        """Serialize as a JSON-shaped dict matching `ArrayMetadataJSON_V3`.
+
+        Return type is `dict[str, JSON]` rather than `ArrayMetadataJSON_V3` so
+        the result composes with other zarr-python metadata serialisation
+        paths that traffic in `dict[str, JSON]` (notably consolidated metadata).
+        """
         out_dict = super().to_dict()
         extra_fields = out_dict.pop("extra_fields")
         out_dict = out_dict | extra_fields  # type: ignore[operator]
diff --git a/tests/test_array.py b/tests/test_array.py
index 0d6d2d5906..f3ca0ed70f 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -7,7 +7,7 @@
 import re
 import sys
 from itertools import accumulate, starmap
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any, Literal, cast
 from unittest import mock
 
 import numcodecs
@@ -85,7 +85,11 @@
 from .test_dtype.conftest import zdtype_examples
 
 if TYPE_CHECKING:
+    from zarr_metadata import ArrayMetadataV2
+    from zarr_metadata.v3.codec.bytes import BytesCodecMetadata
+
     from zarr.abc.codec import CodecJSON_V3
+    from zarr.core.metadata import ArrayMetadataJSON_V3
 
 
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@@ -325,47 +329,47 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->
 
 
 @pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("zarr_format", [2, 3, "invalid"])
-def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str) -> None:
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat) -> None:
     """
-    Test that providing an actual storage transformer produces a warning and otherwise passes through
+    storage_transformers is a v3-only field; passing a populated one to v3
+    array construction raises, while v2 (where the field has no spec
+    meaning) is unaffected.
     """
-    metadata_dict: dict[str, JSON]
     if zarr_format == 3:
-        metadata_dict = {
+        v3_metadata: ArrayMetadataJSON_V3 = {
             "zarr_format": 3,
             "node_type": "array",
             "shape": (10,),
             "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
             "data_type": "uint8",
             "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
-            "codecs": (BytesCodec().to_dict(),),
+            "codecs": (BytesCodec().to_dict(),),  # type: ignore[typeddict-item]
             "fill_value": 0,
-            "storage_transformers": ({"test": "should_raise"}),
+            # Deliberately invalid: the test asserts that any non-empty
+            # storage_transformers value triggers the "not supported"
+            # error path, regardless of its inner shape.
+            "storage_transformers": ({"test": "should_raise"},),  # type: ignore[typeddict-item]
         }
+        match = "Arrays with storage transformers are not supported in zarr-python at this time."
+        with pytest.raises(ValueError, match=match):
+            # cast: from_dict accepts the wider `dict[str, JSON]`.
+            Array.from_dict(StorePath(store), data=cast("dict[str, JSON]", v3_metadata))
     else:
-        metadata_dict = {
-            "zarr_format": zarr_format,
+        # Plain v2 array metadata; no v3-only fields (no codecs,
+        # storage_transformers, chunk_grid, etc.).
+        v2_metadata: ArrayMetadataV2 = {
+            "zarr_format": 2,
             "shape": (10,),
             "chunks": (1,),
             "dtype": "|u1",
             "dimension_separator": ".",
-            "codecs": (BytesCodec().to_dict(),),
+            "compressor": None,
             "fill_value": 0,
             "order": "C",
-            "storage_transformers": ({"test": "should_raise"}),
+            "filters": None,
         }
-    if zarr_format == 3:
-        match = "Arrays with storage transformers are not supported in zarr-python at this time."
-        with pytest.raises(ValueError, match=match):
-            Array.from_dict(StorePath(store), data=metadata_dict)
-    elif zarr_format == 2:
-        # no warning
-        Array.from_dict(StorePath(store), data=metadata_dict)
-    else:
-        match = f"Invalid zarr_format: {zarr_format}. Expected 2 or 3"
-        with pytest.raises(ValueError, match=match):
-            Array.from_dict(StorePath(store), data=metadata_dict)
+        Array.from_dict(StorePath(store), data=cast("dict[str, JSON]", v2_metadata))
 
 
 @pytest.mark.parametrize("test_cls", [AnyArray, AnyAsyncArray])
@@ -1885,7 +1889,7 @@ def test_roundtrip_numcodecs() -> None:
         dimension_names=["lat", "lon"],
     )
 
-    BYTES_CODEC = {"name": "bytes", "configuration": {"endian": "little"}}
+    BYTES_CODEC: BytesCodecMetadata = {"name": "bytes", "configuration": {"endian": "little"}}
     # Read in the array again and check compressor config
     root = zarr.open_group(store)
     metadata = root["test"].metadata.to_dict()
diff --git a/tests/test_codecs/test_cast_value.py b/tests/test_codecs/test_cast_value.py
index c43edb76e8..d682234ace 100644
--- a/tests/test_codecs/test_cast_value.py
+++ b/tests/test_codecs/test_cast_value.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
 import numpy as np
 import pytest
@@ -9,6 +9,11 @@
 from tests.conftest import Expect, ExpectFail
 from zarr.codecs.cast_value import CastValue
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.codec.cast_value import CastValueCodecObject
+
+    from zarr.core.common import JSON
+
 try:
     import cast_value_rs  # noqa: F401
 
@@ -26,14 +31,25 @@
 # ---------------------------------------------------------------------------
 
 
+_CAST_VALUE_MINIMAL: CastValueCodecObject = {
+    "name": "cast_value",
+    "configuration": {"data_type": "uint8"},
+}
+_CAST_VALUE_FULL: CastValueCodecObject = {
+    "name": "cast_value",
+    "configuration": {
+        "data_type": "uint8",
+        "rounding": "towards-zero",
+        "out_of_range": "clamp",
+        "scalar_map": {"encode": (("NaN", 0),)},
+    },
+}
+
+
 @pytest.mark.parametrize(
     "case",
     [
-        Expect(
-            input=CastValue(data_type="uint8"),
-            output={"name": "cast_value", "configuration": {"data_type": "uint8"}},
-            id="minimal",
-        ),
+        Expect(input=CastValue(data_type="uint8"), output=_CAST_VALUE_MINIMAL, id="minimal"),
         Expect(
             input=CastValue(
                 data_type="uint8",
@@ -41,51 +57,53 @@
                 out_of_range="clamp",
                 scalar_map={"encode": [("NaN", 0)]},
             ),
-            output={
-                "name": "cast_value",
-                "configuration": {
-                    "data_type": "uint8",
-                    "rounding": "towards-zero",
-                    "out_of_range": "clamp",
-                    "scalar_map": {"encode": [("NaN", 0)]},
-                },
-            },
+            output=_CAST_VALUE_FULL,
             id="full",
         ),
     ],
     ids=lambda c: c.id,
 )
-def test_to_dict(case: Expect[CastValue, dict[str, Any]]) -> None:
+def test_to_dict(case: Expect[CastValue, CastValueCodecObject]) -> None:
     """to_dict produces the expected JSON structure."""
     assert case.input.to_dict() == case.output
 
 
+_CAST_VALUE_FROM_DICT_DEFAULTS: CastValueCodecObject = {
+    "name": "cast_value",
+    "configuration": {"data_type": "float32"},
+}
+_CAST_VALUE_FROM_DICT_EXPLICIT: CastValueCodecObject = {
+    "name": "cast_value",
+    "configuration": {
+        "data_type": "int16",
+        "rounding": "towards-zero",
+        "out_of_range": "clamp",
+    },
+}
+
+
 @pytest.mark.parametrize(
     "case",
     [
         Expect(
-            input={"name": "cast_value", "configuration": {"data_type": "float32"}},
+            input=_CAST_VALUE_FROM_DICT_DEFAULTS,
             output=("float32", "nearest-even", None),
             id="defaults",
         ),
         Expect(
-            input={
-                "name": "cast_value",
-                "configuration": {
-                    "data_type": "int16",
-                    "rounding": "towards-zero",
-                    "out_of_range": "clamp",
-                },
-            },
+            input=_CAST_VALUE_FROM_DICT_EXPLICIT,
             output=("int16", "towards-zero", "clamp"),
             id="explicit",
         ),
     ],
     ids=lambda c: c.id,
 )
-def test_from_dict(case: Expect[dict[str, Any], tuple[str, str, str | None]]) -> None:
+def test_from_dict(
+    case: Expect[CastValueCodecObject, tuple[str, str, str | None]],
+) -> None:
     """from_dict deserializes configuration with correct values and defaults."""
-    codec = CastValue.from_dict(case.input)
+    # cast: from_dict accepts the wider `dict[str, JSON]`.
+    codec = CastValue.from_dict(cast("dict[str, JSON]", case.input))
     dtype_name, rounding, out_of_range = case.output
     assert codec.dtype.to_native_dtype() == np.dtype(dtype_name)
     assert codec.rounding == rounding
diff --git a/tests/test_codecs/test_scale_offset.py b/tests/test_codecs/test_scale_offset.py
index 513caf463a..89df06a80c 100644
--- a/tests/test_codecs/test_scale_offset.py
+++ b/tests/test_codecs/test_scale_offset.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
 
 import numpy as np
 import pytest
@@ -16,53 +16,67 @@
 from zarr.core.buffer.core import default_buffer_prototype
 from zarr.storage._memory import MemoryStore
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.codec.scale_offset import ScaleOffsetCodecObject
+
+    from zarr.core.common import JSON
+
 # ---------------------------------------------------------------------------
 # Serialization
 # ---------------------------------------------------------------------------
 
 
+_SCALE_OFFSET_DEFAULT: ScaleOffsetCodecObject = {"name": "scale_offset"}
+_SCALE_OFFSET_OFFSET_ONLY: ScaleOffsetCodecObject = {
+    "name": "scale_offset",
+    "configuration": {"offset": 5},
+}
+_SCALE_OFFSET_SCALE_ONLY: ScaleOffsetCodecObject = {
+    "name": "scale_offset",
+    "configuration": {"scale": 0.1},
+}
+_SCALE_OFFSET_BOTH: ScaleOffsetCodecObject = {
+    "name": "scale_offset",
+    "configuration": {"offset": 5, "scale": 0.1},
+}
+
+
 @pytest.mark.parametrize(
     "case",
     [
-        Expect(input=ScaleOffset(), output={"name": "scale_offset"}, id="default"),
-        Expect(
-            input=ScaleOffset(offset=5),
-            output={"name": "scale_offset", "configuration": {"offset": 5}},
-            id="offset-only",
-        ),
-        Expect(
-            input=ScaleOffset(scale=0.1),
-            output={"name": "scale_offset", "configuration": {"scale": 0.1}},
-            id="scale-only",
-        ),
-        Expect(
-            input=ScaleOffset(offset=5, scale=0.1),
-            output={"name": "scale_offset", "configuration": {"offset": 5, "scale": 0.1}},
-            id="both",
-        ),
+        Expect(input=ScaleOffset(), output=_SCALE_OFFSET_DEFAULT, id="default"),
+        Expect(input=ScaleOffset(offset=5), output=_SCALE_OFFSET_OFFSET_ONLY, id="offset-only"),
+        Expect(input=ScaleOffset(scale=0.1), output=_SCALE_OFFSET_SCALE_ONLY, id="scale-only"),
+        Expect(input=ScaleOffset(offset=5, scale=0.1), output=_SCALE_OFFSET_BOTH, id="both"),
     ],
     ids=lambda c: c.id,
 )
-def test_to_dict(case: Expect[ScaleOffset, dict[str, Any]]) -> None:
+def test_to_dict(case: Expect[ScaleOffset, ScaleOffsetCodecObject]) -> None:
     """to_dict produces the expected JSON structure."""
     assert case.input.to_dict() == case.output
 
 
+_SCALE_OFFSET_FROM_DICT_NO_CONFIG: ScaleOffsetCodecObject = {"name": "scale_offset"}
+_SCALE_OFFSET_FROM_DICT_WITH_CONFIG: ScaleOffsetCodecObject = {
+    "name": "scale_offset",
+    "configuration": {"offset": 3, "scale": 2},
+}
+
+
 @pytest.mark.parametrize(
     "case",
     [
-        Expect(input={"name": "scale_offset"}, output=(0, 1), id="no-config"),
-        Expect(
-            input={"name": "scale_offset", "configuration": {"offset": 3, "scale": 2}},
-            output=(3, 2),
-            id="with-config",
-        ),
+        Expect(input=_SCALE_OFFSET_FROM_DICT_NO_CONFIG, output=(0, 1), id="no-config"),
+        Expect(input=_SCALE_OFFSET_FROM_DICT_WITH_CONFIG, output=(3, 2), id="with-config"),
     ],
     ids=lambda c: c.id,
 )
-def test_from_dict(case: Expect[dict[str, Any], tuple[int | float, int | float]]) -> None:
+def test_from_dict(
+    case: Expect[ScaleOffsetCodecObject, tuple[int | float, int | float]],
+) -> None:
     """from_dict deserializes configuration with correct values and defaults."""
-    codec = ScaleOffset.from_dict(case.input)
+    # cast: from_dict accepts the wider `dict[str, JSON]`.
+    codec = ScaleOffset.from_dict(cast("dict[str, JSON]", case.input))
     expected_offset, expected_scale = case.output
     assert codec.offset == expected_offset
     assert codec.scale == expected_scale
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
index da30214b3b..ff48b26189 100644
--- a/tests/test_dtype/test_npy/test_bool.py
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -1,10 +1,15 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING, ClassVar
+
 import numpy as np
 
 from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.bool import Bool
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.bool import BoolDataTypeName
+
 
 class TestBool(BaseTestZDType):
     test_cls = Bool
@@ -16,7 +21,7 @@ class TestBool(BaseTestZDType):
         np.dtype(np.uint16),
     )
     valid_json_v2 = ({"name": "|b1", "object_codec_id": None},)
-    valid_json_v3 = ("bool",)
+    valid_json_v3: ClassVar[tuple[BoolDataTypeName, ...]] = ("bool",)
     invalid_json_v2 = (
         "|b1",
         "bool",
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
index b4ce42be58..dda60585f8 100644
--- a/tests/test_dtype/test_npy/test_complex.py
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -1,12 +1,17 @@
 from __future__ import annotations
 
 import math
+from typing import TYPE_CHECKING, ClassVar
 
 import numpy as np
 
 from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.complex import Complex64, Complex128
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.complex64 import Complex64DataTypeName
+    from zarr_metadata.v3.data_type.complex128 import Complex128DataTypeName
+
 
 class _BaseTestFloat(BaseTestZDType):
     def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
@@ -27,7 +32,7 @@ class TestComplex64(_BaseTestFloat):
         {"name": ">c8", "object_codec_id": None},
         {"name": "<c8", "object_codec_id": None},
     )
-    valid_json_v3 = ("complex64",)
+    valid_json_v3: ClassVar[tuple[Complex64DataTypeName, ...]] = ("complex64",)
     invalid_json_v2 = (
         "|c8",
         "complex64",
@@ -70,7 +75,7 @@ class TestComplex128(_BaseTestFloat):
         {"name": ">c16", "object_codec_id": None},
         {"name": "<c16", "object_codec_id": None},
     )
-    valid_json_v3 = ("complex128",)
+    valid_json_v3: ClassVar[tuple[Complex128DataTypeName, ...]] = ("complex128",)
     invalid_json_v2 = (
         "|c16",
         "complex128",
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index 8d8e768263..cc2796d625 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -1,10 +1,17 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING, ClassVar
+
 import numpy as np
 
 from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.float16 import Float16DataTypeName
+    from zarr_metadata.v3.data_type.float32 import Float32DataTypeName
+    from zarr_metadata.v3.data_type.float64 import Float64DataTypeName
+
 
 class _BaseTestFloat(BaseTestZDType):
     def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
@@ -36,7 +43,7 @@ class TestFloat16(_BaseTestFloat):
         {"name": ">f2", "object_codec_id": None},
         {"name": "<f2", "object_codec_id": None},
     )
-    valid_json_v3 = ("float16",)
+    valid_json_v3: ClassVar[tuple[Float16DataTypeName, ...]] = ("float16",)
     invalid_json_v2 = (
         "|f2",
         "float16",
@@ -86,7 +93,7 @@ class TestFloat32(_BaseTestFloat):
         {"name": ">f4", "object_codec_id": None},
         {"name": "<f4", "object_codec_id": None},
     )
-    valid_json_v3 = ("float32",)
+    valid_json_v3: ClassVar[tuple[Float32DataTypeName, ...]] = ("float32",)
     invalid_json_v2 = (
         "|f4",
         "float32",
@@ -136,7 +143,7 @@ class TestFloat64(_BaseTestFloat):
         {"name": ">f8", "object_codec_id": None},
         {"name": "<f8", "object_codec_id": None},
     )
-    valid_json_v3 = ("float64",)
+    valid_json_v3: ClassVar[tuple[Float64DataTypeName, ...]] = ("float64",)
     invalid_json_v2 = (
         "|f8",
         "float64",
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index 9eab053080..164d70a725 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -1,10 +1,22 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING, ClassVar
+
 import numpy as np
 
 from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.int8 import Int8DataTypeName
+    from zarr_metadata.v3.data_type.int16 import Int16DataTypeName
+    from zarr_metadata.v3.data_type.int32 import Int32DataTypeName
+    from zarr_metadata.v3.data_type.int64 import Int64DataTypeName
+    from zarr_metadata.v3.data_type.uint8 import Uint8DataTypeName
+    from zarr_metadata.v3.data_type.uint16 import Uint16DataTypeName
+    from zarr_metadata.v3.data_type.uint32 import Uint32DataTypeName
+    from zarr_metadata.v3.data_type.uint64 import Uint64DataTypeName
+
 
 class TestInt8(BaseTestZDType):
     test_cls = Int8
@@ -16,7 +28,7 @@ class TestInt8(BaseTestZDType):
         np.dtype(np.float64),
     )
     valid_json_v2 = ({"name": "|i1", "object_codec_id": None},)
-    valid_json_v3 = ("int8",)
+    valid_json_v3: ClassVar[tuple[Int8DataTypeName, ...]] = ("int8",)
     invalid_json_v2 = (
         ">i1",
         "int8",
@@ -51,7 +63,7 @@ class TestInt16(BaseTestZDType):
         {"name": ">i2", "object_codec_id": None},
         {"name": "<i2", "object_codec_id": None},
     )
-    valid_json_v3 = ("int16",)
+    valid_json_v3: ClassVar[tuple[Int16DataTypeName, ...]] = ("int16",)
     invalid_json_v2 = (
         "|i2",
         "int16",
@@ -89,7 +101,7 @@ class TestInt32(BaseTestZDType):
         {"name": ">i4", "object_codec_id": None},
         {"name": "<i4", "object_codec_id": None},
     )
-    valid_json_v3 = ("int32",)
+    valid_json_v3: ClassVar[tuple[Int32DataTypeName, ...]] = ("int32",)
     invalid_json_v2 = (
         "|i4",
         "int32",
@@ -124,7 +136,7 @@ class TestInt64(BaseTestZDType):
         {"name": ">i8", "object_codec_id": None},
         {"name": "<i8", "object_codec_id": None},
     )
-    valid_json_v3 = ("int64",)
+    valid_json_v3: ClassVar[tuple[Int64DataTypeName, ...]] = ("int64",)
     invalid_json_v2 = (
         "|i8",
         "int64",
@@ -156,7 +168,7 @@ class TestUInt8(BaseTestZDType):
         np.dtype(np.float64),
     )
     valid_json_v2 = ({"name": "|u1", "object_codec_id": None},)
-    valid_json_v3 = ("uint8",)
+    valid_json_v3: ClassVar[tuple[Uint8DataTypeName, ...]] = ("uint8",)
     invalid_json_v2 = (
         "|u1",
         "uint8",
@@ -191,7 +203,7 @@ class TestUInt16(BaseTestZDType):
         {"name": ">u2", "object_codec_id": None},
         {"name": "<u2", "object_codec_id": None},
     )
-    valid_json_v3 = ("uint16",)
+    valid_json_v3: ClassVar[tuple[Uint16DataTypeName, ...]] = ("uint16",)
     invalid_json_v2 = (
         "|u2",
         "uint16",
@@ -235,7 +247,7 @@ class TestUInt32(BaseTestZDType):
         {"name": ">u4", "object_codec_id": None},
         {"name": "<u4", "object_codec_id": None},
     )
-    valid_json_v3 = ("uint32",)
+    valid_json_v3: ClassVar[tuple[Uint32DataTypeName, ...]] = ("uint32",)
     invalid_json_v2 = (
         "|u4",
         "uint32",
@@ -270,7 +282,7 @@ class TestUInt64(BaseTestZDType):
         {"name": ">u8", "object_codec_id": None},
         {"name": "<u8", "object_codec_id": None},
     )
-    valid_json_v3 = ("uint64",)
+    valid_json_v3: ClassVar[tuple[Uint64DataTypeName, ...]] = ("uint64",)
     invalid_json_v2 = (
         "|u8",
         "uint64",
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index c3b292e5fc..913f6dfb00 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING, ClassVar
+
 import numpy as np
 import pytest
 
@@ -7,6 +9,9 @@
 from zarr.core.dtype import FixedLengthUTF32
 from zarr.core.dtype.npy.string import VariableLengthUTF8
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.string import StringDataTypeName
+
 
 class TestVariableLengthString(BaseTestZDType):
     test_cls = VariableLengthUTF8  # type: ignore[assignment]
@@ -17,7 +22,7 @@ class TestVariableLengthString(BaseTestZDType):
         np.dtype("|S10"),
     )
     valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},)
-    valid_json_v3 = ("string",)
+    valid_json_v3: ClassVar[tuple[StringDataTypeName, ...]] = ("string",)
     invalid_json_v2 = (
         "|S10",
         "|f8",
diff --git a/tests/test_dtype/test_npy/test_structured.py b/tests/test_dtype/test_npy/test_structured.py
index 554c3b4e41..f7e38eac9b 100644
--- a/tests/test_dtype/test_npy/test_structured.py
+++ b/tests/test_dtype/test_npy/test_structured.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any, ClassVar
 
 import numpy as np
 import pytest
 
 from tests.test_dtype.test_wrapper import BaseTestZDType
+
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.struct import Struct as StructMetadata
 from zarr.core.dtype import (
     Float16,
     Float64,
@@ -34,20 +37,23 @@ class TestStruct(BaseTestZDType):
         {"name": [["field1", ">i4"], ["field2", ">f8"]], "object_codec_id": None},
         {"name": [["field1", ">i8"], ["field2", ">i4"]], "object_codec_id": None},
     )
-    valid_json_v3 = (
+    # `StructConfiguration.fields` is a `tuple[StructField, ...]` (a JSON array
+    # is a typed fixed-length container), and `Struct.to_json` emits a tuple to
+    # match, so the field entries are written as tuples here.
+    valid_json_v3: ClassVar[tuple[StructMetadata, ...]] = (
         {
             "name": "struct",
             "configuration": {
-                "fields": [
+                "fields": (
                     {"name": "field1", "data_type": "int32"},
                     {"name": "field2", "data_type": "float64"},
-                ]
+                )
             },
         },
         {
             "name": "struct",
             "configuration": {
-                "fields": [
+                "fields": (
                     {
                         "name": "field1",
                         "data_type": {
@@ -62,7 +68,7 @@ class TestStruct(BaseTestZDType):
                             "configuration": {"length_bytes": 32},
                         },
                     },
-                ]
+                )
             },
         },
     )
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
index 67ba3bd130..14b6000999 100644
--- a/tests/test_dtype/test_npy/test_time.py
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import re
-from typing import get_args
+from typing import TYPE_CHECKING, ClassVar, get_args
 
 import numpy as np
 import pytest
@@ -10,6 +10,10 @@
 from zarr.core.dtype.npy.common import DateTimeUnit
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64, datetime_from_int
 
+if TYPE_CHECKING:
+    from zarr_metadata.v3.data_type.numpy_datetime64 import NumpyDatetime64
+    from zarr_metadata.v3.data_type.numpy_timedelta64 import NumpyTimedelta64
+
 
 class _TestTimeBase(BaseTestZDType):
     def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool:
@@ -40,7 +44,7 @@ class TestDateTime64(_TestTimeBase):
         {"name": "<M8[10s]", "object_codec_id": None},
         {"name": "<M8[10us]", "object_codec_id": None},
     )
-    valid_json_v3 = (
+    valid_json_v3: ClassVar[tuple[NumpyDatetime64, ...]] = (
         {"name": "numpy.datetime64", "configuration": {"unit": "ns", "scale_factor": 10}},
         {"name": "numpy.datetime64", "configuration": {"unit": "us", "scale_factor": 1}},
     )
@@ -90,7 +94,7 @@ class TestTimeDelta64(_TestTimeBase):
         {"name": "<m8[10s]", "object_codec_id": None},
         {"name": "<m8[10us]", "object_codec_id": None},
     )
-    valid_json_v3 = (
+    valid_json_v3: ClassVar[tuple[NumpyTimedelta64, ...]] = (
         {"name": "numpy.timedelta64", "configuration": {"unit": "ns", "scale_factor": 10}},
         {"name": "numpy.timedelta64", "configuration": {"unit": "us", "scale_factor": 1}},
     )
diff --git a/tests/test_group.py b/tests/test_group.py
index e05df0dfcb..84bdaa70b4 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -57,6 +57,7 @@
     from collections.abc import Callable
 
     from _pytest.compat import LEGACY_PATH
+    from zarr_metadata import GroupMetadataV2, GroupMetadataV3
 
     from zarr.core.buffer.core import Buffer
     from zarr.core.common import JSON, ZarrFormat
@@ -587,6 +588,19 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
             else:
                 group = zarr.consolidate_metadata(store)
         if zarr_format == 2:
+            # The v2 spec defines `.zgroup` as `{"zarr_format": 2}` only;
+            # `node_type` is v3 only. The subgroup metadata here uses the
+            # in-memory merged form (zarr-python folds `.zattrs` and the
+            # `consolidated_metadata` extension into a single dict).
+            subgroup: GroupMetadataV2 = {
+                "attributes": {},
+                "consolidated_metadata": {  # type: ignore[typeddict-unknown-key]
+                    "metadata": {},
+                    "kind": "inline",
+                    "must_understand": False,
+                },
+                "zarr_format": 2,
+            }
             metadata = {
                 "subarray": {
                     "attributes": {},
@@ -599,16 +613,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                     "compressor": Blosc(),
                     "zarr_format": zarr_format,
                 },
-                "subgroup": {
-                    "attributes": {},
-                    "consolidated_metadata": {
-                        "metadata": {},
-                        "kind": "inline",
-                        "must_understand": False,
-                    },
-                    "node_type": "group",
-                    "zarr_format": zarr_format,
-                },
+                "subgroup": subgroup,
             }
         else:
             metadata = {
@@ -1021,15 +1026,16 @@ async def test_asyncgroup_open_wrong_format(
         await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong)
 
 
-# todo: replace the dict[str, Any] type with something a bit more specific
+_FROM_DICT_V3: GroupMetadataV3 = {
+    "zarr_format": 3,
+    "node_type": "group",
+    "attributes": {"foo": 100},
+}
+_FROM_DICT_V2: GroupMetadataV2 = {"zarr_format": 2, "attributes": {"foo": 100}}
+
+
 # should this be async?
-@pytest.mark.parametrize(
-    "data",
-    [
-        {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}},
-        {"zarr_format": 2, "attributes": {"foo": 100}},
-    ],
-)
+@pytest.mark.parametrize("data", [_FROM_DICT_V3, _FROM_DICT_V2])
 def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None:
     """
     Test that we can create an AsyncGroup from a dict
diff --git a/tests/test_metadata/conftest.py b/tests/test_metadata/conftest.py
index 24f2417fce..2a0765b9a4 100644
--- a/tests/test_metadata/conftest.py
+++ b/tests/test_metadata/conftest.py
@@ -5,7 +5,10 @@
 from zarr.codecs.bytes import BytesCodec
 
 if TYPE_CHECKING:
-    from zarr.core.metadata.v3 import ArrayMetadataJSON_V3
+    from zarr_metadata.v3.chunk_grid.regular import RegularChunkGridMetadata
+    from zarr_metadata.v3.chunk_key_encoding.default import DefaultChunkKeyEncodingMetadata
+
+    from zarr.core.metadata import ArrayMetadataJSON_V3
 
 
 def minimal_metadata_dict_v3(
@@ -23,13 +26,29 @@ def minimal_metadata_dict_v3(
     **overrides
         Override any of the standard metadata fields.
     """
+    # Bind chunk-grid and chunk-key-encoding subdicts to their precise
+    # zarr-metadata types so structural shape errors surface here rather
+    # than downstream.
+    chunk_grid: RegularChunkGridMetadata = {
+        "name": "regular",
+        "configuration": {"chunk_shape": (4, 4)},
+    }
+    chunk_key_encoding: DefaultChunkKeyEncodingMetadata = {
+        "name": "default",
+        "configuration": {"separator": "/"},
+    }
     d: ArrayMetadataJSON_V3 = {
         "zarr_format": 3,
         "node_type": "array",
         "shape": (4, 4),
         "data_type": "uint8",
-        "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (4, 4)}},
-        "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}},
+        # mypy does not recognize structural subtyping between TypedDicts,
+        # so `RegularChunkGridMetadata` is not seen as assignable to the
+        # outer `str | NamedConfig` field type even though it is. The
+        # bound variables above are correct; suppress the spurious
+        # `typeddict-item` rejections here.
+        "chunk_grid": chunk_grid,  # type: ignore[typeddict-item]
+        "chunk_key_encoding": chunk_key_encoding,  # type: ignore[typeddict-item]
         "fill_value": 0,
         "codecs": (BytesCodec().to_dict(),),  # type: ignore[typeddict-item]
         "attributes": {},
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index 3596d2bcaa..724c471134 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pytest
@@ -26,6 +26,10 @@
 from zarr.storage import StorePath
 
 if TYPE_CHECKING:
+    from zarr_metadata.v2 import ConsolidatedMetadataV2, ZAttrsMetadata, ZGroupMetadata
+    from zarr_metadata.v3.array import ArrayMetadataV3Partial
+    from zarr_metadata.v3.group import GroupMetadataV3
+
     from zarr.abc.store import Store
     from zarr.core.common import JSON, ZarrFormat
 
@@ -63,14 +67,22 @@ async def test_getitem_consolidated_empty_leaf_group(
         #
         # field on the leaf group nodes.
         if zarr_format == 2:
-            zmetadata: dict[str, JSON] = {
+            # Bind each value to a typed variable so the outer TypedDict's
+            # value-union (ZArrayMetadata | ZGroupMetadata | ZAttrsMetadata)
+            # resolves unambiguously to the correct arm — inline literals
+            # do not narrow because mypy can't structurally disambiguate
+            # `{}` between `ZAttrsMetadata` (Mapping[str, object]) and an
+            # empty TypedDict variant.
+            empty_attrs: ZAttrsMetadata = {}
+            empty_group: ZGroupMetadata = {"zarr_format": 2}
+            zmetadata: ConsolidatedMetadataV2 = {
                 "metadata": {
-                    ".zattrs": {},
-                    ".zgroup": {"zarr_format": 2},
-                    "raw/.zattrs": {},
-                    "raw/.zgroup": {"zarr_format": 2},
-                    "raw/varm/.zattrs": {},
-                    "raw/varm/.zgroup": {"zarr_format": 2},
+                    ".zattrs": empty_attrs,
+                    ".zgroup": empty_group,
+                    "raw/.zattrs": empty_attrs,
+                    "raw/.zgroup": empty_group,
+                    "raw/varm/.zattrs": empty_attrs,
+                    "raw/varm/.zgroup": empty_group,
                 },
                 "zarr_consolidated_format": 1,
             }
@@ -83,7 +95,15 @@ async def test_getitem_consolidated_empty_leaf_group(
             )
 
         else:
-            zmetadata = {
+            # The v3 shape is a group metadata document with an inline
+            # `consolidated_metadata` extension field; not a
+            # `ConsolidatedMetadataV2` shape, so use a separately-named
+            # variable.
+            # Complete v3 group document with an inline `consolidated_metadata`
+            # extension field. mypy does not honor PEP 728 `extra_items=`, so
+            # the extension key needs a `typeddict-unknown-key` suppression even
+            # though `GroupMetadataV3` permits conforming extension fields.
+            zarr_json: GroupMetadataV3 = {  # type: ignore[typeddict-unknown-key]
                 "attributes": {},
                 "zarr_format": 3,
                 "consolidated_metadata": {
@@ -105,7 +125,7 @@ async def test_getitem_consolidated_empty_leaf_group(
                 "node_type": "group",
             }
             await memory_store.set(
-                "zarr.json", cpu.Buffer.from_bytes(json.dumps(zmetadata).encode())
+                "zarr.json", cpu.Buffer.from_bytes(json.dumps(zarr_json).encode())
             )
 
         group = await zarr.api.asynchronous.open_consolidated(
@@ -143,7 +163,10 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
             await consolidate_metadata(memory_store_with_hierarchy)
         group2 = await AsyncGroup.open(memory_store_with_hierarchy)
 
-        array_metadata: dict[str, JSON] = {
+        # Partial v3 array document: `shape` and `chunk_grid` are intentionally
+        # omitted and supplied per-array via spread below. `ArrayMetadataV3Partial`
+        # is the `total=False` form that types exactly this kind of fragment.
+        array_metadata: ArrayMetadataV3Partial = {
             "attributes": {},
             "chunk_key_encoding": {
                 "configuration": {"separator": "/"},
@@ -173,7 +196,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                                 "configuration": {"chunk_shape": (1, 2, 3)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "lat": ArrayV3Metadata.from_dict(
@@ -183,7 +206,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                                 "configuration": {"chunk_shape": (1,)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "lon": ArrayV3Metadata.from_dict(
@@ -193,7 +216,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                                 "configuration": {"chunk_shape": (2,)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "time": ArrayV3Metadata.from_dict(
@@ -203,7 +226,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                                 "configuration": {"chunk_shape": (3,)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "child": GroupMetadata(
@@ -212,7 +235,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                             metadata={
                                 "array": ArrayV3Metadata.from_dict(
                                     {
-                                        **array_metadata,
+                                        **array_metadata,  # type: ignore[dict-item]
                                         "attributes": {"key": "child"},
                                         "shape": (4, 4),
                                         "chunk_grid": {
@@ -234,7 +257,7 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                                             ),
                                             "array": ArrayV3Metadata.from_dict(
                                                 {
-                                                    **array_metadata,
+                                                    **array_metadata,  # type: ignore[dict-item]
                                                     "attributes": {"key": "grandchild"},
                                                     "shape": (4, 4),
                                                     "chunk_grid": {
@@ -294,7 +317,9 @@ def test_consolidated_sync(self, memory_store: Store) -> None:
             zarr.api.synchronous.consolidate_metadata(memory_store)
         group2 = zarr.Group.open(memory_store)
 
-        array_metadata: dict[str, JSON] = {
+        # Partial v3 array document (see `test_consolidated_metadata`): `shape`
+        # and `chunk_grid` are supplied per-array via the spreads below.
+        array_metadata: ArrayMetadataV3Partial = {
             "attributes": {},
             "chunk_key_encoding": {
                 "configuration": {"separator": "/"},
@@ -324,7 +349,7 @@ def test_consolidated_sync(self, memory_store: Store) -> None:
                                 "configuration": {"chunk_shape": (1, 2, 3)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "lat": ArrayV3Metadata.from_dict(
@@ -334,7 +359,7 @@ def test_consolidated_sync(self, memory_store: Store) -> None:
                                 "configuration": {"chunk_shape": (1,)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "lon": ArrayV3Metadata.from_dict(
@@ -344,7 +369,7 @@ def test_consolidated_sync(self, memory_store: Store) -> None:
                                 "configuration": {"chunk_shape": (2,)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                     "time": ArrayV3Metadata.from_dict(
@@ -354,7 +379,7 @@ def test_consolidated_sync(self, memory_store: Store) -> None:
                                 "configuration": {"chunk_shape": (3,)},
                                 "name": "regular",
                             },
-                            **array_metadata,
+                            **array_metadata,  # type: ignore[dict-item]
                         }
                     ),
                 },
@@ -411,7 +436,9 @@ def test_consolidated_metadata_from_dict(self) -> None:
         ConsolidatedMetadata.from_dict(data)
 
     def test_flatten(self) -> None:
-        array_metadata: dict[str, Any] = {
+        # Partial v3 array document (see `test_consolidated_metadata`): `shape`
+        # and `chunk_grid` are supplied per-array via the spreads below.
+        array_metadata: ArrayMetadataV3Partial = {
             "attributes": {},
             "chunk_key_encoding": {
                 "configuration": {"separator": "/"},
@@ -436,7 +463,7 @@ def test_flatten(self) -> None:
                             "configuration": {"chunk_shape": (1, 2, 3)},
                             "name": "regular",
                         },
-                        **array_metadata,
+                        **array_metadata,  # type: ignore[dict-item]
                     }
                 ),
                 "lat": ArrayV3Metadata.from_dict(
@@ -446,7 +473,7 @@ def test_flatten(self) -> None:
                             "configuration": {"chunk_shape": (1,)},
                             "name": "regular",
                         },
-                        **array_metadata,
+                        **array_metadata,  # type: ignore[dict-item]
                     }
                 ),
                 "child": GroupMetadata(
@@ -455,7 +482,7 @@ def test_flatten(self) -> None:
                         metadata={
                             "array": ArrayV3Metadata.from_dict(
                                 {
-                                    **array_metadata,
+                                    **array_metadata,  # type: ignore[dict-item]
                                     "attributes": {"key": "child"},
                                     "shape": (4, 4),
                                     "chunk_grid": {
@@ -470,7 +497,7 @@ def test_flatten(self) -> None:
                                     metadata={
                                         "array": ArrayV3Metadata.from_dict(
                                             {
-                                                **array_metadata,
+                                                **array_metadata,  # type: ignore[dict-item]
                                                 "attributes": {"key": "grandchild"},
                                                 "shape": (4, 4),
                                                 "chunk_grid": {
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index d1a1ca00b4..3d4f9168fa 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -21,6 +21,8 @@
     from pathlib import Path
     from typing import Any
 
+    from zarr_metadata import ConsolidatedMetadataV2
+
     from zarr.abc.codec import Codec
     from zarr.core.common import JSON
 
@@ -107,7 +109,7 @@ class TestConsolidated:
     async def v2_consolidated_metadata(
         self, memory_store: zarr.storage.MemoryStore
     ) -> zarr.storage.MemoryStore:
-        zmetadata: dict[str, JSON] = {
+        zmetadata: ConsolidatedMetadataV2 = {
             "metadata": {
                 ".zattrs": {
                     "Conventions": "COARDS",
@@ -170,19 +172,19 @@ async def v2_consolidated_metadata(
         await store.set(".zmetadata", cpu.Buffer.from_bytes(json.dumps(zmetadata).encode()))
         await store.set(
             "air/.zarray",
-            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zarray"]).encode()),  # type: ignore[index, call-overload]
+            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zarray"]).encode()),
         )
         await store.set(
             "air/.zattrs",
-            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zattrs"]).encode()),  # type: ignore[index, call-overload]
+            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["air/.zattrs"]).encode()),
         )
         await store.set(
             "time/.zarray",
-            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zarray"]).encode()),  # type: ignore[index, call-overload]
+            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zarray"]).encode()),
         )
         await store.set(
             "time/.zattrs",
-            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zattrs"]).encode()),  # type: ignore[index, call-overload]
+            cpu.Buffer.from_bytes(json.dumps(zmetadata["metadata"]["time/.zattrs"]).encode()),
         )
 
         # and a nested group for fun
@@ -195,13 +197,13 @@ async def v2_consolidated_metadata(
         await store.set(
             "nested/array/.zarray",
             cpu.Buffer.from_bytes(
-                json.dumps(zmetadata["metadata"]["nested/array/.zarray"]).encode()  # type: ignore[index, call-overload]
+                json.dumps(zmetadata["metadata"]["nested/array/.zarray"]).encode()
             ),
         )
         await store.set(
             "nested/array/.zattrs",
             cpu.Buffer.from_bytes(
-                json.dumps(zmetadata["metadata"]["nested/array/.zattrs"]).encode()  # type: ignore[index, call-overload]
+                json.dumps(zmetadata["metadata"]["nested/array/.zattrs"]).encode()
             ),
         )
 
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index d1e156e500..7ba309995c 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -34,6 +34,9 @@
 if TYPE_CHECKING:
     from typing import Any
 
+    from zarr_metadata import GroupMetadataV3
+    from zarr_metadata.v3.codec.bytes import BytesCodecObject
+
 
 # ---------------------------------------------------------------------------
 # Parsing helpers
@@ -178,8 +181,13 @@ def test_array_metadata_keys_matches_typeddict() -> None:
 # ---------------------------------------------------------------------------
 
 # Codecs after evolution for single-byte (uint8) and multi-byte (float64) types.
+# The uint8 case omits `configuration`; floor-pinned zarr-metadata 0.1.1
+# marks that field as required, so the annotation is dropped until the
+# relaxed shape ships.
 _UINT8_CODECS = ({"name": "bytes"},)
-_FLOAT64_CODECS = ({"name": "bytes", "configuration": {"endian": "little"}},)
+_FLOAT64_CODECS: tuple[BytesCodecObject, ...] = (
+    {"name": "bytes", "configuration": {"endian": "little"}},
+)
 
 
 @pytest.mark.parametrize(
@@ -448,7 +456,11 @@ def test_group_metadata_to_dict_consolidated(attributes: dict[str, Any] | None)
     ):
         group = consolidate_metadata(store)
 
-    assert group.metadata.to_dict() == {
+    # `consolidated_metadata` is an `ExtensionFieldV3` (extra key allowed
+    # on `GroupMetadataV3` via PEP 728 extra_items=ExtensionFieldV3). mypy
+    # doesn't honor PEP 728 yet and reports `typeddict-unknown-key`; the
+    # annotation is correct, so the error code is ignored at the literal.
+    expected: GroupMetadataV3 = {  # type: ignore[typeddict-unknown-key]
         "zarr_format": 3,
         "node_type": "group",
         "attributes": attributes or {},
@@ -469,3 +481,4 @@ def test_group_metadata_to_dict_consolidated(attributes: dict[str, Any] | None)
             },
         },
     }
+    assert group.metadata.to_dict() == expected
diff --git a/uv.lock b/uv.lock
index adc71bae62..61669dd3cf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -6,6 +6,12 @@ resolution-markers = [
     "python_full_version < '3.15'",
 ]
 
+[manifest]
+members = [
+    "zarr",
+    "zarr-metadata",
+]
+
 [[package]]
 name = "aiobotocore"
 version = "3.7.0"
@@ -3952,6 +3958,7 @@ dependencies = [
     { name = "numpy" },
     { name = "packaging" },
     { name = "typing-extensions" },
+    { name = "zarr-metadata" },
 ]
 
 [package.optional-dependencies]
@@ -4077,6 +4084,7 @@ requires-dist = [
     { name = "typer", marker = "extra == 'cli'" },
     { name = "typing-extensions", specifier = ">=4.14" },
     { name = "universal-pathlib", marker = "extra == 'optional'" },
+    { name = "zarr-metadata", editable = "packages/zarr-metadata" },
 ]
 provides-extras = ["cast-value-rs", "cli", "gpu", "optional", "remote"]
 
@@ -4168,3 +4176,25 @@ test = [
     { name = "tomlkit", specifier = "==0.15.0" },
     { name = "uv", specifier = "==0.11.20" },
 ]
+
+[[package]]
+name = "zarr-metadata"
+source = { editable = "packages/zarr-metadata" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+
+[package.dev-dependencies]
+test = [
+    { name = "pydantic" },
+    { name = "pytest" },
+]
+
+[package.metadata]
+requires-dist = [{ name = "typing-extensions", specifier = ">=4.13" }]
+
+[package.metadata.requires-dev]
+test = [
+    { name = "pydantic", specifier = ">=2" },
+    { name = "pytest" },
+]