From 01bf45932fd3a70b972694f7bc3634c55fb476f2 Mon Sep 17 00:00:00 2001
From: Saesha <85289569+psaesha@users.noreply.github.com>
Date: Thu, 18 Jun 2026 01:12:28 +0530
Subject: [PATCH] Revert "Dev"

---
 .gitignore                                   |   2 -
 api/models/Resource.py                       |   7 +-
 api/schema/access_model_schema.py            |  28 +-
 api/schema/aimodel_schema.py                 |  14 +-
 api/schema/dataset_schema.py                 |   1 -
 api/schema/resource_chart_schema.py          |  22 +-
 api/schema/resource_schema.py                |  74 ++--
 api/schema/usecase_schema.py                 |  39 +-
 api/types/type_aimodel.py                    |   2 -
 api/types/type_collaborative.py              |  22 +-
 api/types/type_collaborative_organization.py |   2 -
 api/types/type_metadata.py                   |   2 -
 api/types/type_resource_chart.py             |  14 +-
 api/types/type_resource_chart_image.py       |   2 -
 api/types/type_usecase.py                    |  18 +-
 api/types/type_usecase_organization.py       |   2 -
 api/urls.py                                  |  16 -
 api/utils/data_indexing.py                   | 224 +-----------
 api/utils/keycloak_utils.py                  |   6 +-
 api/views/dataset_data.py                    | 359 -------------------
 dataspace_sdk/__version__.py                 |   2 +-
 dataspace_sdk/resources/datasets.py          | 201 +----------
 docs/dataset_data_api.md                     | 266 --------------
 docs/sdk/README.md                           |  44 ---
 tests/test_data_indexing_filters.py          | 174 ---------
 tests/test_datasets.py                       | 112 ------
 tests/test_settings.py                       |  53 +--
 27 files changed, 178 insertions(+), 1530 deletions(-)
 delete mode 100644 api/views/dataset_data.py
 delete mode 100644 docs/dataset_data_api.md
 delete mode 100644 tests/test_data_indexing_filters.py

diff --git a/.gitignore b/.gitignore
index 18886c7e..8fd0cefe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -168,5 +168,3 @@ files/public/*
 #dvc files
 dvc
 dvc/*
-
-.DS_Store
diff --git a/api/models/Resource.py b/api/models/Resource.py
index 745878f3..35562f26 100644
--- a/api/models/Resource.py
+++ b/api/models/Resource.py
@@ -188,12 +188,7 @@ def version_resource_with_dvc(sender, instance: ResourceFileDetails, created, **
                     # Create a temporary directory for the previous version
                     with tempfile.TemporaryDirectory() as temp_dir:
                         # Get the previous version file path
-                        file_name = instance.file.name
-                        if not file_name:
-                            raise ValueError("File name is missing")
-                        prev_file_name = (
-                            f"prev_version_{instance.resource.id}.{file_name.split('.')[-1]}"
-                        )
+                        prev_file_name = f"prev_version_{instance.resource.id}.{instance.file.name.split('.')[-1]}"
                         prev_file_path = os.path.join(temp_dir, prev_file_name)
 
                         # Use DVC to get the previous version
diff --git a/api/schema/access_model_schema.py b/api/schema/access_model_schema.py
index d5dece23..84ed8b09 100644
--- a/api/schema/access_model_schema.py
+++ b/api/schema/access_model_schema.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 import uuid
 from enum import Enum
 from typing import Any, Dict, List, Optional, Union
@@ -51,7 +49,9 @@ class EditAccessModelInput:
 @strawberry.type(name="Query")
 class Query:
     @strawberry_django.field
-    def access_model_resources(self, info: Info, dataset_id: uuid.UUID) -> List[TypeAccessModel]:
+    def access_model_resources(
+        self, info: Info, dataset_id: uuid.UUID
+    ) -> List[TypeAccessModel]:
         models = AccessModel.objects.filter(dataset_id=dataset_id)
         return [TypeAccessModel.from_django(model) for model in models]
 
@@ -88,12 +88,16 @@ def _add_update_access_model_resources(
         try:
             dataset_resource = Resource.objects.get(id=resource_input.resource)
         except Resource.DoesNotExist as e:
-            raise ValueError(f"Resource with ID {resource_input.resource} does not exist.")
+            raise ValueError(
+                f"Resource with ID {resource_input.resource} does not exist."
+            )
 
         access_model_resource = AccessModelResource.objects.create(
             access_model=access_model, resource=dataset_resource
         )
-        _add_resource_fields(access_model_resource, dataset_resource, resource_input.fields)
+        _add_resource_fields(
+            access_model_resource, dataset_resource, resource_input.fields
+        )
 
 
 def _update_access_model_fields(
@@ -118,13 +122,15 @@ def create_access_model(
         try:
             dataset = Dataset.objects.get(id=access_model_input.dataset)
         except Dataset.DoesNotExist:
-            raise ValueError(f"Dataset with ID {access_model_input.dataset} does not exist.")
+            raise ValueError(
+                f"Dataset with ID {access_model_input.dataset} does not exist."
+            )
 
         access_model = AccessModel.objects.create(
             dataset=dataset,
             name=access_model_input.name,
             description=access_model_input.description,
-            type=access_model_input.type.value,  # type: ignore[attr-defined]
+            type=access_model_input.type.value,
         )
 
         _update_access_model_fields(access_model, access_model_input)
@@ -139,11 +145,15 @@ def edit_access_model(
             try:
                 dataset = Dataset.objects.get(id=access_model_input.dataset)
             except Dataset.DoesNotExist as e:
-                raise ValueError(f"Dataset with ID {access_model_input.dataset} does not exist.")
+                raise ValueError(
+                    f"Dataset with ID {access_model_input.dataset} does not exist."
+                )
             access_model = AccessModel.objects.create(dataset=dataset)
         else:
             try:
-                access_model = AccessModel.objects.get(id=access_model_input.access_model_id)
+                access_model = AccessModel.objects.get(
+                    id=access_model_input.access_model_id
+                )
             except AccessModel.DoesNotExist as e:
                 raise ValueError(
                     f"Access Model with ID {access_model_input.access_model_id} does not exist."
diff --git a/api/schema/aimodel_schema.py b/api/schema/aimodel_schema.py
index 0b059a45..f2075a22 100644
--- a/api/schema/aimodel_schema.py
+++ b/api/schema/aimodel_schema.py
@@ -1,9 +1,9 @@
 """GraphQL schema for AI Model."""
 
-# mypy: disable-error-code="union-attr,misc,valid-type"
+# mypy: disable-error-code="union-attr,misc"
 
 import datetime
-from typing import Any, Dict, List, Optional
+from typing import List, Optional
 
 import strawberry
 import strawberry_django
@@ -419,14 +419,14 @@ def create_ai_model(
         description = input.description or ""
 
         # Prepare supported_languages
-        supported_languages: List[str] = input.supported_languages or []
+        supported_languages = input.supported_languages or []
 
         # Prepare schemas
-        input_schema: Any = input.input_schema or {}
-        output_schema: Any = input.output_schema or {}
+        input_schema = input.input_schema or {}
+        output_schema = input.output_schema or {}
 
         # Prepare metadata
-        metadata: Any = input.metadata or {}
+        metadata = input.metadata or {}
 
         try:
             model = AIModel.objects.create(
@@ -802,7 +802,7 @@ def create_ai_model_version(
             ai_model=model,
             version=input.version,
             version_notes=input.version_notes or "",
-            lifecycle_stage=input.lifecycle_stage.value if input.lifecycle_stage else "DEVELOPMENT",  # type: ignore[attr-defined]
+            lifecycle_stage=input.lifecycle_stage.value if input.lifecycle_stage else "DEVELOPMENT",  # type: ignore[misc]
             supports_streaming=input.supports_streaming,
             max_tokens=input.max_tokens,
             supported_languages=input.supported_languages or [],
diff --git a/api/schema/dataset_schema.py b/api/schema/dataset_schema.py
index d70d1cb2..e0adc049 100644
--- a/api/schema/dataset_schema.py
+++ b/api/schema/dataset_schema.py
@@ -1,5 +1,4 @@
 # mypy: disable-error-code=union-attr
-# mypy: disable-error-code=valid-type
 import datetime
 import uuid
 from typing import Any, List, Optional, Union
diff --git a/api/schema/resource_chart_schema.py b/api/schema/resource_chart_schema.py
index bf8d04da..acb34b78 100644
--- a/api/schema/resource_chart_schema.py
+++ b/api/schema/resource_chart_schema.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code=valid-type
-
 import datetime
 import uuid
 from typing import Any, Dict, List, Optional
@@ -21,12 +19,16 @@
 @strawberry.type(name="Query")
 class Query:
     @strawberry_django.field
-    def charts_details(self, info: Info, dataset_id: uuid.UUID) -> List[TypeResourceChart]:
+    def charts_details(
+        self, info: Info, dataset_id: uuid.UUID
+    ) -> List[TypeResourceChart]:
         charts = ResourceChartDetails.objects.filter(resource__dataset_id=dataset_id)
         return [TypeResourceChart.from_django(chart) for chart in charts]
 
     @strawberry_django.field
-    def resource_chart(self, info: Info, chart_details_id: uuid.UUID) -> TypeResourceChart:
+    def resource_chart(
+        self, info: Info, chart_details_id: uuid.UUID
+    ) -> TypeResourceChart:
         chart = ResourceChartDetails.objects.get(id=chart_details_id)
         return TypeResourceChart.from_django(chart)
 
@@ -186,10 +188,14 @@ def _update_chart_fields(
                     if value:  # Only process if list is not empty
                         options[field_name] = [
                             {
-                                "field": ResourceSchema.objects.get(id=column.field_name),
+                                "field": ResourceSchema.objects.get(
+                                    id=column.field_name
+                                ),
                                 "label": column.label,
                                 "color": column.color,
-                                "value_mapping": _update_value_mapping(column.value_mapping),
+                                "value_mapping": _update_value_mapping(
+                                    column.value_mapping
+                                ),
                             }
                             for column in value
                         ]
@@ -289,7 +295,9 @@ def create_resource_chart(
             )
         ],
     )
-    def edit_resource_chart(self, info: Info, chart_input: ResourceChartInput) -> TypeResourceChart:
+    def edit_resource_chart(
+        self, info: Info, chart_input: ResourceChartInput
+    ) -> TypeResourceChart:
         if not chart_input.chart_id:
             chart = ResourceChartDetails()
         else:
diff --git a/api/schema/resource_schema.py b/api/schema/resource_schema.py
index 85fbc965..06414f71 100644
--- a/api/schema/resource_schema.py
+++ b/api/schema/resource_schema.py
@@ -3,7 +3,6 @@
 from enum import Enum
 
 # mypy: disable-error-code=operator
-# mypy: disable-error-code=valid-type
 from typing import List, Optional
 
 import strawberry
@@ -112,7 +111,9 @@ class Query:
 
     @strawberry_django.field
     @trace_resolver(name="get_dataset_resources", attributes={"component": "resource"})
-    def dataset_resources(self, info: Info, dataset_id: uuid.UUID) -> List[TypeResource]:
+    def dataset_resources(
+        self, info: Info, dataset_id: uuid.UUID
+    ) -> List[TypeResource]:
         """Get resources for a dataset."""
         resources = Resource.objects.filter(dataset_id=dataset_id)
         return [TypeResource.from_django(resource) for resource in resources]
@@ -161,17 +162,23 @@ def _reset_file_resource_schema(resource: Resource) -> None:
     data_table = index_resource_data(resource)
 
 
-def _update_file_resource_schema(resource: Resource, updated_schema: List[SchemaUpdate]) -> None:
+def _update_file_resource_schema(
+    resource: Resource, updated_schema: List[SchemaUpdate]
+) -> None:
     """Update file resource schema and re-index if necessary."""
     # Check if we need to re-index after schema update
     format_changes = False
 
     # Update schema fields
-    existing_schema: QuerySet[ResourceSchema] = ResourceSchema.objects.filter(resource=resource)
+    existing_schema: QuerySet[ResourceSchema] = ResourceSchema.objects.filter(
+        resource=resource
+    )
 
     for schema in existing_schema:  # type: ResourceSchema
         try:
-            schema_change = next(item for item in updated_schema if item.id == str(schema.id))
+            schema_change = next(
+                item for item in updated_schema if item.id == str(schema.id)
+            )
             # Check if format is changing, which might require re-indexing
             if schema.format != schema_change.format.value:
                 format_changes = True
@@ -181,7 +188,9 @@ def _update_file_resource_schema(resource: Resource, updated_schema: List[Schema
             schema.format = schema_change.format.value
             schema.save()
 
-            logger.info(f"Updated schema field {schema.field_name} for resource {resource.id}")
+            logger.info(
+                f"Updated schema field {schema.field_name} for resource {resource.id}"
+            )
         except StopIteration:
             continue
 
@@ -201,8 +210,12 @@ def _update_resource_preview_details(
     if file_resource_input.preview_details:
         # If preview_details already exists, update it
         if preview_details:
-            preview_details.is_all_entries = file_resource_input.preview_details.is_all_entries
-            preview_details.start_entry = file_resource_input.preview_details.start_entry
+            preview_details.is_all_entries = (
+                file_resource_input.preview_details.is_all_entries
+            )
+            preview_details.start_entry = (
+                file_resource_input.preview_details.start_entry
+            )
             preview_details.end_entry = file_resource_input.preview_details.end_entry
             preview_details.save()
         # Otherwise, create a new one
@@ -247,9 +260,9 @@ def create_file_resources(
             raise ValueError(f"Dataset with ID {dataset_id} does not exist.")
 
         for file in file_resource_input.files:
-            resource = Resource.objects.create(name=file.name, dataset=dataset)  # type: ignore[attr-defined]
+            resource = Resource.objects.create(name=file.name, dataset=dataset)
             ResourceFileDetails.objects.create(
-                file=file, size=file.size, resource=resource  # type: ignore[attr-defined]
+                file=file, size=file.size, resource=resource
             )
             _validate_file_details_and_update_format(resource)
             _create_file_resource_schema(resource)
@@ -292,7 +305,11 @@ def create_file_resource(
                     "resource_id": str(result.id),
                     "resource_name": result.name,
                     "updated_fields": {
-                        "name": (file_resource_input.name if file_resource_input.name else None),
+                        "name": (
+                            file_resource_input.name
+                            if file_resource_input.name
+                            else None
+                        ),
                         "description": (
                             file_resource_input.description
                             if file_resource_input.description is not None
@@ -300,7 +317,8 @@ def create_file_resource(
                         ),
                         "preview_enabled": file_resource_input.preview_enabled,
                         "file_updated": file_resource_input.file is not None,
-                        "preview_details_updated": file_resource_input.preview_details is not None,
+                        "preview_details_updated": file_resource_input.preview_details
+                        is not None,
                     },
                 },
             )
@@ -314,7 +332,9 @@ def update_file_resource(
         try:
             resource = Resource.objects.get(id=file_resource_input.id)
         except Resource.DoesNotExist as e:
-            raise ValueError(f"Resource with ID {file_resource_input.id} does not exist.")
+            raise ValueError(
+                f"Resource with ID {file_resource_input.id} does not exist."
+            )
 
         if file_resource_input.name:
             resource.name = file_resource_input.name
@@ -327,12 +347,12 @@ def update_file_resource(
             file_details = getattr(resource, "resourcefiledetails", None)
             if file_details:
                 file_details.file = file_resource_input.file
-                file_details.size = file_resource_input.file.size  # type: ignore[attr-defined]
+                file_details.size = file_resource_input.file.size
                 file_details.save()
             else:
                 ResourceFileDetails.objects.create(
                     file=file_resource_input.file,
-                    size=file_resource_input.file.size,  # type: ignore[attr-defined]
+                    size=file_resource_input.file.size,
                     resource=resource,
                 )
             _validate_file_details_and_update_format(resource)
@@ -344,7 +364,9 @@ def update_file_resource(
         return TypeResource.from_django(resource)
 
     @strawberry_django.mutation(handle_django_errors=True)
-    @trace_resolver(name="update_file_resource_schema", attributes={"component": "resource"})
+    @trace_resolver(
+        name="update_file_resource_schema", attributes={"component": "resource"}
+    )
     def update_file_resource_schema(
         self, info: Info, schema_update_input: SchemaUpdateInput
     ) -> TypeResource:
@@ -352,14 +374,20 @@ def update_file_resource_schema(
         try:
             resource = Resource.objects.get(id=schema_update_input.resource)
         except Resource.DoesNotExist as e:
-            raise ValueError(f"Resource with ID {schema_update_input.resource} does not exist.")
+            raise ValueError(
+                f"Resource with ID {schema_update_input.resource} does not exist."
+            )
 
         _update_file_resource_schema(resource, schema_update_input.updates)
         return TypeResource.from_django(resource)
 
     @strawberry_django.mutation(handle_django_errors=True)
-    @trace_resolver(name="reset_file_resource_schema", attributes={"component": "resource"})
-    def reset_file_resource_schema(self, info: Info, resource_id: uuid.UUID) -> TypeResource:
+    @trace_resolver(
+        name="reset_file_resource_schema", attributes={"component": "resource"}
+    )
+    def reset_file_resource_schema(
+        self, info: Info, resource_id: uuid.UUID
+    ) -> TypeResource:
         """Reset file resource schema."""
         try:
             resource = Resource.objects.get(id=resource_id)
@@ -406,7 +434,9 @@ def delete_file_resource(self, info: Info, resource_id: uuid.UUID) -> bool:
         ],
     )
     @trace_resolver(name="create_major_version", attributes={"component": "resource"})
-    def create_major_version(self, info: Info, input: CreateMajorVersionInput) -> TypeResource:
+    def create_major_version(
+        self, info: Info, input: CreateMajorVersionInput
+    ) -> TypeResource:
         """Create a major version for a resource.
 
         This should be used when significant changes are made to the resource data structure,
@@ -432,7 +462,9 @@ def create_major_version(self, info: Info, input: CreateMajorVersionInput) -> Ty
             new_version = "v1.0.0"
         else:
             # Increment major version
-            new_version = _increment_version(last_version.version_number, increment_type="major")
+            new_version = _increment_version(
+                last_version.version_number, increment_type="major"
+            )
 
         # Initialize DVC manager
         dvc = DVCManager(settings.DVC_REPO_PATH)
diff --git a/api/schema/usecase_schema.py b/api/schema/usecase_schema.py
index a4c905f9..e49b829b 100644
--- a/api/schema/usecase_schema.py
+++ b/api/schema/usecase_schema.py
@@ -1,7 +1,6 @@
 """Schema definitions for use cases."""
 
 # mypy: disable-error-code=operator
-# mypy: disable-error-code=valid-type
 
 import datetime
 import uuid
@@ -169,14 +168,18 @@ def published_use_cases(
         return TypeUseCase.from_django_list(results)
 
     @strawberry_django.field
-    @trace_resolver(name="get_datasets_by_use_case", attributes={"component": "usecase"})
+    @trace_resolver(
+        name="get_datasets_by_use_case", attributes={"component": "usecase"}
+    )
     def dataset_by_use_case(self, info: Info, use_case_id: str) -> list[TypeDataset]:
         """Get datasets by use case."""
         queryset = Dataset.objects.filter(usecase__id=use_case_id)
         return TypeDataset.from_django_list(queryset)
 
     @strawberry_django.field
-    @trace_resolver(name="get_contributors_by_use_case", attributes={"component": "usecase"})
+    @trace_resolver(
+        name="get_contributors_by_use_case", attributes={"component": "usecase"}
+    )
     def contributors_by_use_case(self, info: Info, use_case_id: str) -> list[TypeUser]:
         """Get contributors by use case."""
         try:
@@ -191,7 +194,9 @@ def contributors_by_use_case(self, info: Info, use_case_id: str) -> list[TypeUse
 def _update_usecase_tags(usecase: UseCase, tags: List[str]) -> None:
     usecase.tags.clear()
     for tag in tags:
-        usecase.tags.add(Tag.objects.get_or_create(defaults={"value": tag}, value__iexact=tag)[0])
+        usecase.tags.add(
+            Tag.objects.get_or_create(defaults={"value": tag}, value__iexact=tag)[0]
+        )
     usecase.save()
 
 
@@ -235,7 +240,9 @@ def _add_update_usecase_metadata(
             metadata_field = Metadata.objects.get(id=metadata_input_item.id)
             if not metadata_field.enabled:
                 _delete_existing_metadata(usecase)
-                raise ValueError(f"Metadata with ID {metadata_input_item.id} is not enabled.")
+                raise ValueError(
+                    f"Metadata with ID {metadata_input_item.id} is not enabled."
+                )
             uc_metadata = UseCaseMetadata(
                 usecase=usecase,
                 metadata_item=metadata_field,
@@ -244,7 +251,9 @@ def _add_update_usecase_metadata(
             uc_metadata.save()
         except Metadata.DoesNotExist:
             _delete_existing_metadata(usecase)
-            raise ValueError(f"Metadata with ID {metadata_input_item.id} does not exist.")
+            raise ValueError(
+                f"Metadata with ID {metadata_input_item.id} does not exist."
+            )
 
 
 @trace_resolver(name="delete_existing_metadata", attributes={"component": "usecase"})
@@ -320,7 +329,10 @@ def add_use_case(self, info: Info) -> TypeUseCase:
                             else None
                         ),
                         "sectors": (
-                            [str(sector_id) for sector_id in update_metadata_input.sectors]
+                            [
+                                str(sector_id)
+                                for sector_id in update_metadata_input.sectors
+                            ]
                             if update_metadata_input.sectors
                             else []
                         ),
@@ -383,7 +395,10 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
             usecase.started_on = data.started_on
         if data.completed_on is not None and data.completed_on is not strawberry.UNSET:
             usecase.completed_on = data.completed_on
-        if data.running_status is not None and data.running_status is not strawberry.UNSET:
+        if (
+            data.running_status is not None
+            and data.running_status is not strawberry.UNSET
+        ):
             usecase.running_status = data.running_status
         if data.logo is not None and data.logo is not strawberry.UNSET:
             usecase.logo = data.logo
@@ -395,7 +410,9 @@ def update_use_case(self, info: Info, data: UseCaseInputPartial) -> TypeUseCase:
         extensions=[
             TrackActivity(
                 verb="deleted",
-                get_data=lambda info, use_case_id, **kwargs: {"usecase_id": use_case_id},
+                get_data=lambda info, use_case_id, **kwargs: {
+                    "usecase_id": use_case_id
+                },
             )
         ],
     )
@@ -593,7 +610,9 @@ def remove_contributor_from_use_case(
                 get_data=lambda result, use_case_id, user_ids, **kwargs: {
                     "usecase_id": use_case_id,
                     "usecase_title": result.title,
-                    "updated_fields": {"contributors": [str(user_id) for user_id in user_ids]},
+                    "updated_fields": {
+                        "contributors": [str(user_id) for user_id in user_ids]
+                    },
                 },
             )
         ],
diff --git a/api/types/type_aimodel.py b/api/types/type_aimodel.py
index 60c7c7b7..7870d8cb 100644
--- a/api/types/type_aimodel.py
+++ b/api/types/type_aimodel.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 """GraphQL types for AI Model."""
 
 import uuid
diff --git a/api/types/type_collaborative.py b/api/types/type_collaborative.py
index 89a3cf9e..11805ad4 100644
--- a/api/types/type_collaborative.py
+++ b/api/types/type_collaborative.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 from typing import List, Optional
 
 import strawberry
@@ -67,7 +65,9 @@ class TypeCollaborative(BaseType):
         description="URL of the platform where this collaborative is published"
     )
 
-    @strawberry.field(description="Check if this collaborative is created by an individual user.")
+    @strawberry.field(
+        description="Check if this collaborative is created by an individual user."
+    )
     def is_individual_collaborative(self) -> bool:
         """Check if this collaborative is created by an individual user."""
         return self.organization is None
@@ -106,7 +106,9 @@ def use_cases(self) -> Optional[List["TypeUseCase"]]:
         except Exception:
             return []
 
-    @strawberry.field(description="Get the count of datasets associated with this collaborative.")
+    @strawberry.field(
+        description="Get the count of datasets associated with this collaborative."
+    )
     def dataset_count(self: "TypeCollaborative", info: Info) -> int:
         """Get the count of datasets associated with this collaborative."""
         try:
@@ -114,7 +116,9 @@ def dataset_count(self: "TypeCollaborative", info: Info) -> int:
         except Exception:
             return 0
 
-    @strawberry.field(description="Get the count of use cases associated with this collaborative.")
+    @strawberry.field(
+        description="Get the count of use cases associated with this collaborative."
+    )
     def use_case_count(self: "TypeCollaborative", info: Info) -> int:
         """Get the count of use cases associated with this collaborative."""
         try:
@@ -177,7 +181,9 @@ def metadata(self) -> Optional[List["TypeCollaborativeMetadata"]]:
         except Exception:
             return []
 
-    @strawberry.field(description="Get contributors associated with this collaborative.")
+    @strawberry.field(
+        description="Get contributors associated with this collaborative."
+    )
     def contributors(self) -> Optional[List["TypeUser"]]:
         """Get contributors associated with this collaborative."""
         try:
@@ -203,7 +209,9 @@ def organization_relationships(
         except Exception:
             return []
 
-    @strawberry.field(description="Get supporting organizations for this collaborative.")
+    @strawberry.field(
+        description="Get supporting organizations for this collaborative."
+    )
     def supporting_organizations(self) -> Optional[List["TypeOrganization"]]:
         """Get supporting organizations for this collaborative."""
         try:
diff --git a/api/types/type_collaborative_organization.py b/api/types/type_collaborative_organization.py
index e6cfecfe..662430f7 100644
--- a/api/types/type_collaborative_organization.py
+++ b/api/types/type_collaborative_organization.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 """GraphQL type for UseCase-Organization relationship."""
 
 from typing import Optional
diff --git a/api/types/type_metadata.py b/api/types/type_metadata.py
index 132cc598..304b110a 100644
--- a/api/types/type_metadata.py
+++ b/api/types/type_metadata.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 from enum import Enum
 from typing import List, Optional
 
diff --git a/api/types/type_resource_chart.py b/api/types/type_resource_chart.py
index 51e4161b..45e62485 100644
--- a/api/types/type_resource_chart.py
+++ b/api/types/type_resource_chart.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 import json
 import uuid
 from datetime import datetime
@@ -316,7 +314,9 @@ def chart_options(self) -> Optional[ChartOptionsType]:
                 point_size=options_dict.get("point_size"),
                 # Geospatial Map Chart options
                 geospatial_field=(
-                    ensure_type(options_dict.get("geospatial_field"), TypeResourceSchema)
+                    ensure_type(
+                        options_dict.get("geospatial_field"), TypeResourceSchema
+                    )
                     if options_dict.get("geospatial_field")
                     else None
                 ),
@@ -332,7 +332,9 @@ def chart_filters(self) -> List[FilterType]:
             return [
                 FilterType(
                     column=(
-                        ensure_type(f["column"], TypeResourceSchema) if f.get("column") else None
+                        ensure_type(f["column"], TypeResourceSchema)
+                        if f.get("column")
+                        else None
                     ),
                     operator=f["operator"],
                     value=f["value"],
@@ -350,7 +352,9 @@ def chart(self, info: Info) -> Optional[ChartConfig]:
             return None
 
         # Convert chart to JSON-serializable format
-        chart_options = chart_instance.dump_options_with_quotes() if chart_instance else None
+        chart_options = (
+            chart_instance.dump_options_with_quotes() if chart_instance else None
+        )
         if not chart_options:
             return None
 
diff --git a/api/types/type_resource_chart_image.py b/api/types/type_resource_chart_image.py
index b0d87623..d0cf6acb 100644
--- a/api/types/type_resource_chart_image.py
+++ b/api/types/type_resource_chart_image.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 from typing import Optional
 
 import strawberry
diff --git a/api/types/type_usecase.py b/api/types/type_usecase.py
index 96fd0d92..821379ea 100644
--- a/api/types/type_usecase.py
+++ b/api/types/type_usecase.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 from typing import List, Optional
 
 import strawberry
@@ -66,7 +64,9 @@ class TypeUseCase(BaseType):
         description="URL of the platform where this use case is published"
     )
 
-    @strawberry.field(description="Check if this use case is created by an individual user.")
+    @strawberry.field(
+        description="Check if this use case is created by an individual user."
+    )
     def is_individual_usecase(self) -> bool:
         """Check if this use case is created by an individual user."""
         return self.organization is None
@@ -83,7 +83,9 @@ def datasets(self) -> Optional[List["TypeDataset"]]:
         except Exception:
             return []
 
-    @strawberry.field(description="Get the count of datasets associated with this use case.")
+    @strawberry.field(
+        description="Get the count of datasets associated with this use case."
+    )
     def dataset_count(self: "TypeUseCase", info: Info) -> int:
         """Get the count of datasets associated with this use case."""
         try:
@@ -168,7 +170,9 @@ def contributors(self) -> Optional[List["TypeUser"]]:
         except Exception:
             return []
 
-    @strawberry.field(description="Get organization relationships associated with this use case.")
+    @strawberry.field(
+        description="Get organization relationships associated with this use case."
+    )
     def organization_relationships(
         self,
     ) -> Optional[List["TypeUseCaseOrganizationRelationship"]]:
@@ -215,7 +219,9 @@ def partner_organizations(self) -> Optional[List["TypeOrganization"]]:
         except Exception:
             return []
 
-    @strawberry.field(description="Get Usecase dashboard associated with this use case.")
+    @strawberry.field(
+        description="Get Usecase dashboard associated with this use case."
+    )
     def usecase_dashboard(self) -> Optional[List["TypeUseCaseDashboard"]]:
         """Get Usecase dashboard associated with this use case."""
         try:
diff --git a/api/types/type_usecase_organization.py b/api/types/type_usecase_organization.py
index 2f7fd2f2..2430dc9e 100644
--- a/api/types/type_usecase_organization.py
+++ b/api/types/type_usecase_organization.py
@@ -1,5 +1,3 @@
-# mypy: disable-error-code="valid-type"
-
 """GraphQL type for UseCase-Organization relationship."""
 
 from typing import Optional
diff --git a/api/urls.py b/api/urls.py
index c0bb1d65..6d67c407 100644
--- a/api/urls.py
+++ b/api/urls.py
@@ -11,7 +11,6 @@
     aimodel_execution,
     auditor,
     auth,
-    dataset_data,
     download,
     generate_dynamic_chart,
     search_aimodel,
@@ -78,21 +77,6 @@
         trending_datasets.TrendingDatasets.as_view(),
         name="trending_datasets",
     ),
-    path(
-        "resources/<uuid:resource_id>/data/",
-        dataset_data.ResourceDataView.as_view(),
-        name="resource_data",
-    ),
-    path(
-        "datasets/<uuid:dataset_id>/data/",
-        dataset_data.DatasetDataView.as_view(),
-        name="dataset_data",
-    ),
-    path(
-        "datasets/<uuid:dataset_id>/prompts/",
-        dataset_data.PromptDatasetDataView.as_view(),
-        name="prompt_dataset_data",
-    ),
     # Single, simple GraphQL endpoint with no redirects
     path(
         "graphql",
diff --git a/api/utils/data_indexing.py b/api/utils/data_indexing.py
index 8e817b88..ce30f0ca 100644
--- a/api/utils/data_indexing.py
+++ b/api/utils/data_indexing.py
@@ -1,10 +1,9 @@
-from typing import Any, Dict, Generator, List, Optional, Tuple
+from typing import Any, Dict, Generator, Optional
 
 import pandas as pd
 import structlog
 from django.db import connections, transaction
 from django.db.utils import ProgrammingError
-from psycopg2 import sql as pg_sql  # type: ignore[import-untyped]
 
 from api.models.Resource import Resource, ResourceDataTable
 from api.models.ResourceSchema import ResourceSchema
@@ -16,27 +15,6 @@
 # Use a separate database for data tables
 DATA_DB = "data_db"  # This should match the connection name in settings.py
 
-# Allowed comparison operators for column-based filtering on indexed data.
-# Maps operator suffix -> (sql_template_with_{ph}_placeholder, value_transformer)
-_FILTER_OPERATORS: Dict[str, Tuple[str, Any]] = {
-    "eq": ("= %s", lambda v: v),
-    "ne": ("<> %s", lambda v: v),
-    "gt": ("> %s", lambda v: v),
-    "gte": (">= %s", lambda v: v),
-    "lt": ("< %s", lambda v: v),
-    "lte": ("<= %s", lambda v: v),
-    "in": ("= ANY(%s)", lambda v: list(v) if not isinstance(v, list) else v),
-    "nin": ("<> ALL(%s)", lambda v: list(v) if not isinstance(v, list) else v),
-    "contains": ("LIKE %s", lambda v: f"%{v}%"),
-    "icontains": ("ILIKE %s", lambda v: f"%{v}%"),
-    "startswith": ("LIKE %s", lambda v: f"{v}%"),
-    "istartswith": ("ILIKE %s", lambda v: f"{v}%"),
-    "endswith": ("LIKE %s", lambda v: f"%{v}"),
-    "iendswith": ("ILIKE %s", lambda v: f"%{v}"),
-    "isnull": ("IS NULL", None),  # value ignored
-    "notnull": ("IS NOT NULL", None),
-}
-
 
 def get_sql_type(pandas_dtype: str) -> str:
     """Convert pandas dtype to SQL type."""
@@ -454,203 +432,3 @@ def get_preview_data(resource: Resource) -> Optional[PreviewData]:
             f"Error getting preview data for resource {resource.id}: {str(e)}, traceback: {traceback.format_exc()}"
         )
         return None
-
-
-# Maximum rows that can be returned in a single fetch_resource_data call
-MAX_FETCH_LIMIT = 10000
-DEFAULT_FETCH_LIMIT = 100
-
-
-class DataFetchError(Exception):
-    """Raised when fetch_resource_data receives invalid input."""
-
-
-def get_resource_columns(resource: Resource) -> List[str]:
-    """Return the list of indexed column names for a resource.
-
-    Falls back to inspecting the data_db table if no ResourceSchema rows exist.
-    """
-    cols = list(
-        ResourceSchema.objects.filter(resource=resource).values_list("field_name", flat=True)
-    )
-    if cols:
-        return cols
-    # Fallback: introspect the table directly
-    try:
-        data_table = ResourceDataTable.objects.get(resource=resource)
-        with connections[DATA_DB].cursor() as cursor:
-            cursor.execute(
-                "SELECT column_name FROM information_schema.columns "
-                "WHERE table_name = %s ORDER BY ordinal_position",
-                [data_table.table_name],
-            )
-            return [row[0] for row in cursor.fetchall()]
-    except ResourceDataTable.DoesNotExist:
-        return []
-
-
-def _parse_filter_key(key: str) -> Tuple[str, str]:
-    """Split 'col__op' style filter key into (column, op). Defaults op to 'eq'."""
-    if "__" in key:
-        col, op = key.rsplit("__", 1)
-        if op not in _FILTER_OPERATORS:
-            # No valid operator suffix — treat full key as column with eq
-            return key, "eq"
-        return col, op
-    return key, "eq"
-
-
-def _build_where_clause(
-    filters: Dict[str, Any], allowed_columns: List[str]
-) -> Tuple[pg_sql.Composable, List[Any]]:
-    """Build a parameterized WHERE clause from a filters dict.
-
-    Filters are of the form ``{"column": value}`` for equality, or
-    ``{"column__op": value}`` for other operators. Unknown columns are rejected.
-    """
-    if not filters:
-        return pg_sql.SQL(""), []
-
-    allowed_set = set(allowed_columns)
-    clauses: List[pg_sql.Composable] = []
-    params: List[Any] = []
-
-    for raw_key, value in filters.items():
-        col, op = _parse_filter_key(raw_key)
-        if col not in allowed_set:
-            raise DataFetchError(f"Unknown filter column: {col}")
-        op_template, transformer = _FILTER_OPERATORS[op]
-
-        col_ident = pg_sql.Identifier(col)
-        if op in ("isnull", "notnull"):
-            # Boolean toggle: isnull=true means IS NULL, isnull=false means IS NOT NULL
-            truthy = value not in (False, "false", "False", 0, "0", None)
-            sql_op = "IS NULL" if (op == "isnull") == truthy else "IS NOT NULL"
-            clauses.append(pg_sql.SQL("{col} {op}").format(col=col_ident, op=pg_sql.SQL(sql_op)))
-            continue
-
-        # Compose: <col> <op_template> (where op_template contains %s placeholders)
-        clauses.append(pg_sql.SQL("{col} ").format(col=col_ident) + pg_sql.SQL(op_template))
-        params.append(transformer(value) if transformer else value)
-
-    where_sql = pg_sql.SQL(" WHERE ") + pg_sql.SQL(" AND ").join(clauses)
-    return where_sql, params
-
-
-def _build_order_by(order_by: Optional[List[str]], allowed_columns: List[str]) -> pg_sql.Composable:
-    """Build a parameterised ORDER BY clause. Each entry may be 'col' or '-col'."""
-    if not order_by:
-        return pg_sql.SQL("")
-    allowed_set = set(allowed_columns)
-    parts: List[pg_sql.Composable] = []
-    for item in order_by:
-        direction = "ASC"
-        col = item
-        if item.startswith("-"):
-            direction = "DESC"
-            col = item[1:]
-        elif item.startswith("+"):
-            col = item[1:]
-        if col not in allowed_set:
-            raise DataFetchError(f"Unknown order_by column: {col}")
-        parts.append(
-            pg_sql.SQL("{col} ").format(col=pg_sql.Identifier(col)) + pg_sql.SQL(direction)
-        )
-    return pg_sql.SQL(" ORDER BY ") + pg_sql.SQL(", ").join(parts)
-
-
-def fetch_resource_data(
-    resource: Resource,
-    filters: Optional[Dict[str, Any]] = None,
-    columns: Optional[List[str]] = None,
-    limit: int = DEFAULT_FETCH_LIMIT,
-    offset: int = 0,
-    order_by: Optional[List[str]] = None,
-    count: bool = True,
-) -> Dict[str, Any]:
-    """Fetch indexed data for a Resource from data_db with column-level filtering.
-
-    Returns a dict::
-
-        {
-            "columns": [...],  # selected column names
-            "rows": [[...], ...],  # list of rows (one list per row)
-            "total": <int or None>,  # total matching rows (None if count=False)
-            "limit": <int>,
-            "offset": <int>,
-        }
-
-    Args:
-        resource: The Resource whose indexed data should be fetched.
-        filters: Optional dict of ``{"col": val}`` or ``{"col__op": val}`` filters.
-        columns: Optional list of columns to project. Defaults to all columns.
-        limit: Max rows to return (capped at MAX_FETCH_LIMIT).
-        offset: Number of rows to skip.
-        order_by: Optional list of columns; prefix with ``-`` for DESC.
-        count: When True (default) also returns the total matching row count.
-
-    Raises:
-        DataFetchError: If the resource has no indexed data, or filters/columns
-            reference unknown columns.
-    """
-    try:
-        data_table = ResourceDataTable.objects.get(resource=resource)
-    except ResourceDataTable.DoesNotExist:
-        raise DataFetchError(f"Resource {resource.id} has no indexed data table")
-
-    allowed_columns = get_resource_columns(resource)
-    if not allowed_columns:
-        raise DataFetchError(f"Resource {resource.id} has no schema/columns available")
-
-    # Validate and resolve projected columns
-    if columns:
-        unknown = [c for c in columns if c not in allowed_columns]
-        if unknown:
-            raise DataFetchError(f"Unknown columns: {unknown}")
-        select_columns = columns
-    else:
-        select_columns = allowed_columns
-
-    # Clamp pagination
-    if limit is None or limit <= 0:
-        limit = DEFAULT_FETCH_LIMIT
-    limit = min(int(limit), MAX_FETCH_LIMIT)
-    offset = max(int(offset or 0), 0)
-
-    table_ident = pg_sql.Identifier(data_table.table_name)
-    cols_sql = pg_sql.SQL(", ").join(pg_sql.Identifier(c) for c in select_columns)
-    where_sql, params = _build_where_clause(filters or {}, allowed_columns)
-    order_sql = _build_order_by(order_by, allowed_columns)
-
-    select_query = (
-        pg_sql.SQL("SELECT ")
-        + cols_sql
-        + pg_sql.SQL(" FROM ")
-        + table_ident
-        + where_sql
-        + order_sql
-        + pg_sql.SQL(" LIMIT %s OFFSET %s")
-    )
-
-    total: Optional[int] = None
-    with connections[DATA_DB].cursor() as cursor:
-        # Safety: cap query time
-        cursor.execute("SET statement_timeout = 10000")
-
-        if count:
-            count_query = pg_sql.SQL("SELECT COUNT(*) FROM ") + table_ident + where_sql
-            cursor.execute(count_query, params)
-            row = cursor.fetchone()
-            total = int(row[0]) if row else 0
-
-        cursor.execute(select_query, params + [limit, offset])
-        result_columns = [desc[0] for desc in cursor.description]
-        rows = [list(r) for r in cursor.fetchall()]
-
-    return {
-        "columns": result_columns,
-        "rows": rows,
-        "total": total,
-        "limit": limit,
-        "offset": offset,
-    }
diff --git a/api/utils/keycloak_utils.py b/api/utils/keycloak_utils.py
index f6937d71..15cd6fda 100644
--- a/api/utils/keycloak_utils.py
+++ b/api/utils/keycloak_utils.py
@@ -117,11 +117,7 @@ def validate_token(self, token: str) -> Dict[str, Any]:
             # If that fails (403), fall back to token introspection data
             try:
                 user_info = self.keycloak_openid.userinfo(token)
-                if isinstance(user_info, bytes):
-                    import json
-
-                    user_info = json.loads(user_info.decode("utf-8"))
-                return user_info  # type: ignore[return-value]
+                return user_info
             except KeycloakError as userinfo_error:
                 # If userinfo fails (e.g., 403), extract user info from token introspection
                 logger.warning(
diff --git a/api/views/dataset_data.py b/api/views/dataset_data.py
deleted file mode 100644
index f6f0b6a7..00000000
--- a/api/views/dataset_data.py
+++ /dev/null
@@ -1,359 +0,0 @@
-"""HTTP endpoints for fetching indexed dataset/resource data from data_db.
-
-Endpoints:
-
-- ``GET /api/resources/<resource_id>/data/`` — fetch indexed data for a single
-  resource with column-based filtering.
-- ``GET /api/datasets/<dataset_id>/data/`` — fetch indexed data for a dataset.
-  By default operates on the dataset's first indexed resource. Pass
-  ``?resource_id=<uuid>`` to target a specific resource.
-- ``GET /api/datasets/<dataset_id>/prompts/`` — fetch indexed data for a
-  PromptDataset, restricted to ``dataset_type=PROMPT`` and exposing extra
-  prompt-specific filter shorthands.
-
-All endpoints accept these query params:
-
-- ``columns`` — comma-separated list of columns to project.
-- ``limit`` (default 100, max 10000), ``offset`` (default 0).
-- ``order_by`` — comma-separated columns; prefix with ``-`` for DESC.
-- ``count`` — ``true``/``false`` (default ``true``) to include total row count.
-- Any other query param is interpreted as a data-column filter, optionally
-  with operator suffix, e.g. ``?price__gte=10&category=books``. Repeated keys
-  produce a list (used naturally for ``__in``/``__nin``).
-"""
-
-import uuid
-from typing import Any, Dict, List, Optional, Tuple
-
-import structlog
-from django.http import HttpRequest
-from rest_framework.permissions import AllowAny
-from rest_framework.request import Request
-from rest_framework.response import Response
-from rest_framework.views import APIView
-
-from api.models import Dataset, Resource, ResourceDataTable
-from api.models.PromptDataset import PromptDataset
-from api.utils.data_indexing import (
-    DEFAULT_FETCH_LIMIT,
-    MAX_FETCH_LIMIT,
-    DataFetchError,
-    fetch_resource_data,
-    get_resource_columns,
-)
-from api.utils.enums import DatasetStatus, DatasetType
-
-logger = structlog.get_logger(__name__)
-
-# Reserved query parameters that are NOT treated as column filters.
-_RESERVED_PARAMS = {
-    "columns",
-    "limit",
-    "offset",
-    "order_by",
-    "count",
-    "resource_id",
-    "format",
-}
-
-
-def _parse_bool(value: Any, default: bool = False) -> bool:
-    if value is None:
-        return default
-    if isinstance(value, bool):
-        return value
-    return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
-
-
-def _parse_int(value: Any, default: int) -> int:
-    try:
-        return int(value)
-    except (TypeError, ValueError):
-        return default
-
-
-def _parse_csv(value: Optional[str]) -> Optional[List[str]]:
-    if not value:
-        return None
-    parts = [p.strip() for p in value.split(",") if p.strip()]
-    return parts or None
-
-
-def _extract_filters(query_params: Any, reserved: Optional[set] = None) -> Dict[str, Any]:
-    """Pull non-reserved query params as filter dict.
-
-    Repeated keys collapse into lists so callers can use
-    ``?col__in=a&col__in=b``. ``__in``/``__nin`` always produce a list, even
-    for a single value.
-    """
-    reserved_set = reserved if reserved is not None else _RESERVED_PARAMS
-    filters: Dict[str, Any] = {}
-    # query_params is a QueryDict; use .lists() if available
-    if hasattr(query_params, "lists"):
-        items = query_params.lists()
-    else:
-        items = [(k, [v]) for k, v in query_params.items()]
-
-    for key, values in items:
-        if key in reserved_set:
-            continue
-        if not values:
-            continue
-        op_suffix = key.rsplit("__", 1)[-1] if "__" in key else None
-        if op_suffix in ("in", "nin"):
-            # Allow comma-separated single value too
-            collected: List[Any] = []
-            for v in values:
-                if isinstance(v, str) and "," in v:
-                    collected.extend([p for p in (s.strip() for s in v.split(",")) if p])
-                else:
-                    collected.append(v)
-            filters[key] = collected
-        else:
-            # Last value wins for non-list operators
-            filters[key] = values[-1]
-    return filters
-
-
-def _user_can_access_dataset(request: HttpRequest, dataset: Dataset) -> bool:
-    """Allow access to PUBLISHED datasets, otherwise require owner/org-member."""
-    if dataset.status == DatasetStatus.PUBLISHED.value:
-        return True
-    user = getattr(request, "user", None)
-    if not user or not user.is_authenticated:
-        return False
-    if user.is_superuser:
-        return True
-    if dataset.user_id and dataset.user_id == user.id:
-        return True
-    if dataset.organization_id:
-        # Lazy import to avoid circular imports at module load
-        from authorization.models import OrganizationMembership
-
-        return OrganizationMembership.objects.filter(
-            user=user, organization_id=dataset.organization_id
-        ).exists()
-    return False
-
-
-def _resolve_dataset_resource(
-    dataset: Dataset, resource_id: Optional[str]
-) -> Tuple[Optional[Resource], Optional[Response]]:
-    """Pick a Resource for a dataset-level data fetch.
-
-    Returns ``(resource, error_response)`` — exactly one is non-None.
-    """
-    if resource_id:
-        try:
-            resource = dataset.resources.get(id=resource_id)
-        except Resource.DoesNotExist:
-            return None, Response(
-                {"error": f"Resource {resource_id} not found in dataset {dataset.id}"},
-                status=404,
-            )
-        return resource, None
-
-    # Default: first resource that has indexed data
-    indexed_table = (
-        ResourceDataTable.objects.filter(resource__dataset=dataset).order_by("created").first()
-    )
-    if indexed_table is None:
-        return None, Response(
-            {
-                "error": (
-                    "Dataset has no indexed (tabular) resources. "
-                    "Pass ?resource_id=<uuid> or upload a CSV/XLSX/Parquet/JSON file."
-                )
-            },
-            status=404,
-        )
-    return indexed_table.resource, None
-
-
-def _fetch_and_respond(
-    request: Request,
-    resource: Resource,
-    extra_filters: Optional[Dict[str, Any]] = None,
-    reserved: Optional[set] = None,
-    extra_response: Optional[Dict[str, Any]] = None,
-) -> Response:
-    """Common path: parse query params, run fetch_resource_data, return JSON."""
-    qp = request.query_params  # type: ignore[attr-defined]
-
-    columns = _parse_csv(qp.get("columns"))
-    order_by = _parse_csv(qp.get("order_by"))
-    limit = _parse_int(qp.get("limit"), DEFAULT_FETCH_LIMIT)
-    offset = _parse_int(qp.get("offset"), 0)
-    count = _parse_bool(qp.get("count"), default=True)
-
-    filters = _extract_filters(qp, reserved=reserved)
-    if extra_filters:
-        filters.update(extra_filters)
-
-    try:
-        result = fetch_resource_data(
-            resource=resource,
-            filters=filters,
-            columns=columns,
-            limit=limit,
-            offset=offset,
-            order_by=order_by,
-            count=count,
-        )
-    except DataFetchError as e:
-        return Response({"error": str(e)}, status=400)
-    except Exception as e:  # pragma: no cover — defensive
-        logger.exception(
-            "fetch_resource_data failed",
-            resource_id=str(resource.id),
-            error=str(e),
-        )
-        return Response({"error": "Failed to fetch data"}, status=500)
-
-    available = get_resource_columns(resource)
-
-    payload: Dict[str, Any] = {
-        "resource_id": str(resource.id),
-        "dataset_id": str(resource.dataset_id),
-        "available_columns": available,
-        "max_limit": MAX_FETCH_LIMIT,
-        **result,
-    }
-    if extra_response:
-        payload.update(extra_response)
-    return Response(payload)
-
-
-class ResourceDataView(APIView):
-    """Return indexed data for a specific resource."""
-
-    permission_classes = [AllowAny]
-
-    def get(self, request: Request, resource_id: uuid.UUID) -> Response:
-        try:
-            resource = Resource.objects.select_related("dataset").get(id=resource_id)
-        except Resource.DoesNotExist:
-            return Response({"error": "Resource not found"}, status=404)
-
-        if not _user_can_access_dataset(request, resource.dataset):  # type: ignore[attr-defined]
-            return Response({"error": "Not authorized"}, status=403)
-
-        return _fetch_and_respond(request, resource)
-
-
-class DatasetDataView(APIView):
-    """Return indexed data for a dataset (one resource at a time)."""
-
-    permission_classes = [AllowAny]
-
-    def get(self, request: Request, dataset_id: uuid.UUID) -> Response:
-        try:
-            dataset = Dataset.objects.get(id=dataset_id)
-        except Dataset.DoesNotExist:
-            return Response({"error": "Dataset not found"}, status=404)
-
-        if not _user_can_access_dataset(request, dataset):
-            return Response({"error": "Not authorized"}, status=403)
-
-        resource_id = request.query_params.get("resource_id")  # type: ignore[attr-defined]
-        resource, err = _resolve_dataset_resource(dataset, resource_id)
-        if err is not None:
-            return err
-        assert resource is not None
-        return _fetch_and_respond(request, resource)
-
-
-class PromptDatasetDataView(APIView):
-    """Return indexed data for a PromptDataset.
-
-    Same query semantics as :class:`DatasetDataView`, but the dataset must be
-    of type ``PROMPT``. Convenience query params (translated to column
-    filters when those columns exist on the data):
-
-    - ``prompt_contains`` -> ``prompt__icontains``
-    - ``response_contains`` -> ``response__icontains`` (or ``completion``)
-    - ``min_length``/``max_length`` -> ``length__gte``/``length__lte``
-    """
-
-    permission_classes = [AllowAny]
-
-    # Conventional column names we look for on prompt data tables.
-    _PROMPT_COL_CANDIDATES = ("prompt", "input", "instruction", "question")
-    _RESPONSE_COL_CANDIDATES = ("response", "completion", "answer", "output")
-    _LENGTH_COL_CANDIDATES = ("length", "prompt_length", "tokens", "token_count")
-
-    def _first_present(self, available: List[str], candidates: Tuple[str, ...]) -> Optional[str]:
-        lower_map = {c.lower(): c for c in available}
-        for cand in candidates:
-            if cand in lower_map:
-                return lower_map[cand]
-        return None
-
-    def get(self, request: Request, dataset_id: uuid.UUID) -> Response:
-        try:
-            prompt_dataset = PromptDataset.objects.get(dataset_ptr_id=dataset_id)
-        except PromptDataset.DoesNotExist:
-            return Response(
-                {"error": f"Dataset {dataset_id} is not a prompt dataset"},
-                status=404,
-            )
-
-        if prompt_dataset.dataset_type != DatasetType.PROMPT.value:
-            return Response(
-                {"error": f"Dataset {dataset_id} is not a prompt dataset"},
-                status=400,
-            )
-
-        if not _user_can_access_dataset(request, prompt_dataset):
-            return Response({"error": "Not authorized"}, status=403)
-
-        resource_id = request.query_params.get("resource_id")  # type: ignore[attr-defined]
-        resource, err = _resolve_dataset_resource(prompt_dataset, resource_id)
-        if err is not None:
-            return err
-        assert resource is not None
-
-        # Map prompt-specific shorthands to underlying column filters
-        available = get_resource_columns(resource)
-        qp = request.query_params  # type: ignore[attr-defined]
-        extra: Dict[str, Any] = {}
-
-        prompt_col = self._first_present(available, self._PROMPT_COL_CANDIDATES)
-        response_col = self._first_present(available, self._RESPONSE_COL_CANDIDATES)
-        length_col = self._first_present(available, self._LENGTH_COL_CANDIDATES)
-
-        prompt_q = qp.get("prompt_contains")
-        if prompt_q and prompt_col:
-            extra[f"{prompt_col}__icontains"] = prompt_q
-
-        response_q = qp.get("response_contains")
-        if response_q and response_col:
-            extra[f"{response_col}__icontains"] = response_q
-
-        min_len = qp.get("min_length")
-        if min_len and length_col:
-            extra[f"{length_col}__gte"] = min_len
-
-        max_len = qp.get("max_length")
-        if max_len and length_col:
-            extra[f"{length_col}__lte"] = max_len
-
-        local_reserved = _RESERVED_PARAMS | {
-            "prompt_contains",
-            "response_contains",
-            "min_length",
-            "max_length",
-        }
-
-        return _fetch_and_respond(
-            request,
-            resource,
-            extra_filters=extra,
-            reserved=local_reserved,
-            extra_response={
-                "dataset_type": prompt_dataset.dataset_type,
-                "prompt_column": prompt_col,
-                "response_column": response_col,
-                "length_column": length_col,
-            },
-        )
diff --git a/dataspace_sdk/__version__.py b/dataspace_sdk/__version__.py
index 8d063328..cff36390 100644
--- a/dataspace_sdk/__version__.py
+++ b/dataspace_sdk/__version__.py
@@ -1,3 +1,3 @@
 """Version information for DataSpace SDK."""
 
-__version__ = "0.5.02"
+__version__ = "0.4.19"
diff --git a/dataspace_sdk/resources/datasets.py b/dataspace_sdk/resources/datasets.py
index df6313e3..24dee57a 100644
--- a/dataspace_sdk/resources/datasets.py
+++ b/dataspace_sdk/resources/datasets.py
@@ -1,6 +1,6 @@
 """Dataset resource client for DataSpace SDK."""
 
-from typing import Any, Dict, Iterator, List, Optional
+from typing import Any, Dict, List, Optional
 
 from dataspace_sdk.base import BaseAPIClient
 
@@ -169,7 +169,6 @@ def list_all(
                 license
                 created
                 updated
-                datasetType
                 organization {
                     id
                     name
@@ -178,31 +177,6 @@ def list_all(
                     id
                     value
                 }
-                sectors {
-                    id
-                    name
-                }
-                promptMetadata
-                resources {
-                    id
-                    name
-                    noOfEntries
-                    fileDetails {
-                        format
-                        size
-                    }
-                    schema {
-                        format
-                        description
-                        fieldName
-                    }
-                    promptDetails {
-                        promptFormat
-                        hasSystemPrompt
-                        hasExampleResponses
-                        promptCount
-                    }
-                }
             }
         }
         """
@@ -631,176 +605,3 @@ def update_prompt_metadata(
 
         result: Dict[str, Any] = response.get("data", {}).get("updatePromptMetadata", {})
         return result
-
-    # ------------------------------------------------------------------
-    # Indexed data access (data_db)
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def _build_data_params(
-        filters: Optional[Dict[str, Any]],
-        columns: Optional[List[str]],
-        order_by: Optional[List[str]],
-        limit: int,
-        offset: int,
-        count: Optional[bool],
-    ) -> Dict[str, Any]:
-        """Translate Pythonic kwargs into the ``GET /data/`` query-string form.
-
-        ``filters`` is a flat dict using the same ``col`` / ``col__op`` keys as
-        the server. List values are passed through (requests will emit one
-        ``key=v`` pair per entry, used by ``__in`` / ``__nin``).
-        """
-        params: Dict[str, Any] = {"limit": int(limit), "offset": int(offset)}
-        if columns:
-            params["columns"] = ",".join(columns)
-        if order_by:
-            params["order_by"] = ",".join(order_by)
-        if count is not None:
-            params["count"] = "true" if count else "false"
-        if filters:
-            for k, v in filters.items():
-                if isinstance(v, (list, tuple)):
-                    params[k] = list(v)
-                elif isinstance(v, bool):
-                    params[k] = "true" if v else "false"
-                else:
-                    params[k] = v
-        return params
-
-    def get_resource_data(
-        self,
-        resource_id: str,
-        filters: Optional[Dict[str, Any]] = None,
-        columns: Optional[List[str]] = None,
-        order_by: Optional[List[str]] = None,
-        limit: int = 100,
-        offset: int = 0,
-        count: bool = True,
-    ) -> Dict[str, Any]:
-        """Fetch indexed (saved in ``data_db``) data for a single resource.
-
-        Args:
-            resource_id: UUID of the resource (must have an indexed table).
-            filters: Column-level filters. Keys are either ``"col"`` (equality)
-                or ``"col__op"`` where op is one of: ``eq, ne, gt, gte, lt,
-                lte, in, nin, contains, icontains, startswith, istartswith,
-                endswith, iendswith, isnull, notnull``.
-            columns: Subset of columns to project. ``None`` returns all.
-            order_by: Columns to sort by. Prefix with ``-`` for DESC.
-            limit: Max rows to return (server caps at 10000).
-            offset: Number of rows to skip.
-            count: If ``True``, the response includes total matching row count.
-
-        Returns:
-            A dict with ``columns``, ``rows``, ``total``, ``limit``,
-            ``offset``, ``available_columns``, ``resource_id``,
-            ``dataset_id``, and ``max_limit``.
-        """
-        params = self._build_data_params(filters, columns, order_by, limit, offset, count)
-        return self.get(f"/api/resources/{resource_id}/data/", params=params)
-
-    def get_dataset_data(
-        self,
-        dataset_id: str,
-        resource_id: Optional[str] = None,
-        filters: Optional[Dict[str, Any]] = None,
-        columns: Optional[List[str]] = None,
-        order_by: Optional[List[str]] = None,
-        limit: int = 100,
-        offset: int = 0,
-        count: bool = True,
-    ) -> Dict[str, Any]:
-        """Fetch indexed data for a dataset.
-
-        By default operates on the dataset's first indexed (tabular) resource.
-        Pass ``resource_id`` to target a specific resource within the dataset.
-        Filtering / column / ordering semantics are identical to
-        :meth:`get_resource_data`.
-        """
-        params = self._build_data_params(filters, columns, order_by, limit, offset, count)
-        if resource_id:
-            params["resource_id"] = resource_id
-        return self.get(f"/api/datasets/{dataset_id}/data/", params=params)
-
-    def get_prompt_data(
-        self,
-        dataset_id: str,
-        resource_id: Optional[str] = None,
-        filters: Optional[Dict[str, Any]] = None,
-        columns: Optional[List[str]] = None,
-        order_by: Optional[List[str]] = None,
-        limit: int = 100,
-        offset: int = 0,
-        count: bool = True,
-        prompt_contains: Optional[str] = None,
-        response_contains: Optional[str] = None,
-        min_length: Optional[int] = None,
-        max_length: Optional[int] = None,
-    ) -> Dict[str, Any]:
-        """Fetch indexed data for a PROMPT-typed dataset.
-
-        Same generic semantics as :meth:`get_dataset_data`, plus prompt-aware
-        shorthands that automatically map to the underlying prompt/response/
-        length columns when present:
-
-        Args:
-            prompt_contains: Substring (case-insensitive) match on the prompt
-                column (auto-detects ``prompt``/``input``/``instruction``/
-                ``question``).
-            response_contains: Substring match on the response column
-                (auto-detects ``response``/``completion``/``answer``/
-                ``output``).
-            min_length / max_length: Bounds on the length column
-                (auto-detects ``length``/``prompt_length``/``tokens``/
-                ``token_count``).
-
-        The response includes ``prompt_column``, ``response_column``, and
-        ``length_column`` indicating what was auto-detected.
-        """
-        params = self._build_data_params(filters, columns, order_by, limit, offset, count)
-        if resource_id:
-            params["resource_id"] = resource_id
-        if prompt_contains is not None:
-            params["prompt_contains"] = prompt_contains
-        if response_contains is not None:
-            params["response_contains"] = response_contains
-        if min_length is not None:
-            params["min_length"] = int(min_length)
-        if max_length is not None:
-            params["max_length"] = int(max_length)
-        return self.get(f"/api/datasets/{dataset_id}/prompts/", params=params)
-
-    def iter_resource_data(
-        self,
-        resource_id: str,
-        filters: Optional[Dict[str, Any]] = None,
-        columns: Optional[List[str]] = None,
-        order_by: Optional[List[str]] = None,
-        batch_size: int = 1000,
-    ) -> Iterator[Dict[str, Any]]:
-        """Yield rows as dicts, paging through the entire filtered result set.
-
-        Each yielded item is a ``{column: value}`` mapping. ``batch_size`` is
-        capped at 10000 by the server.
-        """
-        offset = 0
-        while True:
-            page = self.get_resource_data(
-                resource_id=resource_id,
-                filters=filters,
-                columns=columns,
-                order_by=order_by,
-                limit=batch_size,
-                offset=offset,
-                count=False,
-            )
-            cols: List[str] = page.get("columns", []) or []
-            rows: List[List[Any]] = page.get("rows", []) or []
-            if not rows:
-                return
-            for row in rows:
-                yield dict(zip(cols, row))
-            if len(rows) < batch_size:
-                return
-            offset += len(rows)
diff --git a/docs/dataset_data_api.md b/docs/dataset_data_api.md
deleted file mode 100644
index 5e970ea9..00000000
--- a/docs/dataset_data_api.md
+++ /dev/null
@@ -1,266 +0,0 @@
-# Indexed Dataset Data API
-
-This document describes the HTTP endpoints and SDK methods for fetching the
-*indexed tabular data* that DataSpace stores in the `data_db` PostgreSQL
-database. When a CSV / XLSX / Parquet / JSON resource is uploaded, its rows are
-indexed into a per-resource table so they can be queried, filtered, and
-streamed without re-downloading the source file.
-
-## Overview
-
-| Layer | Surface |
-|-------|---------|
-| Backend utility | `api.utils.data_indexing.fetch_resource_data(...)` |
-| HTTP API | `GET /api/resources/<resource_id>/data/`, `GET /api/datasets/<dataset_id>/data/`, `GET /api/datasets/<dataset_id>/prompts/` |
-| Python SDK | `DatasetClient.get_resource_data(...)`, `get_dataset_data(...)`, `get_prompt_data(...)`, `iter_resource_data(...)` |
-
-All three endpoints share the same query-parameter contract. The prompt
-endpoint adds prompt-specific shorthands.
-
-## Permissions
-
-- **PUBLISHED** datasets are publicly readable.
-- **DRAFT / ARCHIVED** datasets require the requesting user to be the dataset
-  owner, a superuser, or a member of the dataset's organization.
-
-## HTTP API
-
-### `GET /api/resources/<resource_id>/data/`
-
-Returns indexed data for a single resource.
-
-### `GET /api/datasets/<dataset_id>/data/`
-
-Returns indexed data for a dataset. Defaults to the dataset's first indexed
-resource. Use `?resource_id=<uuid>` to target a specific resource.
-
-### `GET /api/datasets/<dataset_id>/prompts/`
-
-Same semantics as `/data/`, but the dataset must be of `dataset_type=PROMPT`
-and the response includes the auto-detected prompt / response / length column
-names. Convenience filters:
-
-| Param | Maps to |
-|-------|---------|
-| `prompt_contains=<str>` | `<prompt_col>__icontains=<str>` |
-| `response_contains=<str>` | `<response_col>__icontains=<str>` |
-| `min_length=<int>` | `<length_col>__gte=<int>` |
-| `max_length=<int>` | `<length_col>__lte=<int>` |
-
-Auto-detected columns (case-insensitive, first match wins):
-
-- prompt: `prompt`, `input`, `instruction`, `question`
-- response: `response`, `completion`, `answer`, `output`
-- length: `length`, `prompt_length`, `tokens`, `token_count`
-
-If a candidate column is not present in the resource schema, the corresponding
-shorthand is silently ignored. You can always fall back to the explicit
-`<col>__<op>` form.
-
-### Query parameters
-
-Reserved (not interpreted as filters):
-
-| Param | Default | Notes |
-|-------|---------|-------|
-| `columns` | all | Comma-separated list of columns to project. |
-| `limit` | `100` | Capped at `10000`. |
-| `offset` | `0` | |
-| `order_by` | none | Comma-separated. Prefix with `-` for DESC. |
-| `count` | `true` | Set `false` to skip the `SELECT COUNT(*)` round-trip. |
-| `resource_id` | first indexed | Only on `/datasets/<id>/data/` and `/prompts/`. |
-
-Any other query param is treated as a column filter.
-
-### Filter operators
-
-Filters use Django-ORM-style suffixes: `?<col>__<op>=<value>`. Without a
-suffix, equality is assumed: `?<col>=<value>`.
-
-| Operator | SQL | Notes |
-|----------|-----|-------|
-| `eq` (default) | `=` | |
-| `ne` | `<>` | |
-| `gt`, `gte`, `lt`, `lte` | `>`, `>=`, `<`, `<=` | |
-| `in` | `= ANY(...)` | Repeat the param: `?col__in=a&col__in=b` (or `?col__in=a,b`). |
-| `nin` | `<> ALL(...)` | Same shape as `in`. |
-| `contains` / `icontains` | `LIKE` / `ILIKE` `'%v%'` | |
-| `startswith` / `istartswith` | `LIKE` / `ILIKE` `'v%'` | |
-| `endswith` / `iendswith` | `LIKE` / `ILIKE` `'%v'` | |
-| `isnull` | `IS NULL` (truthy) / `IS NOT NULL` (falsy) | Value is parsed as bool. |
-| `notnull` | inverse of `isnull` | |
-
-Unknown columns or unknown operators return HTTP **400** with a
-`{"error": "..."}` body. All identifiers are quoted via `psycopg2.sql`; values
-are bound as parameters — there is no string concatenation into the SQL.
-
-### Response shape
-
-```json
-{
-  "resource_id": "f1e2...",
-  "dataset_id": "abcd...",
-  "available_columns": ["id", "name", "price", "category"],
-  "max_limit": 10000,
-  "columns": ["id", "name"],
-  "rows": [[1, "alpha"], [2, "beta"]],
-  "total": 87,
-  "limit": 100,
-  "offset": 0
-}
-```
-
-The prompt endpoint additionally returns:
-
-```json
-{
-  "dataset_type": "PROMPT",
-  "prompt_column": "prompt",
-  "response_column": "response",
-  "length_column": "tokens"
-}
-```
-
-Set `?count=false` to avoid the count query for large tables; `total` will be
-`null`.
-
-### Examples
-
-```bash
-# Books over $10, sorted by descending price, page 2
-curl "https://api.example.com/api/resources/<rid>/data/?\
-category=books&price__gte=10&order_by=-price&limit=50&offset=50"
-
-# Multiple categories
-curl "https://api.example.com/api/resources/<rid>/data/?\
-category__in=books&category__in=media"
-
-# Prompt dataset: long English translation prompts
-curl "https://api.example.com/api/datasets/<did>/prompts/?\
-prompt_contains=translate&min_length=50&language=en"
-```
-
-## Python SDK
-
-```python
-from dataspace_sdk import DataSpaceClient
-
-client = DataSpaceClient(
-    base_url="https://dataspace.civicdatalab.in",
-    keycloak_url="https://opub-kc.civicdatalab.in",
-    keycloak_realm="DataSpace",
-    keycloak_client_id="dataspace",
-)
-client.login(username="...", password="...")
-```
-
-### `get_resource_data`
-
-```python
-page = client.datasets.get_resource_data(
-    resource_id="f1e2...",
-    filters={
-        "price__gte": 10,
-        "category__in": ["books", "media"],
-        "is_active": True,
-    },
-    columns=["id", "title", "price"],
-    order_by=["-price", "title"],
-    limit=200,
-    offset=0,
-    count=True,
-)
-print(page["total"], len(page["rows"]))
-```
-
-### `get_dataset_data`
-
-Same parameters as `get_resource_data`, plus an optional `resource_id`.
-Without `resource_id`, the dataset's first indexed resource is used.
-
-```python
-page = client.datasets.get_dataset_data(
-    dataset_id="abcd...",
-    resource_id="optional-uuid",
-    filters={"region": "south"},
-)
-```
-
-### `get_prompt_data`
-
-Adds prompt-aware shorthands on top of the generic interface:
-
-```python
-page = client.datasets.get_prompt_data(
-    dataset_id="abcd...",
-    prompt_contains="translate",
-    response_contains="bonjour",
-    min_length=20,
-    max_length=400,
-    filters={"language": "fr"},
-    columns=["prompt", "response", "tokens"],
-    order_by=["-tokens"],
-)
-print(page["prompt_column"], page["response_column"], page["length_column"])
-```
-
-### `iter_resource_data` — streaming all rows
-
-Transparently pages through the entire filtered result set, yielding each row
-as a `{column: value}` dict. The server caps `batch_size` at `10000`.
-
-```python
-for row in client.datasets.iter_resource_data(
-    resource_id="f1e2...",
-    filters={"is_active": True},
-    columns=["id", "title", "price"],
-    batch_size=2000,
-):
-    process(row)
-```
-
-## Backend utility
-
-When you need to fetch indexed data from inside the Django process (e.g. a
-GraphQL resolver or background task), call the underlying utility directly:
-
-```python
-from api.models import Resource
-from api.utils.data_indexing import fetch_resource_data, DataFetchError
-
-resource = Resource.objects.get(id=resource_id)
-try:
-    result = fetch_resource_data(
-        resource=resource,
-        filters={"price__gte": 10},
-        columns=["id", "title", "price"],
-        order_by=["-price"],
-        limit=100,
-        offset=0,
-        count=True,
-    )
-except DataFetchError as e:
-    # Unknown column / no indexed table / etc.
-    raise
-```
-
-The utility validates every column against `ResourceSchema` (or the live
-`information_schema` if no schema rows exist) and uses parameterised queries
-exclusively — passing a malicious column name returns `DataFetchError`,
-never a SQL injection.
-
-## Safety notes
-
-- Identifiers are quoted via `psycopg2.sql.Identifier`; values are passed as
-  query parameters. There is no string interpolation of user input into SQL.
-- `statement_timeout` is set to **10 seconds** on every fetch.
-- `limit` is clamped to **10000** rows. Use `iter_resource_data` to stream
-  larger result sets.
-- The `data_db` connection is read-only from this layer's perspective — the
-  utility never executes anything other than `SELECT` / `SET statement_timeout`.
-
-## Related
-
-- [SDK overview](sdk/OVERVIEW.md)
-- [SDK quick start](sdk/QUICKSTART.md)
-- [Unified search API](unified_search_api.md)
diff --git a/docs/sdk/README.md b/docs/sdk/README.md
index 37b72461..19a67473 100644
--- a/docs/sdk/README.md
+++ b/docs/sdk/README.md
@@ -206,50 +206,6 @@ org_datasets = client.datasets.get_organization_datasets(
 )
 ```
 
-### Fetch Indexed Dataset Data (filterable)
-
-For datasets whose resources have been indexed into `data_db` (CSV/XLSX/etc.),
-you can query the underlying rows with column-level filters, projection, and
-ordering. See the dedicated guide: [dataset_data_api.md](../dataset_data_api.md).
-
-```python
-# Per-resource fetch
-page = client.datasets.get_resource_data(
-    resource_id="f1e2...",
-    filters={"price__gte": 10, "category__in": ["books", "media"]},
-    columns=["id", "title", "price"],
-    order_by=["-price"],
-    limit=200,
-)
-print(page["total"], len(page["rows"]))
-
-# Per-dataset fetch (defaults to first indexed resource)
-page = client.datasets.get_dataset_data(
-    dataset_id="abcd...",
-    filters={"region": "south"},
-)
-
-# Prompt datasets — extra prompt-aware shorthands
-page = client.datasets.get_prompt_data(
-    dataset_id="abcd...",
-    prompt_contains="translate",
-    min_length=20,
-    filters={"language": "fr"},
-)
-
-# Stream all matching rows as dicts
-for row in client.datasets.iter_resource_data(
-    resource_id="f1e2...",
-    filters={"is_active": True},
-    batch_size=2000,
-):
-    process(row)
-```
-
-Supported filter operators (Django-style suffixes): `eq, ne, gt, gte, lt, lte,
-in, nin, contains, icontains, startswith, istartswith, endswith, iendswith,
-isnull, notnull`.
-
 ## Working with AI Models
 
 ### Search AI Models
diff --git a/tests/test_data_indexing_filters.py b/tests/test_data_indexing_filters.py
deleted file mode 100644
index e8db64ff..00000000
--- a/tests/test_data_indexing_filters.py
+++ /dev/null
@@ -1,174 +0,0 @@
-"""Unit tests for the SQL-builder / filter helpers in
-``api.utils.data_indexing`` and the request-parsing helpers in
-``api.views.dataset_data``.
-
-These tests deliberately avoid touching the actual ``data_db`` connection —
-they validate the logic that turns user input into safe SQL fragments and
-into normalised filter dicts.
-"""
-
-import unittest
-
-from django.http import QueryDict
-from psycopg2 import sql as pg_sql
-
-from api.utils.data_indexing import (
-    DataFetchError,
-    _build_order_by,
-    _build_where_clause,
-    _parse_filter_key,
-)
-from api.views.dataset_data import _extract_filters, _parse_bool, _parse_int
-
-
-def _render(composable: pg_sql.Composable) -> str:
-    """Stringify a Composable without needing a live DB connection.
-
-    Walks the Composed tree and concatenates the literal strings of each
-    leaf (SQL/Identifier). Identifiers are rendered as ``"name"``.
-    """
-    if isinstance(composable, pg_sql.SQL):
-        return composable.string
-    if isinstance(composable, pg_sql.Identifier):
-        # psycopg2 may store multiple components for schema-qualified idents
-        parts = (
-            composable.strings
-            if hasattr(composable, "strings")
-            else (composable._wrapped if hasattr(composable, "_wrapped") else [])
-        )
-        return ".".join(f'"{p}"' for p in parts)
-    if isinstance(composable, pg_sql.Composed):
-        return "".join(_render(c) for c in composable.seq)
-    if isinstance(composable, pg_sql.Placeholder):
-        return "%s"
-    return str(composable)
-
-
-class TestParseFilterKey(unittest.TestCase):
-    def test_no_op_defaults_to_eq(self) -> None:
-        self.assertEqual(_parse_filter_key("price"), ("price", "eq"))
-
-    def test_known_op_split(self) -> None:
-        self.assertEqual(_parse_filter_key("price__gte"), ("price", "gte"))
-        self.assertEqual(_parse_filter_key("name__icontains"), ("name", "icontains"))
-
-    def test_unknown_op_treated_as_column(self) -> None:
-        # Column may legitimately contain "__" — if suffix isn't a known op,
-        # fall back to equality on the full key.
-        col, op = _parse_filter_key("weird__suffix")
-        self.assertEqual((col, op), ("weird__suffix", "eq"))
-
-
-class TestBuildWhereClause(unittest.TestCase):
-    allowed = ["id", "price", "name", "active"]
-
-    def test_empty_filters(self) -> None:
-        sql, params = _build_where_clause({}, self.allowed)
-        self.assertEqual(params, [])
-        self.assertEqual(_render(sql), "")
-
-    def test_eq_and_gte(self) -> None:
-        sql, params = _build_where_clause({"price__gte": 10, "name": "abc"}, self.allowed)
-        rendered = _render(sql)
-        self.assertIn(" WHERE ", rendered)
-        self.assertIn('"price" >= %s', rendered)
-        self.assertIn('"name" = %s', rendered)
-        self.assertIn(10, params)
-        self.assertIn("abc", params)
-
-    def test_in_operator_normalises_to_list(self) -> None:
-        sql, params = _build_where_clause({"id__in": ("a", "b")}, self.allowed)
-        rendered = _render(sql)
-        self.assertIn("= ANY(%s)", rendered)
-        self.assertEqual(params, [["a", "b"]])
-
-    def test_isnull_truthy(self) -> None:
-        sql, params = _build_where_clause({"name__isnull": True}, self.allowed)
-        rendered = _render(sql)
-        self.assertIn("IS NULL", rendered)
-        self.assertEqual(params, [])
-
-    def test_isnull_false_means_not_null(self) -> None:
-        sql, _ = _build_where_clause({"name__isnull": "false"}, self.allowed)
-        self.assertIn("IS NOT NULL", _render(sql))
-
-    def test_unknown_column_rejected(self) -> None:
-        with self.assertRaises(DataFetchError):
-            _build_where_clause({"evil__gte": 1}, self.allowed)
-
-    def test_icontains_wraps_value(self) -> None:
-        _, params = _build_where_clause({"name__icontains": "foo"}, self.allowed)
-        self.assertEqual(params, ["%foo%"])
-
-    def test_startswith_wraps_value(self) -> None:
-        _, params = _build_where_clause({"name__startswith": "foo"}, self.allowed)
-        self.assertEqual(params, ["foo%"])
-
-
-class TestBuildOrderBy(unittest.TestCase):
-    allowed = ["id", "price"]
-
-    def test_none_returns_empty(self) -> None:
-        sql = _build_order_by(None, self.allowed)
-        self.assertEqual(_render(sql), "")
-
-    def test_asc_and_desc(self) -> None:
-        sql = _build_order_by(["-price", "id"], self.allowed)
-        rendered = _render(sql)
-        self.assertIn(" ORDER BY ", rendered)
-        self.assertIn('"price" DESC', rendered)
-        self.assertIn('"id" ASC', rendered)
-
-    def test_unknown_column_rejected(self) -> None:
-        with self.assertRaises(DataFetchError):
-            _build_order_by(["evil"], self.allowed)
-
-
-class TestViewQueryParamHelpers(unittest.TestCase):
-    def test_parse_bool(self) -> None:
-        self.assertTrue(_parse_bool("true"))
-        self.assertTrue(_parse_bool("YES"))
-        self.assertTrue(_parse_bool(True))
-        self.assertFalse(_parse_bool("0"))
-        self.assertFalse(_parse_bool(None, default=False))
-        self.assertTrue(_parse_bool(None, default=True))
-
-    def test_parse_int(self) -> None:
-        self.assertEqual(_parse_int("42", 0), 42)
-        self.assertEqual(_parse_int(None, 7), 7)
-        self.assertEqual(_parse_int("not-a-number", 9), 9)
-
-    def test_extract_filters_skips_reserved(self) -> None:
-        qd = QueryDict(mutable=True)
-        qd.update({"limit": "10", "offset": "0", "columns": "a,b"})
-        qd["price__gte"] = "5"
-        qd["name"] = "abc"
-        result = _extract_filters(qd)
-        self.assertEqual(result, {"price__gte": "5", "name": "abc"})
-
-    def test_extract_filters_in_collapses_to_list(self) -> None:
-        qd = QueryDict("col__in=a&col__in=b&col__in=c,d")
-        result = _extract_filters(qd)
-        self.assertIn("col__in", result)
-        self.assertEqual(sorted(result["col__in"]), ["a", "b", "c", "d"])
-
-    def test_extract_filters_custom_reserved(self) -> None:
-        qd = QueryDict("limit=10&prompt_contains=x&col=y")
-        result = _extract_filters(
-            qd,
-            reserved={
-                "limit",
-                "offset",
-                "columns",
-                "order_by",
-                "count",
-                "resource_id",
-                "format",
-                "prompt_contains",
-            },
-        )
-        self.assertEqual(result, {"col": "y"})
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 7155fa8e..74653975 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -140,117 +140,5 @@ def test_search_with_sorting(self, mock_request: MagicMock) -> None:
         mock_request.assert_called_once()
 
 
-class TestDatasetClientDataFetch(unittest.TestCase):
-    """Tests for indexed-data fetch methods on DatasetClient."""
-
-    def setUp(self) -> None:
-        self.client = DatasetClient("https://api.test.com", MagicMock())
-
-    def test_build_data_params_basic(self) -> None:
-        params = DatasetClient._build_data_params(
-            filters=None,
-            columns=None,
-            order_by=None,
-            limit=50,
-            offset=10,
-            count=True,
-        )
-        self.assertEqual(params["limit"], 50)
-        self.assertEqual(params["offset"], 10)
-        self.assertEqual(params["count"], "true")
-        self.assertNotIn("columns", params)
-
-    def test_build_data_params_filters_and_lists(self) -> None:
-        params = DatasetClient._build_data_params(
-            filters={"price__gte": 10, "tag__in": ["a", "b"], "active": True},
-            columns=["id", "name"],
-            order_by=["-price", "name"],
-            limit=100,
-            offset=0,
-            count=False,
-        )
-        self.assertEqual(params["columns"], "id,name")
-        self.assertEqual(params["order_by"], "-price,name")
-        self.assertEqual(params["count"], "false")
-        self.assertEqual(params["price__gte"], 10)
-        self.assertEqual(params["tag__in"], ["a", "b"])
-        self.assertEqual(params["active"], "true")
-
-    @patch.object(DatasetClient, "get")
-    def test_get_resource_data(self, mock_get: MagicMock) -> None:
-        mock_get.return_value = {
-            "columns": ["id"],
-            "rows": [[1]],
-            "total": 1,
-            "limit": 100,
-            "offset": 0,
-        }
-        result = self.client.get_resource_data(
-            "res-1",
-            filters={"id__gte": 1},
-            columns=["id"],
-            order_by=["id"],
-            limit=10,
-        )
-        self.assertEqual(result["total"], 1)
-        endpoint, kwargs = mock_get.call_args[0][0], mock_get.call_args.kwargs
-        self.assertEqual(endpoint, "/api/resources/res-1/data/")
-        self.assertEqual(kwargs["params"]["columns"], "id")
-        self.assertEqual(kwargs["params"]["id__gte"], 1)
-
-    @patch.object(DatasetClient, "get")
-    def test_get_dataset_data_with_resource_id(self, mock_get: MagicMock) -> None:
-        mock_get.return_value = {"rows": [], "columns": [], "total": 0}
-        self.client.get_dataset_data("ds-1", resource_id="res-9", limit=5)
-        endpoint = mock_get.call_args[0][0]
-        params = mock_get.call_args.kwargs["params"]
-        self.assertEqual(endpoint, "/api/datasets/ds-1/data/")
-        self.assertEqual(params["resource_id"], "res-9")
-        self.assertEqual(params["limit"], 5)
-
-    @patch.object(DatasetClient, "get")
-    def test_get_prompt_data_shorthands(self, mock_get: MagicMock) -> None:
-        mock_get.return_value = {"rows": [], "columns": [], "total": 0}
-        self.client.get_prompt_data(
-            "ds-1",
-            prompt_contains="translate",
-            response_contains="hello",
-            min_length=5,
-            max_length=100,
-        )
-        endpoint = mock_get.call_args[0][0]
-        params = mock_get.call_args.kwargs["params"]
-        self.assertEqual(endpoint, "/api/datasets/ds-1/prompts/")
-        self.assertEqual(params["prompt_contains"], "translate")
-        self.assertEqual(params["response_contains"], "hello")
-        self.assertEqual(params["min_length"], 5)
-        self.assertEqual(params["max_length"], 100)
-
-    @patch.object(DatasetClient, "get_resource_data")
-    def test_iter_resource_data_paginates(self, mock_get_data: MagicMock) -> None:
-        # Two pages: full batch then partial page (terminator)
-        mock_get_data.side_effect = [
-            {"columns": ["id", "name"], "rows": [[1, "a"], [2, "b"]]},
-            {"columns": ["id", "name"], "rows": [[3, "c"]]},
-        ]
-        rows = list(self.client.iter_resource_data("res-1", batch_size=2))
-        self.assertEqual(
-            rows,
-            [
-                {"id": 1, "name": "a"},
-                {"id": 2, "name": "b"},
-                {"id": 3, "name": "c"},
-            ],
-        )
-        self.assertEqual(mock_get_data.call_count, 2)
-        # Second call advances offset
-        self.assertEqual(mock_get_data.call_args_list[1].kwargs["offset"], 2)
-
-    @patch.object(DatasetClient, "get_resource_data")
-    def test_iter_resource_data_empty(self, mock_get_data: MagicMock) -> None:
-        mock_get_data.return_value = {"columns": ["id"], "rows": []}
-        self.assertEqual(list(self.client.iter_resource_data("res-1")), [])
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 751a62a9..2fee6f51 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -6,16 +6,16 @@
 import sys
 
 # Add the project root directory to Python path
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 sys.path.insert(0, project_root)
 
 from DataSpace.settings import *
 
 # Use an in-memory SQLite database for testing
 DATABASES = {
-    "default": {
-        "ENGINE": "django.db.backends.sqlite3",
-        "NAME": ":memory:",
+    'default': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'NAME': ':memory:',
     }
 }
 
@@ -24,10 +24,9 @@
 
 # Use a faster password hasher during tests
 PASSWORD_HASHERS = [
-    "django.contrib.auth.hashers.MD5PasswordHasher",
+    'django.contrib.auth.hashers.MD5PasswordHasher',
 ]
 
-
 # Disable migrations during tests
 class DisableMigrations:
     def __contains__(self, item):
@@ -36,43 +35,19 @@ def __contains__(self, item):
     def __getitem__(self, item):
         return None
 
-
 MIGRATION_MODULES = DisableMigrations()
 
 # Disable celery tasks during tests
 CELERY_ALWAYS_EAGER = True
 CELERY_EAGER_PROPAGATES_EXCEPTIONS = True
 
-# NOTE: We intentionally do NOT override INSTALLED_APPS — the real settings
-# already define AUTH_USER_MODEL = "authorization.User", so the
-# ``authorization`` app must be present for Django to bootstrap. Trimming the
-# list to a "minimal" set previously broke every test with
-# ``ImproperlyConfigured: AUTH_USER_MODEL refers to model 'authorization.User'
-# that has not been installed``. Use ``MIGRATION_MODULES`` above to keep
-# tests fast instead of stripping apps.
-
-# Drop middleware that requires live external services (Keycloak / rate
-# limiter / activity stream) so unit tests can boot without network access.
-MIDDLEWARE = [
-    m
-    for m in MIDDLEWARE  # noqa: F405 — imported via ``from DataSpace.settings import *``
-    if m
-    not in {
-        "authorization.middleware.KeycloakAuthenticationMiddleware",
-        "authorization.middleware.activity_consent.ActivityConsentMiddleware",
-        "api.middleware.rate_limit.rate_limit_middleware",
-        "api.middleware.request_validator.RequestValidationMiddleware",
-    }
+# Required apps for testing
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+    'api',
 ]
-
-# Elasticsearch is optional during unit tests — point the DSL at a dummy host
-# so module import doesn't try to connect.
-ELASTICSEARCH_DSL = {
-    "default": {"hosts": "localhost:9200"},
-}
-
-# Disable real Keycloak calls in tests.
-KEYCLOAK_SERVER_URL = "http://localhost:8080"
-KEYCLOAK_REALM = "test"
-KEYCLOAK_CLIENT_ID = "test-client"
-KEYCLOAK_CLIENT_SECRET = "test-secret"