From 6408941b625115ce0f4fb44fab30dc5b2150de39 Mon Sep 17 00:00:00 2001 From: psaesha Date: Wed, 24 Jun 2026 14:49:17 +0530 Subject: [PATCH] no of entries for file formats other than csv --- api/types/type_resource.py | 32 ++++++++++++++++++++++------- api/utils/data_indexing.py | 41 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/api/types/type_resource.py b/api/types/type_resource.py index 20532023..cd6d09c3 100644 --- a/api/types/type_resource.py +++ b/api/types/type_resource.py @@ -20,7 +20,13 @@ from api.types.type_preview_data import PreviewData from api.types.type_prompt_resource_details import TypePromptResourceDetails from api.types.type_resource_metadata import TypeResourceMetadata -from api.utils.data_indexing import get_preview_data, get_row_count +from api.utils.data_indexing import ( + FILE_COUNT_FORMATS, + INDEXED_FORMATS, + get_entry_count_from_file, + get_preview_data, + get_row_count, +) from api.utils.graphql_telemetry import trace_resolver logger = structlog.get_logger(__name__) @@ -177,14 +183,26 @@ def no_of_entries(self) -> int: if not file_details: return 0 - if not hasattr(file_details, "format") or file_details.format.lower() != "csv": + if not hasattr(file_details, "format"): return 0 - try: - return get_row_count(self) # type: ignore - except Exception as row_count_error: - logger.error(f"Error in get_row_count: {str(row_count_error)}") - return 0 + fmt = file_details.format.lower() + + if fmt in INDEXED_FORMATS: + try: + return get_row_count(self) # type: ignore + except Exception as row_count_error: + logger.error(f"Error in get_row_count: {str(row_count_error)}") + return 0 + + if fmt in FILE_COUNT_FORMATS: + try: + return get_entry_count_from_file(self) # type: ignore + except Exception as file_count_error: + logger.error(f"Error in get_entry_count_from_file: {str(file_count_error)}") + return 0 + + return 0 except Exception as e: logger.error(f"Error getting number of entries: {str(e)}") return 0 diff --git a/api/utils/data_indexing.py b/api/utils/data_indexing.py index 8e817b88..8d56da54 100644 --- a/api/utils/data_indexing.py +++ b/api/utils/data_indexing.py @@ -16,6 +16,11 @@ # Use a separate database for data tables DATA_DB = "data_db" # This should match the connection name in settings.py +# Formats indexed into ResourceDataTable (queryable via get_row_count) +INDEXED_FORMATS = {"csv", "xls", "xlsx", "ods", "parquet", "feather", "json", "tsv"} +# Formats counted by parsing the file directly (not indexed into DB) +FILE_COUNT_FORMATS = {"yml", "yaml", "xml"} + # Allowed comparison operators for column-based filtering on indexed data. # Maps operator suffix -> (sql_template_with_{ph}_placeholder, value_transformer) _FILTER_OPERATORS: Dict[str, Tuple[str, Any]] = { @@ -397,6 +402,42 @@ def get_row_count(resource: Resource) -> int: return 0 +def get_entry_count_from_file(resource: Resource) -> int: + """Count entries in yml/yaml/xml files by parsing the file directly.""" + try: + file_details = getattr(resource, "resourcefiledetails", None) + if not file_details: + return 0 + + fmt = file_details.format.lower() + filepath = file_details.file.path + + if fmt in ("yml", "yaml"): + import yaml + + with open(filepath, "r") as f: + data = yaml.safe_load(f) + if isinstance(data, list): + return len(data) + if isinstance(data, dict): + for v in data.values(): + if isinstance(v, list): + return len(v) + return len(data) + return 0 + + if fmt == "xml": + import xml.etree.ElementTree as ET + + root = ET.parse(filepath).getroot() + return len(list(root)) + + return 0 + except Exception as e: + logger.error(f"Error counting entries from file for resource {resource.id}: {str(e)}") + return 0 + + def get_preview_data(resource: Resource) -> Optional[PreviewData]: try: if not resource.preview_enabled: