From 1cbeef710bb069d95c3448d13e8c073cf1442c49 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 10 Jun 2026 16:24:00 +0200 Subject: [PATCH] GH-50149: [C++][Parquet] Avoid process abort when encoding fuzzer encounters OOM --- .../parquet/arrow/fuzz_encoding_internal.cc | 32 +++++++++++-------- testing | 2 +- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/cpp/src/parquet/arrow/fuzz_encoding_internal.cc b/cpp/src/parquet/arrow/fuzz_encoding_internal.cc index 4270eb543788..a0077391167e 100644 --- a/cpp/src/parquet/arrow/fuzz_encoding_internal.cc +++ b/cpp/src/parquet/arrow/fuzz_encoding_internal.cc @@ -290,23 +290,24 @@ struct TypedFuzzEncoding { } // Re-encode and re-decode using roundtrip encoding - { - auto compare_chunk = [&](int offset, std::span chunk_values) { - return CompareChunkAgainstReference(offset, chunk_values); - }; + auto compare_chunk = [&](int offset, std::span chunk_values) { + return CompareChunkAgainstReference(offset, chunk_values); + }; + auto do_roundtrip = [&]() -> Status { auto encoder = MakeEncoder(roundtrip_encoding_); BEGIN_PARQUET_CATCH_EXCEPTIONS if constexpr (arrow_supported()) { encoder->Put(*reference_array_); auto reencoded_buffer = encoder->FlushValues(); auto reencoded_data = reencoded_buffer->template span_as(); - auto array = DecodeArrow(roundtrip_encoding_, reencoded_data).ValueOrDie(); - ARROW_CHECK_OK(array->ValidateFull()); - ARROW_CHECK_OK(CompareAgainstReference(array)); + ARROW_ASSIGN_OR_RAISE(auto array, + DecodeArrow(roundtrip_encoding_, reencoded_data)); + RETURN_NOT_OK(array->ValidateFull()); + RETURN_NOT_OK(CompareAgainstReference(array)); // Compare with reading raw values for (const int chunk_size : chunk_sizes()) { - ARROW_CHECK_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data, - chunk_size, compare_chunk)); + RETURN_NOT_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data, + chunk_size, compare_chunk)); } } else { encoder->Put(reference_values_.data(), @@ -315,14 +316,19 @@ struct TypedFuzzEncoding { auto reencoded_data = reencoded_buffer->template span_as(); // Vary chunk sizes for (const int chunk_size : chunk_sizes()) { - ARROW_CHECK_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data, - chunk_size, compare_chunk)); + RETURN_NOT_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data, + chunk_size, compare_chunk)); } } END_PARQUET_CATCH_EXCEPTIONS + return Status::OK(); + }; + Status roundtrip_status = do_roundtrip(); + // OOM when attempting to roundtrip is not a hard failure, any other error is. + if (!roundtrip_status.IsOutOfMemory()) { + ARROW_CHECK_OK(roundtrip_status); } - - return Status::OK(); + return roundtrip_status; } protected: diff --git a/testing b/testing index 9cfebfef8982..1d74fce2b6bb 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit 9cfebfef8982fb8612e0a2c59059752bd32321a3 +Subproject commit 1d74fce2b6bb30158f254fc292252f4a87fc67a3