Browse Source

HADOOP-19204. VectorIO regression: empty ranges are now rejected (#6887)


- restore old outcome: no-op
- test this
- update spec

This is a critical fix for vector IO and MUST be cherrypicked to all branches with
that feature

Contributed by Steve Loughran
Steve Loughran 10 months ago
parent
commit
56c8aa5f1c

+ 8 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java

@@ -294,7 +294,14 @@ public final class VectoredReadUtils {
       final Optional<Long> fileLength) throws EOFException {
 
     requireNonNull(input, "Null input list");
-    checkArgument(!input.isEmpty(), "Empty input list");
+
+    if (input.isEmpty()) {
+      // this may seem a pathological case, but it was valid
+      // before and somehow Spark can call it through parquet.
+      LOG.debug("Empty input list");
+      return input;
+    }
+
     final List<? extends FileRange> sortedRanges;
 
     if (input.size() == 1) {

+ 0 - 1
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md

@@ -474,7 +474,6 @@ No empty lists.
 
 ```python
 if ranges = null raise NullPointerException
-if ranges.len() = 0 raise IllegalArgumentException
 if allocate = null raise NullPointerException
 ```
 

+ 11 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java

@@ -340,6 +340,17 @@ public abstract class AbstractContractVectoredReadTest extends AbstractFSContrac
     }
   }
 
+  @Test
+  public void testEmptyRanges() throws Exception {
+    List<FileRange> fileRanges = new ArrayList<>();
+    try (FSDataInputStream in = openVectorFile()) {
+      in.readVectored(fileRanges, allocate);
+      Assertions.assertThat(fileRanges)
+          .describedAs("Empty ranges must stay empty")
+          .isEmpty();
+    }
+  }
+
   /**
    * Test to validate EOF ranges.
    * <p>

+ 3 - 4
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java

@@ -702,12 +702,11 @@ public class TestVectoredReadUtils extends HadoopTestBase {
   }
 
   /**
-   * Empty ranges cannot be sorted.
+   * Empty ranges are allowed.
    */
   @Test
-  public void testEmptyRangesRaisesIllegalArgument() throws Throwable {
-    intercept(IllegalArgumentException.class,
-        () -> validateAndSortRanges(Collections.emptyList(), Optional.empty()));
+  public void testEmptyRangesAllowed() throws Throwable {
+    validateAndSortRanges(Collections.emptyList(), Optional.empty());
   }
 
   /**