Skip to content

Commit

Permalink
Fix failure when reading missing fields in Parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Nov 27, 2024
1 parent 63fe63a commit abe7084
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,11 @@ private ColumnChunk readArray(GroupField field)
{
List<Type> parameters = field.getType().getTypeParameters();
checkArgument(parameters.size() == 1, "Arrays must have a single type parameter, found %s", parameters.size());
Field elementField = field.getChildren().get(0).get();
Optional<Field> children = field.getChildren().get(0);
if (children.isEmpty()) {
return new ColumnChunk(field.getType().createNullBlock(), new int[] {}, new int[] {});
}
Field elementField = children.get();
ColumnChunk columnChunk = readColumnChunk(elementField);

ListColumnReader.BlockPositions collectionPositions = calculateCollectionOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
Expand All @@ -355,7 +359,8 @@ private ColumnChunk readMap(GroupField field)

ColumnChunk columnChunk = readColumnChunk(field.getChildren().get(0).get());
blocks[0] = columnChunk.getBlock();
blocks[1] = readColumnChunk(field.getChildren().get(1).get()).getBlock();
Optional<Field> valueField = field.getChildren().get(1);
blocks[1] = valueField.isPresent() ? readColumnChunk(valueField.get()).getBlock() : parameters.get(1).createNullBlock();
ListColumnReader.BlockPositions collectionPositions = calculateCollectionOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
Block mapBlock = ((MapType) field.getType()).createBlockFromKeyValue(collectionPositions.isNull(), collectionPositions.offsets(), blocks[0], blocks[1]);
return new ColumnChunk(mapBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8539,6 +8539,45 @@ public void testSetIllegalExtraPropertyKey()
}
}

@Test // regression test for https://github.com/trinodb/trino/issues/22922
void testArrayElementChange()
{
try (TestTable table = new TestTable(
getQueryRunner()::execute,
"test_array_schema_change",
"(col array(row(a varchar, b varchar)))",
List.of("CAST(array[row('a', 'b')] AS array(row(a varchar, b varchar)))"))) {
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.element.a");
assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN col.element.c varchar");
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.element.b");

String expected = format == ORC ? "CAST(array[row(NULL)] AS array(row(c varchar)))" : "CAST(NULL AS array(row(c varchar)))";
assertThat(query("SELECT * FROM " + table.getName()))
.matches("VALUES " + expected);
}
}

// MAP type is tested in TestIcebergV2.testMapValueSchemaChange

@Test
void testRowFieldChange()
{
try (TestTable table = new TestTable(
getQueryRunner()::execute,
"test_row_schema_change",
"(col row(a varchar, b varchar))")) {
assertUpdate("INSERT INTO " + table.getName() + " SELECT CAST(row('a', 'b') AS row(a varchar, b varchar))", 1);

assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.a");
assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN col.c varchar");
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.b");

String expected = format == ORC || format == AVRO ? "CAST(row(NULL) AS row(c varchar))" : "CAST(NULL AS row(c varchar))";
assertThat(query("SELECT * FROM " + table.getName()))
.matches("SELECT " + expected);
}
}

@Test
public void testObjectStoreLayoutEnabledAndDataLocation()
throws Exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,31 @@ public void testHighlyNestedFieldPartitioningWithTimestampTransform()
ImmutableSet.of("grandparent.parent.ts_hour=2021-01-01-01/", "grandparent.parent.ts_hour=2022-02-02-02/", "grandparent.parent.ts_hour=2023-03-03-03/"));
}

@Test
void testMapValueSchemaChange()
{
testMapValueSchemaChange("PARQUET", "map(array[1], array[NULL])");
testMapValueSchemaChange("ORC", "map(array[1], array[row(NULL)])");
testMapValueSchemaChange("AVRO", "NULL");
}

private void testMapValueSchemaChange(String format, String expectedValue)
{
try (TestTable table = new TestTable(
getQueryRunner()::execute,
"test_map_value_schema_change",
"WITH (format = '" + format + "') AS SELECT CAST(map(array[1], array[row(2)]) AS map(integer, row(field integer))) col")) {
Table icebergTable = loadTable(table.getName());
icebergTable.updateSchema()
.addColumn("col.value", "new_field", Types.IntegerType.get())
.deleteColumn("col.value.field")
.commit();
assertThat(query("SELECT * FROM " + table.getName()))
.as("Format: %s", format)
.matches("SELECT CAST(" + expectedValue + " AS map(integer, row(new_field integer)))");
}
}

@Test
public void testUpdateAfterEqualityDelete()
throws Exception
Expand Down

0 comments on commit abe7084

Please sign in to comment.