From c215ad5bf46dabb324a9ccac51c441abdf193295 Mon Sep 17 00:00:00 2001 From: Michal Foksa Date: Sun, 1 Aug 2021 17:01:25 +0200 Subject: [PATCH] logicalType switch. Logical types are not generated by default. --- avro/README.md | 102 ++++++++++++++---- .../avro/jsr310/AvroJavaTimeModule.java | 4 +- .../jsr310/ser/AvroInstantSerializer.java | 4 +- .../jsr310/ser/AvroLocalDateSerializer.java | 4 +- .../ser/AvroLocalDateTimeSerializer.java | 4 +- .../jsr310/ser/AvroLocalTimeSerializer.java | 6 +- .../avro/schema/AvroFormatVisitorWrapper.java | 30 +++++- .../avro/schema/DateTimeVisitor.java | 17 +-- ...AvroJavaTimeModule_schemaCreationTest.java | 1 + ...sitorWrapper_createVisitorWrapperTest.java | 26 +++++ .../DateTimeVisitor_builtAvroSchemaTest.java | 100 +++++++++++++++++ 11 files changed, 255 insertions(+), 43 deletions(-) create mode 100644 avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper_createVisitorWrapperTest.java create mode 100644 avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor_builtAvroSchemaTest.java diff --git a/avro/README.md b/avro/README.md index aea4100c2..beb8e5df7 100644 --- a/avro/README.md +++ b/avro/README.md @@ -112,39 +112,97 @@ byte[] avroData = mapper.writer(schema) and that's about it, for now. +## Avro Logical Types + +Following is an extract from [Logical Types](http://avro.apache.org/docs/current/spec.html#Logical+Types) paragraph in +Avro schema specification: +> A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute +> `logicalType` is always be present for a logical type, and is a string with the name of one of the logical types +> defined by Avro specification. + +Generation of logical types for limited set of `java.time` classes is supported at the moment. See a table bellow. + +### Mapping to Logical Type + +Mapping to Avro type and logical type works in few steps: +1. Serializer for particular Java type (or class) determines a Jackson type where the Java type will be serialized into. +2. `AvroSchemaGenerator` determines corresponding Avro type for that Jackson type. +2. If logical type generation is enabled, then `logicalType` is determined for the above combination of Java type and + Avro type. + +#### Java type to Avro Logical Type mapping + +| Java type | Serialization type | Generated Avro schema with Avro type and logical type +| ----------------------------- | ------------------ | ----------------------------------------------------- +| `java.time.OffsetDateTime` | NumberType.LONG | `{"type": "long", "logicalType": "timestamp-millis"}` +| `java.time.ZonedDateTime` | NumberType.LONG | `{"type": "long", "logicalType": "timestamp-millis"}` +| `java.time.Instant` | NumberType.LONG | `{"type": "long", "logicalType": "timestamp-millis"}` +| `java.time.LocalDate` | NumberType.INT | `{"type": "int", "logicalType": "date"}` +| `java.time.LocalTime` | NumberType.INT | `{"type": "int", "logicalType": "time-millis"}` +| `java.time.LocalDateTime` | NumberType.LONG | `{"type": "long", "logicalType": "local-timestamp-millis"}` + +_Provided Avro logical type generation is enabled._ + +### Usage + +Call `AvroSchemaGenerator.enableLogicalTypes()` method to enable Avro schema with logical type generation. + +```java +// Create and configure Avro mapper. With for example a module or a serializer. +AvroMapper mapper = AvroMapper.builder() + .build(); + +AvroSchemaGenerator gen = new AvroSchemaGenerator(); +// Enable logical types +gen.enableLogicalTypes(); + +mapper.acceptJsonFormatVisitor(RootType.class, gen); +Schema actualSchema = gen.getGeneratedSchema().getAvroSchema(); +``` + +_**Note:** For best performance with `java.time` classes configure `AvroMapper` to use `AvroJavaTimeModule`. More on +`AvroJavaTimeModule` bellow._ + ## Java Time Support -Serialization and deserialization support for limited set of `java.time` classes to Avro with [logical type](http://avro.apache.org/docs/current/spec.html#Logical+Types) is provided by `AvroJavaTimeModule`. -This module is to be used either: -- Instead of Java 8 date/time module (`com.fasterxml.jackson.datatype.jsr310.JavaTimeModule`) or +`AvroJavaTimeModule` is the best companionship to enabled to Avro logical types. It provides serialization and +deserialization for set of `java.time` classes into a simple numerical value, e.g., `OffsetDateTime` to `long`, +`LocalTime` to `int`, etc. + +| WARNING: Time zone information is lost at serialization. After deserialization, time instant is reconstructed but not the original time zone.| +| --- | + +Because data is serialized into simple numerical value (long or int), time zone information is lost at serialization. +Serialized values represent point in time, independent of a particular time zone or calendar. Upon reading a value back, +time instant is reconstructed but not the original time zone. + +`AvroJavaTimeModule` is to be used either as: +- replacement of Java 8 date/time module (`com.fasterxml.jackson.datatype.jsr310.JavaTimeModule`) or - to override Java 8 date/time module and for that, module must be registered AFTER Java 8 date/time module (last registration wins). +### Java types supported by AvroJavaTimeModule, and their mapping to Jackson types + +| Java type | Serialization type +| ----------------------------- | ------------------ +| `java.time.OffsetDateTime` | NumberType.LONG +| `java.time.ZonedDateTime` | NumberType.LONG +| `java.time.Instant` | NumberType.LONG +| `java.time.LocalDate` | NumberType.INT +| `java.time.LocalTime` | NumberType.INT +| `java.time.LocalDateTime` | NumberType.LONG + +### Usage + ```java AvroMapper mapper = AvroMapper.builder() .addModule(new AvroJavaTimeModule()) .build(); ``` - -#### Note -Please note that time zone information is lost at serialization. Serialized values represent point in time, -independent of a particular time zone or calendar. Upon reading a value back time instant is reconstructed but not the original time zone. - -#### Supported java.time types: - -Supported java.time types with Avro schema. - -| Type | Avro schema -| ------------------------------ | ------------- -| `java.time.OffsetDateTime` | `{"type": "long", "logicalType": "timestamp-millis"}` -| `java.time.ZonedDateTime` | `{"type": "long", "logicalType": "timestamp-millis"}` -| `java.time.Instant` | `{"type": "long", "logicalType": "timestamp-millis"}` -| `java.time.LocalDate` | `{"type": "int", "logicalType": "date"}` -| `java.time.LocalTime` | `{"type": "int", "logicalType": "time-millis"}` -| `java.time.LocalDateTime` | `{"type": "long", "logicalType": "local-timestamp-millis"}` -#### Precision +### Precision -Avro supports milliseconds and microseconds precision for date and time related LogicalTypes, but this module only supports millisecond precision. +Avro supports milliseconds and microseconds precision for date and time related logical types. `AvroJavaTimeModule` +supports millisecond precision only. ## Generating Avro Schema from POJO definition diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule.java index d901852d6..a380f1681 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule.java @@ -21,8 +21,8 @@ /** * A module that installs a collection of serializers and deserializers for java.time classes. * - * This module is to be used either: - * - Instead of Java 8 date/time module (com.fasterxml.jackson.datatype.jsr310.JavaTimeModule) or + * AvroJavaTimeModule module is to be used either as: + * - replacement of Java 8 date/time module (com.fasterxml.jackson.datatype.jsr310.JavaTimeModule) or * - to override Java 8 date/time module and for that, module must be registered AFTER Java 8 date/time module. */ public class AvroJavaTimeModule extends SimpleModule { diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroInstantSerializer.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroInstantSerializer.java index a816a37cd..b362c447e 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroInstantSerializer.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroInstantSerializer.java @@ -24,8 +24,8 @@ * Please note that time zone information gets lost in this process. Upon reading a value back, we can only * reconstruct the instant, but not the original representation. * - * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.DateTimeVisitor} it aims to produce - * Avro schema with type long and logicalType timestamp-millis: + * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaGenerator#enableLogicalTypes()} + * it aims to produce Avro schema with type long and logicalType timestamp-millis: * { * "type" : "long", * "logicalType" : "timestamp-millis" diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateSerializer.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateSerializer.java index 89d932344..1278e7148 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateSerializer.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateSerializer.java @@ -18,8 +18,8 @@ * Serialized value represents number of days from the unix epoch, 1 January 1970 with no reference * to a particular time zone or time of day. * - * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.DateTimeVisitor} it aims to produce - * Avro schema with type int and logicalType date: + * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaGenerator#enableLogicalTypes()} + * it aims to produce Avro schema with type int and logicalType date: * { * "type" : "int", * "logicalType" : "date" diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateTimeSerializer.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateTimeSerializer.java index 0c58c2d0f..d1c0f7f09 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateTimeSerializer.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalDateTimeSerializer.java @@ -20,8 +20,8 @@ * Serialized value represents timestamp in a local timezone, regardless of what specific time zone * is considered local, with a precision of one millisecond from 1 January 1970 00:00:00.000. * - * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.DateTimeVisitor} it aims to produce - * Avro schema with type long and logicalType local-timestamp-millis: + * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaGenerator#enableLogicalTypes()} + * it aims to produce Avro schema with type long and logicalType local-timestamp-millis: * { * "type" : "long", * "logicalType" : "local-timestamp-millis" diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalTimeSerializer.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalTimeSerializer.java index 5bc2485d2..3476b0b54 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalTimeSerializer.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/jsr310/ser/AvroLocalTimeSerializer.java @@ -18,9 +18,9 @@ * Serialized value represents time of day, with no reference to a particular calendar, * time zone or date, where the int stores the number of milliseconds after midnight, 00:00:00.000. * - * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.DateTimeVisitor} it aims to produce - * Avro schema with type int and logicalType time-millis: - * { + * Note: In combination with {@link com.fasterxml.jackson.dataformat.avro.schema.AvroSchemaGenerator#enableLogicalTypes()} + * it aims to produce Avro schema with type int and logicalType time-millis: + * { * "type" : "int", * "logicalType" : "time-millis" * } diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper.java index 6f38a29ff..5ae98179a 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper.java @@ -26,6 +26,8 @@ public class AvroFormatVisitorWrapper implements JsonFormatVisitorWrapper { protected final DefinedSchemas _schemas; + protected boolean _logicalTypesEnabled = false; + /** * Visitor used for resolving actual Schema, if structured type * (or one with complex configuration) @@ -48,6 +50,12 @@ public AvroFormatVisitorWrapper(DefinedSchemas schemas, SerializerProvider p) { _provider = p; } + protected AvroFormatVisitorWrapper(AvroFormatVisitorWrapper src) { + this._schemas = src._schemas; + this._provider = src._provider; + this._logicalTypesEnabled = src._logicalTypesEnabled; + } + /** * Creates new {@link AvroFormatVisitorWrapper} instance with shared schemas, * serialization provider and same configuration. @@ -55,7 +63,7 @@ public AvroFormatVisitorWrapper(DefinedSchemas schemas, SerializerProvider p) { * @return new instance with shared properties and configuration. */ protected AvroFormatVisitorWrapper createVisitorWrapper() { - return new AvroFormatVisitorWrapper(_schemas, _provider); + return new AvroFormatVisitorWrapper(this); } @Override @@ -90,6 +98,24 @@ public Schema getAvroSchema() { return _builder.builtAvroSchema(); } + /** + * Enables Avro schema with Logical Types generation. + */ + public void enableLogicalTypes() { + _logicalTypesEnabled = true; + } + + /** + * Disables Avro schema with Logical Types generation. + */ + public void disableLogicalTypes() { + _logicalTypesEnabled = false; + } + + public boolean isLogicalTypesEnabled() { + return _logicalTypesEnabled; + } + /* /********************************************************************** /* Callbacks @@ -170,7 +196,7 @@ public JsonIntegerFormatVisitor expectIntegerFormat(JavaType type) { return null; } - if (_isDateTimeType(type)) { + if (isLogicalTypesEnabled() && _isDateTimeType(type)) { DateTimeVisitor v = new DateTimeVisitor(type); _builder = v; return v; diff --git a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor.java b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor.java index 27a6beba7..059ef3338 100644 --- a/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor.java +++ b/avro/src/main/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor.java @@ -39,7 +39,8 @@ public Schema builtAvroSchema() { Schema schema = AvroSchemaHelper.numericAvroSchema(_type); if (_hint != null) { - String logicalType = logicalType(_hint); + String logicalType = getLogicalType(schema.getType(), _hint); + if (logicalType != null) { schema.addProp(LogicalType.LOGICAL_TYPE_PROP, logicalType); } else { @@ -49,26 +50,26 @@ public Schema builtAvroSchema() { return schema; } - private String logicalType(JavaType hint) { + private String getLogicalType(Schema.Type avroType, JavaType hint) { Class clazz = hint.getRawClass(); - if (OffsetDateTime.class.isAssignableFrom(clazz)) { + if (OffsetDateTime.class.isAssignableFrom(clazz) && Schema.Type.LONG == avroType) { return TIMESTAMP_MILLIS; } - if (ZonedDateTime.class.isAssignableFrom(clazz)) { + if (ZonedDateTime.class.isAssignableFrom(clazz) && Schema.Type.LONG == avroType) { return TIMESTAMP_MILLIS; } - if (Instant.class.isAssignableFrom(clazz)) { + if (Instant.class.isAssignableFrom(clazz) && Schema.Type.LONG == avroType) { return TIMESTAMP_MILLIS; } - if (LocalDate.class.isAssignableFrom(clazz)) { + if (LocalDate.class.isAssignableFrom(clazz) && Schema.Type.INT == avroType) { return DATE; } - if (LocalTime.class.isAssignableFrom(clazz)) { + if (LocalTime.class.isAssignableFrom(clazz) && Schema.Type.INT == avroType) { return TIME_MILLIS; } - if (LocalDateTime.class.isAssignableFrom(clazz)) { + if (LocalDateTime.class.isAssignableFrom(clazz) && Schema.Type.LONG == avroType) { return LOCAL_TIMESTAMP_MILLIS; } diff --git a/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule_schemaCreationTest.java b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule_schemaCreationTest.java index b5edb2ef5..6d9e22c4f 100644 --- a/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule_schemaCreationTest.java +++ b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/jsr310/AvroJavaTimeModule_schemaCreationTest.java @@ -55,6 +55,7 @@ public void testSchemaCreation() throws JsonMappingException { .addModule(new AvroJavaTimeModule()) .build(); AvroSchemaGenerator gen = new AvroSchemaGenerator(); + gen.enableLogicalTypes(); // WHEN mapper.acceptJsonFormatVisitor(testClass, gen); diff --git a/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper_createVisitorWrapperTest.java b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper_createVisitorWrapperTest.java new file mode 100644 index 000000000..b7c08b3db --- /dev/null +++ b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/AvroFormatVisitorWrapper_createVisitorWrapperTest.java @@ -0,0 +1,26 @@ +package com.fasterxml.jackson.dataformat.avro.schema; + +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.DefaultSerializerProvider; +import org.assertj.core.api.Assertions; + +public class AvroFormatVisitorWrapper_createVisitorWrapperTest { + + public void test () { + // GIVEN + SerializerProvider serializerProvider = new DefaultSerializerProvider.Impl(); + DefinedSchemas schemas = new DefinedSchemas(); + + AvroFormatVisitorWrapper src = new AvroFormatVisitorWrapper(schemas, serializerProvider); + src.enableLogicalTypes(); + + // WHEN + AvroFormatVisitorWrapper actual = src.createVisitorWrapper(); + + // THEN + // All settings are inherited from parent visitor wrapper. + Assertions.assertThat(actual.getSchemas()).isEqualTo(schemas); + Assertions.assertThat(actual.getProvider()).isEqualTo(serializerProvider); + Assertions.assertThat(actual.isLogicalTypesEnabled()).isTrue(); + } +} diff --git a/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor_builtAvroSchemaTest.java b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor_builtAvroSchemaTest.java new file mode 100644 index 000000000..baa0918f3 --- /dev/null +++ b/avro/src/test/java/com/fasterxml/jackson/dataformat/avro/schema/DateTimeVisitor_builtAvroSchemaTest.java @@ -0,0 +1,100 @@ +package com.fasterxml.jackson.dataformat.avro.schema; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.type.TypeFactory; +import org.apache.avro.LogicalType; +import org.apache.avro.Schema; +import org.apache.avro.specific.SpecificData; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.time.ZonedDateTime; +import java.util.Arrays; +import java.util.Collection; + +import static org.assertj.core.api.Assertions.assertThat; + +@RunWith(Parameterized.class) +public class DateTimeVisitor_builtAvroSchemaTest { + + private static final TypeFactory TYPE_FACTORY = TypeFactory.defaultInstance(); + + @Parameter(0) + public Class testClass; + + @Parameter(1) + public JsonParser.NumberType givenNumberType; + + @Parameter(2) + public Schema.Type expectedAvroType; + + @Parameter(3) + public String expectedLogicalType; + + @Parameters(name = "With {0} and number type {1}") + public static Collection testData() { + return Arrays.asList(new Object[][]{ + // Java type | given number type, | expected Avro type | expected logicalType + { + Instant.class, + JsonParser.NumberType.LONG, + Schema.Type.LONG, + "timestamp-millis"}, + { + OffsetDateTime.class, + JsonParser.NumberType.LONG, + Schema.Type.LONG, + "timestamp-millis"}, + { + ZonedDateTime.class, + JsonParser.NumberType.LONG, + Schema.Type.LONG, + "timestamp-millis"}, + { + LocalDateTime.class, + JsonParser.NumberType.LONG, + Schema.Type.LONG, + "local-timestamp-millis"}, + { + LocalDate.class, + JsonParser.NumberType.INT, + Schema.Type.INT, + "date"}, + { + LocalTime.class, + JsonParser.NumberType.INT, + Schema.Type.INT, + "time-millis"}, + }); + } + + @Test + public void builtAvroSchemaTest() { + // GIVEN + DateTimeVisitor dateTimeVisitor = new DateTimeVisitor(TYPE_FACTORY.constructSimpleType(testClass, null)); + dateTimeVisitor.numberType(givenNumberType); + + // WHEN + Schema actualSchema = dateTimeVisitor.builtAvroSchema(); + + System.out.println(testClass.getName() + " schema:\n" + actualSchema.toString(true)); + + // THEN + assertThat(actualSchema.getType()).isEqualTo(expectedAvroType); + assertThat(actualSchema.getProp(LogicalType.LOGICAL_TYPE_PROP)).isEqualTo(expectedLogicalType); + /** + * Having logicalType and java-class is not valid according to + * {@link LogicalType#validate(Schema)} + */ + assertThat(actualSchema.getProp(SpecificData.CLASS_PROP)).isNull(); + } + +}