From 6f2d87a9d7c7c9ccc6f821ca0f6a8029b25a6918 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 11 Jul 2023 15:31:20 +0200 Subject: [PATCH 01/34] Add new file callback configs --- .../connect/s3/S3SinkConnectorConfig.java | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index 4ab843906..a879cf3e1 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -24,6 +24,8 @@ import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.SSEAlgorithm; +import io.confluent.connect.s3.callback.FileCallbackProvider; +import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; import io.confluent.connect.storage.common.util.StringUtils; import org.apache.kafka.common.Configurable; import org.apache.kafka.common.config.AbstractConfig; @@ -194,6 +196,17 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { public static final String TOMBSTONE_ENCODED_PARTITION = "tombstone.encoded.partition"; public static final String TOMBSTONE_ENCODED_PARTITION_DEFAULT = "tombstone"; + /** + * Callback configs + */ + public static final String FILE_CALLBACK_ENABLE = "s3.file.callback.enable"; + public static final boolean FILE_CALLBACK_ENABLE_DEFAULT = true; + public static final String FILE_CALLBACK_CLASS = "s3.file.callback.class"; + public static final Class FILE_CALLBACK_CLASS_DEFAULT = + KafkaFileCallbackProvider.class; + public static final String FILE_CALLBACK_CONFIG_JSON = "s3.file.callback.config.json"; + public static final String FILE_CALLBACK_CONFIG_JSON_DEFAULT = "{}"; + /** * Append schema name in s3-path */ @@ -775,6 +788,47 @@ public static ConfigDef newConfigDef() { "Elastic buffer initial capacity" ); + configDef.define( + FILE_CALLBACK_ENABLE, + Type.BOOLEAN, + FILE_CALLBACK_ENABLE_DEFAULT, + Importance.LOW, + "Enables the file callback to be specified and configured", + group, + ++orderInGroup, + Width.LONG, + "Enable s3 file callback" + ); + + configDef.define( + FILE_CALLBACK_CLASS, + Type.CLASS, + FILE_CALLBACK_CLASS_DEFAULT, + new FileCallbackProviderValidator(), + Importance.LOW, + "File callback to push notification for each file written on s3. By default " + + "the connector uses ``" + + FILE_CALLBACK_CLASS_DEFAULT.getSimpleName() + + "``.", + + group, + ++orderInGroup, + Width.LONG, + "File callback class" + ); + + configDef.define( + FILE_CALLBACK_CONFIG_JSON, + Type.STRING, + FILE_CALLBACK_CONFIG_JSON_DEFAULT, + Importance.LOW, + "File callback configuration as json format. By default an empty json.", + group, + ++orderInGroup, + Width.LONG, + "File callback config json" + ); + } return configDef; } @@ -953,6 +1007,18 @@ public int getElasticBufferInitCap() { return getInt(ELASTIC_BUFFER_INIT_CAPACITY); } + public boolean getFileCallbackEnable() { + return getBoolean(FILE_CALLBACK_ENABLE); + } + + public Class getFileCallbackClass() { + return getClass(FILE_CALLBACK_CLASS); + } + + public String getFileCallbackConfigJson() { + return getString(FILE_CALLBACK_CONFIG_JSON); + } + public boolean isTombstoneWriteEnabled() { return OutputWriteBehavior.WRITE.toString().equalsIgnoreCase(nullValueBehavior()); } @@ -1189,12 +1255,33 @@ public void ensureValid(String name, Object provider) { ); } + @Override public String toString() { return "Any class implementing: " + AWSCredentialsProvider.class; } } + private static class FileCallbackProviderValidator implements ConfigDef.Validator { + @Override + public void ensureValid(String name, Object provider) { + if (provider != null && provider instanceof Class + && FileCallbackProvider.class.isAssignableFrom((Class) provider)) { + return; + } + throw new ConfigException( + name, + provider, + "Class must extend: " + FileCallbackProvider.class + ); + } + + @Override + public String toString() { + return "Any class implementing: " + FileCallbackProvider.class; + } + } + private static class SseAlgorithmRecommender implements ConfigDef.Recommender { @Override public List validValues(String name, Map connectorConfigs) { From b95f4e01d8c0aff6e3424fab42e1e5c8adea7dad Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 11 Jul 2023 15:32:14 +0200 Subject: [PATCH 02/34] Add file callback interface/ dummy impl --- .../s3/callback/FileCallbackProvider.java | 22 +++++++++++++ .../callback/KafkaFileCallbackProvider.java | 31 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java new file mode 100644 index 000000000..6a587bfca --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java @@ -0,0 +1,22 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.callback; + +public interface FileCallbackProvider { + + void call(String topicName, String s3Partition, String filePath, int partition, + Long baseRecordTimestamp, Long currentTimestamp, int recordCount); +} diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java new file mode 100644 index 000000000..e2cf8326e --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -0,0 +1,31 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.callback; + +public class KafkaFileCallbackProvider implements FileCallbackProvider { + private final String configJson; + + public KafkaFileCallbackProvider(String configJson) { + this.configJson = configJson; + } + + @Override + public void call(String topicName,String s3Partition, String filePath, int partition, + Long baseRecordTimestamp, Long currentTimestamp, int recordCount) { + System.out.println(this.configJson + filePath); + } + +} From 9b81126ddf026484e7c25823046a805a1243ea5f Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 11 Jul 2023 15:32:32 +0200 Subject: [PATCH 03/34] Add first callback call --- .../connect/s3/TopicPartitionWriter.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index cee677f4d..eb987de88 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -16,6 +16,7 @@ package io.confluent.connect.s3; import com.amazonaws.SdkClientException; +import io.confluent.connect.s3.callback.FileCallbackProvider; import io.confluent.connect.s3.storage.S3Storage; import io.confluent.connect.s3.util.FileRotationTracker; import io.confluent.connect.s3.util.RetryUtil; @@ -36,6 +37,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.lang.reflect.InvocationTargetException; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedList; @@ -636,6 +638,8 @@ private void commitFiles() { for (Map.Entry entry : commitFiles.entrySet()) { String encodedPartition = entry.getKey(); commitFile(encodedPartition); + // apply callback if needed + callbackFile(encodedPartition); if (isTaggingEnabled) { RetryUtil.exponentialBackoffRetry(() -> tagFile(encodedPartition, entry.getValue()), ConnectException.class, @@ -672,6 +676,22 @@ private void commitFile(String encodedPartition) { } } + private void callbackFile(String encodedPartition) { + if (this.connectorConfig.getFileCallbackEnable()) { + try { + // TODO: instanciate the callback once instead of each call + FileCallbackProvider fileCallback = (FileCallbackProvider)this.connectorConfig + .getFileCallbackClass().getConstructor(String.class) + .newInstance(connectorConfig.getFileCallbackConfigJson()); + fileCallback.call(tp.topic(), encodedPartition, commitFiles.get(encodedPartition), + tp.partition(), baseRecordTimestamp, currentTimestamp, recordCount); + } catch (InstantiationException | IllegalAccessException + | InvocationTargetException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + } + private void tagFile(String encodedPartition, String s3ObjectPath) { Long startOffset = startOffsets.get(encodedPartition); Long endOffset = endOffsets.get(encodedPartition); From 06057ed9c08e8d19484254ea532148dc883d878c Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 11 Jul 2023 15:43:06 +0200 Subject: [PATCH 04/34] Init callback in constructor --- .../connect/s3/TopicPartitionWriter.java | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index eb987de88..eb660f484 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -107,6 +107,7 @@ public class TopicPartitionWriter { private ErrantRecordReporter reporter; private final FileRotationTracker fileRotationTracker; + private final Optional fileCallback; public TopicPartitionWriter(TopicPartition tp, S3Storage storage, @@ -190,6 +191,20 @@ public TopicPartitionWriter(TopicPartition tp, // Initialize scheduled rotation timer if applicable setNextScheduledRotation(); + + // Initialize callback if enabled + if (this.connectorConfig.getFileCallbackEnable()) { + try { + fileCallback = Optional.of((FileCallbackProvider)this.connectorConfig + .getFileCallbackClass().getConstructor(String.class) + .newInstance(connectorConfig.getFileCallbackConfigJson())); + } catch (InstantiationException | IllegalAccessException + | InvocationTargetException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } else { + fileCallback = Optional.empty(); + } } private enum State { @@ -638,7 +653,6 @@ private void commitFiles() { for (Map.Entry entry : commitFiles.entrySet()) { String encodedPartition = entry.getKey(); commitFile(encodedPartition); - // apply callback if needed callbackFile(encodedPartition); if (isTaggingEnabled) { RetryUtil.exponentialBackoffRetry(() -> tagFile(encodedPartition, entry.getValue()), @@ -677,19 +691,9 @@ private void commitFile(String encodedPartition) { } private void callbackFile(String encodedPartition) { - if (this.connectorConfig.getFileCallbackEnable()) { - try { - // TODO: instanciate the callback once instead of each call - FileCallbackProvider fileCallback = (FileCallbackProvider)this.connectorConfig - .getFileCallbackClass().getConstructor(String.class) - .newInstance(connectorConfig.getFileCallbackConfigJson()); - fileCallback.call(tp.topic(), encodedPartition, commitFiles.get(encodedPartition), - tp.partition(), baseRecordTimestamp, currentTimestamp, recordCount); - } catch (InstantiationException | IllegalAccessException - | InvocationTargetException | NoSuchMethodException e) { - throw new RuntimeException(e); - } - } + fileCallback.ifPresent(fs -> fs.call(tp.topic(), encodedPartition, + commitFiles.get(encodedPartition), tp.partition(), baseRecordTimestamp, + currentTimestamp, recordCount)); } private void tagFile(String encodedPartition, String s3ObjectPath) { From cdd4a5eb7a711457ef12f12d6c55356923bc6141 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 11 Jul 2023 17:19:05 +0200 Subject: [PATCH 05/34] Add kafka configs --- .../s3/AbstractFileCallbackConfig.java | 47 +++++++++++++++++ .../connect/s3/KafkaFileCallbackConfig.java | 50 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java new file mode 100644 index 000000000..5a1687011 --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java @@ -0,0 +1,47 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.PropertyNamingStrategy; + +import java.io.IOException; +import java.util.Properties; + +public abstract class AbstractFileCallbackConfig { + public static T fromJsonString(String jsonContent, + Class clazz) { + try { + if (jsonContent == null) { + return clazz.newInstance(); + } + ObjectMapper instanceMapper = new ObjectMapper(); + instanceMapper.setPropertyNamingStrategy( + PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + instanceMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); + instanceMapper.enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS); + return instanceMapper.readValue(jsonContent, clazz); + } catch (IllegalAccessException | InstantiationException | IOException e) { + throw new RuntimeException(e); + } + } + + public abstract Properties toProps(); +} diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java new file mode 100644 index 000000000..f9d7e607e --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java @@ -0,0 +1,50 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3; + +import java.util.Properties; + +public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { + + private String topicName; + private String topicUser; + private String topicPassword; + private String bootstrapServers; + private String securityProtocols; + + public KafkaFileCallbackConfig() { + } + + public KafkaFileCallbackConfig(String topicName, String topicUser, String topicPassword, + String bootstrapServers, String securityProtocols) { + this.topicName = topicName; + this.topicUser = topicUser; + this.topicPassword = topicPassword; + this.bootstrapServers = bootstrapServers; + this.securityProtocols = securityProtocols; + } + + + @Override + public Properties toProps() { + Properties prop = new Properties(); + return prop; + } + + public String getTopicName() { + return topicName; + } +} From f9c12013b3fb41d3a1425dbfe778fb0252709a6a Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 11 Jul 2023 17:19:16 +0200 Subject: [PATCH 06/34] WIP: Add kafka producer --- .../s3/callback/KafkaFileCallbackProvider.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index e2cf8326e..ccfe7b00d 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -15,17 +15,34 @@ package io.confluent.connect.s3.callback; +import io.confluent.connect.s3.KafkaFileCallbackConfig; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; + public class KafkaFileCallbackProvider implements FileCallbackProvider { private final String configJson; + private final KafkaFileCallbackConfig kafkaConfig; public KafkaFileCallbackProvider(String configJson) { this.configJson = configJson; + this.kafkaConfig = KafkaFileCallbackConfig.fromJsonString(configJson, + KafkaFileCallbackConfig.class); } @Override public void call(String topicName,String s3Partition, String filePath, int partition, Long baseRecordTimestamp, Long currentTimestamp, int recordCount) { System.out.println(this.configJson + filePath); + String value = topicName; + try (final Producer producer = new KafkaProducer<>(kafkaConfig.toProps())) { + producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), topicName, value), + (event, ex) -> { + if (ex != null) { + ex.printStackTrace(); + } + }); + } } } From a9728db2e8b5c485eb30ccd3d0130bea53df59d9 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Wed, 26 Jul 2023 14:33:18 +0200 Subject: [PATCH 07/34] Use MinioContainer as replacement of real S3 Squashed commit of the following: commit 509d64acbd22b6e73473dc7614fa19ec9a9f1c30 Author: Guillaume SANON Date: Tue Jul 11 15:12:41 2023 +0200 Use MinioContainer as replacement of real S3 commit b780ff4a26673c837708aa869ac65cd63d222362 Author: Guillaume SANON Date: Tue Jul 11 15:11:47 2023 +0200 Create Minio Container to simulate S3 commit 2fa0547e3c7e2e59db914c281824205deb7c723b Author: Guillaume SANON Date: Tue Jul 11 15:11:23 2023 +0200 Update testcontainers lib --- kafka-connect-s3/pom.xml | 2 +- .../s3/integration/BaseConnectorIT.java | 38 ++++++++++------ .../s3/integration/MinioContainer.java | 44 +++++++++++++++++++ .../s3/integration/S3SinkConnectorIT.java | 12 ++--- .../s3/integration/S3SinkDataFormatIT.java | 7 ++- 5 files changed, 79 insertions(+), 24 deletions(-) create mode 100644 kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java diff --git a/kafka-connect-s3/pom.xml b/kafka-connect-s3/pom.xml index 5746fa412..c49ebd663 100644 --- a/kafka-connect-s3/pom.xml +++ b/kafka-connect-s3/pom.xml @@ -39,7 +39,7 @@ 1.11.1 0.2.2 1.0.1 - 1.15.0 + 1.18.3 2.22.1 false 2.22.1 diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java index 7c992ad3f..aece093e9 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java @@ -21,6 +21,7 @@ import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.GetObjectRequest; @@ -129,9 +130,13 @@ public abstract class BaseConnectorIT { protected EmbeddedConnectCluster connect; protected Map props; + protected static MinioContainer minioContainer; + @BeforeClass public static void setupClient() { log.info("Starting ITs..."); + minioContainer = new MinioContainer(); + minioContainer.start(); S3Client = getS3Client(); if (S3Client.doesBucketExistV2(TEST_BUCKET_NAME)) { clearBucket(TEST_BUCKET_NAME); @@ -141,9 +146,12 @@ public static void setupClient() { } @AfterClass - public static void deleteBucket() { + public static void cleanEnv() { S3Client.deleteBucket(TEST_BUCKET_NAME); log.info("Finished ITs, removed S3 bucket"); + // Stopping manually to avoid potential race condition with other IT classes + minioContainer.stop(); + log.info("Stopping Minio container"); } @Before @@ -375,19 +383,20 @@ protected Iterable
sampleHeaders() { * @return an authenticated S3 client */ protected static AmazonS3 getS3Client() { - Map creds = getAWSCredentialFromPath(); - // If AWS credentials found on AWS_CREDENTIALS_PATH, use them (Jenkins) - if (creds.size() == 2) { - BasicAWSCredentials awsCreds = new BasicAWSCredentials( - creds.get(AWS_ACCESS_KEY_ID_CONFIG), - creds.get(AWS_SECRET_ACCESS_KEY_CONFIG)); - return AmazonS3ClientBuilder.standard() - .withCredentials(new AWSStaticCredentialsProvider(awsCreds)) - .build(); - } - // DefaultAWSCredentialsProviderChain, - // For local testing, ~/.aws/credentials needs to be defined or other environment variables - return AmazonS3ClientBuilder.standard().withRegion(AWS_REGION).build(); + return AmazonS3ClientBuilder + .standard() + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(MinioContainer.MINIO_USERNAME, MinioContainer.MINIO_PASSWORD)) + ) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration( + minioContainer.getUrl(), + AWS_REGION + ) + ) + .withPathStyleAccessEnabled(true) + .build(); } /** @@ -576,6 +585,7 @@ private static List getContentsFromAvro(String filePath) { * @param filePath the path of the downloaded parquet file * @return the rows of the file as JsonNodes */ + @SuppressWarnings({"deprecation"}) private static List getContentsFromParquet(String filePath) { try (ParquetReader reader = ParquetReader .builder(new SimpleReadSupport(), new Path(filePath)).build()){ diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java new file mode 100644 index 000000000..af3846cda --- /dev/null +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java @@ -0,0 +1,44 @@ +package io.confluent.connect.s3.integration; + +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; + +import java.time.Duration; +import java.util.Arrays; + +public class MinioContainer extends GenericContainer { + + private static final String DEFAULT_DOCKER_IMAGE = "minio/minio:latest"; + + private static final String HEALTH_ENDPOINT = "/minio/health/ready"; + + private static final int DEFAULT_SERVER_PORT = 9000; + + private static final int DEFAULT_CONSOLE_PORT = 9001; + + // Must be used as AWS_ACCESS_KEY and AWS_SECRET_KEY in AWS S3 Client + public static final String MINIO_USERNAME = "minioadmin"; + + public static final String MINIO_PASSWORD = "minioadmin"; + + public MinioContainer() { + this(DEFAULT_DOCKER_IMAGE); + } + + public MinioContainer(String dockerImageName) { + super(dockerImageName); + this.logger().info("Starting an Minio container using [{}]", dockerImageName); + this.setPortBindings(Arrays.asList(String.format("%d:%d", DEFAULT_SERVER_PORT, DEFAULT_SERVER_PORT), + String.format("%d:%d", DEFAULT_CONSOLE_PORT, DEFAULT_CONSOLE_PORT))); + this.withCommand(String.format("server /data --address :%d --console-address :%d", + DEFAULT_SERVER_PORT, DEFAULT_CONSOLE_PORT)); + setWaitStrategy(new HttpWaitStrategy() + .forPort(DEFAULT_SERVER_PORT) + .forPath(HEALTH_ENDPOINT) + .withStartupTimeout(Duration.ofMinutes(2))); + } + + public String getUrl() { + return String.format("http://%s:%s", this.getHost(), this.getMappedPort(DEFAULT_SERVER_PORT)); + } +} diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index 24b6296b4..3651f2320 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -15,14 +15,7 @@ package io.confluent.connect.s3.integration; -import static io.confluent.connect.s3.S3SinkConnectorConfig.BEHAVIOR_ON_NULL_VALUES_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.KEYS_FORMAT_CLASS_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.S3_BUCKET_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.STORE_KAFKA_HEADERS_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.STORE_KAFKA_KEYS_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.AWS_ACCESS_KEY_ID_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.AWS_SECRET_ACCESS_KEY_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.TOMBSTONE_ENCODED_PARTITION; +import static io.confluent.connect.s3.S3SinkConnectorConfig.*; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FLUSH_SIZE_CONFIG; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FORMAT_CLASS_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; @@ -109,6 +102,9 @@ public void before() throws InterruptedException { props.put(FORMAT_CLASS_CONFIG, AvroFormat.class.getName()); props.put(STORAGE_CLASS_CONFIG, S3Storage.class.getName()); props.put(S3_BUCKET_CONFIG, TEST_BUCKET_NAME); + props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); + props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); + props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); // create topics in Kafka KAFKA_TOPICS.forEach(topic -> connect.kafka().createTopic(topic, 1)); } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java index 0b6414002..4e27b9df1 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java @@ -15,7 +15,8 @@ package io.confluent.connect.s3.integration; -import static io.confluent.connect.s3.S3SinkConnectorConfig.S3_BUCKET_CONFIG; +import static io.confluent.connect.s3.S3SinkConnectorConfig.*; +import static io.confluent.connect.s3.S3SinkConnectorConfig.AWS_SECRET_ACCESS_KEY_CONFIG; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FLUSH_SIZE_CONFIG; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FORMAT_CLASS_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; @@ -123,6 +124,10 @@ public void before() throws InterruptedException { props.put(VALUE_CONVERTER_SCHEMA_REGISTRY_URL, restApp.restServer.getURI().toString()); props.put(VALUE_CONVERTER_SCRUB_INVALID_NAMES, "true"); + props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); + props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); + props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); + // create topics in Kafka connect.kafka().createTopic(topicName, 1); } From 14cfc09c656beb26b5c326e323c034e4e65f3169 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Wed, 26 Jul 2023 16:17:55 +0200 Subject: [PATCH 08/34] Test callback on embedded kafka w/ string serializer --- .../connect/s3/KafkaFileCallbackConfig.java | 37 +++++++++++++++++++ .../connect/s3/S3SinkConnectorConfig.java | 2 +- .../callback/KafkaFileCallbackProvider.java | 9 ++++- .../s3/integration/S3SinkConnectorIT.java | 20 ++++++++++ 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java index f9d7e607e..878b87772 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java @@ -24,6 +24,7 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { private String topicPassword; private String bootstrapServers; private String securityProtocols; + private String keySerializer = "org.apache.kafka.common.serialization.StringSerializer"; public KafkaFileCallbackConfig() { } @@ -41,10 +42,46 @@ public KafkaFileCallbackConfig(String topicName, String topicUser, String topicP @Override public Properties toProps() { Properties prop = new Properties(); + prop.setProperty("bootstrap.servers", bootstrapServers); + prop.setProperty("topic.name", topicName); + prop.setProperty("key.serializer", keySerializer); + prop.setProperty("value.serializer", keySerializer); return prop; } public String getTopicName() { return topicName; } + + public String getTopicUser() { + return topicUser; + } + + public String getTopicPassword() { + return topicPassword; + } + + public String getBootstrapServers() { + return bootstrapServers; + } + + public String getSecurityProtocols() { + return securityProtocols; + } + + public String getKeySerializer() { + return keySerializer; + } + + public String toJson() { + final StringBuffer sb = new StringBuffer("{"); + sb.append("\"topic_name\": \"").append(topicName).append('"'); + sb.append(", \"topic_user\": \"").append(topicUser).append('"'); + sb.append(", \"topic_password\": \"").append(topicPassword).append('"'); + sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); + sb.append(", \"security_protocols\": \"").append(securityProtocols).append('"'); + sb.append(", \"key_serializer\": \"").append(keySerializer).append('"'); + sb.append('}'); + return sb.toString(); + } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index a879cf3e1..5b4b5512b 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -200,7 +200,7 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { * Callback configs */ public static final String FILE_CALLBACK_ENABLE = "s3.file.callback.enable"; - public static final boolean FILE_CALLBACK_ENABLE_DEFAULT = true; + public static final boolean FILE_CALLBACK_ENABLE_DEFAULT = false; public static final String FILE_CALLBACK_CLASS = "s3.file.callback.class"; public static final Class FILE_CALLBACK_CLASS_DEFAULT = KafkaFileCallbackProvider.class; diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index ccfe7b00d..83e4c166b 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -20,6 +20,8 @@ import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; +import java.util.Arrays; + public class KafkaFileCallbackProvider implements FileCallbackProvider { private final String configJson; private final KafkaFileCallbackConfig kafkaConfig; @@ -33,8 +35,9 @@ public KafkaFileCallbackProvider(String configJson) { @Override public void call(String topicName,String s3Partition, String filePath, int partition, Long baseRecordTimestamp, Long currentTimestamp, int recordCount) { - System.out.println(this.configJson + filePath); - String value = topicName; + String value = String.join("|", Arrays.asList(topicName, s3Partition, filePath, + String.valueOf(partition), String.valueOf(baseRecordTimestamp), + String.valueOf(currentTimestamp), String.valueOf(recordCount))); try (final Producer producer = new KafkaProducer<>(kafkaConfig.toProps())) { producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), topicName, value), (event, ex) -> { @@ -42,6 +45,8 @@ public void call(String topicName,String s3Partition, String filePath, int parti ex.printStackTrace(); } }); + } catch (Exception e) { + System.out.println("foo"); } } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index 3651f2320..ff46b0c5b 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -26,9 +26,11 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import io.confluent.connect.s3.KafkaFileCallbackConfig; import io.confluent.connect.s3.S3SinkConnector; import io.confluent.connect.s3.S3SinkConnectorConfig.IgnoreOrFailBehavior; import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; +import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; import io.confluent.connect.s3.format.avro.AvroFormat; import io.confluent.connect.s3.format.json.JsonFormat; import io.confluent.connect.s3.format.parquet.ParquetFormat; @@ -61,6 +63,7 @@ import org.apache.kafka.connect.runtime.SinkConnectorConfig; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.storage.StringConverter; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; import org.apache.kafka.test.IntegrationTest; import org.junit.After; import org.junit.Before; @@ -448,4 +451,21 @@ private void setupProperties() { // aws credential if exists props.putAll(getAWSCredentialFromPath()); } + + @Test + public void testCallBackPartition() { + EmbeddedKafkaCluster kafka = connect.kafka(); + kafka.start(); + String bootstrapServers = kafka.bootstrapServers(); + String callbackTopic = "callback_topic"; + kafka.createTopic(callbackTopic); + KafkaFileCallbackConfig kafkaFileCallbackConfig = new KafkaFileCallbackConfig(callbackTopic, "", "", bootstrapServers, ""); + KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson()); + callBack.call("baz-topic", "version/event/hour", "hey.avro", 12, 1234L, 123L, 34); + ConsumerRecords res = kafka.consume(1, 1000L, callbackTopic); + ConsumerRecord next = res.iterator().next(); + String key = new String(next.key()); + String value = new String(next.value()); + System.out.println(key + value); + } } From e2ac13015525c87a6151d13f38c2bb07d26689a2 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Wed, 26 Jul 2023 17:21:20 +0200 Subject: [PATCH 09/34] Turn value into avro serde --- .../connect/s3/KafkaFileCallbackConfig.java | 32 +- .../connect/s3/callback/Callback.java | 826 ++++++++++++++++++ .../callback/KafkaFileCallbackProvider.java | 8 +- .../s3/integration/S3SinkConnectorIT.java | 9 +- 4 files changed, 860 insertions(+), 15 deletions(-) create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java index 878b87772..ed634c5a7 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java @@ -24,35 +24,34 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { private String topicPassword; private String bootstrapServers; private String securityProtocols; + + + private String schemaRegistryUrl; private String keySerializer = "org.apache.kafka.common.serialization.StringSerializer"; + private String valueSerializer = "io.confluent.kafka.serializers.KafkaAvroSerializer"; public KafkaFileCallbackConfig() { } public KafkaFileCallbackConfig(String topicName, String topicUser, String topicPassword, - String bootstrapServers, String securityProtocols) { + String bootstrapServers, String securityProtocols, String schemaRegistryUrl) { this.topicName = topicName; this.topicUser = topicUser; this.topicPassword = topicPassword; this.bootstrapServers = bootstrapServers; this.securityProtocols = securityProtocols; + this.schemaRegistryUrl = schemaRegistryUrl; } - @Override - public Properties toProps() { - Properties prop = new Properties(); - prop.setProperty("bootstrap.servers", bootstrapServers); - prop.setProperty("topic.name", topicName); - prop.setProperty("key.serializer", keySerializer); - prop.setProperty("value.serializer", keySerializer); - return prop; - } public String getTopicName() { return topicName; } + public String getSchemaRegistryUrl() { + return schemaRegistryUrl; + } public String getTopicUser() { return topicUser; } @@ -80,8 +79,19 @@ public String toJson() { sb.append(", \"topic_password\": \"").append(topicPassword).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); sb.append(", \"security_protocols\": \"").append(securityProtocols).append('"'); - sb.append(", \"key_serializer\": \"").append(keySerializer).append('"'); + sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); sb.append('}'); return sb.toString(); } + + @Override + public Properties toProps() { + Properties prop = new Properties(); + prop.setProperty("bootstrap.servers", bootstrapServers); + prop.setProperty("topic.name", topicName); + prop.setProperty("key.serializer", keySerializer); + prop.setProperty("value.serializer", valueSerializer); + prop.setProperty("schema.registry.url", schemaRegistryUrl); + return prop; + } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java new file mode 100644 index 000000000..158df1714 --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java @@ -0,0 +1,826 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package io.confluent.connect.s3.callback; + +import org.apache.avro.generic.GenericArray; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.Utf8; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.SchemaStore; + +/** This event represents a callback Message */ +@org.apache.avro.specific.AvroGenerated +public class Callback extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { + private static final long serialVersionUID = 5786357167649199011L; + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Callback\",\"namespace\":\"io.confluent.connect.s3.callback\",\"doc\":\"This event represents a callback Message\",\"fields\":[{\"name\":\"topicName\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"s3Partition\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"filePath\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"partition\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"baseRecordTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"currentTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"recordCount\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"}]}"); + public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } + + private static SpecificData MODEL$ = new SpecificData(); + + private static final BinaryMessageEncoder ENCODER = + new BinaryMessageEncoder(MODEL$, SCHEMA$); + + private static final BinaryMessageDecoder DECODER = + new BinaryMessageDecoder(MODEL$, SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this Callback to a ByteBuffer. + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a Callback from a ByteBuffer. + * @param b a byte buffer holding serialized data for an instance of this class + * @return a Callback instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into an instance of this class + */ + public static Callback fromByteBuffer( + java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + /** A unique event ID (UUID) */ + @Deprecated public java.lang.String topicName; + /** A unique event ID (UUID) */ + @Deprecated public java.lang.String s3Partition; + /** A unique event ID (UUID) */ + @Deprecated public java.lang.String filePath; + /** A unique event ID (UUID) */ + @Deprecated public int partition; + /** A unique event ID (UUID) */ + @Deprecated public long baseRecordTimestamp; + /** A unique event ID (UUID) */ + @Deprecated public long currentTimestamp; + /** A unique event ID (UUID) */ + @Deprecated public int recordCount; + + /** + * Default constructor. Note that this does not initialize fields + * to their default values from the schema. If that is desired then + * one should use newBuilder(). + */ + public Callback() {} + + /** + * All-args constructor. + * @param topicName A unique event ID (UUID) + * @param s3Partition A unique event ID (UUID) + * @param filePath A unique event ID (UUID) + * @param partition A unique event ID (UUID) + * @param baseRecordTimestamp A unique event ID (UUID) + * @param currentTimestamp A unique event ID (UUID) + * @param recordCount A unique event ID (UUID) + */ + public Callback(java.lang.String topicName, java.lang.String s3Partition, java.lang.String filePath, java.lang.Integer partition, java.lang.Long baseRecordTimestamp, java.lang.Long currentTimestamp, java.lang.Integer recordCount) { + this.topicName = topicName; + this.s3Partition = s3Partition; + this.filePath = filePath; + this.partition = partition; + this.baseRecordTimestamp = baseRecordTimestamp; + this.currentTimestamp = currentTimestamp; + this.recordCount = recordCount; + } + + public org.apache.avro.specific.SpecificData getSpecificData() { return MODEL$; } + public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + public java.lang.Object get(int field$) { + switch (field$) { + case 0: return topicName; + case 1: return s3Partition; + case 2: return filePath; + case 3: return partition; + case 4: return baseRecordTimestamp; + case 5: return currentTimestamp; + case 6: return recordCount; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value="unchecked") + public void put(int field$, java.lang.Object value$) { + switch (field$) { + case 0: topicName = (java.lang.String)value$; break; + case 1: s3Partition = (java.lang.String)value$; break; + case 2: filePath = (java.lang.String)value$; break; + case 3: partition = (java.lang.Integer)value$; break; + case 4: baseRecordTimestamp = (java.lang.Long)value$; break; + case 5: currentTimestamp = (java.lang.Long)value$; break; + case 6: recordCount = (java.lang.Integer)value$; break; + default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + } + } + + /** + * Gets the value of the 'topicName' field. + * @return A unique event ID (UUID) + */ + public java.lang.String getTopicName() { + return topicName; + } + + + /** + * Sets the value of the 'topicName' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setTopicName(java.lang.String value) { + this.topicName = value; + } + + /** + * Gets the value of the 's3Partition' field. + * @return A unique event ID (UUID) + */ + public java.lang.String getS3Partition() { + return s3Partition; + } + + + /** + * Sets the value of the 's3Partition' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setS3Partition(java.lang.String value) { + this.s3Partition = value; + } + + /** + * Gets the value of the 'filePath' field. + * @return A unique event ID (UUID) + */ + public java.lang.String getFilePath() { + return filePath; + } + + + /** + * Sets the value of the 'filePath' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setFilePath(java.lang.String value) { + this.filePath = value; + } + + /** + * Gets the value of the 'partition' field. + * @return A unique event ID (UUID) + */ + public int getPartition() { + return partition; + } + + + /** + * Sets the value of the 'partition' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setPartition(int value) { + this.partition = value; + } + + /** + * Gets the value of the 'baseRecordTimestamp' field. + * @return A unique event ID (UUID) + */ + public long getBaseRecordTimestamp() { + return baseRecordTimestamp; + } + + + /** + * Sets the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setBaseRecordTimestamp(long value) { + this.baseRecordTimestamp = value; + } + + /** + * Gets the value of the 'currentTimestamp' field. + * @return A unique event ID (UUID) + */ + public long getCurrentTimestamp() { + return currentTimestamp; + } + + + /** + * Sets the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setCurrentTimestamp(long value) { + this.currentTimestamp = value; + } + + /** + * Gets the value of the 'recordCount' field. + * @return A unique event ID (UUID) + */ + public int getRecordCount() { + return recordCount; + } + + + /** + * Sets the value of the 'recordCount' field. + * A unique event ID (UUID) + * @param value the value to set. + */ + public void setRecordCount(int value) { + this.recordCount = value; + } + + /** + * Creates a new Callback RecordBuilder. + * @return A new Callback RecordBuilder + */ + public static io.confluent.connect.s3.callback.Callback.Builder newBuilder() { + return new io.confluent.connect.s3.callback.Callback.Builder(); + } + + /** + * Creates a new Callback RecordBuilder by copying an existing Builder. + * @param other The existing builder to copy. + * @return A new Callback RecordBuilder + */ + public static io.confluent.connect.s3.callback.Callback.Builder newBuilder(io.confluent.connect.s3.callback.Callback.Builder other) { + if (other == null) { + return new io.confluent.connect.s3.callback.Callback.Builder(); + } else { + return new io.confluent.connect.s3.callback.Callback.Builder(other); + } + } + + /** + * Creates a new Callback RecordBuilder by copying an existing Callback instance. + * @param other The existing instance to copy. + * @return A new Callback RecordBuilder + */ + public static io.confluent.connect.s3.callback.Callback.Builder newBuilder(io.confluent.connect.s3.callback.Callback other) { + if (other == null) { + return new io.confluent.connect.s3.callback.Callback.Builder(); + } else { + return new io.confluent.connect.s3.callback.Callback.Builder(other); + } + } + + /** + * RecordBuilder for Callback instances. + */ + public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + /** A unique event ID (UUID) */ + private java.lang.String topicName; + /** A unique event ID (UUID) */ + private java.lang.String s3Partition; + /** A unique event ID (UUID) */ + private java.lang.String filePath; + /** A unique event ID (UUID) */ + private int partition; + /** A unique event ID (UUID) */ + private long baseRecordTimestamp; + /** A unique event ID (UUID) */ + private long currentTimestamp; + /** A unique event ID (UUID) */ + private int recordCount; + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$); + } + + /** + * Creates a Builder by copying an existing Builder. + * @param other The existing Builder to copy. + */ + private Builder(io.confluent.connect.s3.callback.Callback.Builder other) { + super(other); + if (isValidValue(fields()[0], other.topicName)) { + this.topicName = data().deepCopy(fields()[0].schema(), other.topicName); + fieldSetFlags()[0] = other.fieldSetFlags()[0]; + } + if (isValidValue(fields()[1], other.s3Partition)) { + this.s3Partition = data().deepCopy(fields()[1].schema(), other.s3Partition); + fieldSetFlags()[1] = other.fieldSetFlags()[1]; + } + if (isValidValue(fields()[2], other.filePath)) { + this.filePath = data().deepCopy(fields()[2].schema(), other.filePath); + fieldSetFlags()[2] = other.fieldSetFlags()[2]; + } + if (isValidValue(fields()[3], other.partition)) { + this.partition = data().deepCopy(fields()[3].schema(), other.partition); + fieldSetFlags()[3] = other.fieldSetFlags()[3]; + } + if (isValidValue(fields()[4], other.baseRecordTimestamp)) { + this.baseRecordTimestamp = data().deepCopy(fields()[4].schema(), other.baseRecordTimestamp); + fieldSetFlags()[4] = other.fieldSetFlags()[4]; + } + if (isValidValue(fields()[5], other.currentTimestamp)) { + this.currentTimestamp = data().deepCopy(fields()[5].schema(), other.currentTimestamp); + fieldSetFlags()[5] = other.fieldSetFlags()[5]; + } + if (isValidValue(fields()[6], other.recordCount)) { + this.recordCount = data().deepCopy(fields()[6].schema(), other.recordCount); + fieldSetFlags()[6] = other.fieldSetFlags()[6]; + } + } + + /** + * Creates a Builder by copying an existing Callback instance + * @param other The existing instance to copy. + */ + private Builder(io.confluent.connect.s3.callback.Callback other) { + super(SCHEMA$); + if (isValidValue(fields()[0], other.topicName)) { + this.topicName = data().deepCopy(fields()[0].schema(), other.topicName); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.s3Partition)) { + this.s3Partition = data().deepCopy(fields()[1].schema(), other.s3Partition); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.filePath)) { + this.filePath = data().deepCopy(fields()[2].schema(), other.filePath); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.partition)) { + this.partition = data().deepCopy(fields()[3].schema(), other.partition); + fieldSetFlags()[3] = true; + } + if (isValidValue(fields()[4], other.baseRecordTimestamp)) { + this.baseRecordTimestamp = data().deepCopy(fields()[4].schema(), other.baseRecordTimestamp); + fieldSetFlags()[4] = true; + } + if (isValidValue(fields()[5], other.currentTimestamp)) { + this.currentTimestamp = data().deepCopy(fields()[5].schema(), other.currentTimestamp); + fieldSetFlags()[5] = true; + } + if (isValidValue(fields()[6], other.recordCount)) { + this.recordCount = data().deepCopy(fields()[6].schema(), other.recordCount); + fieldSetFlags()[6] = true; + } + } + + /** + * Gets the value of the 'topicName' field. + * A unique event ID (UUID) + * @return The value. + */ + public java.lang.String getTopicName() { + return topicName; + } + + + /** + * Sets the value of the 'topicName' field. + * A unique event ID (UUID) + * @param value The value of 'topicName'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setTopicName(java.lang.String value) { + validate(fields()[0], value); + this.topicName = value; + fieldSetFlags()[0] = true; + return this; + } + + /** + * Checks whether the 'topicName' field has been set. + * A unique event ID (UUID) + * @return True if the 'topicName' field has been set, false otherwise. + */ + public boolean hasTopicName() { + return fieldSetFlags()[0]; + } + + + /** + * Clears the value of the 'topicName' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearTopicName() { + topicName = null; + fieldSetFlags()[0] = false; + return this; + } + + /** + * Gets the value of the 's3Partition' field. + * A unique event ID (UUID) + * @return The value. + */ + public java.lang.String getS3Partition() { + return s3Partition; + } + + + /** + * Sets the value of the 's3Partition' field. + * A unique event ID (UUID) + * @param value The value of 's3Partition'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setS3Partition(java.lang.String value) { + validate(fields()[1], value); + this.s3Partition = value; + fieldSetFlags()[1] = true; + return this; + } + + /** + * Checks whether the 's3Partition' field has been set. + * A unique event ID (UUID) + * @return True if the 's3Partition' field has been set, false otherwise. + */ + public boolean hasS3Partition() { + return fieldSetFlags()[1]; + } + + + /** + * Clears the value of the 's3Partition' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearS3Partition() { + s3Partition = null; + fieldSetFlags()[1] = false; + return this; + } + + /** + * Gets the value of the 'filePath' field. + * A unique event ID (UUID) + * @return The value. + */ + public java.lang.String getFilePath() { + return filePath; + } + + + /** + * Sets the value of the 'filePath' field. + * A unique event ID (UUID) + * @param value The value of 'filePath'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setFilePath(java.lang.String value) { + validate(fields()[2], value); + this.filePath = value; + fieldSetFlags()[2] = true; + return this; + } + + /** + * Checks whether the 'filePath' field has been set. + * A unique event ID (UUID) + * @return True if the 'filePath' field has been set, false otherwise. + */ + public boolean hasFilePath() { + return fieldSetFlags()[2]; + } + + + /** + * Clears the value of the 'filePath' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearFilePath() { + filePath = null; + fieldSetFlags()[2] = false; + return this; + } + + /** + * Gets the value of the 'partition' field. + * A unique event ID (UUID) + * @return The value. + */ + public int getPartition() { + return partition; + } + + + /** + * Sets the value of the 'partition' field. + * A unique event ID (UUID) + * @param value The value of 'partition'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setPartition(int value) { + validate(fields()[3], value); + this.partition = value; + fieldSetFlags()[3] = true; + return this; + } + + /** + * Checks whether the 'partition' field has been set. + * A unique event ID (UUID) + * @return True if the 'partition' field has been set, false otherwise. + */ + public boolean hasPartition() { + return fieldSetFlags()[3]; + } + + + /** + * Clears the value of the 'partition' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearPartition() { + fieldSetFlags()[3] = false; + return this; + } + + /** + * Gets the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @return The value. + */ + public long getBaseRecordTimestamp() { + return baseRecordTimestamp; + } + + + /** + * Sets the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @param value The value of 'baseRecordTimestamp'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setBaseRecordTimestamp(long value) { + validate(fields()[4], value); + this.baseRecordTimestamp = value; + fieldSetFlags()[4] = true; + return this; + } + + /** + * Checks whether the 'baseRecordTimestamp' field has been set. + * A unique event ID (UUID) + * @return True if the 'baseRecordTimestamp' field has been set, false otherwise. + */ + public boolean hasBaseRecordTimestamp() { + return fieldSetFlags()[4]; + } + + + /** + * Clears the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearBaseRecordTimestamp() { + fieldSetFlags()[4] = false; + return this; + } + + /** + * Gets the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @return The value. + */ + public long getCurrentTimestamp() { + return currentTimestamp; + } + + + /** + * Sets the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @param value The value of 'currentTimestamp'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setCurrentTimestamp(long value) { + validate(fields()[5], value); + this.currentTimestamp = value; + fieldSetFlags()[5] = true; + return this; + } + + /** + * Checks whether the 'currentTimestamp' field has been set. + * A unique event ID (UUID) + * @return True if the 'currentTimestamp' field has been set, false otherwise. + */ + public boolean hasCurrentTimestamp() { + return fieldSetFlags()[5]; + } + + + /** + * Clears the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearCurrentTimestamp() { + fieldSetFlags()[5] = false; + return this; + } + + /** + * Gets the value of the 'recordCount' field. + * A unique event ID (UUID) + * @return The value. + */ + public int getRecordCount() { + return recordCount; + } + + + /** + * Sets the value of the 'recordCount' field. + * A unique event ID (UUID) + * @param value The value of 'recordCount'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setRecordCount(int value) { + validate(fields()[6], value); + this.recordCount = value; + fieldSetFlags()[6] = true; + return this; + } + + /** + * Checks whether the 'recordCount' field has been set. + * A unique event ID (UUID) + * @return True if the 'recordCount' field has been set, false otherwise. + */ + public boolean hasRecordCount() { + return fieldSetFlags()[6]; + } + + + /** + * Clears the value of the 'recordCount' field. + * A unique event ID (UUID) + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder clearRecordCount() { + fieldSetFlags()[6] = false; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public Callback build() { + try { + Callback record = new Callback(); + record.topicName = fieldSetFlags()[0] ? this.topicName : (java.lang.String) defaultValue(fields()[0]); + record.s3Partition = fieldSetFlags()[1] ? this.s3Partition : (java.lang.String) defaultValue(fields()[1]); + record.filePath = fieldSetFlags()[2] ? this.filePath : (java.lang.String) defaultValue(fields()[2]); + record.partition = fieldSetFlags()[3] ? this.partition : (java.lang.Integer) defaultValue(fields()[3]); + record.baseRecordTimestamp = fieldSetFlags()[4] ? this.baseRecordTimestamp : (java.lang.Long) defaultValue(fields()[4]); + record.currentTimestamp = fieldSetFlags()[5] ? this.currentTimestamp : (java.lang.Long) defaultValue(fields()[5]); + record.recordCount = fieldSetFlags()[6] ? this.recordCount : (java.lang.Integer) defaultValue(fields()[6]); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (java.lang.Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter + WRITER$ = (org.apache.avro.io.DatumWriter)MODEL$.createDatumWriter(SCHEMA$); + + @Override public void writeExternal(java.io.ObjectOutput out) + throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader + READER$ = (org.apache.avro.io.DatumReader)MODEL$.createDatumReader(SCHEMA$); + + @Override public void readExternal(java.io.ObjectInput in) + throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + + @Override protected boolean hasCustomCoders() { return true; } + + @Override public void customEncode(org.apache.avro.io.Encoder out) + throws java.io.IOException + { + out.writeString(this.topicName); + + out.writeString(this.s3Partition); + + out.writeString(this.filePath); + + out.writeInt(this.partition); + + out.writeLong(this.baseRecordTimestamp); + + out.writeLong(this.currentTimestamp); + + out.writeInt(this.recordCount); + + } + + @Override public void customDecode(org.apache.avro.io.ResolvingDecoder in) + throws java.io.IOException + { + org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); + if (fieldOrder == null) { + this.topicName = in.readString(); + + this.s3Partition = in.readString(); + + this.filePath = in.readString(); + + this.partition = in.readInt(); + + this.baseRecordTimestamp = in.readLong(); + + this.currentTimestamp = in.readLong(); + + this.recordCount = in.readInt(); + + } else { + for (int i = 0; i < 7; i++) { + switch (fieldOrder[i].pos()) { + case 0: + this.topicName = in.readString(); + break; + + case 1: + this.s3Partition = in.readString(); + break; + + case 2: + this.filePath = in.readString(); + break; + + case 3: + this.partition = in.readInt(); + break; + + case 4: + this.baseRecordTimestamp = in.readLong(); + break; + + case 5: + this.currentTimestamp = in.readLong(); + break; + + case 6: + this.recordCount = in.readInt(); + break; + + default: + throw new java.io.IOException("Corrupt ResolvingDecoder."); + } + } + } + } +} diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index 83e4c166b..7a12e4122 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -16,6 +16,7 @@ package io.confluent.connect.s3.callback; import io.confluent.connect.s3.KafkaFileCallbackConfig; +import org.apache.avro.specific.SpecificRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; @@ -38,8 +39,11 @@ public void call(String topicName,String s3Partition, String filePath, int parti String value = String.join("|", Arrays.asList(topicName, s3Partition, filePath, String.valueOf(partition), String.valueOf(baseRecordTimestamp), String.valueOf(currentTimestamp), String.valueOf(recordCount))); - try (final Producer producer = new KafkaProducer<>(kafkaConfig.toProps())) { - producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), topicName, value), + Callback callback = new Callback(topicName, s3Partition, filePath, partition, + baseRecordTimestamp, currentTimestamp, recordCount); + try (final Producer producer = + new KafkaProducer<>(kafkaConfig.toProps())) { + producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), topicName, callback), (event, ex) -> { if (ex != null) { ex.printStackTrace(); diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index ff46b0c5b..0075b811c 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -30,12 +30,15 @@ import io.confluent.connect.s3.S3SinkConnector; import io.confluent.connect.s3.S3SinkConnectorConfig.IgnoreOrFailBehavior; import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; +import io.confluent.connect.s3.callback.Callback; import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; import io.confluent.connect.s3.format.avro.AvroFormat; import io.confluent.connect.s3.format.json.JsonFormat; import io.confluent.connect.s3.format.parquet.ParquetFormat; import io.confluent.connect.s3.storage.S3Storage; import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -48,6 +51,7 @@ import java.util.concurrent.TimeUnit; import io.confluent.connect.s3.util.EmbeddedConnectUtils; +import jdk.nashorn.internal.codegen.CompilerConstants; import org.apache.commons.io.FileUtils; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; @@ -453,13 +457,13 @@ private void setupProperties() { } @Test - public void testCallBackPartition() { + public void testCallBackPartition() throws IOException { EmbeddedKafkaCluster kafka = connect.kafka(); kafka.start(); String bootstrapServers = kafka.bootstrapServers(); String callbackTopic = "callback_topic"; kafka.createTopic(callbackTopic); - KafkaFileCallbackConfig kafkaFileCallbackConfig = new KafkaFileCallbackConfig(callbackTopic, "", "", bootstrapServers, ""); + KafkaFileCallbackConfig kafkaFileCallbackConfig = new KafkaFileCallbackConfig(callbackTopic, "", "", bootstrapServers, "", restApp.restServer.getURI().toString()); KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson()); callBack.call("baz-topic", "version/event/hour", "hey.avro", 12, 1234L, 123L, 34); ConsumerRecords res = kafka.consume(1, 1000L, callbackTopic); @@ -467,5 +471,6 @@ public void testCallBackPartition() { String key = new String(next.key()); String value = new String(next.value()); System.out.println(key + value); + } } From 34ceb091486828487df3d0f99a3687d9441875e1 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 11:40:02 +0200 Subject: [PATCH 10/34] Build avsc from maven --- kafka-connect-s3/pom.xml | 18 + .../connect/s3/callback/Callback.java | 423 +++++++++--------- .../callback/KafkaFileCallbackProvider.java | 12 +- .../src/main/resources/callback.avsc | 43 ++ 4 files changed, 287 insertions(+), 209 deletions(-) create mode 100644 kafka-connect-s3/src/main/resources/callback.avsc diff --git a/kafka-connect-s3/pom.xml b/kafka-connect-s3/pom.xml index c49ebd663..f485deab2 100644 --- a/kafka-connect-s3/pom.xml +++ b/kafka-connect-s3/pom.xml @@ -41,6 +41,7 @@ 1.0.1 1.18.3 2.22.1 + 1.9.2 false 2.22.1 30.1.1-jre @@ -319,6 +320,23 @@ + + org.apache.avro + avro-maven-plugin + ${maven.avro.plugin.version} + + + generate-sources + + schema + + + ${project.basedir}/src/main/resources/ + ${project.basedir}/src/main/java/ + + + + diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java index 158df1714..2af520131 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java @@ -16,16 +16,18 @@ @org.apache.avro.specific.AvroGenerated public class Callback extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { private static final long serialVersionUID = 5786357167649199011L; - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Callback\",\"namespace\":\"io.confluent.connect.s3.callback\",\"doc\":\"This event represents a callback Message\",\"fields\":[{\"name\":\"topicName\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"s3Partition\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"filePath\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"partition\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"baseRecordTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"currentTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"recordCount\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"}]}"); + + + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Callback\",\"namespace\":\"io.confluent.connect.s3.callback\",\"doc\":\"This event represents a callback Message\",\"fields\":[{\"name\":\"topicName\",\"type\":\"string\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"s3Partition\",\"type\":\"string\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"filePath\",\"type\":\"string\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"partition\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"baseRecordTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"currentTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"recordCount\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"}]}"); public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } - private static SpecificData MODEL$ = new SpecificData(); + private static final SpecificData MODEL$ = new SpecificData(); private static final BinaryMessageEncoder ENCODER = - new BinaryMessageEncoder(MODEL$, SCHEMA$); + new BinaryMessageEncoder<>(MODEL$, SCHEMA$); private static final BinaryMessageDecoder DECODER = - new BinaryMessageDecoder(MODEL$, SCHEMA$); + new BinaryMessageDecoder<>(MODEL$, SCHEMA$); /** * Return the BinaryMessageEncoder instance used by this class. @@ -49,7 +51,7 @@ public static BinaryMessageDecoder getDecoder() { * @return a BinaryMessageDecoder instance for this class backed by the given SchemaStore */ public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { - return new BinaryMessageDecoder(MODEL$, SCHEMA$, resolver); + return new BinaryMessageDecoder<>(MODEL$, SCHEMA$, resolver); } /** @@ -68,24 +70,24 @@ public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { * @throws java.io.IOException if the given bytes could not be deserialized into an instance of this class */ public static Callback fromByteBuffer( - java.nio.ByteBuffer b) throws java.io.IOException { + java.nio.ByteBuffer b) throws java.io.IOException { return DECODER.decode(b); } /** A unique event ID (UUID) */ - @Deprecated public java.lang.String topicName; + private java.lang.CharSequence topicName; /** A unique event ID (UUID) */ - @Deprecated public java.lang.String s3Partition; + private java.lang.CharSequence s3Partition; /** A unique event ID (UUID) */ - @Deprecated public java.lang.String filePath; + private java.lang.CharSequence filePath; /** A unique event ID (UUID) */ - @Deprecated public int partition; + private int partition; /** A unique event ID (UUID) */ - @Deprecated public long baseRecordTimestamp; + private long baseRecordTimestamp; /** A unique event ID (UUID) */ - @Deprecated public long currentTimestamp; + private long currentTimestamp; /** A unique event ID (UUID) */ - @Deprecated public int recordCount; + private int recordCount; /** * Default constructor. Note that this does not initialize fields @@ -104,7 +106,7 @@ public Callback() {} * @param currentTimestamp A unique event ID (UUID) * @param recordCount A unique event ID (UUID) */ - public Callback(java.lang.String topicName, java.lang.String s3Partition, java.lang.String filePath, java.lang.Integer partition, java.lang.Long baseRecordTimestamp, java.lang.Long currentTimestamp, java.lang.Integer recordCount) { + public Callback(java.lang.CharSequence topicName, java.lang.CharSequence s3Partition, java.lang.CharSequence filePath, java.lang.Integer partition, java.lang.Long baseRecordTimestamp, java.lang.Long currentTimestamp, java.lang.Integer recordCount) { this.topicName = topicName; this.s3Partition = s3Partition; this.filePath = filePath; @@ -114,34 +116,40 @@ public Callback(java.lang.String topicName, java.lang.String s3Partition, java.l this.recordCount = recordCount; } + @Override public org.apache.avro.specific.SpecificData getSpecificData() { return MODEL$; } + + @Override public org.apache.avro.Schema getSchema() { return SCHEMA$; } + // Used by DatumWriter. Applications should not call. + @Override public java.lang.Object get(int field$) { switch (field$) { - case 0: return topicName; - case 1: return s3Partition; - case 2: return filePath; - case 3: return partition; - case 4: return baseRecordTimestamp; - case 5: return currentTimestamp; - case 6: return recordCount; - default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + case 0: return topicName; + case 1: return s3Partition; + case 2: return filePath; + case 3: return partition; + case 4: return baseRecordTimestamp; + case 5: return currentTimestamp; + case 6: return recordCount; + default: throw new IndexOutOfBoundsException("Invalid index: " + field$); } } // Used by DatumReader. Applications should not call. + @Override @SuppressWarnings(value="unchecked") public void put(int field$, java.lang.Object value$) { switch (field$) { - case 0: topicName = (java.lang.String)value$; break; - case 1: s3Partition = (java.lang.String)value$; break; - case 2: filePath = (java.lang.String)value$; break; - case 3: partition = (java.lang.Integer)value$; break; - case 4: baseRecordTimestamp = (java.lang.Long)value$; break; - case 5: currentTimestamp = (java.lang.Long)value$; break; - case 6: recordCount = (java.lang.Integer)value$; break; - default: throw new org.apache.avro.AvroRuntimeException("Bad index"); + case 0: topicName = (java.lang.CharSequence)value$; break; + case 1: s3Partition = (java.lang.CharSequence)value$; break; + case 2: filePath = (java.lang.CharSequence)value$; break; + case 3: partition = (java.lang.Integer)value$; break; + case 4: baseRecordTimestamp = (java.lang.Long)value$; break; + case 5: currentTimestamp = (java.lang.Long)value$; break; + case 6: recordCount = (java.lang.Integer)value$; break; + default: throw new IndexOutOfBoundsException("Invalid index: " + field$); } } @@ -149,7 +157,7 @@ public void put(int field$, java.lang.Object value$) { * Gets the value of the 'topicName' field. * @return A unique event ID (UUID) */ - public java.lang.String getTopicName() { + public java.lang.CharSequence getTopicName() { return topicName; } @@ -159,7 +167,7 @@ public java.lang.String getTopicName() { * A unique event ID (UUID) * @param value the value to set. */ - public void setTopicName(java.lang.String value) { + public void setTopicName(java.lang.CharSequence value) { this.topicName = value; } @@ -167,7 +175,7 @@ public void setTopicName(java.lang.String value) { * Gets the value of the 's3Partition' field. * @return A unique event ID (UUID) */ - public java.lang.String getS3Partition() { + public java.lang.CharSequence getS3Partition() { return s3Partition; } @@ -177,7 +185,7 @@ public java.lang.String getS3Partition() { * A unique event ID (UUID) * @param value the value to set. */ - public void setS3Partition(java.lang.String value) { + public void setS3Partition(java.lang.CharSequence value) { this.s3Partition = value; } @@ -185,7 +193,7 @@ public void setS3Partition(java.lang.String value) { * Gets the value of the 'filePath' field. * @return A unique event ID (UUID) */ - public java.lang.String getFilePath() { + public java.lang.CharSequence getFilePath() { return filePath; } @@ -195,7 +203,7 @@ public java.lang.String getFilePath() { * A unique event ID (UUID) * @param value the value to set. */ - public void setFilePath(java.lang.String value) { + public void setFilePath(java.lang.CharSequence value) { this.filePath = value; } @@ -308,15 +316,16 @@ public static io.confluent.connect.s3.callback.Callback.Builder newBuilder(io.co /** * RecordBuilder for Callback instances. */ + @org.apache.avro.specific.AvroGenerated public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase - implements org.apache.avro.data.RecordBuilder { + implements org.apache.avro.data.RecordBuilder { /** A unique event ID (UUID) */ - private java.lang.String topicName; + private java.lang.CharSequence topicName; /** A unique event ID (UUID) */ - private java.lang.String s3Partition; + private java.lang.CharSequence s3Partition; /** A unique event ID (UUID) */ - private java.lang.String filePath; + private java.lang.CharSequence filePath; /** A unique event ID (UUID) */ private int partition; /** A unique event ID (UUID) */ @@ -328,7 +337,7 @@ public static class Builder extends org.apache.avro.specific.SpecificRecordBuild /** Creates a new Builder */ private Builder() { - super(SCHEMA$); + super(SCHEMA$, MODEL$); } /** @@ -372,7 +381,7 @@ private Builder(io.confluent.connect.s3.callback.Callback.Builder other) { * @param other The existing instance to copy. */ private Builder(io.confluent.connect.s3.callback.Callback other) { - super(SCHEMA$); + super(SCHEMA$, MODEL$); if (isValidValue(fields()[0], other.topicName)) { this.topicName = data().deepCopy(fields()[0].schema(), other.topicName); fieldSetFlags()[0] = true; @@ -404,22 +413,22 @@ private Builder(io.confluent.connect.s3.callback.Callback other) { } /** - * Gets the value of the 'topicName' field. - * A unique event ID (UUID) - * @return The value. - */ - public java.lang.String getTopicName() { + * Gets the value of the 'topicName' field. + * A unique event ID (UUID) + * @return The value. + */ + public java.lang.CharSequence getTopicName() { return topicName; } /** - * Sets the value of the 'topicName' field. - * A unique event ID (UUID) - * @param value The value of 'topicName'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setTopicName(java.lang.String value) { + * Sets the value of the 'topicName' field. + * A unique event ID (UUID) + * @param value The value of 'topicName'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setTopicName(java.lang.CharSequence value) { validate(fields()[0], value); this.topicName = value; fieldSetFlags()[0] = true; @@ -427,20 +436,20 @@ public io.confluent.connect.s3.callback.Callback.Builder setTopicName(java.lang. } /** - * Checks whether the 'topicName' field has been set. - * A unique event ID (UUID) - * @return True if the 'topicName' field has been set, false otherwise. - */ + * Checks whether the 'topicName' field has been set. + * A unique event ID (UUID) + * @return True if the 'topicName' field has been set, false otherwise. + */ public boolean hasTopicName() { return fieldSetFlags()[0]; } /** - * Clears the value of the 'topicName' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 'topicName' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearTopicName() { topicName = null; fieldSetFlags()[0] = false; @@ -448,22 +457,22 @@ public io.confluent.connect.s3.callback.Callback.Builder clearTopicName() { } /** - * Gets the value of the 's3Partition' field. - * A unique event ID (UUID) - * @return The value. - */ - public java.lang.String getS3Partition() { + * Gets the value of the 's3Partition' field. + * A unique event ID (UUID) + * @return The value. + */ + public java.lang.CharSequence getS3Partition() { return s3Partition; } /** - * Sets the value of the 's3Partition' field. - * A unique event ID (UUID) - * @param value The value of 's3Partition'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setS3Partition(java.lang.String value) { + * Sets the value of the 's3Partition' field. + * A unique event ID (UUID) + * @param value The value of 's3Partition'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setS3Partition(java.lang.CharSequence value) { validate(fields()[1], value); this.s3Partition = value; fieldSetFlags()[1] = true; @@ -471,20 +480,20 @@ public io.confluent.connect.s3.callback.Callback.Builder setS3Partition(java.lan } /** - * Checks whether the 's3Partition' field has been set. - * A unique event ID (UUID) - * @return True if the 's3Partition' field has been set, false otherwise. - */ + * Checks whether the 's3Partition' field has been set. + * A unique event ID (UUID) + * @return True if the 's3Partition' field has been set, false otherwise. + */ public boolean hasS3Partition() { return fieldSetFlags()[1]; } /** - * Clears the value of the 's3Partition' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 's3Partition' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearS3Partition() { s3Partition = null; fieldSetFlags()[1] = false; @@ -492,22 +501,22 @@ public io.confluent.connect.s3.callback.Callback.Builder clearS3Partition() { } /** - * Gets the value of the 'filePath' field. - * A unique event ID (UUID) - * @return The value. - */ - public java.lang.String getFilePath() { + * Gets the value of the 'filePath' field. + * A unique event ID (UUID) + * @return The value. + */ + public java.lang.CharSequence getFilePath() { return filePath; } /** - * Sets the value of the 'filePath' field. - * A unique event ID (UUID) - * @param value The value of 'filePath'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setFilePath(java.lang.String value) { + * Sets the value of the 'filePath' field. + * A unique event ID (UUID) + * @param value The value of 'filePath'. + * @return This builder. + */ + public io.confluent.connect.s3.callback.Callback.Builder setFilePath(java.lang.CharSequence value) { validate(fields()[2], value); this.filePath = value; fieldSetFlags()[2] = true; @@ -515,20 +524,20 @@ public io.confluent.connect.s3.callback.Callback.Builder setFilePath(java.lang.S } /** - * Checks whether the 'filePath' field has been set. - * A unique event ID (UUID) - * @return True if the 'filePath' field has been set, false otherwise. - */ + * Checks whether the 'filePath' field has been set. + * A unique event ID (UUID) + * @return True if the 'filePath' field has been set, false otherwise. + */ public boolean hasFilePath() { return fieldSetFlags()[2]; } /** - * Clears the value of the 'filePath' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 'filePath' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearFilePath() { filePath = null; fieldSetFlags()[2] = false; @@ -536,21 +545,21 @@ public io.confluent.connect.s3.callback.Callback.Builder clearFilePath() { } /** - * Gets the value of the 'partition' field. - * A unique event ID (UUID) - * @return The value. - */ + * Gets the value of the 'partition' field. + * A unique event ID (UUID) + * @return The value. + */ public int getPartition() { return partition; } /** - * Sets the value of the 'partition' field. - * A unique event ID (UUID) - * @param value The value of 'partition'. - * @return This builder. - */ + * Sets the value of the 'partition' field. + * A unique event ID (UUID) + * @param value The value of 'partition'. + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder setPartition(int value) { validate(fields()[3], value); this.partition = value; @@ -559,41 +568,41 @@ public io.confluent.connect.s3.callback.Callback.Builder setPartition(int value) } /** - * Checks whether the 'partition' field has been set. - * A unique event ID (UUID) - * @return True if the 'partition' field has been set, false otherwise. - */ + * Checks whether the 'partition' field has been set. + * A unique event ID (UUID) + * @return True if the 'partition' field has been set, false otherwise. + */ public boolean hasPartition() { return fieldSetFlags()[3]; } /** - * Clears the value of the 'partition' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 'partition' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearPartition() { fieldSetFlags()[3] = false; return this; } /** - * Gets the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @return The value. - */ + * Gets the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @return The value. + */ public long getBaseRecordTimestamp() { return baseRecordTimestamp; } /** - * Sets the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @param value The value of 'baseRecordTimestamp'. - * @return This builder. - */ + * Sets the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @param value The value of 'baseRecordTimestamp'. + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder setBaseRecordTimestamp(long value) { validate(fields()[4], value); this.baseRecordTimestamp = value; @@ -602,41 +611,41 @@ public io.confluent.connect.s3.callback.Callback.Builder setBaseRecordTimestamp( } /** - * Checks whether the 'baseRecordTimestamp' field has been set. - * A unique event ID (UUID) - * @return True if the 'baseRecordTimestamp' field has been set, false otherwise. - */ + * Checks whether the 'baseRecordTimestamp' field has been set. + * A unique event ID (UUID) + * @return True if the 'baseRecordTimestamp' field has been set, false otherwise. + */ public boolean hasBaseRecordTimestamp() { return fieldSetFlags()[4]; } /** - * Clears the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 'baseRecordTimestamp' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearBaseRecordTimestamp() { fieldSetFlags()[4] = false; return this; } /** - * Gets the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @return The value. - */ + * Gets the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @return The value. + */ public long getCurrentTimestamp() { return currentTimestamp; } /** - * Sets the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @param value The value of 'currentTimestamp'. - * @return This builder. - */ + * Sets the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @param value The value of 'currentTimestamp'. + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder setCurrentTimestamp(long value) { validate(fields()[5], value); this.currentTimestamp = value; @@ -645,41 +654,41 @@ public io.confluent.connect.s3.callback.Callback.Builder setCurrentTimestamp(lon } /** - * Checks whether the 'currentTimestamp' field has been set. - * A unique event ID (UUID) - * @return True if the 'currentTimestamp' field has been set, false otherwise. - */ + * Checks whether the 'currentTimestamp' field has been set. + * A unique event ID (UUID) + * @return True if the 'currentTimestamp' field has been set, false otherwise. + */ public boolean hasCurrentTimestamp() { return fieldSetFlags()[5]; } /** - * Clears the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 'currentTimestamp' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearCurrentTimestamp() { fieldSetFlags()[5] = false; return this; } /** - * Gets the value of the 'recordCount' field. - * A unique event ID (UUID) - * @return The value. - */ + * Gets the value of the 'recordCount' field. + * A unique event ID (UUID) + * @return The value. + */ public int getRecordCount() { return recordCount; } /** - * Sets the value of the 'recordCount' field. - * A unique event ID (UUID) - * @param value The value of 'recordCount'. - * @return This builder. - */ + * Sets the value of the 'recordCount' field. + * A unique event ID (UUID) + * @param value The value of 'recordCount'. + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder setRecordCount(int value) { validate(fields()[6], value); this.recordCount = value; @@ -688,20 +697,20 @@ public io.confluent.connect.s3.callback.Callback.Builder setRecordCount(int valu } /** - * Checks whether the 'recordCount' field has been set. - * A unique event ID (UUID) - * @return True if the 'recordCount' field has been set, false otherwise. - */ + * Checks whether the 'recordCount' field has been set. + * A unique event ID (UUID) + * @return True if the 'recordCount' field has been set, false otherwise. + */ public boolean hasRecordCount() { return fieldSetFlags()[6]; } /** - * Clears the value of the 'recordCount' field. - * A unique event ID (UUID) - * @return This builder. - */ + * Clears the value of the 'recordCount' field. + * A unique event ID (UUID) + * @return This builder. + */ public io.confluent.connect.s3.callback.Callback.Builder clearRecordCount() { fieldSetFlags()[6] = false; return this; @@ -712,9 +721,9 @@ public io.confluent.connect.s3.callback.Callback.Builder clearRecordCount() { public Callback build() { try { Callback record = new Callback(); - record.topicName = fieldSetFlags()[0] ? this.topicName : (java.lang.String) defaultValue(fields()[0]); - record.s3Partition = fieldSetFlags()[1] ? this.s3Partition : (java.lang.String) defaultValue(fields()[1]); - record.filePath = fieldSetFlags()[2] ? this.filePath : (java.lang.String) defaultValue(fields()[2]); + record.topicName = fieldSetFlags()[0] ? this.topicName : (java.lang.CharSequence) defaultValue(fields()[0]); + record.s3Partition = fieldSetFlags()[1] ? this.s3Partition : (java.lang.CharSequence) defaultValue(fields()[1]); + record.filePath = fieldSetFlags()[2] ? this.filePath : (java.lang.CharSequence) defaultValue(fields()[2]); record.partition = fieldSetFlags()[3] ? this.partition : (java.lang.Integer) defaultValue(fields()[3]); record.baseRecordTimestamp = fieldSetFlags()[4] ? this.baseRecordTimestamp : (java.lang.Long) defaultValue(fields()[4]); record.currentTimestamp = fieldSetFlags()[5] ? this.currentTimestamp : (java.lang.Long) defaultValue(fields()[5]); @@ -730,26 +739,26 @@ public Callback build() { @SuppressWarnings("unchecked") private static final org.apache.avro.io.DatumWriter - WRITER$ = (org.apache.avro.io.DatumWriter)MODEL$.createDatumWriter(SCHEMA$); + WRITER$ = (org.apache.avro.io.DatumWriter)MODEL$.createDatumWriter(SCHEMA$); @Override public void writeExternal(java.io.ObjectOutput out) - throws java.io.IOException { + throws java.io.IOException { WRITER$.write(this, SpecificData.getEncoder(out)); } @SuppressWarnings("unchecked") private static final org.apache.avro.io.DatumReader - READER$ = (org.apache.avro.io.DatumReader)MODEL$.createDatumReader(SCHEMA$); + READER$ = (org.apache.avro.io.DatumReader)MODEL$.createDatumReader(SCHEMA$); @Override public void readExternal(java.io.ObjectInput in) - throws java.io.IOException { + throws java.io.IOException { READER$.read(this, SpecificData.getDecoder(in)); } @Override protected boolean hasCustomCoders() { return true; } @Override public void customEncode(org.apache.avro.io.Encoder out) - throws java.io.IOException + throws java.io.IOException { out.writeString(this.topicName); @@ -768,15 +777,15 @@ public Callback build() { } @Override public void customDecode(org.apache.avro.io.ResolvingDecoder in) - throws java.io.IOException + throws java.io.IOException { org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); if (fieldOrder == null) { - this.topicName = in.readString(); + this.topicName = in.readString(this.topicName instanceof Utf8 ? (Utf8)this.topicName : null); - this.s3Partition = in.readString(); + this.s3Partition = in.readString(this.s3Partition instanceof Utf8 ? (Utf8)this.s3Partition : null); - this.filePath = in.readString(); + this.filePath = in.readString(this.filePath instanceof Utf8 ? (Utf8)this.filePath : null); this.partition = in.readInt(); @@ -789,38 +798,48 @@ public Callback build() { } else { for (int i = 0; i < 7; i++) { switch (fieldOrder[i].pos()) { - case 0: - this.topicName = in.readString(); - break; + case 0: + this.topicName = in.readString(this.topicName instanceof Utf8 ? (Utf8)this.topicName : null); + break; - case 1: - this.s3Partition = in.readString(); - break; + case 1: + this.s3Partition = in.readString(this.s3Partition instanceof Utf8 ? (Utf8)this.s3Partition : null); + break; - case 2: - this.filePath = in.readString(); - break; + case 2: + this.filePath = in.readString(this.filePath instanceof Utf8 ? (Utf8)this.filePath : null); + break; - case 3: - this.partition = in.readInt(); - break; + case 3: + this.partition = in.readInt(); + break; - case 4: - this.baseRecordTimestamp = in.readLong(); - break; + case 4: + this.baseRecordTimestamp = in.readLong(); + break; - case 5: - this.currentTimestamp = in.readLong(); - break; + case 5: + this.currentTimestamp = in.readLong(); + break; - case 6: - this.recordCount = in.readInt(); - break; + case 6: + this.recordCount = in.readInt(); + break; - default: - throw new java.io.IOException("Corrupt ResolvingDecoder."); + default: + throw new java.io.IOException("Corrupt ResolvingDecoder."); } } } } } + + + + + + + + + + diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index 7a12e4122..77b56337a 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -20,10 +20,11 @@ import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; - -import java.util.Arrays; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class KafkaFileCallbackProvider implements FileCallbackProvider { + private static final Logger log = LoggerFactory.getLogger(KafkaFileCallbackProvider.class); private final String configJson; private final KafkaFileCallbackConfig kafkaConfig; @@ -36,9 +37,6 @@ public KafkaFileCallbackProvider(String configJson) { @Override public void call(String topicName,String s3Partition, String filePath, int partition, Long baseRecordTimestamp, Long currentTimestamp, int recordCount) { - String value = String.join("|", Arrays.asList(topicName, s3Partition, filePath, - String.valueOf(partition), String.valueOf(baseRecordTimestamp), - String.valueOf(currentTimestamp), String.valueOf(recordCount))); Callback callback = new Callback(topicName, s3Partition, filePath, partition, baseRecordTimestamp, currentTimestamp, recordCount); try (final Producer producer = @@ -46,11 +44,11 @@ public void call(String topicName,String s3Partition, String filePath, int parti producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), topicName, callback), (event, ex) -> { if (ex != null) { - ex.printStackTrace(); + throw new RuntimeException(ex); } }); } catch (Exception e) { - System.out.println("foo"); + log.error(e.getMessage(), e); } } diff --git a/kafka-connect-s3/src/main/resources/callback.avsc b/kafka-connect-s3/src/main/resources/callback.avsc new file mode 100644 index 000000000..1865ec802 --- /dev/null +++ b/kafka-connect-s3/src/main/resources/callback.avsc @@ -0,0 +1,43 @@ +{ + "namespace": "io.confluent.connect.s3.callback", + "type": "record", + "name": "Callback", + "doc": "This event represents a callback Message", + "fields": [ + { + "name": "topicName", + "type": "string", + "doc": "A unique event ID (UUID)" + }, + { + "name": "s3Partition", + "type": "string", + "doc": "A unique event ID (UUID)" + }, + { + "name": "filePath", + "type": "string", + "doc": "A unique event ID (UUID)" + }, + { + "name": "partition", + "type": "int", + "doc": "A unique event ID (UUID)" + }, + { + "name": "baseRecordTimestamp", + "type": "long", + "doc": "A unique event ID (UUID)" + }, + { + "name": "currentTimestamp", + "type": "long", + "doc": "A unique event ID (UUID)" + }, + { + "name": "recordCount", + "type": "int", + "doc": "A unique event ID (UUID)" + } + ] +} \ No newline at end of file From f8fa534f23c38bd817c26b9dda5c7a493b929ecb Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 11:55:13 +0200 Subject: [PATCH 11/34] Allow to skip errors also use abstract class for constructor spec --- .../connect/s3/S3SinkConnectorConfig.java | 18 ++++++++++++++++++ .../connect/s3/TopicPartitionWriter.java | 2 +- .../s3/callback/FileCallbackProvider.java | 11 +++++++++-- .../s3/callback/KafkaFileCallbackProvider.java | 12 +++++++----- .../s3/integration/S3SinkConnectorIT.java | 2 +- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index 5b4b5512b..ee5661057 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -201,6 +201,8 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { */ public static final String FILE_CALLBACK_ENABLE = "s3.file.callback.enable"; public static final boolean FILE_CALLBACK_ENABLE_DEFAULT = false; + public static final String FILE_CALLBACK_SKIP_ERROR = "s3.file.callback.skip.error"; + public static final boolean FILE_CALLBACK_SKIP_ERROR_DEFAULT = false; public static final String FILE_CALLBACK_CLASS = "s3.file.callback.class"; public static final Class FILE_CALLBACK_CLASS_DEFAULT = KafkaFileCallbackProvider.class; @@ -800,6 +802,18 @@ public static ConfigDef newConfigDef() { "Enable s3 file callback" ); + configDef.define( + FILE_CALLBACK_SKIP_ERROR, + Type.BOOLEAN, + FILE_CALLBACK_SKIP_ERROR_DEFAULT, + Importance.LOW, + "In case of callback, then raise an error or fail silently. Default raise an error.", + group, + ++orderInGroup, + Width.LONG, + "Fail when s3 file callback error" + ); + configDef.define( FILE_CALLBACK_CLASS, Type.CLASS, @@ -1011,6 +1025,10 @@ public boolean getFileCallbackEnable() { return getBoolean(FILE_CALLBACK_ENABLE); } + public boolean getFileCallbackSkipError() { + return getBoolean(FILE_CALLBACK_SKIP_ERROR); + } + public Class getFileCallbackClass() { return getClass(FILE_CALLBACK_CLASS); } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index eb660f484..783eecbf8 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -197,7 +197,7 @@ public TopicPartitionWriter(TopicPartition tp, try { fileCallback = Optional.of((FileCallbackProvider)this.connectorConfig .getFileCallbackClass().getConstructor(String.class) - .newInstance(connectorConfig.getFileCallbackConfigJson())); + .newInstance(connectorConfig.getFileCallbackConfigJson(), connectorConfig.getFileCallbackSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { throw new RuntimeException(e); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java index 6a587bfca..5484f2e90 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java @@ -15,8 +15,15 @@ package io.confluent.connect.s3.callback; -public interface FileCallbackProvider { +public abstract class FileCallbackProvider { + protected final String configJson; + protected final boolean skipError; - void call(String topicName, String s3Partition, String filePath, int partition, + public FileCallbackProvider(String configJson, boolean skipError) { + this.configJson = configJson; + this.skipError = skipError; + } + + abstract public void call(String topicName, String s3Partition, String filePath, int partition, Long baseRecordTimestamp, Long currentTimestamp, int recordCount); } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index 77b56337a..e173ac5f0 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -23,13 +23,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class KafkaFileCallbackProvider implements FileCallbackProvider { +public class KafkaFileCallbackProvider extends FileCallbackProvider { private static final Logger log = LoggerFactory.getLogger(KafkaFileCallbackProvider.class); - private final String configJson; private final KafkaFileCallbackConfig kafkaConfig; - public KafkaFileCallbackProvider(String configJson) { - this.configJson = configJson; + public KafkaFileCallbackProvider(String configJson, boolean skipError) { + super(configJson, skipError); this.kafkaConfig = KafkaFileCallbackConfig.fromJsonString(configJson, KafkaFileCallbackConfig.class); } @@ -48,7 +47,10 @@ public void call(String topicName,String s3Partition, String filePath, int parti } }); } catch (Exception e) { - log.error(e.getMessage(), e); + if(skipError) + log.error(e.getMessage(), e); + else + throw new RuntimeException(e); } } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index 0075b811c..f976bade2 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -464,7 +464,7 @@ public void testCallBackPartition() throws IOException { String callbackTopic = "callback_topic"; kafka.createTopic(callbackTopic); KafkaFileCallbackConfig kafkaFileCallbackConfig = new KafkaFileCallbackConfig(callbackTopic, "", "", bootstrapServers, "", restApp.restServer.getURI().toString()); - KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson()); + KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); callBack.call("baz-topic", "version/event/hour", "hey.avro", 12, 1234L, 123L, 34); ConsumerRecords res = kafka.consume(1, 1000L, callbackTopic); ConsumerRecord next = res.iterator().next(); From ea1b58e763f6b4cc8d2f64c0f3ebc67ec38060dc Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 14:07:59 +0200 Subject: [PATCH 12/34] Implement security --- .../s3/AbstractFileCallbackConfig.java | 2 +- .../connect/s3/KafkaFileCallbackConfig.java | 99 ++++++++++--------- .../connect/s3/TopicPartitionWriter.java | 2 +- .../callback/KafkaFileCallbackProvider.java | 5 +- .../s3/integration/S3SinkConnectorIT.java | 4 +- 5 files changed, 60 insertions(+), 52 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java index 5a1687011..b9aeb3b01 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java @@ -34,7 +34,7 @@ public static T fromJsonString(String jso ObjectMapper instanceMapper = new ObjectMapper(); instanceMapper.setPropertyNamingStrategy( PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); - instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); instanceMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); instanceMapper.enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS); return instanceMapper.readValue(jsonContent, clazz); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java index ed634c5a7..649379996 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java @@ -19,67 +19,44 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { + private final String KEY_SERIALIZER = "org.apache.kafka.common.serialization.StringSerializer"; + private final String VALUE_SERIALIZER = "io.confluent.kafka.serializers.KafkaAvroSerializer"; + private String topicName; - private String topicUser; - private String topicPassword; private String bootstrapServers; - private String securityProtocols; - - + private String securityProtocol; private String schemaRegistryUrl; - private String keySerializer = "org.apache.kafka.common.serialization.StringSerializer"; - private String valueSerializer = "io.confluent.kafka.serializers.KafkaAvroSerializer"; + private String saslMecanism; + private String saslJaasConfig; + /** + * empty constructor for jackson + */ public KafkaFileCallbackConfig() { } - public KafkaFileCallbackConfig(String topicName, String topicUser, String topicPassword, - String bootstrapServers, String securityProtocols, String schemaRegistryUrl) { + public KafkaFileCallbackConfig(String topicName, String bootstrapServers, + String schemaRegistryUrl, String securityProtocol, + String saslMecanism, String saslJaasConfig) { this.topicName = topicName; - this.topicUser = topicUser; - this.topicPassword = topicPassword; this.bootstrapServers = bootstrapServers; - this.securityProtocols = securityProtocols; this.schemaRegistryUrl = schemaRegistryUrl; - } - - - - public String getTopicName() { - return topicName; - } - - public String getSchemaRegistryUrl() { - return schemaRegistryUrl; - } - public String getTopicUser() { - return topicUser; - } - - public String getTopicPassword() { - return topicPassword; - } - - public String getBootstrapServers() { - return bootstrapServers; - } - - public String getSecurityProtocols() { - return securityProtocols; - } - - public String getKeySerializer() { - return keySerializer; + this.securityProtocol = securityProtocol; + this.saslMecanism = saslMecanism; + this.saslJaasConfig = saslJaasConfig; } public String toJson() { final StringBuffer sb = new StringBuffer("{"); sb.append("\"topic_name\": \"").append(topicName).append('"'); - sb.append(", \"topic_user\": \"").append(topicUser).append('"'); - sb.append(", \"topic_password\": \"").append(topicPassword).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); - sb.append(", \"security_protocols\": \"").append(securityProtocols).append('"'); sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); + if(securityProtocol != null) + sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); + if(saslMecanism != null) + sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); + if(saslJaasConfig != null) + sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); sb.append('}'); return sb.toString(); } @@ -87,11 +64,41 @@ public String toJson() { @Override public Properties toProps() { Properties prop = new Properties(); + prop.setProperty("key.serializer", KEY_SERIALIZER); + prop.setProperty("value.serializer", VALUE_SERIALIZER); + // mandatory prop.setProperty("bootstrap.servers", bootstrapServers); prop.setProperty("topic.name", topicName); - prop.setProperty("key.serializer", keySerializer); - prop.setProperty("value.serializer", valueSerializer); prop.setProperty("schema.registry.url", schemaRegistryUrl); + // optional + if(saslMecanism != null) + prop.setProperty("sasl.mechanism", saslMecanism); + if(securityProtocol != null) + prop.setProperty("security.protocol", securityProtocol); + if(saslJaasConfig != null) + prop.setProperty("sasl.jaas.config", saslJaasConfig); return prop; } + + public String getTopicName() { + return topicName; + } + + public String getSchemaRegistryUrl() { + return schemaRegistryUrl; + } + public String getBootstrapServers() { + return bootstrapServers; + } + + public String getSecurityProtocol() { + return securityProtocol; + } + public String getSaslMecanism() { + return saslMecanism; + } + + public String getSaslJaasConfig() { + return saslJaasConfig; + } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index 783eecbf8..c03d22ffb 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -196,7 +196,7 @@ public TopicPartitionWriter(TopicPartition tp, if (this.connectorConfig.getFileCallbackEnable()) { try { fileCallback = Optional.of((FileCallbackProvider)this.connectorConfig - .getFileCallbackClass().getConstructor(String.class) + .getFileCallbackClass().getConstructor(FileCallbackProvider.class) .newInstance(connectorConfig.getFileCallbackConfigJson(), connectorConfig.getFileCallbackSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index e173ac5f0..20672e6d7 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -36,11 +36,12 @@ public KafkaFileCallbackProvider(String configJson, boolean skipError) { @Override public void call(String topicName,String s3Partition, String filePath, int partition, Long baseRecordTimestamp, Long currentTimestamp, int recordCount) { - Callback callback = new Callback(topicName, s3Partition, filePath, partition, + String key = topicName; + Callback value = new Callback(topicName, s3Partition, filePath, partition, baseRecordTimestamp, currentTimestamp, recordCount); try (final Producer producer = new KafkaProducer<>(kafkaConfig.toProps())) { - producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), topicName, callback), + producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), (event, ex) -> { if (ex != null) { throw new RuntimeException(ex); diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index f976bade2..02b4bb572 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -463,7 +463,8 @@ public void testCallBackPartition() throws IOException { String bootstrapServers = kafka.bootstrapServers(); String callbackTopic = "callback_topic"; kafka.createTopic(callbackTopic); - KafkaFileCallbackConfig kafkaFileCallbackConfig = new KafkaFileCallbackConfig(callbackTopic, "", "", bootstrapServers, "", restApp.restServer.getURI().toString()); + KafkaFileCallbackConfig kafkaFileCallbackConfig = + new KafkaFileCallbackConfig(callbackTopic, bootstrapServers, restApp.restServer.getURI().toString(), null, null, null); KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); callBack.call("baz-topic", "version/event/hour", "hey.avro", 12, 1234L, 123L, 34); ConsumerRecords res = kafka.consume(1, 1000L, callbackTopic); @@ -471,6 +472,5 @@ public void testCallBackPartition() throws IOException { String key = new String(next.key()); String value = new String(next.value()); System.out.println(key + value); - } } From 71eb9f60c3c63519c9093e1cf42ce6d88523df1f Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 14:54:38 +0200 Subject: [PATCH 13/34] Fix style --- checkstyle/suppressions.xml | 5 + kafka-connect-s3/pom.xml | 2 +- .../connect/s3/S3SinkConnectorConfig.java | 11 +- .../connect/s3/TopicPartitionWriter.java | 3 +- .../AbstractFileCallbackConfig.java | 11 +- .../connect/s3/callback/Callback.java | 845 ------------------ .../s3/callback/FileCallbackProvider.java | 10 +- .../KafkaFileCallbackConfig.java | 53 +- .../callback/KafkaFileCallbackProvider.java | 47 +- .../s3/integration/S3SinkConnectorIT.java | 32 +- 10 files changed, 115 insertions(+), 904 deletions(-) rename kafka-connect-s3/src/main/java/io/confluent/connect/s3/{ => callback}/AbstractFileCallbackConfig.java (87%) delete mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java rename kafka-connect-s3/src/main/java/io/confluent/connect/s3/{ => callback}/KafkaFileCallbackConfig.java (72%) diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index 6d1c521d7..9704e9d4f 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -21,4 +21,9 @@ files="(TopicPartitionWriter).java" /> + + diff --git a/kafka-connect-s3/pom.xml b/kafka-connect-s3/pom.xml index f485deab2..6cf8b6005 100644 --- a/kafka-connect-s3/pom.xml +++ b/kafka-connect-s3/pom.xml @@ -332,7 +332,7 @@ ${project.basedir}/src/main/resources/ - ${project.basedir}/src/main/java/ + ${project.build.directory}/generated-main-avro-java/ diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index ee5661057..3cbef2344 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -789,6 +789,11 @@ public static ConfigDef newConfigDef() { Width.LONG, "Elastic buffer initial capacity" ); + } + + { + final String group = "File callback"; + int orderInGroup = 0; configDef.define( FILE_CALLBACK_ENABLE, @@ -836,13 +841,15 @@ public static ConfigDef newConfigDef() { Type.STRING, FILE_CALLBACK_CONFIG_JSON_DEFAULT, Importance.LOW, - "File callback configuration as json format. By default an empty json.", + "File callback configuration as json format. " + + "Mandatory Fields: bootstrap_servers, topic_name, schema_registry_url. " + + "Optional fields: sasl_mechanism, security_protocol, sasl_jaas_config. " + + "By default an empty json.", group, ++orderInGroup, Width.LONG, "File callback config json" ); - } return configDef; } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index c03d22ffb..61b9f1af6 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -197,7 +197,8 @@ public TopicPartitionWriter(TopicPartition tp, try { fileCallback = Optional.of((FileCallbackProvider)this.connectorConfig .getFileCallbackClass().getConstructor(FileCallbackProvider.class) - .newInstance(connectorConfig.getFileCallbackConfigJson(), connectorConfig.getFileCallbackSkipError())); + .newInstance(connectorConfig.getFileCallbackConfigJson(), + connectorConfig.getFileCallbackSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { throw new RuntimeException(e); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java similarity index 87% rename from kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java rename to kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java index b9aeb3b01..b68a0ad36 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/AbstractFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java @@ -13,27 +13,26 @@ * specific language governing permissions and limitations under the License. */ -package io.confluent.connect.s3; +package io.confluent.connect.s3.callback; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; - import java.io.IOException; import java.util.Properties; public abstract class AbstractFileCallbackConfig { - public static T fromJsonString(String jsonContent, - Class clazz) { + public static T fromJsonString( + String jsonContent, Class clazz) { try { if (jsonContent == null) { return clazz.newInstance(); } ObjectMapper instanceMapper = new ObjectMapper(); instanceMapper.setPropertyNamingStrategy( - PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); instanceMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); instanceMapper.enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS); @@ -44,4 +43,6 @@ public static T fromJsonString(String jso } public abstract Properties toProps(); + + public abstract String toJson(); } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java deleted file mode 100644 index 2af520131..000000000 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/Callback.java +++ /dev/null @@ -1,845 +0,0 @@ -/** - * Autogenerated by Avro - * - * DO NOT EDIT DIRECTLY - */ -package io.confluent.connect.s3.callback; - -import org.apache.avro.generic.GenericArray; -import org.apache.avro.specific.SpecificData; -import org.apache.avro.util.Utf8; -import org.apache.avro.message.BinaryMessageEncoder; -import org.apache.avro.message.BinaryMessageDecoder; -import org.apache.avro.message.SchemaStore; - -/** This event represents a callback Message */ -@org.apache.avro.specific.AvroGenerated -public class Callback extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { - private static final long serialVersionUID = 5786357167649199011L; - - - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Callback\",\"namespace\":\"io.confluent.connect.s3.callback\",\"doc\":\"This event represents a callback Message\",\"fields\":[{\"name\":\"topicName\",\"type\":\"string\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"s3Partition\",\"type\":\"string\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"filePath\",\"type\":\"string\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"partition\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"baseRecordTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"currentTimestamp\",\"type\":\"long\",\"doc\":\"A unique event ID (UUID)\"},{\"name\":\"recordCount\",\"type\":\"int\",\"doc\":\"A unique event ID (UUID)\"}]}"); - public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } - - private static final SpecificData MODEL$ = new SpecificData(); - - private static final BinaryMessageEncoder ENCODER = - new BinaryMessageEncoder<>(MODEL$, SCHEMA$); - - private static final BinaryMessageDecoder DECODER = - new BinaryMessageDecoder<>(MODEL$, SCHEMA$); - - /** - * Return the BinaryMessageEncoder instance used by this class. - * @return the message encoder used by this class - */ - public static BinaryMessageEncoder getEncoder() { - return ENCODER; - } - - /** - * Return the BinaryMessageDecoder instance used by this class. - * @return the message decoder used by this class - */ - public static BinaryMessageDecoder getDecoder() { - return DECODER; - } - - /** - * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. - * @param resolver a {@link SchemaStore} used to find schemas by fingerprint - * @return a BinaryMessageDecoder instance for this class backed by the given SchemaStore - */ - public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { - return new BinaryMessageDecoder<>(MODEL$, SCHEMA$, resolver); - } - - /** - * Serializes this Callback to a ByteBuffer. - * @return a buffer holding the serialized data for this instance - * @throws java.io.IOException if this instance could not be serialized - */ - public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { - return ENCODER.encode(this); - } - - /** - * Deserializes a Callback from a ByteBuffer. - * @param b a byte buffer holding serialized data for an instance of this class - * @return a Callback instance decoded from the given buffer - * @throws java.io.IOException if the given bytes could not be deserialized into an instance of this class - */ - public static Callback fromByteBuffer( - java.nio.ByteBuffer b) throws java.io.IOException { - return DECODER.decode(b); - } - - /** A unique event ID (UUID) */ - private java.lang.CharSequence topicName; - /** A unique event ID (UUID) */ - private java.lang.CharSequence s3Partition; - /** A unique event ID (UUID) */ - private java.lang.CharSequence filePath; - /** A unique event ID (UUID) */ - private int partition; - /** A unique event ID (UUID) */ - private long baseRecordTimestamp; - /** A unique event ID (UUID) */ - private long currentTimestamp; - /** A unique event ID (UUID) */ - private int recordCount; - - /** - * Default constructor. Note that this does not initialize fields - * to their default values from the schema. If that is desired then - * one should use newBuilder(). - */ - public Callback() {} - - /** - * All-args constructor. - * @param topicName A unique event ID (UUID) - * @param s3Partition A unique event ID (UUID) - * @param filePath A unique event ID (UUID) - * @param partition A unique event ID (UUID) - * @param baseRecordTimestamp A unique event ID (UUID) - * @param currentTimestamp A unique event ID (UUID) - * @param recordCount A unique event ID (UUID) - */ - public Callback(java.lang.CharSequence topicName, java.lang.CharSequence s3Partition, java.lang.CharSequence filePath, java.lang.Integer partition, java.lang.Long baseRecordTimestamp, java.lang.Long currentTimestamp, java.lang.Integer recordCount) { - this.topicName = topicName; - this.s3Partition = s3Partition; - this.filePath = filePath; - this.partition = partition; - this.baseRecordTimestamp = baseRecordTimestamp; - this.currentTimestamp = currentTimestamp; - this.recordCount = recordCount; - } - - @Override - public org.apache.avro.specific.SpecificData getSpecificData() { return MODEL$; } - - @Override - public org.apache.avro.Schema getSchema() { return SCHEMA$; } - - // Used by DatumWriter. Applications should not call. - @Override - public java.lang.Object get(int field$) { - switch (field$) { - case 0: return topicName; - case 1: return s3Partition; - case 2: return filePath; - case 3: return partition; - case 4: return baseRecordTimestamp; - case 5: return currentTimestamp; - case 6: return recordCount; - default: throw new IndexOutOfBoundsException("Invalid index: " + field$); - } - } - - // Used by DatumReader. Applications should not call. - @Override - @SuppressWarnings(value="unchecked") - public void put(int field$, java.lang.Object value$) { - switch (field$) { - case 0: topicName = (java.lang.CharSequence)value$; break; - case 1: s3Partition = (java.lang.CharSequence)value$; break; - case 2: filePath = (java.lang.CharSequence)value$; break; - case 3: partition = (java.lang.Integer)value$; break; - case 4: baseRecordTimestamp = (java.lang.Long)value$; break; - case 5: currentTimestamp = (java.lang.Long)value$; break; - case 6: recordCount = (java.lang.Integer)value$; break; - default: throw new IndexOutOfBoundsException("Invalid index: " + field$); - } - } - - /** - * Gets the value of the 'topicName' field. - * @return A unique event ID (UUID) - */ - public java.lang.CharSequence getTopicName() { - return topicName; - } - - - /** - * Sets the value of the 'topicName' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setTopicName(java.lang.CharSequence value) { - this.topicName = value; - } - - /** - * Gets the value of the 's3Partition' field. - * @return A unique event ID (UUID) - */ - public java.lang.CharSequence getS3Partition() { - return s3Partition; - } - - - /** - * Sets the value of the 's3Partition' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setS3Partition(java.lang.CharSequence value) { - this.s3Partition = value; - } - - /** - * Gets the value of the 'filePath' field. - * @return A unique event ID (UUID) - */ - public java.lang.CharSequence getFilePath() { - return filePath; - } - - - /** - * Sets the value of the 'filePath' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setFilePath(java.lang.CharSequence value) { - this.filePath = value; - } - - /** - * Gets the value of the 'partition' field. - * @return A unique event ID (UUID) - */ - public int getPartition() { - return partition; - } - - - /** - * Sets the value of the 'partition' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setPartition(int value) { - this.partition = value; - } - - /** - * Gets the value of the 'baseRecordTimestamp' field. - * @return A unique event ID (UUID) - */ - public long getBaseRecordTimestamp() { - return baseRecordTimestamp; - } - - - /** - * Sets the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setBaseRecordTimestamp(long value) { - this.baseRecordTimestamp = value; - } - - /** - * Gets the value of the 'currentTimestamp' field. - * @return A unique event ID (UUID) - */ - public long getCurrentTimestamp() { - return currentTimestamp; - } - - - /** - * Sets the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setCurrentTimestamp(long value) { - this.currentTimestamp = value; - } - - /** - * Gets the value of the 'recordCount' field. - * @return A unique event ID (UUID) - */ - public int getRecordCount() { - return recordCount; - } - - - /** - * Sets the value of the 'recordCount' field. - * A unique event ID (UUID) - * @param value the value to set. - */ - public void setRecordCount(int value) { - this.recordCount = value; - } - - /** - * Creates a new Callback RecordBuilder. - * @return A new Callback RecordBuilder - */ - public static io.confluent.connect.s3.callback.Callback.Builder newBuilder() { - return new io.confluent.connect.s3.callback.Callback.Builder(); - } - - /** - * Creates a new Callback RecordBuilder by copying an existing Builder. - * @param other The existing builder to copy. - * @return A new Callback RecordBuilder - */ - public static io.confluent.connect.s3.callback.Callback.Builder newBuilder(io.confluent.connect.s3.callback.Callback.Builder other) { - if (other == null) { - return new io.confluent.connect.s3.callback.Callback.Builder(); - } else { - return new io.confluent.connect.s3.callback.Callback.Builder(other); - } - } - - /** - * Creates a new Callback RecordBuilder by copying an existing Callback instance. - * @param other The existing instance to copy. - * @return A new Callback RecordBuilder - */ - public static io.confluent.connect.s3.callback.Callback.Builder newBuilder(io.confluent.connect.s3.callback.Callback other) { - if (other == null) { - return new io.confluent.connect.s3.callback.Callback.Builder(); - } else { - return new io.confluent.connect.s3.callback.Callback.Builder(other); - } - } - - /** - * RecordBuilder for Callback instances. - */ - @org.apache.avro.specific.AvroGenerated - public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase - implements org.apache.avro.data.RecordBuilder { - - /** A unique event ID (UUID) */ - private java.lang.CharSequence topicName; - /** A unique event ID (UUID) */ - private java.lang.CharSequence s3Partition; - /** A unique event ID (UUID) */ - private java.lang.CharSequence filePath; - /** A unique event ID (UUID) */ - private int partition; - /** A unique event ID (UUID) */ - private long baseRecordTimestamp; - /** A unique event ID (UUID) */ - private long currentTimestamp; - /** A unique event ID (UUID) */ - private int recordCount; - - /** Creates a new Builder */ - private Builder() { - super(SCHEMA$, MODEL$); - } - - /** - * Creates a Builder by copying an existing Builder. - * @param other The existing Builder to copy. - */ - private Builder(io.confluent.connect.s3.callback.Callback.Builder other) { - super(other); - if (isValidValue(fields()[0], other.topicName)) { - this.topicName = data().deepCopy(fields()[0].schema(), other.topicName); - fieldSetFlags()[0] = other.fieldSetFlags()[0]; - } - if (isValidValue(fields()[1], other.s3Partition)) { - this.s3Partition = data().deepCopy(fields()[1].schema(), other.s3Partition); - fieldSetFlags()[1] = other.fieldSetFlags()[1]; - } - if (isValidValue(fields()[2], other.filePath)) { - this.filePath = data().deepCopy(fields()[2].schema(), other.filePath); - fieldSetFlags()[2] = other.fieldSetFlags()[2]; - } - if (isValidValue(fields()[3], other.partition)) { - this.partition = data().deepCopy(fields()[3].schema(), other.partition); - fieldSetFlags()[3] = other.fieldSetFlags()[3]; - } - if (isValidValue(fields()[4], other.baseRecordTimestamp)) { - this.baseRecordTimestamp = data().deepCopy(fields()[4].schema(), other.baseRecordTimestamp); - fieldSetFlags()[4] = other.fieldSetFlags()[4]; - } - if (isValidValue(fields()[5], other.currentTimestamp)) { - this.currentTimestamp = data().deepCopy(fields()[5].schema(), other.currentTimestamp); - fieldSetFlags()[5] = other.fieldSetFlags()[5]; - } - if (isValidValue(fields()[6], other.recordCount)) { - this.recordCount = data().deepCopy(fields()[6].schema(), other.recordCount); - fieldSetFlags()[6] = other.fieldSetFlags()[6]; - } - } - - /** - * Creates a Builder by copying an existing Callback instance - * @param other The existing instance to copy. - */ - private Builder(io.confluent.connect.s3.callback.Callback other) { - super(SCHEMA$, MODEL$); - if (isValidValue(fields()[0], other.topicName)) { - this.topicName = data().deepCopy(fields()[0].schema(), other.topicName); - fieldSetFlags()[0] = true; - } - if (isValidValue(fields()[1], other.s3Partition)) { - this.s3Partition = data().deepCopy(fields()[1].schema(), other.s3Partition); - fieldSetFlags()[1] = true; - } - if (isValidValue(fields()[2], other.filePath)) { - this.filePath = data().deepCopy(fields()[2].schema(), other.filePath); - fieldSetFlags()[2] = true; - } - if (isValidValue(fields()[3], other.partition)) { - this.partition = data().deepCopy(fields()[3].schema(), other.partition); - fieldSetFlags()[3] = true; - } - if (isValidValue(fields()[4], other.baseRecordTimestamp)) { - this.baseRecordTimestamp = data().deepCopy(fields()[4].schema(), other.baseRecordTimestamp); - fieldSetFlags()[4] = true; - } - if (isValidValue(fields()[5], other.currentTimestamp)) { - this.currentTimestamp = data().deepCopy(fields()[5].schema(), other.currentTimestamp); - fieldSetFlags()[5] = true; - } - if (isValidValue(fields()[6], other.recordCount)) { - this.recordCount = data().deepCopy(fields()[6].schema(), other.recordCount); - fieldSetFlags()[6] = true; - } - } - - /** - * Gets the value of the 'topicName' field. - * A unique event ID (UUID) - * @return The value. - */ - public java.lang.CharSequence getTopicName() { - return topicName; - } - - - /** - * Sets the value of the 'topicName' field. - * A unique event ID (UUID) - * @param value The value of 'topicName'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setTopicName(java.lang.CharSequence value) { - validate(fields()[0], value); - this.topicName = value; - fieldSetFlags()[0] = true; - return this; - } - - /** - * Checks whether the 'topicName' field has been set. - * A unique event ID (UUID) - * @return True if the 'topicName' field has been set, false otherwise. - */ - public boolean hasTopicName() { - return fieldSetFlags()[0]; - } - - - /** - * Clears the value of the 'topicName' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearTopicName() { - topicName = null; - fieldSetFlags()[0] = false; - return this; - } - - /** - * Gets the value of the 's3Partition' field. - * A unique event ID (UUID) - * @return The value. - */ - public java.lang.CharSequence getS3Partition() { - return s3Partition; - } - - - /** - * Sets the value of the 's3Partition' field. - * A unique event ID (UUID) - * @param value The value of 's3Partition'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setS3Partition(java.lang.CharSequence value) { - validate(fields()[1], value); - this.s3Partition = value; - fieldSetFlags()[1] = true; - return this; - } - - /** - * Checks whether the 's3Partition' field has been set. - * A unique event ID (UUID) - * @return True if the 's3Partition' field has been set, false otherwise. - */ - public boolean hasS3Partition() { - return fieldSetFlags()[1]; - } - - - /** - * Clears the value of the 's3Partition' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearS3Partition() { - s3Partition = null; - fieldSetFlags()[1] = false; - return this; - } - - /** - * Gets the value of the 'filePath' field. - * A unique event ID (UUID) - * @return The value. - */ - public java.lang.CharSequence getFilePath() { - return filePath; - } - - - /** - * Sets the value of the 'filePath' field. - * A unique event ID (UUID) - * @param value The value of 'filePath'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setFilePath(java.lang.CharSequence value) { - validate(fields()[2], value); - this.filePath = value; - fieldSetFlags()[2] = true; - return this; - } - - /** - * Checks whether the 'filePath' field has been set. - * A unique event ID (UUID) - * @return True if the 'filePath' field has been set, false otherwise. - */ - public boolean hasFilePath() { - return fieldSetFlags()[2]; - } - - - /** - * Clears the value of the 'filePath' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearFilePath() { - filePath = null; - fieldSetFlags()[2] = false; - return this; - } - - /** - * Gets the value of the 'partition' field. - * A unique event ID (UUID) - * @return The value. - */ - public int getPartition() { - return partition; - } - - - /** - * Sets the value of the 'partition' field. - * A unique event ID (UUID) - * @param value The value of 'partition'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setPartition(int value) { - validate(fields()[3], value); - this.partition = value; - fieldSetFlags()[3] = true; - return this; - } - - /** - * Checks whether the 'partition' field has been set. - * A unique event ID (UUID) - * @return True if the 'partition' field has been set, false otherwise. - */ - public boolean hasPartition() { - return fieldSetFlags()[3]; - } - - - /** - * Clears the value of the 'partition' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearPartition() { - fieldSetFlags()[3] = false; - return this; - } - - /** - * Gets the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @return The value. - */ - public long getBaseRecordTimestamp() { - return baseRecordTimestamp; - } - - - /** - * Sets the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @param value The value of 'baseRecordTimestamp'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setBaseRecordTimestamp(long value) { - validate(fields()[4], value); - this.baseRecordTimestamp = value; - fieldSetFlags()[4] = true; - return this; - } - - /** - * Checks whether the 'baseRecordTimestamp' field has been set. - * A unique event ID (UUID) - * @return True if the 'baseRecordTimestamp' field has been set, false otherwise. - */ - public boolean hasBaseRecordTimestamp() { - return fieldSetFlags()[4]; - } - - - /** - * Clears the value of the 'baseRecordTimestamp' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearBaseRecordTimestamp() { - fieldSetFlags()[4] = false; - return this; - } - - /** - * Gets the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @return The value. - */ - public long getCurrentTimestamp() { - return currentTimestamp; - } - - - /** - * Sets the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @param value The value of 'currentTimestamp'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setCurrentTimestamp(long value) { - validate(fields()[5], value); - this.currentTimestamp = value; - fieldSetFlags()[5] = true; - return this; - } - - /** - * Checks whether the 'currentTimestamp' field has been set. - * A unique event ID (UUID) - * @return True if the 'currentTimestamp' field has been set, false otherwise. - */ - public boolean hasCurrentTimestamp() { - return fieldSetFlags()[5]; - } - - - /** - * Clears the value of the 'currentTimestamp' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearCurrentTimestamp() { - fieldSetFlags()[5] = false; - return this; - } - - /** - * Gets the value of the 'recordCount' field. - * A unique event ID (UUID) - * @return The value. - */ - public int getRecordCount() { - return recordCount; - } - - - /** - * Sets the value of the 'recordCount' field. - * A unique event ID (UUID) - * @param value The value of 'recordCount'. - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder setRecordCount(int value) { - validate(fields()[6], value); - this.recordCount = value; - fieldSetFlags()[6] = true; - return this; - } - - /** - * Checks whether the 'recordCount' field has been set. - * A unique event ID (UUID) - * @return True if the 'recordCount' field has been set, false otherwise. - */ - public boolean hasRecordCount() { - return fieldSetFlags()[6]; - } - - - /** - * Clears the value of the 'recordCount' field. - * A unique event ID (UUID) - * @return This builder. - */ - public io.confluent.connect.s3.callback.Callback.Builder clearRecordCount() { - fieldSetFlags()[6] = false; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public Callback build() { - try { - Callback record = new Callback(); - record.topicName = fieldSetFlags()[0] ? this.topicName : (java.lang.CharSequence) defaultValue(fields()[0]); - record.s3Partition = fieldSetFlags()[1] ? this.s3Partition : (java.lang.CharSequence) defaultValue(fields()[1]); - record.filePath = fieldSetFlags()[2] ? this.filePath : (java.lang.CharSequence) defaultValue(fields()[2]); - record.partition = fieldSetFlags()[3] ? this.partition : (java.lang.Integer) defaultValue(fields()[3]); - record.baseRecordTimestamp = fieldSetFlags()[4] ? this.baseRecordTimestamp : (java.lang.Long) defaultValue(fields()[4]); - record.currentTimestamp = fieldSetFlags()[5] ? this.currentTimestamp : (java.lang.Long) defaultValue(fields()[5]); - record.recordCount = fieldSetFlags()[6] ? this.recordCount : (java.lang.Integer) defaultValue(fields()[6]); - return record; - } catch (org.apache.avro.AvroMissingFieldException e) { - throw e; - } catch (java.lang.Exception e) { - throw new org.apache.avro.AvroRuntimeException(e); - } - } - } - - @SuppressWarnings("unchecked") - private static final org.apache.avro.io.DatumWriter - WRITER$ = (org.apache.avro.io.DatumWriter)MODEL$.createDatumWriter(SCHEMA$); - - @Override public void writeExternal(java.io.ObjectOutput out) - throws java.io.IOException { - WRITER$.write(this, SpecificData.getEncoder(out)); - } - - @SuppressWarnings("unchecked") - private static final org.apache.avro.io.DatumReader - READER$ = (org.apache.avro.io.DatumReader)MODEL$.createDatumReader(SCHEMA$); - - @Override public void readExternal(java.io.ObjectInput in) - throws java.io.IOException { - READER$.read(this, SpecificData.getDecoder(in)); - } - - @Override protected boolean hasCustomCoders() { return true; } - - @Override public void customEncode(org.apache.avro.io.Encoder out) - throws java.io.IOException - { - out.writeString(this.topicName); - - out.writeString(this.s3Partition); - - out.writeString(this.filePath); - - out.writeInt(this.partition); - - out.writeLong(this.baseRecordTimestamp); - - out.writeLong(this.currentTimestamp); - - out.writeInt(this.recordCount); - - } - - @Override public void customDecode(org.apache.avro.io.ResolvingDecoder in) - throws java.io.IOException - { - org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); - if (fieldOrder == null) { - this.topicName = in.readString(this.topicName instanceof Utf8 ? (Utf8)this.topicName : null); - - this.s3Partition = in.readString(this.s3Partition instanceof Utf8 ? (Utf8)this.s3Partition : null); - - this.filePath = in.readString(this.filePath instanceof Utf8 ? (Utf8)this.filePath : null); - - this.partition = in.readInt(); - - this.baseRecordTimestamp = in.readLong(); - - this.currentTimestamp = in.readLong(); - - this.recordCount = in.readInt(); - - } else { - for (int i = 0; i < 7; i++) { - switch (fieldOrder[i].pos()) { - case 0: - this.topicName = in.readString(this.topicName instanceof Utf8 ? (Utf8)this.topicName : null); - break; - - case 1: - this.s3Partition = in.readString(this.s3Partition instanceof Utf8 ? (Utf8)this.s3Partition : null); - break; - - case 2: - this.filePath = in.readString(this.filePath instanceof Utf8 ? (Utf8)this.filePath : null); - break; - - case 3: - this.partition = in.readInt(); - break; - - case 4: - this.baseRecordTimestamp = in.readLong(); - break; - - case 5: - this.currentTimestamp = in.readLong(); - break; - - case 6: - this.recordCount = in.readInt(); - break; - - default: - throw new java.io.IOException("Corrupt ResolvingDecoder."); - } - } - } - } -} - - - - - - - - - - diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java index 5484f2e90..e6cdf2fa1 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java @@ -24,6 +24,12 @@ public FileCallbackProvider(String configJson, boolean skipError) { this.skipError = skipError; } - abstract public void call(String topicName, String s3Partition, String filePath, int partition, - Long baseRecordTimestamp, Long currentTimestamp, int recordCount); + public abstract void call( + String topicName, + String s3Partition, + String filePath, + int partition, + Long baseRecordTimestamp, + Long currentTimestamp, + int recordCount); } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java similarity index 72% rename from kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java rename to kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java index 649379996..1dab3b947 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java @@ -13,14 +13,16 @@ * specific language governing permissions and limitations under the License. */ -package io.confluent.connect.s3; +package io.confluent.connect.s3.callback; import java.util.Properties; public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { - private final String KEY_SERIALIZER = "org.apache.kafka.common.serialization.StringSerializer"; - private final String VALUE_SERIALIZER = "io.confluent.kafka.serializers.KafkaAvroSerializer"; + private static final String KEY_SERIALIZER = + "org.apache.kafka.common.serialization.StringSerializer"; + private static final String VALUE_SERIALIZER = + "io.confluent.kafka.serializers.KafkaAvroSerializer"; private String topicName; private String bootstrapServers; @@ -29,15 +31,16 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { private String saslMecanism; private String saslJaasConfig; - /** - * empty constructor for jackson - */ - public KafkaFileCallbackConfig() { - } + /** empty constructor for jackson */ + public KafkaFileCallbackConfig() {} - public KafkaFileCallbackConfig(String topicName, String bootstrapServers, - String schemaRegistryUrl, String securityProtocol, - String saslMecanism, String saslJaasConfig) { + public KafkaFileCallbackConfig( + String topicName, + String bootstrapServers, + String schemaRegistryUrl, + String securityProtocol, + String saslMecanism, + String saslJaasConfig) { this.topicName = topicName; this.bootstrapServers = bootstrapServers; this.schemaRegistryUrl = schemaRegistryUrl; @@ -46,17 +49,28 @@ public KafkaFileCallbackConfig(String topicName, String bootstrapServers, this.saslJaasConfig = saslJaasConfig; } + private void validateFields() { + if (topicName == null || bootstrapServers == null || schemaRegistryUrl == null) { + throw new RuntimeException( + "topic_name, boostrap_servers and schema_registry_url shall be defined"); + } + } + + @Override public String toJson() { final StringBuffer sb = new StringBuffer("{"); sb.append("\"topic_name\": \"").append(topicName).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); - if(securityProtocol != null) + if (securityProtocol != null) { sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); - if(saslMecanism != null) + } + if (saslMecanism != null) { sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); - if(saslJaasConfig != null) + } + if (saslJaasConfig != null) { sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); + } sb.append('}'); return sb.toString(); } @@ -71,12 +85,15 @@ public Properties toProps() { prop.setProperty("topic.name", topicName); prop.setProperty("schema.registry.url", schemaRegistryUrl); // optional - if(saslMecanism != null) + if (saslMecanism != null) { prop.setProperty("sasl.mechanism", saslMecanism); - if(securityProtocol != null) + } + if (securityProtocol != null) { prop.setProperty("security.protocol", securityProtocol); - if(saslJaasConfig != null) + } + if (saslJaasConfig != null) { prop.setProperty("sasl.jaas.config", saslJaasConfig); + } return prop; } @@ -87,6 +104,7 @@ public String getTopicName() { public String getSchemaRegistryUrl() { return schemaRegistryUrl; } + public String getBootstrapServers() { return bootstrapServers; } @@ -94,6 +112,7 @@ public String getBootstrapServers() { public String getSecurityProtocol() { return securityProtocol; } + public String getSaslMecanism() { return saslMecanism; } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index 20672e6d7..ee961352a 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -15,7 +15,6 @@ package io.confluent.connect.s3.callback; -import io.confluent.connect.s3.KafkaFileCallbackConfig; import org.apache.avro.specific.SpecificRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; @@ -29,30 +28,44 @@ public class KafkaFileCallbackProvider extends FileCallbackProvider { public KafkaFileCallbackProvider(String configJson, boolean skipError) { super(configJson, skipError); - this.kafkaConfig = KafkaFileCallbackConfig.fromJsonString(configJson, - KafkaFileCallbackConfig.class); + this.kafkaConfig = + KafkaFileCallbackConfig.fromJsonString(configJson, KafkaFileCallbackConfig.class); } @Override - public void call(String topicName,String s3Partition, String filePath, int partition, - Long baseRecordTimestamp, Long currentTimestamp, int recordCount) { + public void call( + String topicName, + String s3Partition, + String filePath, + int partition, + Long baseRecordTimestamp, + Long currentTimestamp, + int recordCount) { String key = topicName; - Callback value = new Callback(topicName, s3Partition, filePath, partition, - baseRecordTimestamp, currentTimestamp, recordCount); + Callback value = + new Callback( + topicName, + s3Partition, + filePath, + partition, + baseRecordTimestamp, + currentTimestamp, + recordCount); try (final Producer producer = - new KafkaProducer<>(kafkaConfig.toProps())) { - producer.send(new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), - (event, ex) -> { - if (ex != null) { - throw new RuntimeException(ex); - } - }); + new KafkaProducer<>(kafkaConfig.toProps())) { + producer.send( + new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), + (event, ex) -> { + if (ex != null) { + throw new RuntimeException(ex); + } + }); } catch (Exception e) { - if(skipError) + if (skipError) { log.error(e.getMessage(), e); - else + } else { throw new RuntimeException(e); + } } } - } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index 02b4bb572..1f8712286 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -26,19 +26,16 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import io.confluent.connect.s3.KafkaFileCallbackConfig; +import io.confluent.connect.s3.callback.KafkaFileCallbackConfig; import io.confluent.connect.s3.S3SinkConnector; import io.confluent.connect.s3.S3SinkConnectorConfig.IgnoreOrFailBehavior; import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; -import io.confluent.connect.s3.callback.Callback; import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; import io.confluent.connect.s3.format.avro.AvroFormat; import io.confluent.connect.s3.format.json.JsonFormat; import io.confluent.connect.s3.format.parquet.ParquetFormat; import io.confluent.connect.s3.storage.S3Storage; import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -51,7 +48,6 @@ import java.util.concurrent.TimeUnit; import io.confluent.connect.s3.util.EmbeddedConnectUtils; -import jdk.nashorn.internal.codegen.CompilerConstants; import org.apache.commons.io.FileUtils; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; @@ -457,20 +453,28 @@ private void setupProperties() { } @Test - public void testCallBackPartition() throws IOException { + public void testCallBackPartition() { EmbeddedKafkaCluster kafka = connect.kafka(); kafka.start(); String bootstrapServers = kafka.bootstrapServers(); String callbackTopic = "callback_topic"; kafka.createTopic(callbackTopic); KafkaFileCallbackConfig kafkaFileCallbackConfig = - new KafkaFileCallbackConfig(callbackTopic, bootstrapServers, restApp.restServer.getURI().toString(), null, null, null); - KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); - callBack.call("baz-topic", "version/event/hour", "hey.avro", 12, 1234L, 123L, 34); - ConsumerRecords res = kafka.consume(1, 1000L, callbackTopic); - ConsumerRecord next = res.iterator().next(); - String key = new String(next.key()); - String value = new String(next.value()); - System.out.println(key + value); + new KafkaFileCallbackConfig( + callbackTopic, + bootstrapServers, + restApp.restServer.getURI().toString(), + null, + null, + null); + KafkaFileCallbackProvider callBack = + new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); + callBack.call("baz-topic", "version/event/hour", "file1.avro", 12, + 1234L, 123L, 34); + callBack.call("foo-topic", "version/event/hour", "fil2.avro", 8, + 12345L, 1234L, 12); + + // fails if two records are not present in kafka within 1s + kafka.consume(2, 1000L, callbackTopic); } } From 4dbe5f63038404de34e8a11f6920a3d0b0670559 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 15:59:21 +0200 Subject: [PATCH 14/34] Add k-connect test and fix impl --- .../connect/s3/TopicPartitionWriter.java | 9 +- .../callback/AbstractFileCallbackConfig.java | 6 +- .../s3/callback/KafkaFileCallbackConfig.java | 7 +- .../src/main/resources/callback.avsc | 4 +- .../s3/integration/S3SinkCallbackIT.java | 277 ++++++++++++++++++ .../s3/integration/S3SinkConnectorIT.java | 29 -- 6 files changed, 293 insertions(+), 39 deletions(-) create mode 100644 kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index 61b9f1af6..f7db14991 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -107,7 +107,7 @@ public class TopicPartitionWriter { private ErrantRecordReporter reporter; private final FileRotationTracker fileRotationTracker; - private final Optional fileCallback; + private Optional fileCallback = Optional.empty(); public TopicPartitionWriter(TopicPartition tp, S3Storage storage, @@ -195,16 +195,15 @@ public TopicPartitionWriter(TopicPartition tp, // Initialize callback if enabled if (this.connectorConfig.getFileCallbackEnable()) { try { - fileCallback = Optional.of((FileCallbackProvider)this.connectorConfig - .getFileCallbackClass().getConstructor(FileCallbackProvider.class) + fileCallback = Optional.of((FileCallbackProvider) + this.connectorConfig + .getFileCallbackClass().getConstructor(String.class, boolean.class) .newInstance(connectorConfig.getFileCallbackConfigJson(), connectorConfig.getFileCallbackSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { throw new RuntimeException(e); } - } else { - fileCallback = Optional.empty(); } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java index b68a0ad36..4a19e0ff4 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java @@ -36,12 +36,16 @@ public static T fromJsonString( instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); instanceMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); instanceMapper.enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS); - return instanceMapper.readValue(jsonContent, clazz); + T value = instanceMapper.readValue(jsonContent, clazz); + value.validateFields(); + return value; } catch (IllegalAccessException | InstantiationException | IOException e) { throw new RuntimeException(e); } } + protected abstract void validateFields() ; + public abstract Properties toProps(); public abstract String toJson(); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java index 1dab3b947..7b0941cdc 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java @@ -32,7 +32,8 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { private String saslJaasConfig; /** empty constructor for jackson */ - public KafkaFileCallbackConfig() {} + public KafkaFileCallbackConfig() { + } public KafkaFileCallbackConfig( String topicName, @@ -49,7 +50,8 @@ public KafkaFileCallbackConfig( this.saslJaasConfig = saslJaasConfig; } - private void validateFields() { + @Override + protected void validateFields() { if (topicName == null || bootstrapServers == null || schemaRegistryUrl == null) { throw new RuntimeException( "topic_name, boostrap_servers and schema_registry_url shall be defined"); @@ -80,6 +82,7 @@ public Properties toProps() { Properties prop = new Properties(); prop.setProperty("key.serializer", KEY_SERIALIZER); prop.setProperty("value.serializer", VALUE_SERIALIZER); + prop.setProperty("auto.create.topics.enable", "true"); // mandatory prop.setProperty("bootstrap.servers", bootstrapServers); prop.setProperty("topic.name", topicName); diff --git a/kafka-connect-s3/src/main/resources/callback.avsc b/kafka-connect-s3/src/main/resources/callback.avsc index 1865ec802..980752dad 100644 --- a/kafka-connect-s3/src/main/resources/callback.avsc +++ b/kafka-connect-s3/src/main/resources/callback.avsc @@ -26,12 +26,12 @@ }, { "name": "baseRecordTimestamp", - "type": "long", + "type": ["null", "long"], "doc": "A unique event ID (UUID)" }, { "name": "currentTimestamp", - "type": "long", + "type": ["null", "long"], "doc": "A unique event ID (UUID)" }, { diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java new file mode 100644 index 000000000..f7129ada1 --- /dev/null +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java @@ -0,0 +1,277 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.integration; + +import static io.confluent.connect.s3.S3SinkConnectorConfig.*; +import static io.confluent.connect.storage.StorageSinkConnectorConfig.FLUSH_SIZE_CONFIG; +import static io.confluent.connect.storage.StorageSinkConnectorConfig.FORMAT_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG; +import static org.hamcrest.core.StringStartsWith.startsWith; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import io.confluent.connect.s3.S3SinkConnector; +import io.confluent.connect.s3.S3SinkConnectorConfig.IgnoreOrFailBehavior; +import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; +import io.confluent.connect.s3.callback.KafkaFileCallbackConfig; +import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; +import io.confluent.connect.s3.format.avro.AvroFormat; +import io.confluent.connect.s3.format.json.JsonFormat; +import io.confluent.connect.s3.format.parquet.ParquetFormat; +import io.confluent.connect.s3.storage.S3Storage; +import io.confluent.connect.s3.util.EmbeddedConnectUtils; +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import org.apache.commons.io.FileUtils; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.header.Header; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; +import org.apache.kafka.connect.sink.SinkRecord; +import org.apache.kafka.connect.storage.StringConverter; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; +import org.apache.kafka.test.IntegrationTest; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"unchecked", "deprecation"}) +@Category(IntegrationTest.class) +public class S3SinkCallbackIT extends BaseConnectorIT { + + private static final Logger log = LoggerFactory.getLogger(S3SinkCallbackIT.class); + // connector and test configs + private static final String CONNECTOR_NAME = "s3-sink"; + private static final String DEFAULT_TEST_TOPIC_NAME = "TestTopic"; + + private static final List KAFKA_TOPICS = Collections.singletonList(DEFAULT_TEST_TOPIC_NAME); + + private JsonConverter jsonConverter; + // custom producer to enable sending records with headers + private Producer producer; + + @Before + public void before() throws InterruptedException { + initializeJsonConverter(); + initializeCustomProducer(); + setupProperties(); + waitForSchemaRegistryToStart(); + //add class specific props + props.put(SinkConnectorConfig.TOPICS_CONFIG, String.join(",", KAFKA_TOPICS)); + props.put(FLUSH_SIZE_CONFIG, Integer.toString(FLUSH_SIZE_STANDARD)); + props.put(FORMAT_CLASS_CONFIG, AvroFormat.class.getName()); + props.put(STORAGE_CLASS_CONFIG, S3Storage.class.getName()); + props.put(S3_BUCKET_CONFIG, TEST_BUCKET_NAME); + props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); + props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); + props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); + // callback + props.put(FILE_CALLBACK_ENABLE, "true"); + // create topics in Kafka + KAFKA_TOPICS.forEach(topic -> connect.kafka().createTopic(topic, 1)); + } + + @After + public void after() throws Exception { + // delete the downloaded test file folder + FileUtils.deleteDirectory(new File(TEST_DOWNLOAD_PATH)); + // clear for next test + clearBucket(TEST_BUCKET_NAME); + // wait for bucket to clear + waitForFilesInBucket(TEST_BUCKET_NAME, 0); + } + + @Test + public void testCallBackPartition() { + String bootstrapServers = connect.kafka().bootstrapServers(); + String callbackTopic = "callback_topic"; + connect.kafka().createTopic(callbackTopic); + KafkaFileCallbackConfig kafkaFileCallbackConfig = + new KafkaFileCallbackConfig( + callbackTopic, + bootstrapServers, + restApp.restServer.getURI().toString(), + null, + null, + null); + KafkaFileCallbackProvider callBack = + new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); + callBack.call("baz-topic", "version/event/hour", "file1.avro", 12, + 1234L, 123L, 34); + callBack.call("foo-topic", "version/event/hour", "fil2.avro", 8, + 12345L, 1234L, 12); + + // fails if two records are not present in kafka within 1s + connect.kafka().consume(2, 1000L, callbackTopic); + } + + @Test + public void testBasicRecordsWrittenParquet() throws Throwable { + // add test specific props + props.put(FORMAT_CLASS_CONFIG, ParquetFormat.class.getName()); + String topicCallback = "TopicCallback"; + props.put( + FILE_CALLBACK_CONFIG_JSON, + new KafkaFileCallbackConfig( + topicCallback, + connect.kafka().bootstrapServers(), + restApp.restServer.getURI().toString(), + null, + null, + null) + .toJson()); + connect.kafka().createTopic(topicCallback); + testBasicRecordsWritten(PARQUET_EXTENSION, topicCallback); + } + + /** + * Test that the expected records are written for a given file extension + * Optionally, test that topics which have "*.{expectedFileExtension}*" in them are processed + * and written. + * @param expectedFileExtension The file extension to test against + * @param callbackTopic The callback topic name + * @throws Throwable + */ + private void testBasicRecordsWritten( + String expectedFileExtension, + String callbackTopic + ) throws Throwable { + // Add an extra topic with this extension inside of the name + // Use a TreeSet for test determinism + Set topicNames = new TreeSet<>(KAFKA_TOPICS); + + // start sink connector + connect.configureConnector(CONNECTOR_NAME, props); + // wait for tasks to spin up + EmbeddedConnectUtils.waitForConnectorToStart(connect, CONNECTOR_NAME, Math.min(topicNames.size(), MAX_TASKS)); + + Schema recordValueSchema = getSampleStructSchema(); + Struct recordValueStruct = getSampleStructVal(recordValueSchema); + + for (String thisTopicName : topicNames) { + // Create and send records to Kafka using the topic name in the current 'thisTopicName' + SinkRecord sampleRecord = getSampleTopicRecord(thisTopicName, recordValueSchema, recordValueStruct); + produceRecordsNoHeaders(NUM_RECORDS_INSERT, sampleRecord); + } + + log.info("Waiting for files in S3..."); + int countPerTopic = NUM_RECORDS_INSERT / FLUSH_SIZE_STANDARD; + int expectedTotalFileCount = countPerTopic * topicNames.size(); + waitForFilesInBucket(TEST_BUCKET_NAME, expectedTotalFileCount); + + Set expectedTopicFilenames = new TreeSet<>(); + for (String thisTopicName : topicNames) { + List theseFiles = getExpectedFilenames( + thisTopicName, + TOPIC_PARTITION, + FLUSH_SIZE_STANDARD, + NUM_RECORDS_INSERT, + expectedFileExtension + ); + assertEquals(theseFiles.size(), countPerTopic); + expectedTopicFilenames.addAll(theseFiles); + } + // This check will catch any duplications + assertEquals(expectedTopicFilenames.size(), expectedTotalFileCount); + // Check wether we get same number of records in callback + connect.kafka().consume(expectedTotalFileCount, 1000L, callbackTopic); + } + + private void produceRecordsNoHeaders(int recordCount, SinkRecord record) + throws ExecutionException, InterruptedException { + produceRecords(record.topic(), recordCount, record, true, true, false); + } + + private void produceRecords( + String topic, + int recordCount, + SinkRecord record, + boolean withKey, + boolean withValue, + boolean withHeaders + ) throws ExecutionException, InterruptedException { + byte[] kafkaKey = null; + byte[] kafkaValue = null; + Iterable
headers = Collections.emptyList(); + if (withKey) { + kafkaKey = jsonConverter.fromConnectData(topic, Schema.STRING_SCHEMA, record.key()); + } + if (withValue) { + kafkaValue = jsonConverter.fromConnectData(record.topic(), record.valueSchema(), record.value()); + } + if (withHeaders) { + headers = sampleHeaders(); + } + ProducerRecord producerRecord = + new ProducerRecord<>(topic, TOPIC_PARTITION, kafkaKey, kafkaValue, headers); + for (long i = 0; i < recordCount; i++) { + producer.send(producerRecord).get(); + } + } + + private void initializeJsonConverter() { + Map jsonConverterProps = new HashMap<>(); + jsonConverterProps.put("schemas.enable", "true"); + jsonConverterProps.put("converter.type", "value"); + jsonConverter = new JsonConverter(); + jsonConverter.configure(jsonConverterProps); + } + + private void initializeCustomProducer() { + Map producerProps = new HashMap<>(); + producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, connect.kafka().bootstrapServers()); + producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, + org.apache.kafka.common.serialization.ByteArraySerializer.class.getName()); + producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, + org.apache.kafka.common.serialization.ByteArraySerializer.class.getName()); + producer = new KafkaProducer<>(producerProps); + } + + private void setupProperties() { + props = new HashMap<>(); + props.put(CONNECTOR_CLASS_CONFIG, S3SinkConnector.class.getName()); + props.put(TASKS_MAX_CONFIG, Integer.toString(MAX_TASKS)); + // converters + props.put(KEY_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); + props.put(VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); + // aws credential if exists + props.putAll(getAWSCredentialFromPath()); + } + +} diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index 1f8712286..3651f2320 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -26,11 +26,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import io.confluent.connect.s3.callback.KafkaFileCallbackConfig; import io.confluent.connect.s3.S3SinkConnector; import io.confluent.connect.s3.S3SinkConnectorConfig.IgnoreOrFailBehavior; import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; -import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; import io.confluent.connect.s3.format.avro.AvroFormat; import io.confluent.connect.s3.format.json.JsonFormat; import io.confluent.connect.s3.format.parquet.ParquetFormat; @@ -63,7 +61,6 @@ import org.apache.kafka.connect.runtime.SinkConnectorConfig; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.storage.StringConverter; -import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; import org.apache.kafka.test.IntegrationTest; import org.junit.After; import org.junit.Before; @@ -451,30 +448,4 @@ private void setupProperties() { // aws credential if exists props.putAll(getAWSCredentialFromPath()); } - - @Test - public void testCallBackPartition() { - EmbeddedKafkaCluster kafka = connect.kafka(); - kafka.start(); - String bootstrapServers = kafka.bootstrapServers(); - String callbackTopic = "callback_topic"; - kafka.createTopic(callbackTopic); - KafkaFileCallbackConfig kafkaFileCallbackConfig = - new KafkaFileCallbackConfig( - callbackTopic, - bootstrapServers, - restApp.restServer.getURI().toString(), - null, - null, - null); - KafkaFileCallbackProvider callBack = - new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); - callBack.call("baz-topic", "version/event/hour", "file1.avro", 12, - 1234L, 123L, 34); - callBack.call("foo-topic", "version/event/hour", "fil2.avro", 8, - 12345L, 1234L, 12); - - // fails if two records are not present in kafka within 1s - kafka.consume(2, 1000L, callbackTopic); - } } From 3010b5471bdaaef100ff172150ac53e49ddd5d18 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 16:57:51 +0200 Subject: [PATCH 15/34] Add timebasedpartitioner to get ts --- .../s3/integration/S3SinkCallbackIT.java | 62 +++++++++---------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java index f7129ada1..342d45267 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java @@ -22,23 +22,17 @@ import static org.apache.kafka.connect.runtime.ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG; -import static org.hamcrest.core.StringStartsWith.startsWith; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; import io.confluent.connect.s3.S3SinkConnector; -import io.confluent.connect.s3.S3SinkConnectorConfig.IgnoreOrFailBehavior; -import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; import io.confluent.connect.s3.callback.KafkaFileCallbackConfig; import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; import io.confluent.connect.s3.format.avro.AvroFormat; -import io.confluent.connect.s3.format.json.JsonFormat; import io.confluent.connect.s3.format.parquet.ParquetFormat; import io.confluent.connect.s3.storage.S3Storage; import io.confluent.connect.s3.util.EmbeddedConnectUtils; +import io.confluent.connect.storage.partitioner.PartitionerConfig; import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -46,23 +40,17 @@ import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.header.Header; -import org.apache.kafka.common.serialization.StringDeserializer; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.runtime.SinkConnectorConfig; import org.apache.kafka.connect.sink.SinkRecord; -import org.apache.kafka.connect.storage.StringConverter; -import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; import org.apache.kafka.test.IntegrationTest; import org.junit.After; import org.junit.Before; @@ -103,6 +91,12 @@ public void before() throws InterruptedException { props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); // callback props.put(FILE_CALLBACK_ENABLE, "true"); + // TimeBasedPartitioner + props.put(PartitionerConfig.PARTITIONER_CLASS_CONFIG, "io.confluent.connect.storage.partitioner.TimeBasedPartitioner"); + props.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, "100"); + props.put(PartitionerConfig.PATH_FORMAT_CONFIG, "'event_date'=YYYY-MM-dd/'event_hour'=HH"); + props.put(PartitionerConfig.LOCALE_CONFIG, "FR_fr"); + props.put(PartitionerConfig.TIMEZONE_CONFIG, "UTC"); // create topics in Kafka KAFKA_TOPICS.forEach(topic -> connect.kafka().createTopic(topic, 1)); } @@ -117,6 +111,26 @@ public void after() throws Exception { waitForFilesInBucket(TEST_BUCKET_NAME, 0); } + + @Test + public void testBasicRecordsWrittenParquetAndRelatedCallbacks() throws Throwable { + // add test specific props + props.put(FORMAT_CLASS_CONFIG, ParquetFormat.class.getName()); + String topicCallback = "TopicCallback"; + props.put( + FILE_CALLBACK_CONFIG_JSON, + new KafkaFileCallbackConfig( + topicCallback, + connect.kafka().bootstrapServers(), + restApp.restServer.getURI().toString(), + null, + null, + null) + .toJson()); + connect.kafka().createTopic(topicCallback); + testBasicRecordsWrittenAndRelatedCallbacks(PARQUET_EXTENSION, topicCallback); + } + @Test public void testCallBackPartition() { String bootstrapServers = connect.kafka().bootstrapServers(); @@ -140,26 +154,6 @@ public void testCallBackPartition() { // fails if two records are not present in kafka within 1s connect.kafka().consume(2, 1000L, callbackTopic); } - - @Test - public void testBasicRecordsWrittenParquet() throws Throwable { - // add test specific props - props.put(FORMAT_CLASS_CONFIG, ParquetFormat.class.getName()); - String topicCallback = "TopicCallback"; - props.put( - FILE_CALLBACK_CONFIG_JSON, - new KafkaFileCallbackConfig( - topicCallback, - connect.kafka().bootstrapServers(), - restApp.restServer.getURI().toString(), - null, - null, - null) - .toJson()); - connect.kafka().createTopic(topicCallback); - testBasicRecordsWritten(PARQUET_EXTENSION, topicCallback); - } - /** * Test that the expected records are written for a given file extension * Optionally, test that topics which have "*.{expectedFileExtension}*" in them are processed @@ -168,7 +162,7 @@ public void testBasicRecordsWrittenParquet() throws Throwable { * @param callbackTopic The callback topic name * @throws Throwable */ - private void testBasicRecordsWritten( + private void testBasicRecordsWrittenAndRelatedCallbacks( String expectedFileExtension, String callbackTopic ) throws Throwable { From b1dc3881d6f958e630417f183ac227b6d63ed2ac Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 28 Jul 2023 17:15:59 +0200 Subject: [PATCH 16/34] Document the avro record --- kafka-connect-s3/src/main/resources/callback.avsc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kafka-connect-s3/src/main/resources/callback.avsc b/kafka-connect-s3/src/main/resources/callback.avsc index 980752dad..83a4f7012 100644 --- a/kafka-connect-s3/src/main/resources/callback.avsc +++ b/kafka-connect-s3/src/main/resources/callback.avsc @@ -7,37 +7,37 @@ { "name": "topicName", "type": "string", - "doc": "A unique event ID (UUID)" + "doc": "The topic name of the record being written" }, { "name": "s3Partition", "type": "string", - "doc": "A unique event ID (UUID)" + "doc": "The s3 partition produced by the partitioner" }, { "name": "filePath", "type": "string", - "doc": "A unique event ID (UUID)" + "doc": "Current file path, including partition and file name" }, { "name": "partition", "type": "int", - "doc": "A unique event ID (UUID)" + "doc": "The kafka partition being recorded" }, { "name": "baseRecordTimestamp", "type": ["null", "long"], - "doc": "A unique event ID (UUID)" + "doc": "Time of the first record written in the file. Defined when partitioner is time based only." }, { "name": "currentTimestamp", "type": ["null", "long"], - "doc": "A unique event ID (UUID)" + "doc": "Time of the last record written in the file. Defined when partitioner is time based only." }, { "name": "recordCount", "type": "int", - "doc": "A unique event ID (UUID)" + "doc": "Number of records within the written file" } ] } \ No newline at end of file From dae74e2976f79fe2eb961ac819e25c50218e81cd Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Thu, 17 Aug 2023 16:10:15 +0200 Subject: [PATCH 17/34] Use avro to serialize key --- .../confluent/connect/s3/callback/KafkaFileCallbackConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java index 7b0941cdc..c10313180 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java @@ -20,7 +20,7 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { private static final String KEY_SERIALIZER = - "org.apache.kafka.common.serialization.StringSerializer"; + "io.confluent.kafka.serializers.KafkaAvroSerializer"; private static final String VALUE_SERIALIZER = "io.confluent.kafka.serializers.KafkaAvroSerializer"; From cec27e752f21aca3f3a5ffae00ba0454fe6af9b0 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Thu, 17 Aug 2023 16:18:42 +0200 Subject: [PATCH 18/34] Add event datetime --- .../java/io/confluent/connect/s3/TopicPartitionWriter.java | 2 +- .../confluent/connect/s3/callback/FileCallbackProvider.java | 2 +- .../connect/s3/callback/KafkaFileCallbackProvider.java | 6 ++++-- kafka-connect-s3/src/main/resources/callback.avsc | 5 +++++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index f7db14991..2e8419289 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -693,7 +693,7 @@ private void commitFile(String encodedPartition) { private void callbackFile(String encodedPartition) { fileCallback.ifPresent(fs -> fs.call(tp.topic(), encodedPartition, commitFiles.get(encodedPartition), tp.partition(), baseRecordTimestamp, - currentTimestamp, recordCount)); + currentTimestamp, recordCount, time.milliseconds())); } private void tagFile(String encodedPartition, String s3ObjectPath) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java index e6cdf2fa1..9c914a704 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java @@ -31,5 +31,5 @@ public abstract void call( int partition, Long baseRecordTimestamp, Long currentTimestamp, - int recordCount); + int recordCount, Long eventDatetime); } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index ee961352a..3ffe58044 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -40,7 +40,8 @@ public void call( int partition, Long baseRecordTimestamp, Long currentTimestamp, - int recordCount) { + int recordCount, + Long eventDatetime) { String key = topicName; Callback value = new Callback( @@ -50,7 +51,8 @@ public void call( partition, baseRecordTimestamp, currentTimestamp, - recordCount); + recordCount, + eventDatetime); try (final Producer producer = new KafkaProducer<>(kafkaConfig.toProps())) { producer.send( diff --git a/kafka-connect-s3/src/main/resources/callback.avsc b/kafka-connect-s3/src/main/resources/callback.avsc index 83a4f7012..aa11923ed 100644 --- a/kafka-connect-s3/src/main/resources/callback.avsc +++ b/kafka-connect-s3/src/main/resources/callback.avsc @@ -38,6 +38,11 @@ "name": "recordCount", "type": "int", "doc": "Number of records within the written file" + }, + { + "name": "event_datetime", + "type": "long", + "doc": "The time of the callback event" } ] } \ No newline at end of file From 48f853680509a033073b2186dbd130f86b6f6120 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 22 Aug 2023 17:00:02 +0200 Subject: [PATCH 19/34] Add logs and fix it --- .../connect/s3/TopicPartitionWriter.java | 1 + .../s3/callback/FileCallbackProvider.java | 26 ++++++++++++++++++- .../callback/KafkaFileCallbackProvider.java | 9 +------ .../s3/integration/S3SinkCallbackIT.java | 4 +-- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index 2e8419289..6cfb0afca 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -195,6 +195,7 @@ public TopicPartitionWriter(TopicPartition tp, // Initialize callback if enabled if (this.connectorConfig.getFileCallbackEnable()) { try { + log.info("File callback enabled"); fileCallback = Optional.of((FileCallbackProvider) this.connectorConfig .getFileCallbackClass().getConstructor(String.class, boolean.class) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java index 9c914a704..b7cd8e75d 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java @@ -15,7 +15,11 @@ package io.confluent.connect.s3.callback; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public abstract class FileCallbackProvider { + private static final Logger log = LoggerFactory.getLogger(FileCallbackProvider.class); protected final String configJson; protected final boolean skipError; @@ -24,7 +28,27 @@ public FileCallbackProvider(String configJson, boolean skipError) { this.skipError = skipError; } - public abstract void call( + public void call( + String topicName, + String s3Partition, + String filePath, + int partition, + Long baseRecordTimestamp, + Long currentTimestamp, + int recordCount, + Long eventDatetime) { + try { + log.info("Running file callback : {}, {}", topicName, filePath); + callImpl(topicName, s3Partition, filePath, partition, baseRecordTimestamp, currentTimestamp, recordCount, eventDatetime); + } catch (Exception e) { + if (skipError) { + log.error(e.getMessage(), e); + } else { + throw new RuntimeException(e); + } + } + } + public abstract void callImpl( String topicName, String s3Partition, String filePath, diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index 3ffe58044..8ad9a333d 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -23,7 +23,6 @@ import org.slf4j.LoggerFactory; public class KafkaFileCallbackProvider extends FileCallbackProvider { - private static final Logger log = LoggerFactory.getLogger(KafkaFileCallbackProvider.class); private final KafkaFileCallbackConfig kafkaConfig; public KafkaFileCallbackProvider(String configJson, boolean skipError) { @@ -33,7 +32,7 @@ public KafkaFileCallbackProvider(String configJson, boolean skipError) { } @Override - public void call( + public void callImpl( String topicName, String s3Partition, String filePath, @@ -62,12 +61,6 @@ public void call( throw new RuntimeException(ex); } }); - } catch (Exception e) { - if (skipError) { - log.error(e.getMessage(), e); - } else { - throw new RuntimeException(e); - } } } } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java index 342d45267..e34add267 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java @@ -147,9 +147,9 @@ public void testCallBackPartition() { KafkaFileCallbackProvider callBack = new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); callBack.call("baz-topic", "version/event/hour", "file1.avro", 12, - 1234L, 123L, 34); + 1234L, 123L, 34, 1234L); callBack.call("foo-topic", "version/event/hour", "fil2.avro", 8, - 12345L, 1234L, 12); + 12345L, 1234L, 12, 12345L); // fails if two records are not present in kafka within 1s connect.kafka().consume(2, 1000L, callbackTopic); From 5379081fb0cdf72109cd5dcb94a0c360f7b4a388 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 25 Aug 2023 11:12:01 +0200 Subject: [PATCH 20/34] Rm useless imports --- .../connect/s3/callback/KafkaFileCallbackProvider.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java index 8ad9a333d..7aecfccb6 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java @@ -19,8 +19,6 @@ import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class KafkaFileCallbackProvider extends FileCallbackProvider { private final KafkaFileCallbackConfig kafkaConfig; From a9288c780229bf09468029b25726757e218fb2d6 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Thu, 31 Aug 2023 15:56:03 +0200 Subject: [PATCH 21/34] Rename callback to file-event --- .../connect/s3/S3SinkConnectorConfig.java | 86 +++++++++---------- .../connect/s3/TopicPartitionWriter.java | 24 +++--- .../AbstractFileEventConfig.java} | 6 +- .../FileEventProvider.java} | 31 ++++--- .../KafkaFileEventConfig.java} | 8 +- .../KafkaFileEventProvider.java} | 27 +++--- .../{callback.avsc => file-event.avsc} | 18 ++-- ...CallbackIT.java => S3SinkFileEventIT.java} | 65 +++++++------- 8 files changed, 140 insertions(+), 125 deletions(-) rename kafka-connect-s3/src/main/java/io/confluent/connect/s3/{callback/AbstractFileCallbackConfig.java => file/AbstractFileEventConfig.java} (91%) rename kafka-connect-s3/src/main/java/io/confluent/connect/s3/{callback/FileCallbackProvider.java => file/FileEventProvider.java} (65%) rename kafka-connect-s3/src/main/java/io/confluent/connect/s3/{callback/KafkaFileCallbackConfig.java => file/KafkaFileEventConfig.java} (95%) rename kafka-connect-s3/src/main/java/io/confluent/connect/s3/{callback/KafkaFileCallbackProvider.java => file/KafkaFileEventProvider.java} (70%) rename kafka-connect-s3/src/main/resources/{callback.avsc => file-event.avsc} (60%) rename kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/{S3SinkCallbackIT.java => S3SinkFileEventIT.java} (84%) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index 3cbef2344..626e1bcee 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -24,8 +24,8 @@ import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.SSEAlgorithm; -import io.confluent.connect.s3.callback.FileCallbackProvider; -import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; +import io.confluent.connect.s3.file.FileEventProvider; +import io.confluent.connect.s3.file.KafkaFileEventProvider; import io.confluent.connect.storage.common.util.StringUtils; import org.apache.kafka.common.Configurable; import org.apache.kafka.common.config.AbstractConfig; @@ -197,17 +197,17 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { public static final String TOMBSTONE_ENCODED_PARTITION_DEFAULT = "tombstone"; /** - * Callback configs + * File event configs */ - public static final String FILE_CALLBACK_ENABLE = "s3.file.callback.enable"; - public static final boolean FILE_CALLBACK_ENABLE_DEFAULT = false; - public static final String FILE_CALLBACK_SKIP_ERROR = "s3.file.callback.skip.error"; - public static final boolean FILE_CALLBACK_SKIP_ERROR_DEFAULT = false; - public static final String FILE_CALLBACK_CLASS = "s3.file.callback.class"; - public static final Class FILE_CALLBACK_CLASS_DEFAULT = - KafkaFileCallbackProvider.class; - public static final String FILE_CALLBACK_CONFIG_JSON = "s3.file.callback.config.json"; - public static final String FILE_CALLBACK_CONFIG_JSON_DEFAULT = "{}"; + public static final String FILE_EVENT_ENABLE = "s3.file.event.enable"; + public static final boolean FILE_EVENT_ENABLE_DEFAULT = false; + public static final String FILE_EVENT_SKIP_ERROR = "s3.file.event.skip.error"; + public static final boolean FILE_EVENT_SKIP_ERROR_DEFAULT = false; + public static final String FILE_EVENT_CLASS = "s3.file.event.class"; + public static final Class FILE_EVENT_CLASS_DEFAULT = + KafkaFileEventProvider.class; + public static final String FILE_EVENT_CONFIG_JSON = "s3.file.event.config.json"; + public static final String FILE_EVENT_CONFIG_JSON_DEFAULT = "{}"; /** * Append schema name in s3-path @@ -792,63 +792,63 @@ public static ConfigDef newConfigDef() { } { - final String group = "File callback"; + final String group = "File event"; int orderInGroup = 0; configDef.define( - FILE_CALLBACK_ENABLE, + FILE_EVENT_ENABLE, Type.BOOLEAN, - FILE_CALLBACK_ENABLE_DEFAULT, + FILE_EVENT_ENABLE_DEFAULT, Importance.LOW, - "Enables the file callback to be specified and configured", + "Enables the file event to be specified and configured", group, ++orderInGroup, Width.LONG, - "Enable s3 file callback" + "Enable s3 file event" ); configDef.define( - FILE_CALLBACK_SKIP_ERROR, + FILE_EVENT_SKIP_ERROR, Type.BOOLEAN, - FILE_CALLBACK_SKIP_ERROR_DEFAULT, + FILE_EVENT_SKIP_ERROR_DEFAULT, Importance.LOW, - "In case of callback, then raise an error or fail silently. Default raise an error.", + "In case of file event error, then raise or fail silently. Default raise an error.", group, ++orderInGroup, Width.LONG, - "Fail when s3 file callback error" + "Fail when s3 file event error" ); configDef.define( - FILE_CALLBACK_CLASS, + FILE_EVENT_CLASS, Type.CLASS, - FILE_CALLBACK_CLASS_DEFAULT, - new FileCallbackProviderValidator(), + FILE_EVENT_CLASS_DEFAULT, + new FileEventProviderValidator(), Importance.LOW, - "File callback to push notification for each file written on s3. By default " + "File event to push notification for each file written on s3. By default " + "the connector uses ``" - + FILE_CALLBACK_CLASS_DEFAULT.getSimpleName() + + FILE_EVENT_CLASS_DEFAULT.getSimpleName() + "``.", group, ++orderInGroup, Width.LONG, - "File callback class" + "File event class" ); configDef.define( - FILE_CALLBACK_CONFIG_JSON, + FILE_EVENT_CONFIG_JSON, Type.STRING, - FILE_CALLBACK_CONFIG_JSON_DEFAULT, + FILE_EVENT_CONFIG_JSON_DEFAULT, Importance.LOW, - "File callback configuration as json format. " + "File event configuration as json format. " + "Mandatory Fields: bootstrap_servers, topic_name, schema_registry_url. " + "Optional fields: sasl_mechanism, security_protocol, sasl_jaas_config. " + "By default an empty json.", group, ++orderInGroup, Width.LONG, - "File callback config json" + "File event config json" ); } return configDef; @@ -1028,20 +1028,20 @@ public int getElasticBufferInitCap() { return getInt(ELASTIC_BUFFER_INIT_CAPACITY); } - public boolean getFileCallbackEnable() { - return getBoolean(FILE_CALLBACK_ENABLE); + public boolean getFileEventEnable() { + return getBoolean(FILE_EVENT_ENABLE); } - public boolean getFileCallbackSkipError() { - return getBoolean(FILE_CALLBACK_SKIP_ERROR); + public boolean getFileEventSkipError() { + return getBoolean(FILE_EVENT_SKIP_ERROR); } - public Class getFileCallbackClass() { - return getClass(FILE_CALLBACK_CLASS); + public Class getFileEventClass() { + return getClass(FILE_EVENT_CLASS); } - public String getFileCallbackConfigJson() { - return getString(FILE_CALLBACK_CONFIG_JSON); + public String getFileEventConfigJson() { + return getString(FILE_EVENT_CONFIG_JSON); } public boolean isTombstoneWriteEnabled() { @@ -1287,23 +1287,23 @@ public String toString() { } } - private static class FileCallbackProviderValidator implements ConfigDef.Validator { + private static class FileEventProviderValidator implements ConfigDef.Validator { @Override public void ensureValid(String name, Object provider) { if (provider != null && provider instanceof Class - && FileCallbackProvider.class.isAssignableFrom((Class) provider)) { + && FileEventProvider.class.isAssignableFrom((Class) provider)) { return; } throw new ConfigException( name, provider, - "Class must extend: " + FileCallbackProvider.class + "Class must extend: " + FileEventProvider.class ); } @Override public String toString() { - return "Any class implementing: " + FileCallbackProvider.class; + return "Any class implementing: " + FileEventProvider.class; } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index 6cfb0afca..fed74cd98 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -16,7 +16,7 @@ package io.confluent.connect.s3; import com.amazonaws.SdkClientException; -import io.confluent.connect.s3.callback.FileCallbackProvider; +import io.confluent.connect.s3.file.FileEventProvider; import io.confluent.connect.s3.storage.S3Storage; import io.confluent.connect.s3.util.FileRotationTracker; import io.confluent.connect.s3.util.RetryUtil; @@ -107,7 +107,7 @@ public class TopicPartitionWriter { private ErrantRecordReporter reporter; private final FileRotationTracker fileRotationTracker; - private Optional fileCallback = Optional.empty(); + private Optional fileCallback = Optional.empty(); public TopicPartitionWriter(TopicPartition tp, S3Storage storage, @@ -192,15 +192,15 @@ public TopicPartitionWriter(TopicPartition tp, // Initialize scheduled rotation timer if applicable setNextScheduledRotation(); - // Initialize callback if enabled - if (this.connectorConfig.getFileCallbackEnable()) { + // Initialize fileEvent if enabled + if (this.connectorConfig.getFileEventEnable()) { try { - log.info("File callback enabled"); - fileCallback = Optional.of((FileCallbackProvider) + log.info("File event enabled"); + fileCallback = Optional.of((FileEventProvider) this.connectorConfig - .getFileCallbackClass().getConstructor(String.class, boolean.class) - .newInstance(connectorConfig.getFileCallbackConfigJson(), - connectorConfig.getFileCallbackSkipError())); + .getFileEventClass().getConstructor(String.class, boolean.class) + .newInstance(connectorConfig.getFileEventConfigJson(), + connectorConfig.getFileEventSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { throw new RuntimeException(e); @@ -693,8 +693,10 @@ private void commitFile(String encodedPartition) { private void callbackFile(String encodedPartition) { fileCallback.ifPresent(fs -> fs.call(tp.topic(), encodedPartition, - commitFiles.get(encodedPartition), tp.partition(), baseRecordTimestamp, - currentTimestamp, recordCount, time.milliseconds())); + commitFiles.get(encodedPartition), tp.partition(), + new DateTime(baseRecordTimestamp).withZone(timeZone), + new DateTime(currentTimestamp).withZone(timeZone), recordCount, + new DateTime(time.milliseconds()).withZone(timeZone))); } private void tagFile(String encodedPartition, String s3ObjectPath) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java similarity index 91% rename from kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java rename to kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java index 4a19e0ff4..a9b63026a 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/AbstractFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java @@ -13,7 +13,7 @@ * specific language governing permissions and limitations under the License. */ -package io.confluent.connect.s3.callback; +package io.confluent.connect.s3.file; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.DeserializationFeature; @@ -23,8 +23,8 @@ import java.io.IOException; import java.util.Properties; -public abstract class AbstractFileCallbackConfig { - public static T fromJsonString( +public abstract class AbstractFileEventConfig { + public static T fromJsonString( String jsonContent, Class clazz) { try { if (jsonContent == null) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java similarity index 65% rename from kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java rename to kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java index b7cd8e75d..d8b4106c8 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/FileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java @@ -13,32 +13,40 @@ * specific language governing permissions and limitations under the License. */ -package io.confluent.connect.s3.callback; +package io.confluent.connect.s3.file; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.ISODateTimeFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class FileCallbackProvider { - private static final Logger log = LoggerFactory.getLogger(FileCallbackProvider.class); +public abstract class FileEventProvider { + private static final Logger log = LoggerFactory.getLogger(FileEventProvider.class); protected final String configJson; protected final boolean skipError; - public FileCallbackProvider(String configJson, boolean skipError) { + public FileEventProvider(String configJson, boolean skipError) { this.configJson = configJson; this.skipError = skipError; } + public String formatDateRFC3339(DateTime timestamp){ + DateTimeFormatter fmt = ISODateTimeFormat.dateTime(); + return fmt.print(timestamp); + } + public void call( String topicName, String s3Partition, String filePath, int partition, - Long baseRecordTimestamp, - Long currentTimestamp, + DateTime baseRecordTimestamp, + DateTime currentTimestamp, int recordCount, - Long eventDatetime) { + DateTime eventDatetime) { try { - log.info("Running file callback : {}, {}", topicName, filePath); + log.info("Running file event : {}, {}", topicName, filePath); callImpl(topicName, s3Partition, filePath, partition, baseRecordTimestamp, currentTimestamp, recordCount, eventDatetime); } catch (Exception e) { if (skipError) { @@ -53,7 +61,8 @@ public abstract void callImpl( String s3Partition, String filePath, int partition, - Long baseRecordTimestamp, - Long currentTimestamp, - int recordCount, Long eventDatetime); + DateTime baseRecordTimestamp, + DateTime currentTimestamp, + int recordCount, + DateTime eventDatetime); } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java similarity index 95% rename from kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java rename to kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index c10313180..658a2c640 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -13,11 +13,11 @@ * specific language governing permissions and limitations under the License. */ -package io.confluent.connect.s3.callback; +package io.confluent.connect.s3.file; import java.util.Properties; -public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { +public class KafkaFileEventConfig extends AbstractFileEventConfig { private static final String KEY_SERIALIZER = "io.confluent.kafka.serializers.KafkaAvroSerializer"; @@ -32,10 +32,10 @@ public class KafkaFileCallbackConfig extends AbstractFileCallbackConfig { private String saslJaasConfig; /** empty constructor for jackson */ - public KafkaFileCallbackConfig() { + public KafkaFileEventConfig() { } - public KafkaFileCallbackConfig( + public KafkaFileEventConfig( String topicName, String bootstrapServers, String schemaRegistryUrl, diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java similarity index 70% rename from kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java rename to kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java index 7aecfccb6..62e9d2402 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/callback/KafkaFileCallbackProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java @@ -13,20 +13,21 @@ * specific language governing permissions and limitations under the License. */ -package io.confluent.connect.s3.callback; +package io.confluent.connect.s3.file; import org.apache.avro.specific.SpecificRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; +import org.joda.time.DateTime; -public class KafkaFileCallbackProvider extends FileCallbackProvider { - private final KafkaFileCallbackConfig kafkaConfig; +public class KafkaFileEventProvider extends FileEventProvider { + private final KafkaFileEventConfig kafkaConfig; - public KafkaFileCallbackProvider(String configJson, boolean skipError) { + public KafkaFileEventProvider(String configJson, boolean skipError) { super(configJson, skipError); this.kafkaConfig = - KafkaFileCallbackConfig.fromJsonString(configJson, KafkaFileCallbackConfig.class); + KafkaFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); } @Override @@ -35,21 +36,21 @@ public void callImpl( String s3Partition, String filePath, int partition, - Long baseRecordTimestamp, - Long currentTimestamp, + DateTime baseRecordTimestamp, + DateTime currentTimestamp, int recordCount, - Long eventDatetime) { + DateTime eventDatetime) { String key = topicName; - Callback value = - new Callback( + FileEvent value = + new FileEvent( topicName, s3Partition, filePath, partition, - baseRecordTimestamp, - currentTimestamp, + formatDateRFC3339(baseRecordTimestamp), + formatDateRFC3339(currentTimestamp), recordCount, - eventDatetime); + formatDateRFC3339(eventDatetime)); try (final Producer producer = new KafkaProducer<>(kafkaConfig.toProps())) { producer.send( diff --git a/kafka-connect-s3/src/main/resources/callback.avsc b/kafka-connect-s3/src/main/resources/file-event.avsc similarity index 60% rename from kafka-connect-s3/src/main/resources/callback.avsc rename to kafka-connect-s3/src/main/resources/file-event.avsc index aa11923ed..6d355b2d5 100644 --- a/kafka-connect-s3/src/main/resources/callback.avsc +++ b/kafka-connect-s3/src/main/resources/file-event.avsc @@ -1,8 +1,8 @@ { - "namespace": "io.confluent.connect.s3.callback", + "namespace": "io.confluent.connect.s3.file", "type": "record", - "name": "Callback", - "doc": "This event represents a callback Message", + "name": "FileEvent", + "doc": "This event represents a fileEvent Message", "fields": [ { "name": "topicName", @@ -26,13 +26,13 @@ }, { "name": "baseRecordTimestamp", - "type": ["null", "long"], - "doc": "Time of the first record written in the file. Defined when partitioner is time based only." + "type": ["null", "string"], + "doc": "Time of the first record written in the file, in RFC 3339. Defined when partitioner is time based only." }, { "name": "currentTimestamp", - "type": ["null", "long"], - "doc": "Time of the last record written in the file. Defined when partitioner is time based only." + "type": ["null", "string"], + "doc": "Time of the last record written in the file, in RFC 3339. Defined when partitioner is time based only." }, { "name": "recordCount", @@ -41,8 +41,8 @@ }, { "name": "event_datetime", - "type": "long", - "doc": "The time of the callback event" + "type": "string", + "doc": "The time of the file event, in RFC 3339" } ] } \ No newline at end of file diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java similarity index 84% rename from kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java rename to kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java index e34add267..bb591d38d 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkCallbackIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java @@ -25,8 +25,8 @@ import static org.junit.Assert.assertEquals; import io.confluent.connect.s3.S3SinkConnector; -import io.confluent.connect.s3.callback.KafkaFileCallbackConfig; -import io.confluent.connect.s3.callback.KafkaFileCallbackProvider; +import io.confluent.connect.s3.file.KafkaFileEventConfig; +import io.confluent.connect.s3.file.KafkaFileEventProvider; import io.confluent.connect.s3.format.avro.AvroFormat; import io.confluent.connect.s3.format.parquet.ParquetFormat; import io.confluent.connect.s3.storage.S3Storage; @@ -52,6 +52,8 @@ import org.apache.kafka.connect.runtime.SinkConnectorConfig; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.test.IntegrationTest; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -61,9 +63,9 @@ @SuppressWarnings({"unchecked", "deprecation"}) @Category(IntegrationTest.class) -public class S3SinkCallbackIT extends BaseConnectorIT { +public class S3SinkFileEventIT extends BaseConnectorIT { - private static final Logger log = LoggerFactory.getLogger(S3SinkCallbackIT.class); + private static final Logger log = LoggerFactory.getLogger(S3SinkFileEventIT.class); // connector and test configs private static final String CONNECTOR_NAME = "s3-sink"; private static final String DEFAULT_TEST_TOPIC_NAME = "TestTopic"; @@ -89,8 +91,8 @@ public void before() throws InterruptedException { props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); - // callback - props.put(FILE_CALLBACK_ENABLE, "true"); + // file event + props.put(FILE_EVENT_ENABLE, "true"); // TimeBasedPartitioner props.put(PartitionerConfig.PARTITIONER_CLASS_CONFIG, "io.confluent.connect.storage.partitioner.TimeBasedPartitioner"); props.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, "100"); @@ -113,58 +115,59 @@ public void after() throws Exception { @Test - public void testBasicRecordsWrittenParquetAndRelatedCallbacks() throws Throwable { + public void testBasicRecordsWrittenParquetAndRelatedFileEvents() throws Throwable { // add test specific props props.put(FORMAT_CLASS_CONFIG, ParquetFormat.class.getName()); - String topicCallback = "TopicCallback"; + String topicFileEvent = "TopicFileEvent"; props.put( - FILE_CALLBACK_CONFIG_JSON, - new KafkaFileCallbackConfig( - topicCallback, + FILE_EVENT_CONFIG_JSON, + new KafkaFileEventConfig( + topicFileEvent, connect.kafka().bootstrapServers(), restApp.restServer.getURI().toString(), null, null, null) .toJson()); - connect.kafka().createTopic(topicCallback); - testBasicRecordsWrittenAndRelatedCallbacks(PARQUET_EXTENSION, topicCallback); + connect.kafka().createTopic(topicFileEvent); + testBasicRecordsWrittenAndRelatedFileEvents(PARQUET_EXTENSION, topicFileEvent); } @Test - public void testCallBackPartition() { + public void testFileEventPartition() { String bootstrapServers = connect.kafka().bootstrapServers(); - String callbackTopic = "callback_topic"; - connect.kafka().createTopic(callbackTopic); - KafkaFileCallbackConfig kafkaFileCallbackConfig = - new KafkaFileCallbackConfig( - callbackTopic, + String fileEventTopic = "file_event_topic"; + connect.kafka().createTopic(fileEventTopic); + KafkaFileEventConfig kafkaFileEventConfig = + new KafkaFileEventConfig( + fileEventTopic, bootstrapServers, restApp.restServer.getURI().toString(), null, null, null); - KafkaFileCallbackProvider callBack = - new KafkaFileCallbackProvider(kafkaFileCallbackConfig.toJson(), false); - callBack.call("baz-topic", "version/event/hour", "file1.avro", 12, - 1234L, 123L, 34, 1234L); - callBack.call("foo-topic", "version/event/hour", "fil2.avro", 8, - 12345L, 1234L, 12, 12345L); + KafkaFileEventProvider fileEvent = + new KafkaFileEventProvider(kafkaFileEventConfig.toJson(), false); + fileEvent.call("baz-topic", "version/event/hour", "file1.avro", 12, + new DateTime(1234L), new DateTime(123L), + 34, new DateTime(1234L).withZone(DateTimeZone.UTC)); + fileEvent.call("foo-topic", "version/event/hour", "fil2.avro", 8, + new DateTime(12345L), new DateTime(1234L), 12, new DateTime(12345L)); // fails if two records are not present in kafka within 1s - connect.kafka().consume(2, 1000L, callbackTopic); + connect.kafka().consume(2, 1000L, fileEventTopic); } /** * Test that the expected records are written for a given file extension * Optionally, test that topics which have "*.{expectedFileExtension}*" in them are processed * and written. * @param expectedFileExtension The file extension to test against - * @param callbackTopic The callback topic name + * @param fileEventTopic The fileEvent topic name * @throws Throwable */ - private void testBasicRecordsWrittenAndRelatedCallbacks( + private void testBasicRecordsWrittenAndRelatedFileEvents( String expectedFileExtension, - String callbackTopic + String fileEventTopic ) throws Throwable { // Add an extra topic with this extension inside of the name // Use a TreeSet for test determinism @@ -203,8 +206,8 @@ private void testBasicRecordsWrittenAndRelatedCallbacks( } // This check will catch any duplications assertEquals(expectedTopicFilenames.size(), expectedTotalFileCount); - // Check wether we get same number of records in callback - connect.kafka().consume(expectedTotalFileCount, 1000L, callbackTopic); + // Check whether we get same number of records in fileEvent + connect.kafka().consume(expectedTotalFileCount, 1000L, fileEventTopic); } private void produceRecordsNoHeaders(int recordCount, SinkRecord record) From 62ef7ddd141376a8cd882b2b4967a66625aa61a1 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 1 Sep 2023 10:49:47 +0200 Subject: [PATCH 22/34] Use snake case --- kafka-connect-s3/src/main/resources/file-event.avsc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kafka-connect-s3/src/main/resources/file-event.avsc b/kafka-connect-s3/src/main/resources/file-event.avsc index 6d355b2d5..a33c1b119 100644 --- a/kafka-connect-s3/src/main/resources/file-event.avsc +++ b/kafka-connect-s3/src/main/resources/file-event.avsc @@ -5,17 +5,17 @@ "doc": "This event represents a fileEvent Message", "fields": [ { - "name": "topicName", + "name": "topic_name", "type": "string", "doc": "The topic name of the record being written" }, { - "name": "s3Partition", + "name": "s3_partition", "type": "string", "doc": "The s3 partition produced by the partitioner" }, { - "name": "filePath", + "name": "file_path", "type": "string", "doc": "Current file path, including partition and file name" }, @@ -25,17 +25,17 @@ "doc": "The kafka partition being recorded" }, { - "name": "baseRecordTimestamp", + "name": "base_record_timestamp", "type": ["null", "string"], "doc": "Time of the first record written in the file, in RFC 3339. Defined when partitioner is time based only." }, { - "name": "currentTimestamp", + "name": "current_timestamp", "type": ["null", "string"], "doc": "Time of the last record written in the file, in RFC 3339. Defined when partitioner is time based only." }, { - "name": "recordCount", + "name": "record_count", "type": "int", "doc": "Number of records within the written file" }, From 48da5792c11c0c77f4fe7d6ad33f1f7d2ca04e42 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 1 Sep 2023 11:39:22 +0200 Subject: [PATCH 23/34] Separate config and security file event --- .../connect/s3/S3SinkConnectorConfig.java | 22 ++++- .../connect/s3/TopicPartitionWriter.java | 3 +- .../connect/s3/file/FileEventProvider.java | 4 +- .../connect/s3/file/KafkaFileEventConfig.java | 42 +-------- .../s3/file/KafkaFileEventProvider.java | 15 ++- .../s3/file/KafkaFileEventSecurity.java | 91 +++++++++++++++++++ .../s3/integration/S3SinkFileEventIT.java | 12 +-- 7 files changed, 131 insertions(+), 58 deletions(-) create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index 626e1bcee..ef7462d2a 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -208,6 +208,8 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { KafkaFileEventProvider.class; public static final String FILE_EVENT_CONFIG_JSON = "s3.file.event.config.json"; public static final String FILE_EVENT_CONFIG_JSON_DEFAULT = "{}"; + public static final String FILE_EVENT_SECURITY_JSON = "s3.file.event.security.json"; + public static final String FILE_EVENT_SECURITY_JSON_DEFAULT = "{}"; /** * Append schema name in s3-path @@ -842,14 +844,27 @@ public static ConfigDef newConfigDef() { FILE_EVENT_CONFIG_JSON_DEFAULT, Importance.LOW, "File event configuration as json format. " - + "Mandatory Fields: bootstrap_servers, topic_name, schema_registry_url. " - + "Optional fields: sasl_mechanism, security_protocol, sasl_jaas_config. " + + "Content depends on the FileEvent implementation. " + "By default an empty json.", group, ++orderInGroup, Width.LONG, "File event config json" ); + + configDef.define( + FILE_EVENT_SECURITY_JSON, + Type.STRING, + FILE_EVENT_SECURITY_JSON_DEFAULT, + Importance.LOW, + "File event configuration as json format. " + + "content depends on the FileEvent implementation" + + "By default an empty json.", + group, + ++orderInGroup, + Width.LONG, + "File event security json" + ); } return configDef; } @@ -1043,6 +1058,9 @@ public Class getFileEventClass() { public String getFileEventConfigJson() { return getString(FILE_EVENT_CONFIG_JSON); } + public String getFileEventSecurityJson() { + return getString(FILE_EVENT_SECURITY_JSON); + } public boolean isTombstoneWriteEnabled() { return OutputWriteBehavior.WRITE.toString().equalsIgnoreCase(nullValueBehavior()); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index fed74cd98..b3bef2a0d 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -198,8 +198,9 @@ public TopicPartitionWriter(TopicPartition tp, log.info("File event enabled"); fileCallback = Optional.of((FileEventProvider) this.connectorConfig - .getFileEventClass().getConstructor(String.class, boolean.class) + .getFileEventClass().getConstructor(String.class, String.class, boolean.class) .newInstance(connectorConfig.getFileEventConfigJson(), + connectorConfig.getFileEventSecurityJson(), connectorConfig.getFileEventSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java index d8b4106c8..ac4a44c60 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java @@ -24,10 +24,12 @@ public abstract class FileEventProvider { private static final Logger log = LoggerFactory.getLogger(FileEventProvider.class); protected final String configJson; + protected final String securityJson; protected final boolean skipError; - public FileEventProvider(String configJson, boolean skipError) { + public FileEventProvider(String configJson, String securityJson, boolean skipError) { this.configJson = configJson; + this.securityJson = securityJson; this.skipError = skipError; } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index 658a2c640..adbc0ba17 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -26,10 +26,7 @@ public class KafkaFileEventConfig extends AbstractFileEventConfig { private String topicName; private String bootstrapServers; - private String securityProtocol; private String schemaRegistryUrl; - private String saslMecanism; - private String saslJaasConfig; /** empty constructor for jackson */ public KafkaFileEventConfig() { @@ -38,16 +35,10 @@ public KafkaFileEventConfig() { public KafkaFileEventConfig( String topicName, String bootstrapServers, - String schemaRegistryUrl, - String securityProtocol, - String saslMecanism, - String saslJaasConfig) { + String schemaRegistryUrl) { this.topicName = topicName; this.bootstrapServers = bootstrapServers; this.schemaRegistryUrl = schemaRegistryUrl; - this.securityProtocol = securityProtocol; - this.saslMecanism = saslMecanism; - this.saslJaasConfig = saslJaasConfig; } @Override @@ -64,15 +55,6 @@ public String toJson() { sb.append("\"topic_name\": \"").append(topicName).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); - if (securityProtocol != null) { - sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); - } - if (saslMecanism != null) { - sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); - } - if (saslJaasConfig != null) { - sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); - } sb.append('}'); return sb.toString(); } @@ -82,21 +64,10 @@ public Properties toProps() { Properties prop = new Properties(); prop.setProperty("key.serializer", KEY_SERIALIZER); prop.setProperty("value.serializer", VALUE_SERIALIZER); - prop.setProperty("auto.create.topics.enable", "true"); // mandatory prop.setProperty("bootstrap.servers", bootstrapServers); prop.setProperty("topic.name", topicName); prop.setProperty("schema.registry.url", schemaRegistryUrl); - // optional - if (saslMecanism != null) { - prop.setProperty("sasl.mechanism", saslMecanism); - } - if (securityProtocol != null) { - prop.setProperty("security.protocol", securityProtocol); - } - if (saslJaasConfig != null) { - prop.setProperty("sasl.jaas.config", saslJaasConfig); - } return prop; } @@ -112,15 +83,4 @@ public String getBootstrapServers() { return bootstrapServers; } - public String getSecurityProtocol() { - return securityProtocol; - } - - public String getSaslMecanism() { - return saslMecanism; - } - - public String getSaslJaasConfig() { - return saslJaasConfig; - } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java index 62e9d2402..cb009008a 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java @@ -21,13 +21,17 @@ import org.apache.kafka.clients.producer.ProducerRecord; import org.joda.time.DateTime; +import java.util.Properties; + public class KafkaFileEventProvider extends FileEventProvider { private final KafkaFileEventConfig kafkaConfig; + private final KafkaFileEventSecurity kafkaSecurity; - public KafkaFileEventProvider(String configJson, boolean skipError) { - super(configJson, skipError); + public KafkaFileEventProvider(String configJson, String securityJson, boolean skipError) { + super(configJson, securityJson, skipError); this.kafkaConfig = - KafkaFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); + AbstractFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); + this.kafkaSecurity = AbstractFileEventConfig.fromJsonString(securityJson, KafkaFileEventSecurity.class); } @Override @@ -51,8 +55,11 @@ public void callImpl( formatDateRFC3339(currentTimestamp), recordCount, formatDateRFC3339(eventDatetime)); + Properties combinedProperties = new Properties(); + combinedProperties.putAll(kafkaConfig.toProps()); + combinedProperties.putAll(kafkaSecurity.toProps()); try (final Producer producer = - new KafkaProducer<>(kafkaConfig.toProps())) { + new KafkaProducer<>(combinedProperties)) { producer.send( new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), (event, ex) -> { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java new file mode 100644 index 000000000..9f4927d3a --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java @@ -0,0 +1,91 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.file; + +import java.util.Properties; + +public class KafkaFileEventSecurity extends AbstractFileEventConfig { + + private static final String KEY_SERIALIZER = + "io.confluent.kafka.serializers.KafkaAvroSerializer"; + private static final String VALUE_SERIALIZER = + "io.confluent.kafka.serializers.KafkaAvroSerializer"; + + private String securityProtocol; + private String saslMecanism; + private String saslJaasConfig; + + /** empty constructor for jackson */ + public KafkaFileEventSecurity() { + } + + public KafkaFileEventSecurity( + String securityProtocol, + String saslMecanism, + String saslJaasConfig) { + this.securityProtocol = securityProtocol; + this.saslMecanism = saslMecanism; + this.saslJaasConfig = saslJaasConfig; + } + + @Override + protected void validateFields() { + // pass + } + + @Override + public String toJson() { + final StringBuffer sb = new StringBuffer("{"); + if (securityProtocol != null) { + sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); + } + if (saslMecanism != null) { + sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); + } + if (saslJaasConfig != null) { + sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); + } + sb.append('}'); + return sb.toString(); + } + + @Override + public Properties toProps() { + Properties prop = new Properties(); + if (saslMecanism != null) { + prop.setProperty("sasl.mechanism", saslMecanism); + } + if (securityProtocol != null) { + prop.setProperty("security.protocol", securityProtocol); + } + if (saslJaasConfig != null) { + prop.setProperty("sasl.jaas.config", saslJaasConfig); + } + return prop; + } + + public String getSecurityProtocol() { + return securityProtocol; + } + + public String getSaslMecanism() { + return saslMecanism; + } + + public String getSaslJaasConfig() { + return saslJaasConfig; + } +} diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java index bb591d38d..65b1a45a2 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java @@ -124,10 +124,7 @@ public void testBasicRecordsWrittenParquetAndRelatedFileEvents() throws Throwabl new KafkaFileEventConfig( topicFileEvent, connect.kafka().bootstrapServers(), - restApp.restServer.getURI().toString(), - null, - null, - null) + restApp.restServer.getURI().toString()) .toJson()); connect.kafka().createTopic(topicFileEvent); testBasicRecordsWrittenAndRelatedFileEvents(PARQUET_EXTENSION, topicFileEvent); @@ -142,12 +139,9 @@ public void testFileEventPartition() { new KafkaFileEventConfig( fileEventTopic, bootstrapServers, - restApp.restServer.getURI().toString(), - null, - null, - null); + restApp.restServer.getURI().toString()); KafkaFileEventProvider fileEvent = - new KafkaFileEventProvider(kafkaFileEventConfig.toJson(), false); + new KafkaFileEventProvider(kafkaFileEventConfig.toJson(),"{}", false); fileEvent.call("baz-topic", "version/event/hour", "file1.avro", 12, new DateTime(1234L), new DateTime(123L), 34, new DateTime(1234L).withZone(DateTimeZone.UTC)); From b3e0eaa4f84ce970e7dc3e6eaf50be5003a1fae8 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 1 Sep 2023 16:05:25 +0200 Subject: [PATCH 24/34] Revert "Separate config and security file event" This reverts commit 48da5792c11c0c77f4fe7d6ad33f1f7d2ca04e42. --- .../connect/s3/S3SinkConnectorConfig.java | 22 +---- .../connect/s3/TopicPartitionWriter.java | 3 +- .../connect/s3/file/FileEventProvider.java | 4 +- .../connect/s3/file/KafkaFileEventConfig.java | 42 ++++++++- .../s3/file/KafkaFileEventProvider.java | 15 +-- .../s3/file/KafkaFileEventSecurity.java | 91 ------------------- .../s3/integration/S3SinkFileEventIT.java | 12 ++- 7 files changed, 58 insertions(+), 131 deletions(-) delete mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index ef7462d2a..626e1bcee 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -208,8 +208,6 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { KafkaFileEventProvider.class; public static final String FILE_EVENT_CONFIG_JSON = "s3.file.event.config.json"; public static final String FILE_EVENT_CONFIG_JSON_DEFAULT = "{}"; - public static final String FILE_EVENT_SECURITY_JSON = "s3.file.event.security.json"; - public static final String FILE_EVENT_SECURITY_JSON_DEFAULT = "{}"; /** * Append schema name in s3-path @@ -844,27 +842,14 @@ public static ConfigDef newConfigDef() { FILE_EVENT_CONFIG_JSON_DEFAULT, Importance.LOW, "File event configuration as json format. " - + "Content depends on the FileEvent implementation. " + + "Mandatory Fields: bootstrap_servers, topic_name, schema_registry_url. " + + "Optional fields: sasl_mechanism, security_protocol, sasl_jaas_config. " + "By default an empty json.", group, ++orderInGroup, Width.LONG, "File event config json" ); - - configDef.define( - FILE_EVENT_SECURITY_JSON, - Type.STRING, - FILE_EVENT_SECURITY_JSON_DEFAULT, - Importance.LOW, - "File event configuration as json format. " - + "content depends on the FileEvent implementation" - + "By default an empty json.", - group, - ++orderInGroup, - Width.LONG, - "File event security json" - ); } return configDef; } @@ -1058,9 +1043,6 @@ public Class getFileEventClass() { public String getFileEventConfigJson() { return getString(FILE_EVENT_CONFIG_JSON); } - public String getFileEventSecurityJson() { - return getString(FILE_EVENT_SECURITY_JSON); - } public boolean isTombstoneWriteEnabled() { return OutputWriteBehavior.WRITE.toString().equalsIgnoreCase(nullValueBehavior()); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index b3bef2a0d..fed74cd98 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -198,9 +198,8 @@ public TopicPartitionWriter(TopicPartition tp, log.info("File event enabled"); fileCallback = Optional.of((FileEventProvider) this.connectorConfig - .getFileEventClass().getConstructor(String.class, String.class, boolean.class) + .getFileEventClass().getConstructor(String.class, boolean.class) .newInstance(connectorConfig.getFileEventConfigJson(), - connectorConfig.getFileEventSecurityJson(), connectorConfig.getFileEventSkipError())); } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java index ac4a44c60..d8b4106c8 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java @@ -24,12 +24,10 @@ public abstract class FileEventProvider { private static final Logger log = LoggerFactory.getLogger(FileEventProvider.class); protected final String configJson; - protected final String securityJson; protected final boolean skipError; - public FileEventProvider(String configJson, String securityJson, boolean skipError) { + public FileEventProvider(String configJson, boolean skipError) { this.configJson = configJson; - this.securityJson = securityJson; this.skipError = skipError; } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index adbc0ba17..658a2c640 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -26,7 +26,10 @@ public class KafkaFileEventConfig extends AbstractFileEventConfig { private String topicName; private String bootstrapServers; + private String securityProtocol; private String schemaRegistryUrl; + private String saslMecanism; + private String saslJaasConfig; /** empty constructor for jackson */ public KafkaFileEventConfig() { @@ -35,10 +38,16 @@ public KafkaFileEventConfig() { public KafkaFileEventConfig( String topicName, String bootstrapServers, - String schemaRegistryUrl) { + String schemaRegistryUrl, + String securityProtocol, + String saslMecanism, + String saslJaasConfig) { this.topicName = topicName; this.bootstrapServers = bootstrapServers; this.schemaRegistryUrl = schemaRegistryUrl; + this.securityProtocol = securityProtocol; + this.saslMecanism = saslMecanism; + this.saslJaasConfig = saslJaasConfig; } @Override @@ -55,6 +64,15 @@ public String toJson() { sb.append("\"topic_name\": \"").append(topicName).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); + if (securityProtocol != null) { + sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); + } + if (saslMecanism != null) { + sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); + } + if (saslJaasConfig != null) { + sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); + } sb.append('}'); return sb.toString(); } @@ -64,10 +82,21 @@ public Properties toProps() { Properties prop = new Properties(); prop.setProperty("key.serializer", KEY_SERIALIZER); prop.setProperty("value.serializer", VALUE_SERIALIZER); + prop.setProperty("auto.create.topics.enable", "true"); // mandatory prop.setProperty("bootstrap.servers", bootstrapServers); prop.setProperty("topic.name", topicName); prop.setProperty("schema.registry.url", schemaRegistryUrl); + // optional + if (saslMecanism != null) { + prop.setProperty("sasl.mechanism", saslMecanism); + } + if (securityProtocol != null) { + prop.setProperty("security.protocol", securityProtocol); + } + if (saslJaasConfig != null) { + prop.setProperty("sasl.jaas.config", saslJaasConfig); + } return prop; } @@ -83,4 +112,15 @@ public String getBootstrapServers() { return bootstrapServers; } + public String getSecurityProtocol() { + return securityProtocol; + } + + public String getSaslMecanism() { + return saslMecanism; + } + + public String getSaslJaasConfig() { + return saslJaasConfig; + } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java index cb009008a..62e9d2402 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java @@ -21,17 +21,13 @@ import org.apache.kafka.clients.producer.ProducerRecord; import org.joda.time.DateTime; -import java.util.Properties; - public class KafkaFileEventProvider extends FileEventProvider { private final KafkaFileEventConfig kafkaConfig; - private final KafkaFileEventSecurity kafkaSecurity; - public KafkaFileEventProvider(String configJson, String securityJson, boolean skipError) { - super(configJson, securityJson, skipError); + public KafkaFileEventProvider(String configJson, boolean skipError) { + super(configJson, skipError); this.kafkaConfig = - AbstractFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); - this.kafkaSecurity = AbstractFileEventConfig.fromJsonString(securityJson, KafkaFileEventSecurity.class); + KafkaFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); } @Override @@ -55,11 +51,8 @@ public void callImpl( formatDateRFC3339(currentTimestamp), recordCount, formatDateRFC3339(eventDatetime)); - Properties combinedProperties = new Properties(); - combinedProperties.putAll(kafkaConfig.toProps()); - combinedProperties.putAll(kafkaSecurity.toProps()); try (final Producer producer = - new KafkaProducer<>(combinedProperties)) { + new KafkaProducer<>(kafkaConfig.toProps())) { producer.send( new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), (event, ex) -> { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java deleted file mode 100644 index 9f4927d3a..000000000 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventSecurity.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2018 Confluent Inc. - * - * Licensed under the Confluent Community License (the "License"); you may not use - * this file except in compliance with the License. You may obtain a copy of the - * License at - * - * http://www.confluent.io/confluent-community-license - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - */ - -package io.confluent.connect.s3.file; - -import java.util.Properties; - -public class KafkaFileEventSecurity extends AbstractFileEventConfig { - - private static final String KEY_SERIALIZER = - "io.confluent.kafka.serializers.KafkaAvroSerializer"; - private static final String VALUE_SERIALIZER = - "io.confluent.kafka.serializers.KafkaAvroSerializer"; - - private String securityProtocol; - private String saslMecanism; - private String saslJaasConfig; - - /** empty constructor for jackson */ - public KafkaFileEventSecurity() { - } - - public KafkaFileEventSecurity( - String securityProtocol, - String saslMecanism, - String saslJaasConfig) { - this.securityProtocol = securityProtocol; - this.saslMecanism = saslMecanism; - this.saslJaasConfig = saslJaasConfig; - } - - @Override - protected void validateFields() { - // pass - } - - @Override - public String toJson() { - final StringBuffer sb = new StringBuffer("{"); - if (securityProtocol != null) { - sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); - } - if (saslMecanism != null) { - sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); - } - if (saslJaasConfig != null) { - sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); - } - sb.append('}'); - return sb.toString(); - } - - @Override - public Properties toProps() { - Properties prop = new Properties(); - if (saslMecanism != null) { - prop.setProperty("sasl.mechanism", saslMecanism); - } - if (securityProtocol != null) { - prop.setProperty("security.protocol", securityProtocol); - } - if (saslJaasConfig != null) { - prop.setProperty("sasl.jaas.config", saslJaasConfig); - } - return prop; - } - - public String getSecurityProtocol() { - return securityProtocol; - } - - public String getSaslMecanism() { - return saslMecanism; - } - - public String getSaslJaasConfig() { - return saslJaasConfig; - } -} diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java index 65b1a45a2..bb591d38d 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java @@ -124,7 +124,10 @@ public void testBasicRecordsWrittenParquetAndRelatedFileEvents() throws Throwabl new KafkaFileEventConfig( topicFileEvent, connect.kafka().bootstrapServers(), - restApp.restServer.getURI().toString()) + restApp.restServer.getURI().toString(), + null, + null, + null) .toJson()); connect.kafka().createTopic(topicFileEvent); testBasicRecordsWrittenAndRelatedFileEvents(PARQUET_EXTENSION, topicFileEvent); @@ -139,9 +142,12 @@ public void testFileEventPartition() { new KafkaFileEventConfig( fileEventTopic, bootstrapServers, - restApp.restServer.getURI().toString()); + restApp.restServer.getURI().toString(), + null, + null, + null); KafkaFileEventProvider fileEvent = - new KafkaFileEventProvider(kafkaFileEventConfig.toJson(),"{}", false); + new KafkaFileEventProvider(kafkaFileEventConfig.toJson(), false); fileEvent.call("baz-topic", "version/event/hour", "file1.avro", 12, new DateTime(1234L), new DateTime(123L), 34, new DateTime(1234L).withZone(DateTimeZone.UTC)); From 2505921ff4019d2784420b3d2d2629973e7a06f2 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Mon, 4 Sep 2023 14:19:43 +0200 Subject: [PATCH 25/34] File event (#1) --- checkstyle/suppressions.xml | 5 + kafka-connect-s3/pom.xml | 20 +- .../connect/s3/S3SinkConnectorConfig.java | 112 +++++++ .../connect/s3/TopicPartitionWriter.java | 27 ++ .../s3/file/AbstractFileEventConfig.java | 52 ++++ .../connect/s3/file/FileEventProvider.java | 68 +++++ .../connect/s3/file/KafkaFileEventConfig.java | 126 ++++++++ .../s3/file/KafkaFileEventProvider.java | 65 +++++ .../src/main/resources/file-event.avsc | 48 +++ .../s3/integration/BaseConnectorIT.java | 38 ++- .../s3/integration/MinioContainer.java | 44 +++ .../s3/integration/S3SinkConnectorIT.java | 12 +- .../s3/integration/S3SinkDataFormatIT.java | 7 +- .../s3/integration/S3SinkFileEventIT.java | 274 ++++++++++++++++++ 14 files changed, 874 insertions(+), 24 deletions(-) create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java create mode 100644 kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java create mode 100644 kafka-connect-s3/src/main/resources/file-event.avsc create mode 100644 kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java create mode 100644 kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index 6d1c521d7..9704e9d4f 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -21,4 +21,9 @@ files="(TopicPartitionWriter).java" /> + + diff --git a/kafka-connect-s3/pom.xml b/kafka-connect-s3/pom.xml index 2cc30ca5e..ed779a39e 100644 --- a/kafka-connect-s3/pom.xml +++ b/kafka-connect-s3/pom.xml @@ -39,8 +39,9 @@ 1.11.1 0.2.2 1.0.1 - 1.15.0 + 1.18.3 2.22.1 + 1.9.2 false 2.22.1 32.1.2-jre @@ -310,6 +311,23 @@ + + org.apache.avro + avro-maven-plugin + ${maven.avro.plugin.version} + + + generate-sources + + schema + + + ${project.basedir}/src/main/resources/ + ${project.build.directory}/generated-main-avro-java/ + + + + diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index 4ab843906..626e1bcee 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -24,6 +24,8 @@ import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.SSEAlgorithm; +import io.confluent.connect.s3.file.FileEventProvider; +import io.confluent.connect.s3.file.KafkaFileEventProvider; import io.confluent.connect.storage.common.util.StringUtils; import org.apache.kafka.common.Configurable; import org.apache.kafka.common.config.AbstractConfig; @@ -194,6 +196,19 @@ public class S3SinkConnectorConfig extends StorageSinkConnectorConfig { public static final String TOMBSTONE_ENCODED_PARTITION = "tombstone.encoded.partition"; public static final String TOMBSTONE_ENCODED_PARTITION_DEFAULT = "tombstone"; + /** + * File event configs + */ + public static final String FILE_EVENT_ENABLE = "s3.file.event.enable"; + public static final boolean FILE_EVENT_ENABLE_DEFAULT = false; + public static final String FILE_EVENT_SKIP_ERROR = "s3.file.event.skip.error"; + public static final boolean FILE_EVENT_SKIP_ERROR_DEFAULT = false; + public static final String FILE_EVENT_CLASS = "s3.file.event.class"; + public static final Class FILE_EVENT_CLASS_DEFAULT = + KafkaFileEventProvider.class; + public static final String FILE_EVENT_CONFIG_JSON = "s3.file.event.config.json"; + public static final String FILE_EVENT_CONFIG_JSON_DEFAULT = "{}"; + /** * Append schema name in s3-path */ @@ -774,7 +789,67 @@ public static ConfigDef newConfigDef() { Width.LONG, "Elastic buffer initial capacity" ); + } + + { + final String group = "File event"; + int orderInGroup = 0; + + configDef.define( + FILE_EVENT_ENABLE, + Type.BOOLEAN, + FILE_EVENT_ENABLE_DEFAULT, + Importance.LOW, + "Enables the file event to be specified and configured", + group, + ++orderInGroup, + Width.LONG, + "Enable s3 file event" + ); + + configDef.define( + FILE_EVENT_SKIP_ERROR, + Type.BOOLEAN, + FILE_EVENT_SKIP_ERROR_DEFAULT, + Importance.LOW, + "In case of file event error, then raise or fail silently. Default raise an error.", + group, + ++orderInGroup, + Width.LONG, + "Fail when s3 file event error" + ); + + configDef.define( + FILE_EVENT_CLASS, + Type.CLASS, + FILE_EVENT_CLASS_DEFAULT, + new FileEventProviderValidator(), + Importance.LOW, + "File event to push notification for each file written on s3. By default " + + "the connector uses ``" + + FILE_EVENT_CLASS_DEFAULT.getSimpleName() + + "``.", + + group, + ++orderInGroup, + Width.LONG, + "File event class" + ); + configDef.define( + FILE_EVENT_CONFIG_JSON, + Type.STRING, + FILE_EVENT_CONFIG_JSON_DEFAULT, + Importance.LOW, + "File event configuration as json format. " + + "Mandatory Fields: bootstrap_servers, topic_name, schema_registry_url. " + + "Optional fields: sasl_mechanism, security_protocol, sasl_jaas_config. " + + "By default an empty json.", + group, + ++orderInGroup, + Width.LONG, + "File event config json" + ); } return configDef; } @@ -953,6 +1028,22 @@ public int getElasticBufferInitCap() { return getInt(ELASTIC_BUFFER_INIT_CAPACITY); } + public boolean getFileEventEnable() { + return getBoolean(FILE_EVENT_ENABLE); + } + + public boolean getFileEventSkipError() { + return getBoolean(FILE_EVENT_SKIP_ERROR); + } + + public Class getFileEventClass() { + return getClass(FILE_EVENT_CLASS); + } + + public String getFileEventConfigJson() { + return getString(FILE_EVENT_CONFIG_JSON); + } + public boolean isTombstoneWriteEnabled() { return OutputWriteBehavior.WRITE.toString().equalsIgnoreCase(nullValueBehavior()); } @@ -1189,12 +1280,33 @@ public void ensureValid(String name, Object provider) { ); } + @Override public String toString() { return "Any class implementing: " + AWSCredentialsProvider.class; } } + private static class FileEventProviderValidator implements ConfigDef.Validator { + @Override + public void ensureValid(String name, Object provider) { + if (provider != null && provider instanceof Class + && FileEventProvider.class.isAssignableFrom((Class) provider)) { + return; + } + throw new ConfigException( + name, + provider, + "Class must extend: " + FileEventProvider.class + ); + } + + @Override + public String toString() { + return "Any class implementing: " + FileEventProvider.class; + } + } + private static class SseAlgorithmRecommender implements ConfigDef.Recommender { @Override public List validValues(String name, Map connectorConfigs) { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index cee677f4d..fed74cd98 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -16,6 +16,7 @@ package io.confluent.connect.s3; import com.amazonaws.SdkClientException; +import io.confluent.connect.s3.file.FileEventProvider; import io.confluent.connect.s3.storage.S3Storage; import io.confluent.connect.s3.util.FileRotationTracker; import io.confluent.connect.s3.util.RetryUtil; @@ -36,6 +37,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.lang.reflect.InvocationTargetException; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedList; @@ -105,6 +107,7 @@ public class TopicPartitionWriter { private ErrantRecordReporter reporter; private final FileRotationTracker fileRotationTracker; + private Optional fileCallback = Optional.empty(); public TopicPartitionWriter(TopicPartition tp, S3Storage storage, @@ -188,6 +191,21 @@ public TopicPartitionWriter(TopicPartition tp, // Initialize scheduled rotation timer if applicable setNextScheduledRotation(); + + // Initialize fileEvent if enabled + if (this.connectorConfig.getFileEventEnable()) { + try { + log.info("File event enabled"); + fileCallback = Optional.of((FileEventProvider) + this.connectorConfig + .getFileEventClass().getConstructor(String.class, boolean.class) + .newInstance(connectorConfig.getFileEventConfigJson(), + connectorConfig.getFileEventSkipError())); + } catch (InstantiationException | IllegalAccessException + | InvocationTargetException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } } private enum State { @@ -636,6 +654,7 @@ private void commitFiles() { for (Map.Entry entry : commitFiles.entrySet()) { String encodedPartition = entry.getKey(); commitFile(encodedPartition); + callbackFile(encodedPartition); if (isTaggingEnabled) { RetryUtil.exponentialBackoffRetry(() -> tagFile(encodedPartition, entry.getValue()), ConnectException.class, @@ -672,6 +691,14 @@ private void commitFile(String encodedPartition) { } } + private void callbackFile(String encodedPartition) { + fileCallback.ifPresent(fs -> fs.call(tp.topic(), encodedPartition, + commitFiles.get(encodedPartition), tp.partition(), + new DateTime(baseRecordTimestamp).withZone(timeZone), + new DateTime(currentTimestamp).withZone(timeZone), recordCount, + new DateTime(time.milliseconds()).withZone(timeZone))); + } + private void tagFile(String encodedPartition, String s3ObjectPath) { Long startOffset = startOffsets.get(encodedPartition); Long endOffset = endOffsets.get(encodedPartition); diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java new file mode 100644 index 000000000..a9b63026a --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java @@ -0,0 +1,52 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.file; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.PropertyNamingStrategy; +import java.io.IOException; +import java.util.Properties; + +public abstract class AbstractFileEventConfig { + public static T fromJsonString( + String jsonContent, Class clazz) { + try { + if (jsonContent == null) { + return clazz.newInstance(); + } + ObjectMapper instanceMapper = new ObjectMapper(); + instanceMapper.setPropertyNamingStrategy( + PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); + instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); + instanceMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); + instanceMapper.enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS); + T value = instanceMapper.readValue(jsonContent, clazz); + value.validateFields(); + return value; + } catch (IllegalAccessException | InstantiationException | IOException e) { + throw new RuntimeException(e); + } + } + + protected abstract void validateFields() ; + + public abstract Properties toProps(); + + public abstract String toJson(); +} diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java new file mode 100644 index 000000000..d8b4106c8 --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java @@ -0,0 +1,68 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.file; + +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.ISODateTimeFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class FileEventProvider { + private static final Logger log = LoggerFactory.getLogger(FileEventProvider.class); + protected final String configJson; + protected final boolean skipError; + + public FileEventProvider(String configJson, boolean skipError) { + this.configJson = configJson; + this.skipError = skipError; + } + + public String formatDateRFC3339(DateTime timestamp){ + DateTimeFormatter fmt = ISODateTimeFormat.dateTime(); + return fmt.print(timestamp); + } + + public void call( + String topicName, + String s3Partition, + String filePath, + int partition, + DateTime baseRecordTimestamp, + DateTime currentTimestamp, + int recordCount, + DateTime eventDatetime) { + try { + log.info("Running file event : {}, {}", topicName, filePath); + callImpl(topicName, s3Partition, filePath, partition, baseRecordTimestamp, currentTimestamp, recordCount, eventDatetime); + } catch (Exception e) { + if (skipError) { + log.error(e.getMessage(), e); + } else { + throw new RuntimeException(e); + } + } + } + public abstract void callImpl( + String topicName, + String s3Partition, + String filePath, + int partition, + DateTime baseRecordTimestamp, + DateTime currentTimestamp, + int recordCount, + DateTime eventDatetime); +} diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java new file mode 100644 index 000000000..658a2c640 --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -0,0 +1,126 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.file; + +import java.util.Properties; + +public class KafkaFileEventConfig extends AbstractFileEventConfig { + + private static final String KEY_SERIALIZER = + "io.confluent.kafka.serializers.KafkaAvroSerializer"; + private static final String VALUE_SERIALIZER = + "io.confluent.kafka.serializers.KafkaAvroSerializer"; + + private String topicName; + private String bootstrapServers; + private String securityProtocol; + private String schemaRegistryUrl; + private String saslMecanism; + private String saslJaasConfig; + + /** empty constructor for jackson */ + public KafkaFileEventConfig() { + } + + public KafkaFileEventConfig( + String topicName, + String bootstrapServers, + String schemaRegistryUrl, + String securityProtocol, + String saslMecanism, + String saslJaasConfig) { + this.topicName = topicName; + this.bootstrapServers = bootstrapServers; + this.schemaRegistryUrl = schemaRegistryUrl; + this.securityProtocol = securityProtocol; + this.saslMecanism = saslMecanism; + this.saslJaasConfig = saslJaasConfig; + } + + @Override + protected void validateFields() { + if (topicName == null || bootstrapServers == null || schemaRegistryUrl == null) { + throw new RuntimeException( + "topic_name, boostrap_servers and schema_registry_url shall be defined"); + } + } + + @Override + public String toJson() { + final StringBuffer sb = new StringBuffer("{"); + sb.append("\"topic_name\": \"").append(topicName).append('"'); + sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); + sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); + if (securityProtocol != null) { + sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); + } + if (saslMecanism != null) { + sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); + } + if (saslJaasConfig != null) { + sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); + } + sb.append('}'); + return sb.toString(); + } + + @Override + public Properties toProps() { + Properties prop = new Properties(); + prop.setProperty("key.serializer", KEY_SERIALIZER); + prop.setProperty("value.serializer", VALUE_SERIALIZER); + prop.setProperty("auto.create.topics.enable", "true"); + // mandatory + prop.setProperty("bootstrap.servers", bootstrapServers); + prop.setProperty("topic.name", topicName); + prop.setProperty("schema.registry.url", schemaRegistryUrl); + // optional + if (saslMecanism != null) { + prop.setProperty("sasl.mechanism", saslMecanism); + } + if (securityProtocol != null) { + prop.setProperty("security.protocol", securityProtocol); + } + if (saslJaasConfig != null) { + prop.setProperty("sasl.jaas.config", saslJaasConfig); + } + return prop; + } + + public String getTopicName() { + return topicName; + } + + public String getSchemaRegistryUrl() { + return schemaRegistryUrl; + } + + public String getBootstrapServers() { + return bootstrapServers; + } + + public String getSecurityProtocol() { + return securityProtocol; + } + + public String getSaslMecanism() { + return saslMecanism; + } + + public String getSaslJaasConfig() { + return saslJaasConfig; + } +} diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java new file mode 100644 index 000000000..62e9d2402 --- /dev/null +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java @@ -0,0 +1,65 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.file; + +import org.apache.avro.specific.SpecificRecord; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.joda.time.DateTime; + +public class KafkaFileEventProvider extends FileEventProvider { + private final KafkaFileEventConfig kafkaConfig; + + public KafkaFileEventProvider(String configJson, boolean skipError) { + super(configJson, skipError); + this.kafkaConfig = + KafkaFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); + } + + @Override + public void callImpl( + String topicName, + String s3Partition, + String filePath, + int partition, + DateTime baseRecordTimestamp, + DateTime currentTimestamp, + int recordCount, + DateTime eventDatetime) { + String key = topicName; + FileEvent value = + new FileEvent( + topicName, + s3Partition, + filePath, + partition, + formatDateRFC3339(baseRecordTimestamp), + formatDateRFC3339(currentTimestamp), + recordCount, + formatDateRFC3339(eventDatetime)); + try (final Producer producer = + new KafkaProducer<>(kafkaConfig.toProps())) { + producer.send( + new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), + (event, ex) -> { + if (ex != null) { + throw new RuntimeException(ex); + } + }); + } + } +} diff --git a/kafka-connect-s3/src/main/resources/file-event.avsc b/kafka-connect-s3/src/main/resources/file-event.avsc new file mode 100644 index 000000000..a33c1b119 --- /dev/null +++ b/kafka-connect-s3/src/main/resources/file-event.avsc @@ -0,0 +1,48 @@ +{ + "namespace": "io.confluent.connect.s3.file", + "type": "record", + "name": "FileEvent", + "doc": "This event represents a fileEvent Message", + "fields": [ + { + "name": "topic_name", + "type": "string", + "doc": "The topic name of the record being written" + }, + { + "name": "s3_partition", + "type": "string", + "doc": "The s3 partition produced by the partitioner" + }, + { + "name": "file_path", + "type": "string", + "doc": "Current file path, including partition and file name" + }, + { + "name": "partition", + "type": "int", + "doc": "The kafka partition being recorded" + }, + { + "name": "base_record_timestamp", + "type": ["null", "string"], + "doc": "Time of the first record written in the file, in RFC 3339. Defined when partitioner is time based only." + }, + { + "name": "current_timestamp", + "type": ["null", "string"], + "doc": "Time of the last record written in the file, in RFC 3339. Defined when partitioner is time based only." + }, + { + "name": "record_count", + "type": "int", + "doc": "Number of records within the written file" + }, + { + "name": "event_datetime", + "type": "string", + "doc": "The time of the file event, in RFC 3339" + } + ] +} \ No newline at end of file diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java index 7c992ad3f..aece093e9 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/BaseConnectorIT.java @@ -21,6 +21,7 @@ import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.GetObjectRequest; @@ -129,9 +130,13 @@ public abstract class BaseConnectorIT { protected EmbeddedConnectCluster connect; protected Map props; + protected static MinioContainer minioContainer; + @BeforeClass public static void setupClient() { log.info("Starting ITs..."); + minioContainer = new MinioContainer(); + minioContainer.start(); S3Client = getS3Client(); if (S3Client.doesBucketExistV2(TEST_BUCKET_NAME)) { clearBucket(TEST_BUCKET_NAME); @@ -141,9 +146,12 @@ public static void setupClient() { } @AfterClass - public static void deleteBucket() { + public static void cleanEnv() { S3Client.deleteBucket(TEST_BUCKET_NAME); log.info("Finished ITs, removed S3 bucket"); + // Stopping manually to avoid potential race condition with other IT classes + minioContainer.stop(); + log.info("Stopping Minio container"); } @Before @@ -375,19 +383,20 @@ protected Iterable
sampleHeaders() { * @return an authenticated S3 client */ protected static AmazonS3 getS3Client() { - Map creds = getAWSCredentialFromPath(); - // If AWS credentials found on AWS_CREDENTIALS_PATH, use them (Jenkins) - if (creds.size() == 2) { - BasicAWSCredentials awsCreds = new BasicAWSCredentials( - creds.get(AWS_ACCESS_KEY_ID_CONFIG), - creds.get(AWS_SECRET_ACCESS_KEY_CONFIG)); - return AmazonS3ClientBuilder.standard() - .withCredentials(new AWSStaticCredentialsProvider(awsCreds)) - .build(); - } - // DefaultAWSCredentialsProviderChain, - // For local testing, ~/.aws/credentials needs to be defined or other environment variables - return AmazonS3ClientBuilder.standard().withRegion(AWS_REGION).build(); + return AmazonS3ClientBuilder + .standard() + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(MinioContainer.MINIO_USERNAME, MinioContainer.MINIO_PASSWORD)) + ) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration( + minioContainer.getUrl(), + AWS_REGION + ) + ) + .withPathStyleAccessEnabled(true) + .build(); } /** @@ -576,6 +585,7 @@ private static List getContentsFromAvro(String filePath) { * @param filePath the path of the downloaded parquet file * @return the rows of the file as JsonNodes */ + @SuppressWarnings({"deprecation"}) private static List getContentsFromParquet(String filePath) { try (ParquetReader reader = ParquetReader .builder(new SimpleReadSupport(), new Path(filePath)).build()){ diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java new file mode 100644 index 000000000..af3846cda --- /dev/null +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/MinioContainer.java @@ -0,0 +1,44 @@ +package io.confluent.connect.s3.integration; + +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; + +import java.time.Duration; +import java.util.Arrays; + +public class MinioContainer extends GenericContainer { + + private static final String DEFAULT_DOCKER_IMAGE = "minio/minio:latest"; + + private static final String HEALTH_ENDPOINT = "/minio/health/ready"; + + private static final int DEFAULT_SERVER_PORT = 9000; + + private static final int DEFAULT_CONSOLE_PORT = 9001; + + // Must be used as AWS_ACCESS_KEY and AWS_SECRET_KEY in AWS S3 Client + public static final String MINIO_USERNAME = "minioadmin"; + + public static final String MINIO_PASSWORD = "minioadmin"; + + public MinioContainer() { + this(DEFAULT_DOCKER_IMAGE); + } + + public MinioContainer(String dockerImageName) { + super(dockerImageName); + this.logger().info("Starting an Minio container using [{}]", dockerImageName); + this.setPortBindings(Arrays.asList(String.format("%d:%d", DEFAULT_SERVER_PORT, DEFAULT_SERVER_PORT), + String.format("%d:%d", DEFAULT_CONSOLE_PORT, DEFAULT_CONSOLE_PORT))); + this.withCommand(String.format("server /data --address :%d --console-address :%d", + DEFAULT_SERVER_PORT, DEFAULT_CONSOLE_PORT)); + setWaitStrategy(new HttpWaitStrategy() + .forPort(DEFAULT_SERVER_PORT) + .forPath(HEALTH_ENDPOINT) + .withStartupTimeout(Duration.ofMinutes(2))); + } + + public String getUrl() { + return String.format("http://%s:%s", this.getHost(), this.getMappedPort(DEFAULT_SERVER_PORT)); + } +} diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java index 24b6296b4..3651f2320 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkConnectorIT.java @@ -15,14 +15,7 @@ package io.confluent.connect.s3.integration; -import static io.confluent.connect.s3.S3SinkConnectorConfig.BEHAVIOR_ON_NULL_VALUES_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.KEYS_FORMAT_CLASS_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.S3_BUCKET_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.STORE_KAFKA_HEADERS_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.STORE_KAFKA_KEYS_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.AWS_ACCESS_KEY_ID_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.AWS_SECRET_ACCESS_KEY_CONFIG; -import static io.confluent.connect.s3.S3SinkConnectorConfig.TOMBSTONE_ENCODED_PARTITION; +import static io.confluent.connect.s3.S3SinkConnectorConfig.*; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FLUSH_SIZE_CONFIG; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FORMAT_CLASS_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; @@ -109,6 +102,9 @@ public void before() throws InterruptedException { props.put(FORMAT_CLASS_CONFIG, AvroFormat.class.getName()); props.put(STORAGE_CLASS_CONFIG, S3Storage.class.getName()); props.put(S3_BUCKET_CONFIG, TEST_BUCKET_NAME); + props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); + props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); + props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); // create topics in Kafka KAFKA_TOPICS.forEach(topic -> connect.kafka().createTopic(topic, 1)); } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java index 0b6414002..4e27b9df1 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkDataFormatIT.java @@ -15,7 +15,8 @@ package io.confluent.connect.s3.integration; -import static io.confluent.connect.s3.S3SinkConnectorConfig.S3_BUCKET_CONFIG; +import static io.confluent.connect.s3.S3SinkConnectorConfig.*; +import static io.confluent.connect.s3.S3SinkConnectorConfig.AWS_SECRET_ACCESS_KEY_CONFIG; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FLUSH_SIZE_CONFIG; import static io.confluent.connect.storage.StorageSinkConnectorConfig.FORMAT_CLASS_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; @@ -123,6 +124,10 @@ public void before() throws InterruptedException { props.put(VALUE_CONVERTER_SCHEMA_REGISTRY_URL, restApp.restServer.getURI().toString()); props.put(VALUE_CONVERTER_SCRUB_INVALID_NAMES, "true"); + props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); + props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); + props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); + // create topics in Kafka connect.kafka().createTopic(topicName, 1); } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java new file mode 100644 index 000000000..bb591d38d --- /dev/null +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java @@ -0,0 +1,274 @@ +/* + * Copyright 2018 Confluent Inc. + * + * Licensed under the Confluent Community License (the "License"); you may not use + * this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.confluent.io/confluent-community-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +package io.confluent.connect.s3.integration; + +import static io.confluent.connect.s3.S3SinkConnectorConfig.*; +import static io.confluent.connect.storage.StorageSinkConnectorConfig.FLUSH_SIZE_CONFIG; +import static io.confluent.connect.storage.StorageSinkConnectorConfig.FORMAT_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG; +import static org.junit.Assert.assertEquals; + +import io.confluent.connect.s3.S3SinkConnector; +import io.confluent.connect.s3.file.KafkaFileEventConfig; +import io.confluent.connect.s3.file.KafkaFileEventProvider; +import io.confluent.connect.s3.format.avro.AvroFormat; +import io.confluent.connect.s3.format.parquet.ParquetFormat; +import io.confluent.connect.s3.storage.S3Storage; +import io.confluent.connect.s3.util.EmbeddedConnectUtils; +import io.confluent.connect.storage.partitioner.PartitionerConfig; +import java.io.File; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ExecutionException; +import org.apache.commons.io.FileUtils; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.header.Header; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.json.JsonConverter; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; +import org.apache.kafka.connect.sink.SinkRecord; +import org.apache.kafka.test.IntegrationTest; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"unchecked", "deprecation"}) +@Category(IntegrationTest.class) +public class S3SinkFileEventIT extends BaseConnectorIT { + + private static final Logger log = LoggerFactory.getLogger(S3SinkFileEventIT.class); + // connector and test configs + private static final String CONNECTOR_NAME = "s3-sink"; + private static final String DEFAULT_TEST_TOPIC_NAME = "TestTopic"; + + private static final List KAFKA_TOPICS = Collections.singletonList(DEFAULT_TEST_TOPIC_NAME); + + private JsonConverter jsonConverter; + // custom producer to enable sending records with headers + private Producer producer; + + @Before + public void before() throws InterruptedException { + initializeJsonConverter(); + initializeCustomProducer(); + setupProperties(); + waitForSchemaRegistryToStart(); + //add class specific props + props.put(SinkConnectorConfig.TOPICS_CONFIG, String.join(",", KAFKA_TOPICS)); + props.put(FLUSH_SIZE_CONFIG, Integer.toString(FLUSH_SIZE_STANDARD)); + props.put(FORMAT_CLASS_CONFIG, AvroFormat.class.getName()); + props.put(STORAGE_CLASS_CONFIG, S3Storage.class.getName()); + props.put(S3_BUCKET_CONFIG, TEST_BUCKET_NAME); + props.put(S3_PROXY_URL_CONFIG, minioContainer.getUrl()); + props.put(AWS_ACCESS_KEY_ID_CONFIG, MinioContainer.MINIO_USERNAME); + props.put(AWS_SECRET_ACCESS_KEY_CONFIG, MinioContainer.MINIO_PASSWORD); + // file event + props.put(FILE_EVENT_ENABLE, "true"); + // TimeBasedPartitioner + props.put(PartitionerConfig.PARTITIONER_CLASS_CONFIG, "io.confluent.connect.storage.partitioner.TimeBasedPartitioner"); + props.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, "100"); + props.put(PartitionerConfig.PATH_FORMAT_CONFIG, "'event_date'=YYYY-MM-dd/'event_hour'=HH"); + props.put(PartitionerConfig.LOCALE_CONFIG, "FR_fr"); + props.put(PartitionerConfig.TIMEZONE_CONFIG, "UTC"); + // create topics in Kafka + KAFKA_TOPICS.forEach(topic -> connect.kafka().createTopic(topic, 1)); + } + + @After + public void after() throws Exception { + // delete the downloaded test file folder + FileUtils.deleteDirectory(new File(TEST_DOWNLOAD_PATH)); + // clear for next test + clearBucket(TEST_BUCKET_NAME); + // wait for bucket to clear + waitForFilesInBucket(TEST_BUCKET_NAME, 0); + } + + + @Test + public void testBasicRecordsWrittenParquetAndRelatedFileEvents() throws Throwable { + // add test specific props + props.put(FORMAT_CLASS_CONFIG, ParquetFormat.class.getName()); + String topicFileEvent = "TopicFileEvent"; + props.put( + FILE_EVENT_CONFIG_JSON, + new KafkaFileEventConfig( + topicFileEvent, + connect.kafka().bootstrapServers(), + restApp.restServer.getURI().toString(), + null, + null, + null) + .toJson()); + connect.kafka().createTopic(topicFileEvent); + testBasicRecordsWrittenAndRelatedFileEvents(PARQUET_EXTENSION, topicFileEvent); + } + + @Test + public void testFileEventPartition() { + String bootstrapServers = connect.kafka().bootstrapServers(); + String fileEventTopic = "file_event_topic"; + connect.kafka().createTopic(fileEventTopic); + KafkaFileEventConfig kafkaFileEventConfig = + new KafkaFileEventConfig( + fileEventTopic, + bootstrapServers, + restApp.restServer.getURI().toString(), + null, + null, + null); + KafkaFileEventProvider fileEvent = + new KafkaFileEventProvider(kafkaFileEventConfig.toJson(), false); + fileEvent.call("baz-topic", "version/event/hour", "file1.avro", 12, + new DateTime(1234L), new DateTime(123L), + 34, new DateTime(1234L).withZone(DateTimeZone.UTC)); + fileEvent.call("foo-topic", "version/event/hour", "fil2.avro", 8, + new DateTime(12345L), new DateTime(1234L), 12, new DateTime(12345L)); + + // fails if two records are not present in kafka within 1s + connect.kafka().consume(2, 1000L, fileEventTopic); + } + /** + * Test that the expected records are written for a given file extension + * Optionally, test that topics which have "*.{expectedFileExtension}*" in them are processed + * and written. + * @param expectedFileExtension The file extension to test against + * @param fileEventTopic The fileEvent topic name + * @throws Throwable + */ + private void testBasicRecordsWrittenAndRelatedFileEvents( + String expectedFileExtension, + String fileEventTopic + ) throws Throwable { + // Add an extra topic with this extension inside of the name + // Use a TreeSet for test determinism + Set topicNames = new TreeSet<>(KAFKA_TOPICS); + + // start sink connector + connect.configureConnector(CONNECTOR_NAME, props); + // wait for tasks to spin up + EmbeddedConnectUtils.waitForConnectorToStart(connect, CONNECTOR_NAME, Math.min(topicNames.size(), MAX_TASKS)); + + Schema recordValueSchema = getSampleStructSchema(); + Struct recordValueStruct = getSampleStructVal(recordValueSchema); + + for (String thisTopicName : topicNames) { + // Create and send records to Kafka using the topic name in the current 'thisTopicName' + SinkRecord sampleRecord = getSampleTopicRecord(thisTopicName, recordValueSchema, recordValueStruct); + produceRecordsNoHeaders(NUM_RECORDS_INSERT, sampleRecord); + } + + log.info("Waiting for files in S3..."); + int countPerTopic = NUM_RECORDS_INSERT / FLUSH_SIZE_STANDARD; + int expectedTotalFileCount = countPerTopic * topicNames.size(); + waitForFilesInBucket(TEST_BUCKET_NAME, expectedTotalFileCount); + + Set expectedTopicFilenames = new TreeSet<>(); + for (String thisTopicName : topicNames) { + List theseFiles = getExpectedFilenames( + thisTopicName, + TOPIC_PARTITION, + FLUSH_SIZE_STANDARD, + NUM_RECORDS_INSERT, + expectedFileExtension + ); + assertEquals(theseFiles.size(), countPerTopic); + expectedTopicFilenames.addAll(theseFiles); + } + // This check will catch any duplications + assertEquals(expectedTopicFilenames.size(), expectedTotalFileCount); + // Check whether we get same number of records in fileEvent + connect.kafka().consume(expectedTotalFileCount, 1000L, fileEventTopic); + } + + private void produceRecordsNoHeaders(int recordCount, SinkRecord record) + throws ExecutionException, InterruptedException { + produceRecords(record.topic(), recordCount, record, true, true, false); + } + + private void produceRecords( + String topic, + int recordCount, + SinkRecord record, + boolean withKey, + boolean withValue, + boolean withHeaders + ) throws ExecutionException, InterruptedException { + byte[] kafkaKey = null; + byte[] kafkaValue = null; + Iterable
headers = Collections.emptyList(); + if (withKey) { + kafkaKey = jsonConverter.fromConnectData(topic, Schema.STRING_SCHEMA, record.key()); + } + if (withValue) { + kafkaValue = jsonConverter.fromConnectData(record.topic(), record.valueSchema(), record.value()); + } + if (withHeaders) { + headers = sampleHeaders(); + } + ProducerRecord producerRecord = + new ProducerRecord<>(topic, TOPIC_PARTITION, kafkaKey, kafkaValue, headers); + for (long i = 0; i < recordCount; i++) { + producer.send(producerRecord).get(); + } + } + + private void initializeJsonConverter() { + Map jsonConverterProps = new HashMap<>(); + jsonConverterProps.put("schemas.enable", "true"); + jsonConverterProps.put("converter.type", "value"); + jsonConverter = new JsonConverter(); + jsonConverter.configure(jsonConverterProps); + } + + private void initializeCustomProducer() { + Map producerProps = new HashMap<>(); + producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, connect.kafka().bootstrapServers()); + producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, + org.apache.kafka.common.serialization.ByteArraySerializer.class.getName()); + producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, + org.apache.kafka.common.serialization.ByteArraySerializer.class.getName()); + producer = new KafkaProducer<>(producerProps); + } + + private void setupProperties() { + props = new HashMap<>(); + props.put(CONNECTOR_CLASS_CONFIG, S3SinkConnector.class.getName()); + props.put(TASKS_MAX_CONFIG, Integer.toString(MAX_TASKS)); + // converters + props.put(KEY_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); + props.put(VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); + // aws credential if exists + props.putAll(getAWSCredentialFromPath()); + } + +} From b854cdf58b94de839c07a06d7278e29279ff825b Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Thu, 7 Sep 2023 20:39:51 +0200 Subject: [PATCH 26/34] Fix typo sasl mechanism --- .../connect/s3/file/KafkaFileEventConfig.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index 658a2c640..cef9c193e 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -28,7 +28,7 @@ public class KafkaFileEventConfig extends AbstractFileEventConfig { private String bootstrapServers; private String securityProtocol; private String schemaRegistryUrl; - private String saslMecanism; + private String saslMechanism; private String saslJaasConfig; /** empty constructor for jackson */ @@ -40,13 +40,13 @@ public KafkaFileEventConfig( String bootstrapServers, String schemaRegistryUrl, String securityProtocol, - String saslMecanism, + String saslMechanism, String saslJaasConfig) { this.topicName = topicName; this.bootstrapServers = bootstrapServers; this.schemaRegistryUrl = schemaRegistryUrl; this.securityProtocol = securityProtocol; - this.saslMecanism = saslMecanism; + this.saslMechanism = saslMechanism; this.saslJaasConfig = saslJaasConfig; } @@ -67,8 +67,8 @@ public String toJson() { if (securityProtocol != null) { sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); } - if (saslMecanism != null) { - sb.append(", \"sasl_mecanism\": \"").append(saslMecanism).append('"'); + if (saslMechanism != null) { + sb.append(", \"sasl_mechanism\": \"").append(saslMechanism).append('"'); } if (saslJaasConfig != null) { sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); @@ -88,8 +88,8 @@ public Properties toProps() { prop.setProperty("topic.name", topicName); prop.setProperty("schema.registry.url", schemaRegistryUrl); // optional - if (saslMecanism != null) { - prop.setProperty("sasl.mechanism", saslMecanism); + if (saslMechanism != null) { + prop.setProperty("sasl.mechanism", saslMechanism); } if (securityProtocol != null) { prop.setProperty("security.protocol", securityProtocol); @@ -116,8 +116,8 @@ public String getSecurityProtocol() { return securityProtocol; } - public String getSaslMecanism() { - return saslMecanism; + public String getSaslMechanism() { + return saslMechanism; } public String getSaslJaasConfig() { From f38d201a38efc56ec09287c2323fdd2f9195eb6d Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 8 Sep 2023 10:14:42 +0200 Subject: [PATCH 27/34] Use string serializer for key --- .../java/io/confluent/connect/s3/file/KafkaFileEventConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index cef9c193e..eb5524358 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -20,7 +20,7 @@ public class KafkaFileEventConfig extends AbstractFileEventConfig { private static final String KEY_SERIALIZER = - "io.confluent.kafka.serializers.KafkaAvroSerializer"; + "org.apache.kafka.common.serialization.StringSerializer"; private static final String VALUE_SERIALIZER = "io.confluent.kafka.serializers.KafkaAvroSerializer"; From d46be933e39d1a530e353c983efc5b6a49fec122 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 8 Sep 2023 11:38:59 +0200 Subject: [PATCH 28/34] Disable auto create topic/registry schema --- .../io/confluent/connect/s3/file/KafkaFileEventConfig.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index eb5524358..e023249d6 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -82,7 +82,8 @@ public Properties toProps() { Properties prop = new Properties(); prop.setProperty("key.serializer", KEY_SERIALIZER); prop.setProperty("value.serializer", VALUE_SERIALIZER); - prop.setProperty("auto.create.topics.enable", "true"); + prop.setProperty("use.latest.version", "true"); + prop.setProperty("auto.register.schemas", "false"); // mandatory prop.setProperty("bootstrap.servers", bootstrapServers); prop.setProperty("topic.name", topicName); From d22dc906eb28091563f17b882d4c4ceac4f33a52 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Mon, 11 Sep 2023 18:04:37 +0200 Subject: [PATCH 29/34] Allow to provide custom kafka configs --- .../connect/s3/S3SinkConnectorConfig.java | 2 +- .../connect/s3/file/KafkaFileEventConfig.java | 59 ++++------- .../s3/integration/S3SinkFileEventIT.java | 99 +++++++++++-------- 3 files changed, 79 insertions(+), 81 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java index 626e1bcee..687b9ab31 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkConnectorConfig.java @@ -843,7 +843,7 @@ public static ConfigDef newConfigDef() { Importance.LOW, "File event configuration as json format. " + "Mandatory Fields: bootstrap_servers, topic_name, schema_registry_url. " - + "Optional fields: sasl_mechanism, security_protocol, sasl_jaas_config. " + + "Custom fields can be added in the \"custom\" field as a map of attribute" + "By default an empty json.", group, ++orderInGroup, diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index e023249d6..6656fef9f 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -15,10 +15,12 @@ package io.confluent.connect.s3.file; +import java.util.Map; import java.util.Properties; public class KafkaFileEventConfig extends AbstractFileEventConfig { + Map custom; private static final String KEY_SERIALIZER = "org.apache.kafka.common.serialization.StringSerializer"; private static final String VALUE_SERIALIZER = @@ -26,28 +28,20 @@ public class KafkaFileEventConfig extends AbstractFileEventConfig { private String topicName; private String bootstrapServers; - private String securityProtocol; private String schemaRegistryUrl; - private String saslMechanism; - private String saslJaasConfig; /** empty constructor for jackson */ - public KafkaFileEventConfig() { - } + public KafkaFileEventConfig() {} public KafkaFileEventConfig( String topicName, String bootstrapServers, String schemaRegistryUrl, - String securityProtocol, - String saslMechanism, - String saslJaasConfig) { + Map custom) { this.topicName = topicName; this.bootstrapServers = bootstrapServers; this.schemaRegistryUrl = schemaRegistryUrl; - this.securityProtocol = securityProtocol; - this.saslMechanism = saslMechanism; - this.saslJaasConfig = saslJaasConfig; + this.custom = custom; } @Override @@ -64,16 +58,15 @@ public String toJson() { sb.append("\"topic_name\": \"").append(topicName).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); - if (securityProtocol != null) { - sb.append(", \"security_protocol\": \"").append(securityProtocol).append('"'); - } - if (saslMechanism != null) { - sb.append(", \"sasl_mechanism\": \"").append(saslMechanism).append('"'); - } - if (saslJaasConfig != null) { - sb.append(", \"sasl_jaas_config\": \"").append(saslJaasConfig).append('"'); + sb.append(", \"custom\": {"); + String customIncrement = ""; + for (Map.Entry custom : custom.entrySet()) { + sb.append( + String.format( + "%s \"%s\": \"%s\"", customIncrement, custom.getKey(), custom.getValue().toString())); + customIncrement = ","; } - sb.append('}'); + sb.append("}}"); return sb.toString(); } @@ -82,21 +75,13 @@ public Properties toProps() { Properties prop = new Properties(); prop.setProperty("key.serializer", KEY_SERIALIZER); prop.setProperty("value.serializer", VALUE_SERIALIZER); - prop.setProperty("use.latest.version", "true"); - prop.setProperty("auto.register.schemas", "false"); // mandatory prop.setProperty("bootstrap.servers", bootstrapServers); prop.setProperty("topic.name", topicName); prop.setProperty("schema.registry.url", schemaRegistryUrl); - // optional - if (saslMechanism != null) { - prop.setProperty("sasl.mechanism", saslMechanism); - } - if (securityProtocol != null) { - prop.setProperty("security.protocol", securityProtocol); - } - if (saslJaasConfig != null) { - prop.setProperty("sasl.jaas.config", saslJaasConfig); + // custom + for (Map.Entry custom : custom.entrySet()) { + prop.setProperty(custom.getKey(), custom.getValue().toString()); } return prop; } @@ -113,15 +98,7 @@ public String getBootstrapServers() { return bootstrapServers; } - public String getSecurityProtocol() { - return securityProtocol; - } - - public String getSaslMechanism() { - return saslMechanism; - } - - public String getSaslJaasConfig() { - return saslJaasConfig; + public Map getCustom() { + return custom; } } diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java index bb591d38d..ba52d1f9c 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java @@ -70,11 +70,20 @@ public class S3SinkFileEventIT extends BaseConnectorIT { private static final String CONNECTOR_NAME = "s3-sink"; private static final String DEFAULT_TEST_TOPIC_NAME = "TestTopic"; - private static final List KAFKA_TOPICS = Collections.singletonList(DEFAULT_TEST_TOPIC_NAME); + private static final List KAFKA_TOPICS = + Collections.singletonList(DEFAULT_TEST_TOPIC_NAME); private JsonConverter jsonConverter; // custom producer to enable sending records with headers private Producer producer; + private Map autoCreate = + new HashMap() { + { + put("auto.register.schemas", "true"); + put("auto.create.topics.enable", "true"); + } + }; + ; @Before public void before() throws InterruptedException { @@ -82,7 +91,7 @@ public void before() throws InterruptedException { initializeCustomProducer(); setupProperties(); waitForSchemaRegistryToStart(); - //add class specific props + // add class specific props props.put(SinkConnectorConfig.TOPICS_CONFIG, String.join(",", KAFKA_TOPICS)); props.put(FLUSH_SIZE_CONFIG, Integer.toString(FLUSH_SIZE_STANDARD)); props.put(FORMAT_CLASS_CONFIG, AvroFormat.class.getName()); @@ -94,7 +103,9 @@ public void before() throws InterruptedException { // file event props.put(FILE_EVENT_ENABLE, "true"); // TimeBasedPartitioner - props.put(PartitionerConfig.PARTITIONER_CLASS_CONFIG, "io.confluent.connect.storage.partitioner.TimeBasedPartitioner"); + props.put( + PartitionerConfig.PARTITIONER_CLASS_CONFIG, + "io.confluent.connect.storage.partitioner.TimeBasedPartitioner"); props.put(PartitionerConfig.PARTITION_DURATION_MS_CONFIG, "100"); props.put(PartitionerConfig.PATH_FORMAT_CONFIG, "'event_date'=YYYY-MM-dd/'event_hour'=HH"); props.put(PartitionerConfig.LOCALE_CONFIG, "FR_fr"); @@ -113,21 +124,18 @@ public void after() throws Exception { waitForFilesInBucket(TEST_BUCKET_NAME, 0); } - @Test public void testBasicRecordsWrittenParquetAndRelatedFileEvents() throws Throwable { // add test specific props props.put(FORMAT_CLASS_CONFIG, ParquetFormat.class.getName()); String topicFileEvent = "TopicFileEvent"; props.put( - FILE_EVENT_CONFIG_JSON, + FILE_EVENT_CONFIG_JSON, new KafkaFileEventConfig( topicFileEvent, connect.kafka().bootstrapServers(), restApp.restServer.getURI().toString(), - null, - null, - null) + this.autoCreate) .toJson()); connect.kafka().createTopic(topicFileEvent); testBasicRecordsWrittenAndRelatedFileEvents(PARQUET_EXTENSION, topicFileEvent); @@ -139,36 +147,45 @@ public void testFileEventPartition() { String fileEventTopic = "file_event_topic"; connect.kafka().createTopic(fileEventTopic); KafkaFileEventConfig kafkaFileEventConfig = - new KafkaFileEventConfig( - fileEventTopic, - bootstrapServers, - restApp.restServer.getURI().toString(), - null, - null, - null); + new KafkaFileEventConfig( + fileEventTopic, + bootstrapServers, + restApp.restServer.getURI().toString(), + this.autoCreate); KafkaFileEventProvider fileEvent = - new KafkaFileEventProvider(kafkaFileEventConfig.toJson(), false); - fileEvent.call("baz-topic", "version/event/hour", "file1.avro", 12, - new DateTime(1234L), new DateTime(123L), - 34, new DateTime(1234L).withZone(DateTimeZone.UTC)); - fileEvent.call("foo-topic", "version/event/hour", "fil2.avro", 8, - new DateTime(12345L), new DateTime(1234L), 12, new DateTime(12345L)); + new KafkaFileEventProvider(kafkaFileEventConfig.toJson(), false); + fileEvent.call( + "baz-topic", + "version/event/hour", + "file1.avro", + 12, + new DateTime(1234L), + new DateTime(123L), + 34, + new DateTime(1234L).withZone(DateTimeZone.UTC)); + fileEvent.call( + "foo-topic", + "version/event/hour", + "fil2.avro", + 8, + new DateTime(12345L), + new DateTime(1234L), + 12, + new DateTime(12345L)); // fails if two records are not present in kafka within 1s connect.kafka().consume(2, 1000L, fileEventTopic); } /** - * Test that the expected records are written for a given file extension - * Optionally, test that topics which have "*.{expectedFileExtension}*" in them are processed - * and written. + * Test that the expected records are written for a given file extension Optionally, test that + * topics which have "*.{expectedFileExtension}*" in them are processed and written. + * * @param expectedFileExtension The file extension to test against * @param fileEventTopic The fileEvent topic name * @throws Throwable */ private void testBasicRecordsWrittenAndRelatedFileEvents( - String expectedFileExtension, - String fileEventTopic - ) throws Throwable { + String expectedFileExtension, String fileEventTopic) throws Throwable { // Add an extra topic with this extension inside of the name // Use a TreeSet for test determinism Set topicNames = new TreeSet<>(KAFKA_TOPICS); @@ -176,14 +193,16 @@ private void testBasicRecordsWrittenAndRelatedFileEvents( // start sink connector connect.configureConnector(CONNECTOR_NAME, props); // wait for tasks to spin up - EmbeddedConnectUtils.waitForConnectorToStart(connect, CONNECTOR_NAME, Math.min(topicNames.size(), MAX_TASKS)); + EmbeddedConnectUtils.waitForConnectorToStart( + connect, CONNECTOR_NAME, Math.min(topicNames.size(), MAX_TASKS)); Schema recordValueSchema = getSampleStructSchema(); Struct recordValueStruct = getSampleStructVal(recordValueSchema); for (String thisTopicName : topicNames) { // Create and send records to Kafka using the topic name in the current 'thisTopicName' - SinkRecord sampleRecord = getSampleTopicRecord(thisTopicName, recordValueSchema, recordValueStruct); + SinkRecord sampleRecord = + getSampleTopicRecord(thisTopicName, recordValueSchema, recordValueStruct); produceRecordsNoHeaders(NUM_RECORDS_INSERT, sampleRecord); } @@ -194,13 +213,13 @@ private void testBasicRecordsWrittenAndRelatedFileEvents( Set expectedTopicFilenames = new TreeSet<>(); for (String thisTopicName : topicNames) { - List theseFiles = getExpectedFilenames( + List theseFiles = + getExpectedFilenames( thisTopicName, TOPIC_PARTITION, FLUSH_SIZE_STANDARD, NUM_RECORDS_INSERT, - expectedFileExtension - ); + expectedFileExtension); assertEquals(theseFiles.size(), countPerTopic); expectedTopicFilenames.addAll(theseFiles); } @@ -221,8 +240,8 @@ private void produceRecords( SinkRecord record, boolean withKey, boolean withValue, - boolean withHeaders - ) throws ExecutionException, InterruptedException { + boolean withHeaders) + throws ExecutionException, InterruptedException { byte[] kafkaKey = null; byte[] kafkaValue = null; Iterable
headers = Collections.emptyList(); @@ -230,12 +249,13 @@ private void produceRecords( kafkaKey = jsonConverter.fromConnectData(topic, Schema.STRING_SCHEMA, record.key()); } if (withValue) { - kafkaValue = jsonConverter.fromConnectData(record.topic(), record.valueSchema(), record.value()); + kafkaValue = + jsonConverter.fromConnectData(record.topic(), record.valueSchema(), record.value()); } if (withHeaders) { headers = sampleHeaders(); } - ProducerRecord producerRecord = + ProducerRecord producerRecord = new ProducerRecord<>(topic, TOPIC_PARTITION, kafkaKey, kafkaValue, headers); for (long i = 0; i < recordCount; i++) { producer.send(producerRecord).get(); @@ -253,9 +273,11 @@ private void initializeJsonConverter() { private void initializeCustomProducer() { Map producerProps = new HashMap<>(); producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, connect.kafka().bootstrapServers()); - producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, + producerProps.put( + ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.ByteArraySerializer.class.getName()); - producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, + producerProps.put( + ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.ByteArraySerializer.class.getName()); producer = new KafkaProducer<>(producerProps); } @@ -270,5 +292,4 @@ private void setupProperties() { // aws credential if exists props.putAll(getAWSCredentialFromPath()); } - } From 3d1a18021a7b37140195e95fc695218bad09e9b8 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Tue, 12 Sep 2023 11:53:23 +0200 Subject: [PATCH 30/34] Reuse kafka producer at task level --- .../io/confluent/connect/s3/S3SinkTask.java | 40 ++++++++++++++++++- .../connect/s3/TopicPartitionWriter.java | 20 ++-------- .../connect/s3/file/FileEventProvider.java | 5 ++- .../s3/file/KafkaFileEventProvider.java | 31 ++++++++------ 4 files changed, 66 insertions(+), 30 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java index 590c9c5e8..0dea123b0 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java @@ -17,6 +17,7 @@ import com.amazonaws.AmazonClientException; import io.confluent.connect.s3.S3SinkConnectorConfig.OutputWriteBehavior; +import io.confluent.connect.s3.file.FileEventProvider; import io.confluent.connect.s3.util.TombstoneSupportedPartitioner; import io.confluent.connect.s3.util.SchemaPartitioner; import org.apache.kafka.clients.consumer.OffsetAndMetadata; @@ -31,10 +32,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.Collection; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import io.confluent.common.utils.SystemTime; import io.confluent.common.utils.Time; @@ -64,6 +67,7 @@ public class S3SinkTask extends SinkTask { private RecordWriterProvider writerProvider; private final Time time; private ErrantRecordReporter reporter; + private Optional fileEventProvider; /** * No-arg constructor. Used by Connect framework. @@ -150,10 +154,36 @@ public String version() { @Override public void open(Collection partitions) { + initFileEventProvider(); for (TopicPartition tp : partitions) { topicPartitionWriters.put(tp, newTopicPartitionWriter(tp)); } } + private void initFileEventProvider() { + // Initialize fileEvent if enabled + if (this.connectorConfig.getFileEventEnable()) { + try { + log.info("File event enabled"); + if (this.fileEventProvider == null) // only if not yet instanciated + this.fileEventProvider = + Optional.of( + (FileEventProvider) + this.connectorConfig + .getFileEventClass() + .getConstructor(String.class, boolean.class) + .newInstance( + connectorConfig.getFileEventConfigJson(), + connectorConfig.getFileEventSkipError())); + } catch (InstantiationException + | IllegalAccessException + | InvocationTargetException + | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } else { + this.fileEventProvider = Optional.empty(); + } + } @SuppressWarnings("unchecked") private Format newFormat(String formatClassConfig) @@ -322,6 +352,14 @@ public void close(Collection partitions) { } } topicPartitionWriters.clear(); + this.fileEventProvider.ifPresent( + fc -> { + try { + fc.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); } @Override @@ -350,6 +388,6 @@ private TopicPartitionWriter newTopicPartitionWriter(TopicPartition tp) { context, time, reporter - ); + ).withFileEventProvider(fileEventProvider); } } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index fed74cd98..b22ccc3be 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -37,7 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.lang.reflect.InvocationTargetException; import java.util.Comparator; import java.util.HashMap; import java.util.LinkedList; @@ -191,21 +190,10 @@ public TopicPartitionWriter(TopicPartition tp, // Initialize scheduled rotation timer if applicable setNextScheduledRotation(); - - // Initialize fileEvent if enabled - if (this.connectorConfig.getFileEventEnable()) { - try { - log.info("File event enabled"); - fileCallback = Optional.of((FileEventProvider) - this.connectorConfig - .getFileEventClass().getConstructor(String.class, boolean.class) - .newInstance(connectorConfig.getFileEventConfigJson(), - connectorConfig.getFileEventSkipError())); - } catch (InstantiationException | IllegalAccessException - | InvocationTargetException | NoSuchMethodException e) { - throw new RuntimeException(e); - } - } + } + public TopicPartitionWriter withFileEventProvider(Optional fileEventProvider){ + this.fileCallback = fileEventProvider; + return this; } private enum State { diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java index d8b4106c8..961839951 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/FileEventProvider.java @@ -21,7 +21,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class FileEventProvider { +import java.io.Closeable; +import java.io.IOException; + +public abstract class FileEventProvider implements Closeable { private static final Logger log = LoggerFactory.getLogger(FileEventProvider.class); protected final String configJson; protected final boolean skipError; diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java index 62e9d2402..a6829e2f3 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java @@ -21,13 +21,17 @@ import org.apache.kafka.clients.producer.ProducerRecord; import org.joda.time.DateTime; +import java.io.Closeable; +import java.io.IOException; + public class KafkaFileEventProvider extends FileEventProvider { private final KafkaFileEventConfig kafkaConfig; + private Producer producer; public KafkaFileEventProvider(String configJson, boolean skipError) { super(configJson, skipError); - this.kafkaConfig = - KafkaFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); + this.kafkaConfig = KafkaFileEventConfig.fromJsonString(configJson, KafkaFileEventConfig.class); + producer = new KafkaProducer<>(kafkaConfig.toProps()); } @Override @@ -51,15 +55,18 @@ public void callImpl( formatDateRFC3339(currentTimestamp), recordCount, formatDateRFC3339(eventDatetime)); - try (final Producer producer = - new KafkaProducer<>(kafkaConfig.toProps())) { - producer.send( - new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), - (event, ex) -> { - if (ex != null) { - throw new RuntimeException(ex); - } - }); - } + producer.send( + new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), + (event, ex) -> { + if (ex != null) { + throw new RuntimeException(ex); + } + }); + } + + @Override + public void close() throws IOException { + this.producer.flush(); + this.producer.close(); } } From b5de512d1e9eda4a2fca54175b2012c87ee3e052 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 15 Sep 2023 11:11:00 +0200 Subject: [PATCH 31/34] Fix the file event life cycle --- .../src/main/java/io/confluent/connect/s3/S3SinkTask.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java index 0dea123b0..f2fd7ecae 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/S3SinkTask.java @@ -351,15 +351,16 @@ public void close(Collection partitions) { log.error("Error closing writer for {}. Error: {}", tp, e.getMessage()); } } - topicPartitionWriters.clear(); this.fileEventProvider.ifPresent( fc -> { try { fc.close(); + this.fileEventProvider = null; } catch (IOException e) { throw new RuntimeException(e); } }); + topicPartitionWriters.clear(); } @Override From 1fd0c657811650fc9035121fb945a3e7bb997400 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 9 Feb 2024 17:15:43 +0100 Subject: [PATCH 32/34] Fix record counts. previously the record count could be wrong and reflect how much data the worker was writting, independantly of the partition --- .../main/java/io/confluent/connect/s3/TopicPartitionWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java index b22ccc3be..f19fc88dd 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/TopicPartitionWriter.java @@ -683,7 +683,7 @@ private void callbackFile(String encodedPartition) { fileCallback.ifPresent(fs -> fs.call(tp.topic(), encodedPartition, commitFiles.get(encodedPartition), tp.partition(), new DateTime(baseRecordTimestamp).withZone(timeZone), - new DateTime(currentTimestamp).withZone(timeZone), recordCount, + new DateTime(currentTimestamp).withZone(timeZone), (recordCounts.get(encodedPartition)).intValue(), new DateTime(time.milliseconds()).withZone(timeZone))); } From 74c8f2d6a32dc600ef0141156d158989d5e68855 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Wed, 13 Mar 2024 10:03:27 +0100 Subject: [PATCH 33/34] Add db/tbl to the output topic --- .../connect/s3/file/KafkaFileEventConfig.java | 18 ++++++++++++++++++ .../s3/file/KafkaFileEventProvider.java | 5 +++-- .../src/main/resources/file-event.avsc | 12 ++++++++++++ .../s3/integration/S3SinkFileEventIT.java | 4 ++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java index 6656fef9f..04938b197 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventConfig.java @@ -27,6 +27,8 @@ public class KafkaFileEventConfig extends AbstractFileEventConfig { "io.confluent.kafka.serializers.KafkaAvroSerializer"; private String topicName; + private String databaseName; + private String tableName; private String bootstrapServers; private String schemaRegistryUrl; @@ -35,10 +37,14 @@ public KafkaFileEventConfig() {} public KafkaFileEventConfig( String topicName, + String databaseName, + String tableName, String bootstrapServers, String schemaRegistryUrl, Map custom) { this.topicName = topicName; + this.databaseName = databaseName; + this.tableName = tableName; this.bootstrapServers = bootstrapServers; this.schemaRegistryUrl = schemaRegistryUrl; this.custom = custom; @@ -56,6 +62,10 @@ protected void validateFields() { public String toJson() { final StringBuffer sb = new StringBuffer("{"); sb.append("\"topic_name\": \"").append(topicName).append('"'); + if(databaseName != null) + sb.append("\"database_name\": \"").append(databaseName).append('"'); + if(tableName != null) + sb.append("\"table_name\": \"").append(tableName).append('"'); sb.append(", \"bootstrap_servers\": \"").append(bootstrapServers).append('"'); sb.append(", \"schema_registry_url\": \"").append(schemaRegistryUrl).append('"'); sb.append(", \"custom\": {"); @@ -90,6 +100,14 @@ public String getTopicName() { return topicName; } + public String getDatabaseName() { + return databaseName; + } + + public String getTableName() { + return tableName; + } + public String getSchemaRegistryUrl() { return schemaRegistryUrl; } diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java index a6829e2f3..1f7c6ee30 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/KafkaFileEventProvider.java @@ -21,7 +21,6 @@ import org.apache.kafka.clients.producer.ProducerRecord; import org.joda.time.DateTime; -import java.io.Closeable; import java.io.IOException; public class KafkaFileEventProvider extends FileEventProvider { @@ -54,7 +53,9 @@ public void callImpl( formatDateRFC3339(baseRecordTimestamp), formatDateRFC3339(currentTimestamp), recordCount, - formatDateRFC3339(eventDatetime)); + formatDateRFC3339(eventDatetime), + kafkaConfig.getDatabaseName(), + kafkaConfig.getTableName()); producer.send( new ProducerRecord<>(kafkaConfig.getTopicName(), key, value), (event, ex) -> { diff --git a/kafka-connect-s3/src/main/resources/file-event.avsc b/kafka-connect-s3/src/main/resources/file-event.avsc index a33c1b119..13d3aaac8 100644 --- a/kafka-connect-s3/src/main/resources/file-event.avsc +++ b/kafka-connect-s3/src/main/resources/file-event.avsc @@ -43,6 +43,18 @@ "name": "event_datetime", "type": "string", "doc": "The time of the file event, in RFC 3339" + }, + { + "name": "database_name", + "type": ["null", "string"], + "doc": "The database name of the record being written", + "default": null + }, + { + "name": "table_name", + "type": ["null", "string"], + "doc": "The table name of the record being written", + "default": null } ] } \ No newline at end of file diff --git a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java index ba52d1f9c..307e93ebd 100644 --- a/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java +++ b/kafka-connect-s3/src/test/java/io/confluent/connect/s3/integration/S3SinkFileEventIT.java @@ -133,6 +133,8 @@ public void testBasicRecordsWrittenParquetAndRelatedFileEvents() throws Throwabl FILE_EVENT_CONFIG_JSON, new KafkaFileEventConfig( topicFileEvent, + null, + null, connect.kafka().bootstrapServers(), restApp.restServer.getURI().toString(), this.autoCreate) @@ -149,6 +151,8 @@ public void testFileEventPartition() { KafkaFileEventConfig kafkaFileEventConfig = new KafkaFileEventConfig( fileEventTopic, + null, + null, bootstrapServers, restApp.restServer.getURI().toString(), this.autoCreate); From 961852b10a5e676639dc9ec3840aa4d40b295910 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Thu, 14 Mar 2024 11:09:47 +0100 Subject: [PATCH 34/34] Don't fail on unknown properties --- .../io/confluent/connect/s3/file/AbstractFileEventConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java index a9b63026a..4a0dea112 100644 --- a/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java +++ b/kafka-connect-s3/src/main/java/io/confluent/connect/s3/file/AbstractFileEventConfig.java @@ -33,7 +33,7 @@ public static T fromJsonString( ObjectMapper instanceMapper = new ObjectMapper(); instanceMapper.setPropertyNamingStrategy( PropertyNamingStrategy.CAMEL_CASE_TO_LOWER_CASE_WITH_UNDERSCORES); - instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true); + instanceMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); instanceMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); instanceMapper.enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS); T value = instanceMapper.readValue(jsonContent, clazz);