From: Kai Moritz Date: Sun, 24 Jul 2022 10:34:53 +0000 (+0200) Subject: Merge der Refaktorisierung des EndlessConsumer (Branch 'deserialization') X-Git-Tag: sumup-requests---lvm-2-tage~5^2^2^2~2 X-Git-Url: http://juplo.de/gitweb/?a=commitdiff_plain;h=83a4bf324f5a7ec6010a7921118ec7d6e8f997cf;hp=581d0b3851f2db9b52fd049b64ca237ef0ba3515;p=demos%2Fkafka%2Ftraining Merge der Refaktorisierung des EndlessConsumer (Branch 'deserialization') * Um die Implementierung besser testen zu können, wurde die Anwendung in dem Branch 'deserialization' refaktorisiert. * Diese Refaktorisierung werden hier zusammen mit den eingeführten Tests gemerged. * Der so verfügbar gemachte Test wurde so angepasst, dass er das Speichern des Zustands in einer MongoDB berücksichtigt. --- diff --git a/docker-compose.yml b/docker-compose.yml index e30a7bb..30ae3b4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,7 +25,7 @@ services: - zookeeper mongo: - image: mongo:4.4 + image: mongo:4.4.13 ports: - 27017:27017 environment: diff --git a/pom.xml b/pom.xml index 0fbe7e6..701704d 100644 --- a/pom.xml +++ b/pom.xml @@ -51,6 +51,21 @@ spring-boot-starter-test test + + org.springframework.kafka + spring-kafka-test + test + + + org.awaitility + awaitility + test + + + de.flapdoodle.embed + de.flapdoodle.embed.mongo + test + diff --git a/src/main/java/de/juplo/kafka/Application.java b/src/main/java/de/juplo/kafka/Application.java index 2f6e4f2..d280aa6 100644 --- a/src/main/java/de/juplo/kafka/Application.java +++ b/src/main/java/de/juplo/kafka/Application.java @@ -1,40 +1,62 @@ package de.juplo.kafka; +import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.context.properties.EnableConfigurationProperties; -import org.springframework.context.annotation.Bean; -import java.util.concurrent.Executors; +import javax.annotation.PreDestroy; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; @SpringBootApplication -@EnableConfigurationProperties(ApplicationProperties.class) -public class Application +@Slf4j +public class Application implements ApplicationRunner { @Autowired - ApplicationProperties properties; + EndlessConsumer endlessConsumer; + @Autowired + ExecutorService executor; - @Bean - public EndlessConsumer consumer(PartitionStatisticsRepository repository) + @Override + public void run(ApplicationArguments args) throws Exception { - EndlessConsumer consumer = - new EndlessConsumer( - Executors.newFixedThreadPool(1), - repository, - properties.getBootstrapServer(), - properties.getGroupId(), - properties.getClientId(), - properties.getTopic(), - properties.getAutoOffsetReset()); - - consumer.start(); - - return consumer; + log.info("Starting EndlessConsumer"); + endlessConsumer.start(); } + @PreDestroy + public void stopExecutor() + { + try + { + log.info("Shutting down the ExecutorService."); + executor.shutdown(); + log.info("Waiting 5 seconds for the ExecutorService to terminate..."); + executor.awaitTermination(5, TimeUnit.SECONDS); + } + catch (InterruptedException e) + { + log.error("Exception while waiting for the termination of the ExecutorService: {}", e.toString()); + } + finally + { + if (!executor.isTerminated()) + { + log.warn("Forcing shutdown of ExecutorService!"); + executor + .shutdownNow() + .forEach(runnable -> log.warn("Unprocessed task: {}", runnable.getClass().getSimpleName())); + } + log.info("Shutdow of ExecutorService finished"); + } + } + + public static void main(String[] args) { SpringApplication.run(Application.class, args); diff --git a/src/main/java/de/juplo/kafka/ApplicationConfiguration.java b/src/main/java/de/juplo/kafka/ApplicationConfiguration.java new file mode 100644 index 0000000..1ba9d5b --- /dev/null +++ b/src/main/java/de/juplo/kafka/ApplicationConfiguration.java @@ -0,0 +1,69 @@ +package de.juplo.kafka; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.serialization.LongDeserializer; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.util.Properties; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.function.Consumer; + + +@Configuration +@EnableConfigurationProperties(ApplicationProperties.class) +public class ApplicationConfiguration +{ + @Bean + public Consumer> consumer() + { + return (record) -> + { + // Handle record + }; + } + + @Bean + public EndlessConsumer endlessConsumer( + KafkaConsumer kafkaConsumer, + ExecutorService executor, + Consumer> handler, + PartitionStatisticsRepository repository, + ApplicationProperties properties) + { + return + new EndlessConsumer<>( + executor, + repository, + properties.getClientId(), + properties.getTopic(), + kafkaConsumer, + handler); + } + + @Bean + public ExecutorService executor() + { + return Executors.newSingleThreadExecutor(); + } + + @Bean(destroyMethod = "close") + public KafkaConsumer kafkaConsumer(ApplicationProperties properties) + { + Properties props = new Properties(); + + props.put("bootstrap.servers", properties.getBootstrapServer()); + props.put("group.id", properties.getGroupId()); + props.put("client.id", properties.getClientId()); + props.put("auto.offset.reset", properties.getAutoOffsetReset()); + props.put("metadata.max.age.ms", "1000"); + props.put("key.deserializer", StringDeserializer.class.getName()); + props.put("value.deserializer", LongDeserializer.class.getName()); + + return new KafkaConsumer<>(props); + } +} diff --git a/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java b/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java index ab9782c..dc3a26e 100644 --- a/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java +++ b/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java @@ -10,7 +10,7 @@ import org.springframework.stereotype.Component; @RequiredArgsConstructor public class ApplicationHealthIndicator implements HealthIndicator { - private final EndlessConsumer consumer; + private final EndlessConsumer consumer; @Override diff --git a/src/main/java/de/juplo/kafka/DriverController.java b/src/main/java/de/juplo/kafka/DriverController.java index 1fb2a1b..ed38080 100644 --- a/src/main/java/de/juplo/kafka/DriverController.java +++ b/src/main/java/de/juplo/kafka/DriverController.java @@ -33,7 +33,7 @@ public class DriverController @GetMapping("seen") - public Map> seen() + public Map> seen() { return consumer.getSeen(); } diff --git a/src/main/java/de/juplo/kafka/EndlessConsumer.java b/src/main/java/de/juplo/kafka/EndlessConsumer.java index e5ef7d0..a21dd86 100644 --- a/src/main/java/de/juplo/kafka/EndlessConsumer.java +++ b/src/main/java/de/juplo/kafka/EndlessConsumer.java @@ -1,13 +1,11 @@ package de.juplo.kafka; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.*; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.RecordDeserializationException; import org.apache.kafka.common.errors.WakeupException; -import org.apache.kafka.common.serialization.StringDeserializer; import javax.annotation.PreDestroy; import java.time.Duration; @@ -20,110 +18,91 @@ import java.util.concurrent.locks.ReentrantLock; @Slf4j -public class EndlessConsumer implements Runnable +@RequiredArgsConstructor +public class EndlessConsumer implements ConsumerRebalanceListener, Runnable { private final ExecutorService executor; private final PartitionStatisticsRepository repository; - private final String bootstrapServer; - private final String groupId; private final String id; private final String topic; - private final String autoOffsetReset; + private final Consumer consumer; + private final java.util.function.Consumer> handler; private final Lock lock = new ReentrantLock(); private final Condition condition = lock.newCondition(); private boolean running = false; private Exception exception; private long consumed = 0; - private KafkaConsumer consumer = null; + private final Map> seen = new HashMap<>(); + private final Map offsets = new HashMap<>(); - private final Map> seen = new HashMap<>(); + @Override + public void onPartitionsRevoked(Collection partitions) + { + partitions.forEach(tp -> + { + Integer partition = tp.partition(); + Long newOffset = consumer.position(tp); + Long oldOffset = offsets.remove(partition); + log.info( + "{} - removing partition: {}, consumed {} records (offset {} -> {})", + id, + partition, + newOffset - oldOffset, + oldOffset, + newOffset); + Map removed = seen.remove(partition); + for (String key : removed.keySet()) + { + log.info( + "{} - Seen {} messages for partition={}|key={}", + id, + removed.get(key), + partition, + key); + } + repository.save(new StatisticsDocument(partition, removed)); + }); + } - public EndlessConsumer( - ExecutorService executor, - PartitionStatisticsRepository repository, - String bootstrapServer, - String groupId, - String clientId, - String topic, - String autoOffsetReset) + @Override + public void onPartitionsAssigned(Collection partitions) { - this.executor = executor; - this.repository = repository; - this.bootstrapServer = bootstrapServer; - this.groupId = groupId; - this.id = clientId; - this.topic = topic; - this.autoOffsetReset = autoOffsetReset; + partitions.forEach(tp -> + { + Integer partition = tp.partition(); + Long offset = consumer.position(tp); + log.info("{} - adding partition: {}, offset={}", id, partition, offset); + offsets.put(partition, offset); + seen.put( + partition, + repository + .findById(Integer.toString(tp.partition())) + .map(document -> document.statistics) + .orElse(new HashMap<>())); + }); } + @Override public void run() { try { - Properties props = new Properties(); - props.put("bootstrap.servers", bootstrapServer); - props.put("group.id", groupId); - props.put("client.id", id); - props.put("auto.offset.reset", autoOffsetReset); - props.put("metadata.max.age.ms", "1000"); - props.put("key.deserializer", StringDeserializer.class.getName()); - props.put("value.deserializer", StringDeserializer.class.getName()); - - this.consumer = new KafkaConsumer<>(props); - log.info("{} - Subscribing to topic {}", id, topic); - consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() - { - @Override - public void onPartitionsRevoked(Collection partitions) - { - partitions.forEach(tp -> - { - log.info("{} - removing partition: {}", id, tp); - Map removed = seen.remove(tp.partition()); - for (String key : removed.keySet()) - { - log.info( - "{} - Seen {} messages for partition={}|key={}", - id, - removed.get(key), - tp.partition(), - key); - } - repository.save(new StatisticsDocument(tp.partition(), removed)); - }); - } - - @Override - public void onPartitionsAssigned(Collection partitions) - { - partitions.forEach(tp -> - { - log.info("{} - adding partition: {}", id, tp); - seen.put( - tp.partition(), - repository - .findById(Integer.toString(tp.partition())) - .map(document -> document.statistics) - .orElse(new HashMap<>())); - }); - } - }); + consumer.subscribe(Arrays.asList(topic), this); while (true) { - ConsumerRecords records = + ConsumerRecords records = consumer.poll(Duration.ofSeconds(1)); // Do something with the data... log.info("{} - Received {} messages", id, records.count()); - for (ConsumerRecord record : records) + for (ConsumerRecord record : records) { - consumed++; log.info( "{} - {}: {}/{} - {}={}", id, @@ -134,14 +113,18 @@ public class EndlessConsumer implements Runnable record.value() ); + handler.accept(record); + + consumed++; + Integer partition = record.partition(); - String key = record.key() == null ? "NULL" : record.key(); - Map byKey = seen.get(partition); + String key = record.key() == null ? "NULL" : record.key().toString(); + Map byKey = seen.get(partition); if (!byKey.containsKey(key)) - byKey.put(key, 0); + byKey.put(key, 0l); - int seenByKey = byKey.get(key); + long seenByKey = byKey.get(key); seenByKey++; byKey.put(key, seenByKey); } @@ -149,9 +132,24 @@ public class EndlessConsumer implements Runnable } catch(WakeupException e) { - log.info("{} - RIIING!", id); + log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id); + consumer.commitSync(); shutdown(); } + catch(RecordDeserializationException e) + { + TopicPartition tp = e.topicPartition(); + long offset = e.offset(); + log.error( + "{} - Could not deserialize message on topic {} with offset={}: {}", + id, + tp, + offset, + e.getCause().toString()); + + consumer.commitSync(); + shutdown(e); + } catch(Exception e) { log.error("{} - Unexpected error: {}", id, e.toString(), e); @@ -159,8 +157,6 @@ public class EndlessConsumer implements Runnable } finally { - log.info("{} - Closing the KafkaConsumer", id); - consumer.close(); log.info("{} - Consumer-Thread exiting", id); } } @@ -175,9 +171,25 @@ public class EndlessConsumer implements Runnable lock.lock(); try { - running = false; - exception = e; - condition.signal(); + try + { + log.info("{} - Unsubscribing from topic {}", id, topic); + consumer.unsubscribe(); + } + catch (Exception ue) + { + log.error( + "{} - Error while unsubscribing from topic {}: {}", + id, + topic, + ue.toString()); + } + finally + { + running = false; + exception = e; + condition.signal(); + } } finally { @@ -185,7 +197,7 @@ public class EndlessConsumer implements Runnable } } - public Map> getSeen() + public Map> getSeen() { return seen; } diff --git a/src/main/java/de/juplo/kafka/StatisticsDocument.java b/src/main/java/de/juplo/kafka/StatisticsDocument.java index be998ca..2416253 100644 --- a/src/main/java/de/juplo/kafka/StatisticsDocument.java +++ b/src/main/java/de/juplo/kafka/StatisticsDocument.java @@ -14,13 +14,13 @@ public class StatisticsDocument { @Id public String id; - public Map statistics; + public Map statistics; public StatisticsDocument() { } - public StatisticsDocument(Integer partition, Map statistics) + public StatisticsDocument(Integer partition, Map statistics) { this.id = Integer.toString(partition); this.statistics = statistics; diff --git a/src/test/java/de/juplo/kafka/ApplicationTests.java b/src/test/java/de/juplo/kafka/ApplicationTests.java new file mode 100644 index 0000000..caa25c5 --- /dev/null +++ b/src/test/java/de/juplo/kafka/ApplicationTests.java @@ -0,0 +1,323 @@ +package de.juplo.kafka; + +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.RecordDeserializationException; +import org.apache.kafka.common.serialization.*; +import org.apache.kafka.common.utils.Bytes; +import org.junit.jupiter.api.*; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.test.autoconfigure.data.mongo.AutoConfigureDataMongo; +import org.springframework.boot.test.context.ConfigDataApplicationContextInitializer; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Import; +import org.springframework.kafka.test.context.EmbeddedKafka; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit.jupiter.SpringJUnitConfig; + +import java.time.Duration; +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static de.juplo.kafka.ApplicationTests.PARTITIONS; +import static de.juplo.kafka.ApplicationTests.TOPIC; +import static org.assertj.core.api.Assertions.*; +import static org.awaitility.Awaitility.*; + + +@SpringJUnitConfig(initializers = ConfigDataApplicationContextInitializer.class) +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@TestPropertySource( + properties = { + "consumer.bootstrap-server=${spring.embedded.kafka.brokers}", + "consumer.topic=" + TOPIC, + "spring.mongodb.embedded.version=4.4.13" }) +@EmbeddedKafka(topics = TOPIC, partitions = PARTITIONS) +@EnableAutoConfiguration +@AutoConfigureDataMongo +@Slf4j +class ApplicationTests +{ + public static final String TOPIC = "FOO"; + public static final int PARTITIONS = 10; + + + StringSerializer stringSerializer = new StringSerializer(); + + @Autowired + Serializer valueSerializer; + @Autowired + KafkaProducer kafkaProducer; + @Autowired + KafkaConsumer kafkaConsumer; + @Autowired + KafkaConsumer offsetConsumer; + @Autowired + ApplicationProperties properties; + @Autowired + ExecutorService executor; + @Autowired + PartitionStatisticsRepository repository; + + Consumer> testHandler; + EndlessConsumer endlessConsumer; + Map oldOffsets; + Map newOffsets; + Set> receivedRecords; + + + /** Tests methods */ + + @Test + @Order(1) // << The poistion pill is not skipped. Hence, this test must run first + void commitsCurrentOffsetsOnSuccess() throws ExecutionException, InterruptedException + { + send100Messages(i -> new Bytes(valueSerializer.serialize(TOPIC, i))); + + await("100 records received") + .atMost(Duration.ofSeconds(30)) + .until(() -> receivedRecords.size() >= 100); + + await("Offsets committed") + .atMost(Duration.ofSeconds(10)) + .untilAsserted(() -> + { + checkSeenOffsetsForProgress(); + compareToCommitedOffsets(newOffsets); + }); + + assertThatExceptionOfType(IllegalStateException.class) + .isThrownBy(() -> endlessConsumer.exitStatus()) + .describedAs("Consumer should still be running"); + } + + @Test + @Order(2) + void commitsOffsetOfErrorForReprocessingOnError() + { + send100Messages(counter -> + counter == 77 + ? new Bytes(stringSerializer.serialize(TOPIC, "BOOM!")) + : new Bytes(valueSerializer.serialize(TOPIC, counter))); + + await("Consumer failed") + .atMost(Duration.ofSeconds(30)) + .until(() -> !endlessConsumer.running()); + + checkSeenOffsetsForProgress(); + compareToCommitedOffsets(newOffsets); + + endlessConsumer.start(); + await("Consumer failed") + .atMost(Duration.ofSeconds(30)) + .until(() -> !endlessConsumer.running()); + + checkSeenOffsetsForProgress(); + compareToCommitedOffsets(newOffsets); + assertThat(receivedRecords.size()) + .describedAs("Received not all sent events") + .isLessThan(100); + + assertThatNoException() + .describedAs("Consumer should not be running") + .isThrownBy(() -> endlessConsumer.exitStatus()); + assertThat(endlessConsumer.exitStatus()) + .describedAs("Consumer should have exited abnormally") + .containsInstanceOf(RecordDeserializationException.class); + } + + + /** Helper methods for the verification of expectations */ + + void compareToCommitedOffsets(Map offsetsToCheck) + { + doForCurrentOffsets((tp, offset) -> + { + Long expected = offsetsToCheck.get(tp) + 1; + log.debug("Checking, if the offset for {} is {}", tp, expected); + assertThat(offset) + .describedAs("Committed offset corresponds to the offset of the consumer") + .isEqualTo(expected); + }); + } + + void checkSeenOffsetsForProgress() + { + // Be sure, that some messages were consumed...! + Set withProgress = new HashSet<>(); + partitions().forEach(tp -> + { + Long oldOffset = oldOffsets.get(tp); + Long newOffset = newOffsets.get(tp); + if (!oldOffset.equals(newOffset)) + { + log.debug("Progress for {}: {} -> {}", tp, oldOffset, newOffset); + withProgress.add(tp); + } + }); + assertThat(withProgress) + .describedAs("Some offsets must have changed, compared to the old offset-positions") + .isNotEmpty(); + } + + + /** Helper methods for setting up and running the tests */ + + void doForCurrentOffsets(BiConsumer consumer) + { + offsetConsumer.assign(partitions()); + partitions().forEach(tp -> consumer.accept(tp, offsetConsumer.position(tp))); + offsetConsumer.unsubscribe(); + } + + List partitions() + { + return + IntStream + .range(0, PARTITIONS) + .mapToObj(partition -> new TopicPartition(TOPIC, partition)) + .collect(Collectors.toList()); + } + + + void send100Messages(Function messageGenerator) + { + long i = 0; + + for (int partition = 0; partition < 10; partition++) + { + for (int key = 0; key < 10; key++) + { + Bytes value = messageGenerator.apply(++i); + + ProducerRecord record = + new ProducerRecord<>( + TOPIC, + partition, + Integer.toString(key%2), + value); + + kafkaProducer.send(record, (metadata, e) -> + { + if (metadata != null) + { + log.debug( + "{}|{} - {}={}", + metadata.partition(), + metadata.offset(), + record.key(), + record.value()); + } + else + { + log.warn( + "Exception for {}={}: {}", + record.key(), + record.value(), + e.toString()); + } + }); + } + } + } + + + @BeforeEach + public void init() + { + testHandler = record -> {} ; + + oldOffsets = new HashMap<>(); + newOffsets = new HashMap<>(); + receivedRecords = new HashSet<>(); + + doForCurrentOffsets((tp, offset) -> + { + oldOffsets.put(tp, offset - 1); + newOffsets.put(tp, offset - 1); + }); + + Consumer> captureOffsetAndExecuteTestHandler = + record -> + { + newOffsets.put( + new TopicPartition(record.topic(), record.partition()), + record.offset()); + receivedRecords.add(record); + testHandler.accept(record); + }; + + endlessConsumer = + new EndlessConsumer<>( + executor, + repository, + properties.getClientId(), + properties.getTopic(), + kafkaConsumer, + captureOffsetAndExecuteTestHandler); + + endlessConsumer.start(); + } + + @AfterEach + public void deinit() + { + try + { + endlessConsumer.stop(); + } + catch (Exception e) + { + log.info("Exception while stopping the consumer: {}", e.toString()); + } + } + + + @TestConfiguration + @Import(ApplicationConfiguration.class) + public static class Configuration + { + @Bean + Serializer serializer() + { + return new LongSerializer(); + } + + @Bean + KafkaProducer kafkaProducer(ApplicationProperties properties) + { + Properties props = new Properties(); + props.put("bootstrap.servers", properties.getBootstrapServer()); + props.put("linger.ms", 100); + props.put("key.serializer", StringSerializer.class.getName()); + props.put("value.serializer", BytesSerializer.class.getName()); + + return new KafkaProducer<>(props); + } + + @Bean + KafkaConsumer offsetConsumer(ApplicationProperties properties) + { + Properties props = new Properties(); + props.put("bootstrap.servers", properties.getBootstrapServer()); + props.put("client.id", "OFFSET-CONSUMER"); + props.put("group.id", properties.getGroupId()); + props.put("key.deserializer", BytesDeserializer.class.getName()); + props.put("value.deserializer", BytesDeserializer.class.getName()); + + return new KafkaConsumer<>(props); + } + } +}