From: Kai Moritz Date: Thu, 11 Aug 2022 18:52:35 +0000 (+0200) Subject: refactor: Implementierung an Branch `stored-offsets` angepasst X-Git-Tag: endless-stream-consumer-DEPRECATED^2^2^2~1^2~4^2~3 X-Git-Url: http://juplo.de/gitweb/?a=commitdiff_plain;h=2d84eda74475aaffff11ddfebe56d309b9aff2e9;p=demos%2Fkafka%2Ftraining refactor: Implementierung an Branch `stored-offsets` angepasst --- diff --git a/src/main/java/de/juplo/kafka/ApplicationConfiguration.java b/src/main/java/de/juplo/kafka/ApplicationConfiguration.java index 2cf263e..b077a90 100644 --- a/src/main/java/de/juplo/kafka/ApplicationConfiguration.java +++ b/src/main/java/de/juplo/kafka/ApplicationConfiguration.java @@ -1,6 +1,6 @@ package de.juplo.kafka; -import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.serialization.StringDeserializer; import org.springframework.boot.context.properties.EnableConfigurationProperties; @@ -11,7 +11,6 @@ import java.time.Clock; import java.util.Properties; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.function.Consumer; @Configuration @@ -19,21 +18,34 @@ import java.util.function.Consumer; public class ApplicationConfiguration { @Bean - public EndlessConsumer endlessConsumer( + public WordcountRecordHandler wordcountRecordHandler( + PartitionStatisticsRepository repository, + Consumer consumer, + ApplicationProperties properties) + { + return new WordcountRecordHandler( + repository, + properties.getClientId(), + properties.getTopic(), + Clock.systemDefaultZone(), + properties.getCommitInterval(), + consumer); + } + + @Bean + public EndlessConsumer endlessConsumer( KafkaConsumer kafkaConsumer, ExecutorService executor, - PartitionStatisticsRepository repository, + WordcountRecordHandler wordcountRecordHandler, ApplicationProperties properties) { return - new EndlessConsumer( + new EndlessConsumer<>( executor, - repository, properties.getClientId(), properties.getTopic(), - Clock.systemDefaultZone(), - properties.getCommitInterval(), - kafkaConsumer); + kafkaConsumer, + wordcountRecordHandler); } @Bean diff --git a/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java b/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java index ab9782c..df4e653 100644 --- a/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java +++ b/src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java @@ -10,7 +10,7 @@ import org.springframework.stereotype.Component; @RequiredArgsConstructor public class ApplicationHealthIndicator implements HealthIndicator { - private final EndlessConsumer consumer; + private final EndlessConsumer consumer; @Override diff --git a/src/main/java/de/juplo/kafka/DriverController.java b/src/main/java/de/juplo/kafka/DriverController.java index e64d6b8..5d6c1a8 100644 --- a/src/main/java/de/juplo/kafka/DriverController.java +++ b/src/main/java/de/juplo/kafka/DriverController.java @@ -14,6 +14,7 @@ import java.util.concurrent.ExecutionException; public class DriverController { private final EndlessConsumer consumer; + private final WordcountRecordHandler wordcount; @PostMapping("start") @@ -32,13 +33,13 @@ public class DriverController @GetMapping("seen") public Map>> seen() { - return consumer.getSeen(); + return wordcount.getSeen(); } @GetMapping("seen/{user}") public ResponseEntity> seen(@PathVariable String user) { - for (Map> users : consumer.getSeen().values()) + for (Map> users : wordcount.getSeen().values()) { Map words = users.get(user); if (words != null) diff --git a/src/main/java/de/juplo/kafka/EndlessConsumer.java b/src/main/java/de/juplo/kafka/EndlessConsumer.java index 01f9057..0c107f3 100644 --- a/src/main/java/de/juplo/kafka/EndlessConsumer.java +++ b/src/main/java/de/juplo/kafka/EndlessConsumer.java @@ -8,32 +8,24 @@ import org.apache.kafka.common.errors.RecordDeserializationException; import org.apache.kafka.common.errors.WakeupException; import javax.annotation.PreDestroy; -import java.time.Clock; import java.time.Duration; -import java.time.Instant; import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import java.util.regex.Pattern; @Slf4j @RequiredArgsConstructor -public class EndlessConsumer implements ConsumerRebalanceListener, Runnable +public class EndlessConsumer implements ConsumerRebalanceListener, Runnable { - final static Pattern PATTERN = Pattern.compile("\\W+"); - - private final ExecutorService executor; - private final PartitionStatisticsRepository repository; private final String id; private final String topic; - private final Clock clock; - private final Duration commitInterval; - private final Consumer consumer; + private final Consumer consumer; + private final RecordHandler handler; private final Lock lock = new ReentrantLock(); private final Condition condition = lock.newCondition(); @@ -41,46 +33,17 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable private Exception exception; private long consumed = 0; - private final Map>> seen = new HashMap<>(); - @Override public void onPartitionsRevoked(Collection partitions) { - partitions.forEach(tp -> - { - Integer partition = tp.partition(); - Long newOffset = consumer.position(tp); - log.info( - "{} - removing partition: {}, offset of next message {})", - id, - partition, - newOffset); - Map> removed = seen.remove(partition); - repository.save(new StatisticsDocument(partition, removed, consumer.position(tp))); - }); + partitions.forEach(tp -> handler.onPartitionRevoked(tp)); } @Override public void onPartitionsAssigned(Collection partitions) { - partitions.forEach(tp -> - { - Integer partition = tp.partition(); - Long offset = consumer.position(tp); - log.info("{} - adding partition: {}, offset={}", id, partition, offset); - StatisticsDocument document = - repository - .findById(Integer.toString(partition)) - .orElse(new StatisticsDocument(partition)); - if (document.offset >= 0) - { - // Only seek, if a stored offset was found - // Otherwise: Use initial offset, generated by Kafka - consumer.seek(tp, document.offset); - } - seen.put(partition, document.statistics); - }); + partitions.forEach(tp -> handler.onPartitionAssigned(tp)); } @@ -92,16 +55,14 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable log.info("{} - Subscribing to topic {}", id, topic); consumer.subscribe(Arrays.asList(topic), this); - Instant lastCommit = clock.instant(); - while (true) { - ConsumerRecords records = + ConsumerRecords records = consumer.poll(Duration.ofSeconds(1)); // Do something with the data... log.info("{} - Received {} messages", id, records.count()); - for (ConsumerRecord record : records) + for (ConsumerRecord record : records) { log.info( "{} - {}: {}/{} - {}={}", @@ -113,44 +74,12 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable record.value() ); - consumed++; - - Integer partition = record.partition(); - String user = record.key(); - Map> users = seen.get(partition); - - Map words = users.get(user); - if (words == null) - { - words = new HashMap<>(); - users.put(user, words); - } + handler.accept(record); - for (String word : PATTERN.split(record.value())) - { - Long num = words.get(word); - if (num == null) - { - num = 1l; - } - else - { - num++; - } - words.put(word, num); - } + consumed++; } - if (lastCommit.plus(commitInterval).isBefore(clock.instant())) - { - log.debug("Storing data and offsets, last commit: {}", lastCommit); - seen.forEach((partiton, statistics) -> repository.save( - new StatisticsDocument( - partiton, - statistics, - consumer.position(new TopicPartition(topic, partiton))))); - lastCommit = clock.instant(); - } + handler.beforeNextPoll(); } } catch(WakeupException e) @@ -218,11 +147,6 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable } } - public Map>> getSeen() - { - return seen; - } - public void start() { lock.lock(); diff --git a/src/main/java/de/juplo/kafka/RecordHandler.java b/src/main/java/de/juplo/kafka/RecordHandler.java new file mode 100644 index 0000000..ff2f193 --- /dev/null +++ b/src/main/java/de/juplo/kafka/RecordHandler.java @@ -0,0 +1,16 @@ +package de.juplo.kafka; + +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.common.TopicPartition; + +import java.util.function.Consumer; + + +public interface RecordHandler extends Consumer> +{ + default void beforeNextPoll() {} + + default void onPartitionAssigned(TopicPartition tp) {} + + default void onPartitionRevoked(TopicPartition tp) {} +} diff --git a/src/main/java/de/juplo/kafka/WordcountRecordHandler.java b/src/main/java/de/juplo/kafka/WordcountRecordHandler.java new file mode 100644 index 0000000..5981c7d --- /dev/null +++ b/src/main/java/de/juplo/kafka/WordcountRecordHandler.java @@ -0,0 +1,119 @@ +package de.juplo.kafka; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.common.TopicPartition; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + + +@RequiredArgsConstructor +@Slf4j +public class WordcountRecordHandler implements RecordHandler +{ + final static Pattern PATTERN = Pattern.compile("\\W+"); + + + private final PartitionStatisticsRepository repository; + private final String id; + private final String topic; + private final Clock clock; + private final Duration commitInterval; + private final Consumer consumer; + + private final Map>> seen = new HashMap<>(); + + private Instant lastCommit = Instant.EPOCH; + + + @Override + public void accept(ConsumerRecord record) + { + Integer partition = record.partition(); + String user = record.key(); + Map> users = seen.get(partition); + + Map words = users.get(user); + if (words == null) + { + words = new HashMap<>(); + users.put(user, words); + } + + for (String word : PATTERN.split(record.value())) + { + Long num = words.get(word); + if (num == null) + { + num = 1l; + } + else + { + num++; + } + words.put(word, num); + } + } + + + @Override + public void beforeNextPoll() + { + if (lastCommit.plus(commitInterval).isBefore(clock.instant())) + { + log.debug("Storing data and offsets, last commit: {}", lastCommit); + seen.forEach((partiton, statistics) -> repository.save( + new StatisticsDocument( + partiton, + statistics, + consumer.position(new TopicPartition(topic, partiton))))); + lastCommit = clock.instant(); + } + } + + @Override + public void onPartitionAssigned(TopicPartition tp) + { + Integer partition = tp.partition(); + Long offset = consumer.position(tp); + log.info("{} - adding partition: {}, offset={}", id, partition, offset); + StatisticsDocument document = + repository + .findById(Integer.toString(partition)) + .orElse(new StatisticsDocument(partition)); + if (document.offset >= 0) + { + // Only seek, if a stored offset was found + // Otherwise: Use initial offset, generated by Kafka + consumer.seek(tp, document.offset); + } + seen.put(partition, document.statistics); + } + + @Override + public void onPartitionRevoked(TopicPartition tp) + { + Integer partition = tp.partition(); + Long newOffset = consumer.position(tp); + log.info( + "{} - removing partition: {}, offset of next message {})", + id, + partition, + newOffset); + Map> removed = seen.remove(partition); + repository.save(new StatisticsDocument(partition, removed, consumer.position(tp))); + } + + + public Map>> getSeen() + { + return seen; + } +} diff --git a/src/test/java/de/juplo/kafka/ApplicationTests.java b/src/test/java/de/juplo/kafka/ApplicationTests.java index aa6dd4d..408a826 100644 --- a/src/test/java/de/juplo/kafka/ApplicationTests.java +++ b/src/test/java/de/juplo/kafka/ApplicationTests.java @@ -26,7 +26,6 @@ import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.function.BiConsumer; -import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -71,9 +70,10 @@ class ApplicationTests ExecutorService executor; @Autowired PartitionStatisticsRepository repository; + @Autowired + WordcountRecordHandler wordcountRecordHandler; - Consumer> testHandler; - EndlessConsumer endlessConsumer; + EndlessConsumer endlessConsumer; Map oldOffsets; Map newOffsets; Set> receivedRecords; @@ -205,8 +205,6 @@ class ApplicationTests @BeforeEach public void init() { - testHandler = record -> {} ; - oldOffsets = new HashMap<>(); newOffsets = new HashMap<>(); receivedRecords = new HashSet<>(); @@ -217,25 +215,25 @@ class ApplicationTests newOffsets.put(tp, offset - 1); }); - Consumer> captureOffsetAndExecuteTestHandler = - record -> - { - newOffsets.put( - new TopicPartition(record.topic(), record.partition()), - record.offset()); - receivedRecords.add(record); - testHandler.accept(record); + TestRecordHandler captureOffsetAndExecuteTestHandler = + new TestRecordHandler(wordcountRecordHandler) { + @Override + public void onNewRecord(ConsumerRecord record) + { + newOffsets.put( + new TopicPartition(record.topic(), record.partition()), + record.offset()); + receivedRecords.add(record); + } }; endlessConsumer = - new EndlessConsumer( + new EndlessConsumer<>( executor, - repository, properties.getClientId(), properties.getTopic(), - Clock.systemDefaultZone(), - properties.getCommitInterval(), - kafkaConsumer); + kafkaConsumer, + captureOffsetAndExecuteTestHandler); endlessConsumer.start(); } diff --git a/src/test/java/de/juplo/kafka/TestRecordHandler.java b/src/test/java/de/juplo/kafka/TestRecordHandler.java new file mode 100644 index 0000000..4047093 --- /dev/null +++ b/src/test/java/de/juplo/kafka/TestRecordHandler.java @@ -0,0 +1,41 @@ +package de.juplo.kafka; + +import lombok.RequiredArgsConstructor; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.common.TopicPartition; + + +@RequiredArgsConstructor +public abstract class TestRecordHandler implements RecordHandler +{ + private final RecordHandler handler; + + + public abstract void onNewRecord(ConsumerRecord record); + + + @Override + public void accept(ConsumerRecord record) + { + this.onNewRecord(record); + handler.accept(record); + } + @Override + + public void beforeNextPoll() + { + handler.beforeNextPoll(); + } + + @Override + public void onPartitionAssigned(TopicPartition tp) + { + handler.onPartitionAssigned(tp); + } + + @Override + public void onPartitionRevoked(TopicPartition tp) + { + handler.onPartitionRevoked(tp); + } +}