refactor: Implementierung an Branch `stored-offsets` angepasst
authorKai Moritz <kai@juplo.de>
Thu, 11 Aug 2022 18:52:35 +0000 (20:52 +0200)
committerKai Moritz <kai@juplo.de>
Fri, 12 Aug 2022 14:45:21 +0000 (16:45 +0200)
src/main/java/de/juplo/kafka/ApplicationConfiguration.java
src/main/java/de/juplo/kafka/ApplicationHealthIndicator.java
src/main/java/de/juplo/kafka/DriverController.java
src/main/java/de/juplo/kafka/EndlessConsumer.java
src/main/java/de/juplo/kafka/RecordHandler.java [new file with mode: 0644]
src/main/java/de/juplo/kafka/WordcountRecordHandler.java [new file with mode: 0644]
src/test/java/de/juplo/kafka/ApplicationTests.java
src/test/java/de/juplo/kafka/TestRecordHandler.java [new file with mode: 0644]

index 2cf263e..b077a90 100644 (file)
@@ -1,6 +1,6 @@
 package de.juplo.kafka;
 
-import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.KafkaConsumer;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.springframework.boot.context.properties.EnableConfigurationProperties;
@@ -11,7 +11,6 @@ import java.time.Clock;
 import java.util.Properties;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.function.Consumer;
 
 
 @Configuration
@@ -19,21 +18,34 @@ import java.util.function.Consumer;
 public class ApplicationConfiguration
 {
   @Bean
-  public EndlessConsumer endlessConsumer(
+  public WordcountRecordHandler wordcountRecordHandler(
+      PartitionStatisticsRepository repository,
+      Consumer<String, String> consumer,
+      ApplicationProperties properties)
+  {
+    return new WordcountRecordHandler(
+        repository,
+        properties.getClientId(),
+        properties.getTopic(),
+        Clock.systemDefaultZone(),
+        properties.getCommitInterval(),
+        consumer);
+  }
+
+  @Bean
+  public EndlessConsumer<String, String> endlessConsumer(
       KafkaConsumer<String, String> kafkaConsumer,
       ExecutorService executor,
-      PartitionStatisticsRepository repository,
+      WordcountRecordHandler wordcountRecordHandler,
       ApplicationProperties properties)
   {
     return
-        new EndlessConsumer(
+        new EndlessConsumer<>(
             executor,
-            repository,
             properties.getClientId(),
             properties.getTopic(),
-            Clock.systemDefaultZone(),
-            properties.getCommitInterval(),
-            kafkaConsumer);
+            kafkaConsumer,
+            wordcountRecordHandler);
   }
 
   @Bean
index ab9782c..df4e653 100644 (file)
@@ -10,7 +10,7 @@ import org.springframework.stereotype.Component;
 @RequiredArgsConstructor
 public class ApplicationHealthIndicator implements HealthIndicator
 {
-  private final EndlessConsumer consumer;
+  private final EndlessConsumer<String, String> consumer;
 
 
   @Override
index e64d6b8..5d6c1a8 100644 (file)
@@ -14,6 +14,7 @@ import java.util.concurrent.ExecutionException;
 public class DriverController
 {
   private final EndlessConsumer consumer;
+  private final WordcountRecordHandler wordcount;
 
 
   @PostMapping("start")
@@ -32,13 +33,13 @@ public class DriverController
   @GetMapping("seen")
   public Map<Integer, Map<String, Map<String, Long>>> seen()
   {
-    return consumer.getSeen();
+    return wordcount.getSeen();
   }
 
   @GetMapping("seen/{user}")
   public ResponseEntity<Map<String, Long>> seen(@PathVariable String user)
   {
-    for (Map<String, Map<String, Long>> users : consumer.getSeen().values())
+    for (Map<String, Map<String, Long>> users : wordcount.getSeen().values())
     {
       Map<String, Long> words = users.get(user);
       if (words != null)
index 01f9057..0c107f3 100644 (file)
@@ -8,32 +8,24 @@ import org.apache.kafka.common.errors.RecordDeserializationException;
 import org.apache.kafka.common.errors.WakeupException;
 
 import javax.annotation.PreDestroy;
-import java.time.Clock;
 import java.time.Duration;
-import java.time.Instant;
 import java.util.*;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
-import java.util.regex.Pattern;
 
 
 @Slf4j
 @RequiredArgsConstructor
-public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
+public class EndlessConsumer<K, V> implements ConsumerRebalanceListener, Runnable
 {
-  final static Pattern PATTERN = Pattern.compile("\\W+");
-
-
   private final ExecutorService executor;
-  private final PartitionStatisticsRepository repository;
   private final String id;
   private final String topic;
-  private final Clock clock;
-  private final Duration commitInterval;
-  private final Consumer<String, String> consumer;
+  private final Consumer<K, V> consumer;
+  private final RecordHandler<K, V> handler;
 
   private final Lock lock = new ReentrantLock();
   private final Condition condition = lock.newCondition();
@@ -41,46 +33,17 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
   private Exception exception;
   private long consumed = 0;
 
-  private final Map<Integer, Map<String, Map<String, Long>>> seen = new HashMap<>();
-
 
   @Override
   public void onPartitionsRevoked(Collection<TopicPartition> partitions)
   {
-    partitions.forEach(tp ->
-    {
-      Integer partition = tp.partition();
-      Long newOffset = consumer.position(tp);
-      log.info(
-          "{} - removing partition: {}, offset of next message {})",
-          id,
-          partition,
-          newOffset);
-      Map<String, Map<String, Long>> removed = seen.remove(partition);
-      repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
-    });
+    partitions.forEach(tp -> handler.onPartitionRevoked(tp));
   }
 
   @Override
   public void onPartitionsAssigned(Collection<TopicPartition> partitions)
   {
-    partitions.forEach(tp ->
-    {
-      Integer partition = tp.partition();
-      Long offset = consumer.position(tp);
-      log.info("{} - adding partition: {}, offset={}", id, partition, offset);
-      StatisticsDocument document =
-          repository
-              .findById(Integer.toString(partition))
-              .orElse(new StatisticsDocument(partition));
-      if (document.offset >= 0)
-      {
-        // Only seek, if a stored offset was found
-        // Otherwise: Use initial offset, generated by Kafka
-        consumer.seek(tp, document.offset);
-      }
-      seen.put(partition, document.statistics);
-    });
+    partitions.forEach(tp -> handler.onPartitionAssigned(tp));
   }
 
 
@@ -92,16 +55,14 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
       log.info("{} - Subscribing to topic {}", id, topic);
       consumer.subscribe(Arrays.asList(topic), this);
 
-      Instant lastCommit = clock.instant();
-
       while (true)
       {
-        ConsumerRecords<String, String> records =
+        ConsumerRecords<K, V> records =
             consumer.poll(Duration.ofSeconds(1));
 
         // Do something with the data...
         log.info("{} - Received {} messages", id, records.count());
-        for (ConsumerRecord<String, String> record : records)
+        for (ConsumerRecord<K, V> record : records)
         {
           log.info(
               "{} - {}: {}/{} - {}={}",
@@ -113,44 +74,12 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
               record.value()
           );
 
-          consumed++;
-
-          Integer partition = record.partition();
-          String user = record.key();
-          Map<String, Map<String, Long>> users = seen.get(partition);
-
-          Map<String, Long> words = users.get(user);
-          if (words == null)
-          {
-            words = new HashMap<>();
-            users.put(user, words);
-          }
+          handler.accept(record);
 
-          for (String word : PATTERN.split(record.value()))
-          {
-            Long num = words.get(word);
-            if (num == null)
-            {
-              num = 1l;
-            }
-            else
-            {
-              num++;
-            }
-            words.put(word, num);
-          }
+          consumed++;
         }
 
-        if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
-        {
-          log.debug("Storing data and offsets, last commit: {}", lastCommit);
-          seen.forEach((partiton, statistics) -> repository.save(
-              new StatisticsDocument(
-                  partiton,
-                  statistics,
-                  consumer.position(new TopicPartition(topic, partiton)))));
-          lastCommit = clock.instant();
-        }
+        handler.beforeNextPoll();
       }
     }
     catch(WakeupException e)
@@ -218,11 +147,6 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
     }
   }
 
-  public Map<Integer, Map<String, Map<String, Long>>> getSeen()
-  {
-    return seen;
-  }
-
   public void start()
   {
     lock.lock();
diff --git a/src/main/java/de/juplo/kafka/RecordHandler.java b/src/main/java/de/juplo/kafka/RecordHandler.java
new file mode 100644 (file)
index 0000000..ff2f193
--- /dev/null
@@ -0,0 +1,16 @@
+package de.juplo.kafka;
+
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.TopicPartition;
+
+import java.util.function.Consumer;
+
+
+public interface RecordHandler<K, V> extends Consumer<ConsumerRecord<K,V>>
+{
+  default void beforeNextPoll() {}
+
+  default void onPartitionAssigned(TopicPartition tp) {}
+
+  default void onPartitionRevoked(TopicPartition tp) {}
+}
diff --git a/src/main/java/de/juplo/kafka/WordcountRecordHandler.java b/src/main/java/de/juplo/kafka/WordcountRecordHandler.java
new file mode 100644 (file)
index 0000000..5981c7d
--- /dev/null
@@ -0,0 +1,119 @@
+package de.juplo.kafka;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.TopicPartition;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+
+@RequiredArgsConstructor
+@Slf4j
+public class WordcountRecordHandler implements RecordHandler<String, String>
+{
+  final static Pattern PATTERN = Pattern.compile("\\W+");
+
+
+  private final PartitionStatisticsRepository repository;
+  private final String id;
+  private final String topic;
+  private final Clock clock;
+  private final Duration commitInterval;
+  private final Consumer<String, String> consumer;
+
+  private final Map<Integer, Map<String, Map<String, Long>>> seen = new HashMap<>();
+
+  private Instant lastCommit = Instant.EPOCH;
+
+
+  @Override
+  public void accept(ConsumerRecord<String, String> record)
+  {
+    Integer partition = record.partition();
+    String user = record.key();
+    Map<String, Map<String, Long>> users = seen.get(partition);
+
+    Map<String, Long> words = users.get(user);
+    if (words == null)
+    {
+      words = new HashMap<>();
+      users.put(user, words);
+    }
+
+    for (String word : PATTERN.split(record.value()))
+    {
+      Long num = words.get(word);
+      if (num == null)
+      {
+        num = 1l;
+      }
+      else
+      {
+        num++;
+      }
+      words.put(word, num);
+    }
+  }
+
+
+  @Override
+  public void beforeNextPoll()
+  {
+    if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
+    {
+      log.debug("Storing data and offsets, last commit: {}", lastCommit);
+      seen.forEach((partiton, statistics) -> repository.save(
+          new StatisticsDocument(
+              partiton,
+              statistics,
+              consumer.position(new TopicPartition(topic, partiton)))));
+      lastCommit = clock.instant();
+    }
+  }
+
+  @Override
+  public void onPartitionAssigned(TopicPartition tp)
+  {
+    Integer partition = tp.partition();
+    Long offset = consumer.position(tp);
+    log.info("{} - adding partition: {}, offset={}", id, partition, offset);
+    StatisticsDocument document =
+        repository
+            .findById(Integer.toString(partition))
+            .orElse(new StatisticsDocument(partition));
+    if (document.offset >= 0)
+    {
+      // Only seek, if a stored offset was found
+      // Otherwise: Use initial offset, generated by Kafka
+      consumer.seek(tp, document.offset);
+    }
+    seen.put(partition, document.statistics);
+  }
+
+  @Override
+  public void onPartitionRevoked(TopicPartition tp)
+  {
+    Integer partition = tp.partition();
+    Long newOffset = consumer.position(tp);
+    log.info(
+        "{} - removing partition: {}, offset of next message {})",
+        id,
+        partition,
+        newOffset);
+    Map<String, Map<String, Long>> removed = seen.remove(partition);
+    repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
+  }
+
+
+  public Map<Integer, Map<String, Map<String, Long>>> getSeen()
+  {
+    return seen;
+  }
+}
index aa6dd4d..408a826 100644 (file)
@@ -26,7 +26,6 @@ import java.util.*;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.function.BiConsumer;
-import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
@@ -71,9 +70,10 @@ class ApplicationTests
        ExecutorService executor;
        @Autowired
        PartitionStatisticsRepository repository;
+  @Autowired
+       WordcountRecordHandler wordcountRecordHandler;
 
-       Consumer<ConsumerRecord<String, String>> testHandler;
-       EndlessConsumer endlessConsumer;
+       EndlessConsumer<String, String> endlessConsumer;
        Map<TopicPartition, Long> oldOffsets;
        Map<TopicPartition, Long> newOffsets;
        Set<ConsumerRecord<String, String>> receivedRecords;
@@ -205,8 +205,6 @@ class ApplicationTests
        @BeforeEach
        public void init()
        {
-               testHandler = record -> {} ;
-
                oldOffsets = new HashMap<>();
                newOffsets = new HashMap<>();
                receivedRecords = new HashSet<>();
@@ -217,25 +215,25 @@ class ApplicationTests
                        newOffsets.put(tp, offset - 1);
                });
 
-               Consumer<ConsumerRecord<String, String>> captureOffsetAndExecuteTestHandler =
-                               record ->
-                               {
-                                       newOffsets.put(
-                                                       new TopicPartition(record.topic(), record.partition()),
-                                                       record.offset());
-                                       receivedRecords.add(record);
-                                       testHandler.accept(record);
+               TestRecordHandler<String, String> captureOffsetAndExecuteTestHandler =
+                               new TestRecordHandler<String, String>(wordcountRecordHandler) {
+                                       @Override
+                                       public void onNewRecord(ConsumerRecord<String, String> record)
+                                       {
+                                               newOffsets.put(
+                                                               new TopicPartition(record.topic(), record.partition()),
+                                                               record.offset());
+                                               receivedRecords.add(record);
+                                       }
                                };
 
                endlessConsumer =
-                               new EndlessConsumer(
+                               new EndlessConsumer<>(
                                                executor,
-                                               repository,
                                                properties.getClientId(),
                                                properties.getTopic(),
-                                               Clock.systemDefaultZone(),
-                                               properties.getCommitInterval(),
-                                               kafkaConsumer);
+                                               kafkaConsumer,
+                                               captureOffsetAndExecuteTestHandler);
 
                endlessConsumer.start();
        }
diff --git a/src/test/java/de/juplo/kafka/TestRecordHandler.java b/src/test/java/de/juplo/kafka/TestRecordHandler.java
new file mode 100644 (file)
index 0000000..4047093
--- /dev/null
@@ -0,0 +1,41 @@
+package de.juplo.kafka;
+
+import lombok.RequiredArgsConstructor;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.TopicPartition;
+
+
+@RequiredArgsConstructor
+public abstract class TestRecordHandler<K, V> implements RecordHandler<K, V>
+{
+  private final RecordHandler<K, V> handler;
+
+
+  public abstract void onNewRecord(ConsumerRecord<K, V> record);
+
+
+  @Override
+  public void accept(ConsumerRecord<K, V> record)
+  {
+    this.onNewRecord(record);
+    handler.accept(record);
+  }
+  @Override
+
+  public void beforeNextPoll()
+  {
+    handler.beforeNextPoll();
+  }
+
+  @Override
+  public void onPartitionAssigned(TopicPartition tp)
+  {
+    handler.onPartitionAssigned(tp);
+  }
+
+  @Override
+  public void onPartitionRevoked(TopicPartition tp)
+  {
+    handler.onPartitionRevoked(tp);
+  }
+}