Wordcount-Implementierung mit Kafka-Boardmitteln und MongoDB als Storage
[demos/kafka/training] / src / main / java / de / juplo / kafka / EndlessConsumer.java
index 0bf5925..01f9057 100644 (file)
@@ -4,25 +4,35 @@ import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.kafka.clients.consumer.*;
 import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.RecordDeserializationException;
 import org.apache.kafka.common.errors.WakeupException;
 
 import javax.annotation.PreDestroy;
+import java.time.Clock;
 import java.time.Duration;
+import java.time.Instant;
 import java.util.*;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.regex.Pattern;
 
 
 @Slf4j
 @RequiredArgsConstructor
-public class EndlessConsumer implements Runnable
+public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
 {
+  final static Pattern PATTERN = Pattern.compile("\\W+");
+
+
   private final ExecutorService executor;
+  private final PartitionStatisticsRepository repository;
   private final String id;
   private final String topic;
+  private final Clock clock;
+  private final Duration commitInterval;
   private final Consumer<String, String> consumer;
 
   private final Lock lock = new ReentrantLock();
@@ -31,8 +41,47 @@ public class EndlessConsumer implements Runnable
   private Exception exception;
   private long consumed = 0;
 
-  private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
-  private final Map<Integer, Long> offsets = new HashMap<>();
+  private final Map<Integer, Map<String, Map<String, Long>>> seen = new HashMap<>();
+
+
+  @Override
+  public void onPartitionsRevoked(Collection<TopicPartition> partitions)
+  {
+    partitions.forEach(tp ->
+    {
+      Integer partition = tp.partition();
+      Long newOffset = consumer.position(tp);
+      log.info(
+          "{} - removing partition: {}, offset of next message {})",
+          id,
+          partition,
+          newOffset);
+      Map<String, Map<String, Long>> removed = seen.remove(partition);
+      repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
+    });
+  }
+
+  @Override
+  public void onPartitionsAssigned(Collection<TopicPartition> partitions)
+  {
+    partitions.forEach(tp ->
+    {
+      Integer partition = tp.partition();
+      Long offset = consumer.position(tp);
+      log.info("{} - adding partition: {}, offset={}", id, partition, offset);
+      StatisticsDocument document =
+          repository
+              .findById(Integer.toString(partition))
+              .orElse(new StatisticsDocument(partition));
+      if (document.offset >= 0)
+      {
+        // Only seek, if a stored offset was found
+        // Otherwise: Use initial offset, generated by Kafka
+        consumer.seek(tp, document.offset);
+      }
+      seen.put(partition, document.statistics);
+    });
+  }
 
 
   @Override
@@ -41,49 +90,9 @@ public class EndlessConsumer implements Runnable
     try
     {
       log.info("{} - Subscribing to topic {}", id, topic);
-      consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener()
-      {
-        @Override
-        public void onPartitionsRevoked(Collection<TopicPartition> partitions)
-        {
-          partitions.forEach(tp ->
-          {
-            Integer partition = tp.partition();
-            Long newOffset = consumer.position(tp);
-            Long oldOffset = offsets.remove(partition);
-            log.info(
-                "{} - removing partition: {}, consumed {} records (offset {} -> {})",
-                id,
-                partition,
-                newOffset - oldOffset,
-                oldOffset,
-                newOffset);
-            Map<String, Long> removed = seen.remove(partition);
-            for (String key : removed.keySet())
-            {
-              log.info(
-                  "{} - Seen {} messages for partition={}|key={}",
-                  id,
-                  removed.get(key),
-                  partition,
-                  key);
-            }
-          });
-        }
+      consumer.subscribe(Arrays.asList(topic), this);
 
-        @Override
-        public void onPartitionsAssigned(Collection<TopicPartition> partitions)
-        {
-          partitions.forEach(tp ->
-          {
-            Integer partition = tp.partition();
-            Long offset = consumer.position(tp);
-            log.info("{} - adding partition: {}, offset={}", id, partition, offset);
-            offsets.put(partition, offset);
-            seen.put(partition, new HashMap<>());
-          });
-        }
-      });
+      Instant lastCommit = clock.instant();
 
       while (true)
       {
@@ -94,7 +103,6 @@ public class EndlessConsumer implements Runnable
         log.info("{} - Received {} messages", id, records.count());
         for (ConsumerRecord<String, String> record : records)
         {
-          consumed++;
           log.info(
               "{} - {}: {}/{} - {}={}",
               id,
@@ -105,25 +113,64 @@ public class EndlessConsumer implements Runnable
               record.value()
           );
 
+          consumed++;
+
           Integer partition = record.partition();
-          String key = record.key() == null ? "NULL" : record.key();
-          Map<String, Long> byKey = seen.get(partition);
+          String user = record.key();
+          Map<String, Map<String, Long>> users = seen.get(partition);
 
-          if (!byKey.containsKey(key))
-            byKey.put(key, 0l);
+          Map<String, Long> words = users.get(user);
+          if (words == null)
+          {
+            words = new HashMap<>();
+            users.put(user, words);
+          }
 
-          long seenByKey = byKey.get(key);
-          seenByKey++;
-          byKey.put(key, seenByKey);
+          for (String word : PATTERN.split(record.value()))
+          {
+            Long num = words.get(word);
+            if (num == null)
+            {
+              num = 1l;
+            }
+            else
+            {
+              num++;
+            }
+            words.put(word, num);
+          }
+        }
+
+        if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
+        {
+          log.debug("Storing data and offsets, last commit: {}", lastCommit);
+          seen.forEach((partiton, statistics) -> repository.save(
+              new StatisticsDocument(
+                  partiton,
+                  statistics,
+                  consumer.position(new TopicPartition(topic, partiton)))));
+          lastCommit = clock.instant();
         }
       }
     }
     catch(WakeupException e)
     {
       log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id);
-      consumer.commitSync();
       shutdown();
     }
+    catch(RecordDeserializationException e)
+    {
+      TopicPartition tp = e.topicPartition();
+      long offset = e.offset();
+      log.error(
+          "{} - Could not deserialize  message on topic {} with offset={}: {}",
+          id,
+          tp,
+          offset,
+          e.getCause().toString());
+
+      shutdown(e);
+    }
     catch(Exception e)
     {
       log.error("{} - Unexpected error: {}", id, e.toString(), e);
@@ -171,7 +218,7 @@ public class EndlessConsumer implements Runnable
     }
   }
 
-  public Map<Integer, Map<String, Long>> getSeen()
+  public Map<Integer, Map<String, Map<String, Long>>> getSeen()
   {
     return seen;
   }
@@ -195,7 +242,7 @@ public class EndlessConsumer implements Runnable
     }
   }
 
-  public synchronized void stop() throws ExecutionException, InterruptedException
+  public synchronized void stop() throws InterruptedException
   {
     lock.lock();
     try
@@ -218,22 +265,7 @@ public class EndlessConsumer implements Runnable
   public void destroy() throws ExecutionException, InterruptedException
   {
     log.info("{} - Destroy!", id);
-    try
-    {
-      stop();
-    }
-    catch (IllegalStateException e)
-    {
-      log.info("{} - Was already stopped", id);
-    }
-    catch (Exception e)
-    {
-      log.error("{} - Unexpected exception while trying to stop the consumer", id, e);
-    }
-    finally
-    {
-      log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
-    }
+    log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
   }
 
   public boolean running()