fix: Errors during shard-publishing should not kill the instance
[demos/kafka/chat] / src / main / java / de / juplo / kafka / chat / backend / implementation / kafka / DataChannel.java
index 381152b..b4cc33f 100644 (file)
@@ -7,10 +7,7 @@ import de.juplo.kafka.chat.backend.implementation.kafka.messages.AbstractMessage
 import de.juplo.kafka.chat.backend.implementation.kafka.messages.data.EventChatMessageReceivedTo;
 import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
-import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.*;
 import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.TopicPartition;
@@ -19,18 +16,19 @@ import reactor.core.publisher.Mono;
 
 import java.time.*;
 import java.util.*;
-import java.util.function.Function;
 import java.util.stream.IntStream;
 
 
 @Slf4j
 public class DataChannel implements Runnable, ConsumerRebalanceListener
 {
+  private final String instanceId;
   private final String topic;
   private final Producer<String, AbstractMessageTo> producer;
   private final Consumer<String, AbstractMessageTo> consumer;
   private final ZoneId zoneId;
   private final int numShards;
+  private final Duration pollingInterval;
   private final int bufferSize;
   private final Clock clock;
   private final boolean[] isShardOwned;
@@ -38,6 +36,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
   private final long[] nextOffset;
   private final Map<UUID, ChatRoomData>[] chatRoomData;
   private final InfoChannel infoChannel;
+  private final ShardingPublisherStrategy shardingPublisherStrategy;
 
   private boolean running;
   @Getter
@@ -45,24 +44,30 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
 
 
   public DataChannel(
+    String instanceId,
     String topic,
     Producer<String, AbstractMessageTo> producer,
     Consumer<String, AbstractMessageTo> dataChannelConsumer,
     ZoneId zoneId,
     int numShards,
+    Duration pollingInterval,
     int bufferSize,
     Clock clock,
-    InfoChannel infoChannel)
+    InfoChannel infoChannel,
+    ShardingPublisherStrategy shardingPublisherStrategy)
   {
     log.debug(
-        "Creating DataChannel for topic {} with {} partitions",
+        "{}: Creating DataChannel for topic {} with {} partitions",
+        instanceId,
         topic,
         numShards);
+    this.instanceId = instanceId;
     this.topic = topic;
     this.consumer = dataChannelConsumer;
     this.producer = producer;
     this.zoneId = zoneId;
     this.numShards = numShards;
+    this.pollingInterval = pollingInterval;
     this.bufferSize = bufferSize;
     this.clock = clock;
     this.isShardOwned = new boolean[numShards];
@@ -73,6 +78,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
         .range(0, numShards)
         .forEach(shard -> this.chatRoomData[shard] = new HashMap<>());
     this.infoChannel = infoChannel;
+    this.shardingPublisherStrategy = shardingPublisherStrategy;
   }
 
 
@@ -138,6 +144,20 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
           currentOffset);
 
       consumer.seek(topicPartition, nextOffset[partition]);
+      infoChannel.sendShardAssignedEvent(partition);
+      shardingPublisherStrategy
+          .publishOwnership(partition)
+          .doOnSuccess(instanceId -> log.info(
+              "Successfully published instance {} as owner of shard {}",
+              instanceId,
+              partition))
+          .doOnError(throwable -> log.error(
+              "Could not publish instance {} as owner of shard {}: {}",
+              instanceId,
+              partition,
+              throwable.toString()))
+          .onErrorComplete()
+          .block();
     });
 
     consumer.resume(partitions);
@@ -150,7 +170,9 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     {
       int partition = topicPartition.partition();
       isShardOwned[partition] = false;
+      nextOffset[partition] = consumer.position(topicPartition);
       log.info("Partition revoked: {} - next={}", partition, nextOffset[partition]);
+      infoChannel.sendShardRevokedEvent(partition);
     });
   }
 
@@ -171,7 +193,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     {
       try
       {
-        ConsumerRecords<String, AbstractMessageTo> records = consumer.poll(Duration.ofMinutes(1));
+        ConsumerRecords<String, AbstractMessageTo> records = consumer.poll(pollingInterval);
         log.info("Fetched {} messages", records.count());
 
         if (loadInProgress)
@@ -251,6 +273,11 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     KafkaChatMessageService kafkaChatRoomService =
         (KafkaChatMessageService) chatRoomData.getChatRoomService();
 
+    log.debug(
+        "Loaded message from partition={} at offset={}: {}",
+        partition,
+        offset,
+        message);
     kafkaChatRoomService.persistMessage(message);
   }
 
@@ -259,7 +286,12 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     return IntStream
         .range(0, numShards)
         .filter(shard -> isShardOwned[shard])
-        .allMatch(shard -> nextOffset[shard] >= currentOffset[shard]);
+        .allMatch(shard ->
+        {
+          TopicPartition partition = new TopicPartition(topic, shard);
+          long position = consumer.position(partition);
+          return position >= currentOffset[shard];
+        });
   }
 
   private void pauseAllOwnedPartions()
@@ -289,7 +321,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
 
     if (!isShardOwned[shard])
     {
-      return Mono.error(new ShardNotOwnedException(shard));
+      return Mono.error(new ShardNotOwnedException(instanceId, shard));
     }
 
     return infoChannel
@@ -304,4 +336,9 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     KafkaChatMessageService service = new KafkaChatMessageService(this, chatRoomId);
     return new ChatRoomData(clock, service, bufferSize);
   }
+
+  ConsumerGroupMetadata getConsumerGroupMetadata()
+  {
+    return consumer.groupMetadata();
+  }
 }