fix: Errors during shard-publishing should not kill the instance
[demos/kafka/chat] / src / main / java / de / juplo / kafka / chat / backend / implementation / kafka / DataChannel.java
index 2fa4998..b4cc33f 100644 (file)
@@ -7,10 +7,7 @@ import de.juplo.kafka.chat.backend.implementation.kafka.messages.AbstractMessage
 import de.juplo.kafka.chat.backend.implementation.kafka.messages.data.EventChatMessageReceivedTo;
 import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
-import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.*;
 import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.TopicPartition;
@@ -31,6 +28,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
   private final Consumer<String, AbstractMessageTo> consumer;
   private final ZoneId zoneId;
   private final int numShards;
+  private final Duration pollingInterval;
   private final int bufferSize;
   private final Clock clock;
   private final boolean[] isShardOwned;
@@ -52,6 +50,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     Consumer<String, AbstractMessageTo> dataChannelConsumer,
     ZoneId zoneId,
     int numShards,
+    Duration pollingInterval,
     int bufferSize,
     Clock clock,
     InfoChannel infoChannel,
@@ -68,6 +67,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     this.producer = producer;
     this.zoneId = zoneId;
     this.numShards = numShards;
+    this.pollingInterval = pollingInterval;
     this.bufferSize = bufferSize;
     this.clock = clock;
     this.isShardOwned = new boolean[numShards];
@@ -147,11 +147,17 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
       infoChannel.sendShardAssignedEvent(partition);
       shardingPublisherStrategy
           .publishOwnership(partition)
-          .doOnNext(instanceId -> log.info(
-              "Instance {} was published as owner of shard {}",
+          .doOnSuccess(instanceId -> log.info(
+              "Successfully published instance {} as owner of shard {}",
               instanceId,
               partition))
-          .subscribe();
+          .doOnError(throwable -> log.error(
+              "Could not publish instance {} as owner of shard {}: {}",
+              instanceId,
+              partition,
+              throwable.toString()))
+          .onErrorComplete()
+          .block();
     });
 
     consumer.resume(partitions);
@@ -164,6 +170,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     {
       int partition = topicPartition.partition();
       isShardOwned[partition] = false;
+      nextOffset[partition] = consumer.position(topicPartition);
       log.info("Partition revoked: {} - next={}", partition, nextOffset[partition]);
       infoChannel.sendShardRevokedEvent(partition);
     });
@@ -186,7 +193,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     {
       try
       {
-        ConsumerRecords<String, AbstractMessageTo> records = consumer.poll(Duration.ofMinutes(1));
+        ConsumerRecords<String, AbstractMessageTo> records = consumer.poll(pollingInterval);
         log.info("Fetched {} messages", records.count());
 
         if (loadInProgress)
@@ -266,6 +273,11 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     KafkaChatMessageService kafkaChatRoomService =
         (KafkaChatMessageService) chatRoomData.getChatRoomService();
 
+    log.debug(
+        "Loaded message from partition={} at offset={}: {}",
+        partition,
+        offset,
+        message);
     kafkaChatRoomService.persistMessage(message);
   }
 
@@ -274,7 +286,12 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     return IntStream
         .range(0, numShards)
         .filter(shard -> isShardOwned[shard])
-        .allMatch(shard -> nextOffset[shard] >= currentOffset[shard]);
+        .allMatch(shard ->
+        {
+          TopicPartition partition = new TopicPartition(topic, shard);
+          long position = consumer.position(partition);
+          return position >= currentOffset[shard];
+        });
   }
 
   private void pauseAllOwnedPartions()
@@ -319,4 +336,9 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     KafkaChatMessageService service = new KafkaChatMessageService(this, chatRoomId);
     return new ChatRoomData(clock, service, bufferSize);
   }
+
+  ConsumerGroupMetadata getConsumerGroupMetadata()
+  {
+    return consumer.groupMetadata();
+  }
 }