fix: Errors during shard-publishing should not kill the instance
[demos/kafka/chat] / src / main / java / de / juplo / kafka / chat / backend / implementation / kafka / DataChannel.java
index c13f713..b4cc33f 100644 (file)
@@ -7,10 +7,7 @@ import de.juplo.kafka.chat.backend.implementation.kafka.messages.AbstractMessage
 import de.juplo.kafka.chat.backend.implementation.kafka.messages.data.EventChatMessageReceivedTo;
 import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
-import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.*;
 import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.TopicPartition;
@@ -31,6 +28,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
   private final Consumer<String, AbstractMessageTo> consumer;
   private final ZoneId zoneId;
   private final int numShards;
+  private final Duration pollingInterval;
   private final int bufferSize;
   private final Clock clock;
   private final boolean[] isShardOwned;
@@ -52,6 +50,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     Consumer<String, AbstractMessageTo> dataChannelConsumer,
     ZoneId zoneId,
     int numShards,
+    Duration pollingInterval,
     int bufferSize,
     Clock clock,
     InfoChannel infoChannel,
@@ -68,6 +67,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     this.producer = producer;
     this.zoneId = zoneId;
     this.numShards = numShards;
+    this.pollingInterval = pollingInterval;
     this.bufferSize = bufferSize;
     this.clock = clock;
     this.isShardOwned = new boolean[numShards];
@@ -155,7 +155,8 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
               "Could not publish instance {} as owner of shard {}: {}",
               instanceId,
               partition,
-              throwable))
+              throwable.toString()))
+          .onErrorComplete()
           .block();
     });
 
@@ -169,6 +170,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     {
       int partition = topicPartition.partition();
       isShardOwned[partition] = false;
+      nextOffset[partition] = consumer.position(topicPartition);
       log.info("Partition revoked: {} - next={}", partition, nextOffset[partition]);
       infoChannel.sendShardRevokedEvent(partition);
     });
@@ -191,7 +193,7 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     {
       try
       {
-        ConsumerRecords<String, AbstractMessageTo> records = consumer.poll(Duration.ofMinutes(1));
+        ConsumerRecords<String, AbstractMessageTo> records = consumer.poll(pollingInterval);
         log.info("Fetched {} messages", records.count());
 
         if (loadInProgress)
@@ -334,4 +336,9 @@ public class DataChannel implements Runnable, ConsumerRebalanceListener
     KafkaChatMessageService service = new KafkaChatMessageService(this, chatRoomId);
     return new ChatRoomData(clock, service, bufferSize);
   }
+
+  ConsumerGroupMetadata getConsumerGroupMetadata()
+  {
+    return consumer.groupMetadata();
+  }
 }