Merge der Refaktorisierung des EndlessConsumer (Branch 'stored-state')
authorKai Moritz <kai@juplo.de>
Sun, 24 Jul 2022 13:35:14 +0000 (15:35 +0200)
committerKai Moritz <kai@juplo.de>
Fri, 12 Aug 2022 14:44:01 +0000 (16:44 +0200)
* Die `commtSync()`-Aufrufe machen beim Speichern der Offsets außerhalb
  von Kafka keinen Sinn mehr.
* Der Testfall musste an die extern gespeicherten Offsets angepasst
  werden: Die gesehenen Offsets müssen aus der MongoDB gelesen werden,
  anstatt über einen separaten Consumer aus Kafka.
* Der mit dem Merge hinzugefügte Test schlägt fehl, da er einen Fehler
  aufdeckt (NPE bei einer Log-Ausgabe zur Offset-Verarbeitung).

1  2 
docker-compose.yml
src/main/java/de/juplo/kafka/ApplicationConfiguration.java
src/main/java/de/juplo/kafka/EndlessConsumer.java
src/main/java/de/juplo/kafka/StatisticsDocument.java
src/test/java/de/juplo/kafka/ApplicationTests.java

Simple merge
index 0000000,1ba9d5b..54e9b89
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,69 +1,70 @@@
+ package de.juplo.kafka;
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
+ import org.apache.kafka.clients.consumer.KafkaConsumer;
+ import org.apache.kafka.common.serialization.LongDeserializer;
+ import org.apache.kafka.common.serialization.StringDeserializer;
+ import org.springframework.boot.context.properties.EnableConfigurationProperties;
+ import org.springframework.context.annotation.Bean;
+ import org.springframework.context.annotation.Configuration;
+ import java.util.Properties;
+ import java.util.concurrent.ExecutorService;
+ import java.util.concurrent.Executors;
+ import java.util.function.Consumer;
+ @Configuration
+ @EnableConfigurationProperties(ApplicationProperties.class)
+ public class ApplicationConfiguration
+ {
+   @Bean
+   public Consumer<ConsumerRecord<String, Long>> consumer()
+   {
+     return (record) ->
+     {
+       // Handle record
+     };
+   }
+   @Bean
+   public EndlessConsumer<String, Long> endlessConsumer(
+       KafkaConsumer<String, Long> kafkaConsumer,
+       ExecutorService executor,
+       Consumer<ConsumerRecord<String, Long>> handler,
+       PartitionStatisticsRepository repository,
+       ApplicationProperties properties)
+   {
+     return
+         new EndlessConsumer<>(
+             executor,
+             repository,
+             properties.getClientId(),
+             properties.getTopic(),
+             kafkaConsumer,
+             handler);
+   }
+   @Bean
+   public ExecutorService executor()
+   {
+     return Executors.newSingleThreadExecutor();
+   }
+   @Bean(destroyMethod = "close")
+   public KafkaConsumer<String, Long> kafkaConsumer(ApplicationProperties properties)
+   {
+     Properties props = new Properties();
+     props.put("bootstrap.servers", properties.getBootstrapServer());
+     props.put("group.id", properties.getGroupId());
+     props.put("client.id", properties.getClientId());
++    props.put("enable.auto.commit", false);
+     props.put("auto.offset.reset", properties.getAutoOffsetReset());
+     props.put("metadata.max.age.ms", "1000");
+     props.put("key.deserializer", StringDeserializer.class.getName());
+     props.put("value.deserializer", LongDeserializer.class.getName());
+     return new KafkaConsumer<>(props);
+   }
+ }
@@@ -35,30 -33,59 +33,58 @@@ public class EndlessConsumer<K, V> impl
    private boolean running = false;
    private Exception exception;
    private long consumed = 0;
-   private KafkaConsumer<String, String> consumer = null;
  
+   private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
+   private final Map<Integer, Long> offsets = new HashMap<>();
  
-   private final Map<Integer, Map<String, Integer>> seen = new HashMap<>();
  
 -      repository.save(new StatisticsDocument(partition, removed));
+   @Override
+   public void onPartitionsRevoked(Collection<TopicPartition> partitions)
+   {
+     partitions.forEach(tp ->
+     {
+       Integer partition = tp.partition();
+       Long newOffset = consumer.position(tp);
+       Long oldOffset = offsets.remove(partition);
+       log.info(
+           "{} - removing partition: {}, consumed {} records (offset {} -> {})",
+           id,
+           partition,
+           newOffset - oldOffset,
+           oldOffset,
+           newOffset);
+       Map<String, Long> removed = seen.remove(partition);
+       for (String key : removed.keySet())
+       {
+         log.info(
+             "{} - Seen {} messages for partition={}|key={}",
+             id,
+             removed.get(key),
+             partition,
+             key);
+       }
++      repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
+     });
+   }
  
-   public EndlessConsumer(
-       ExecutorService executor,
-       PartitionStatisticsRepository repository,
-       String bootstrapServer,
-       String groupId,
-       String clientId,
-       String topic,
-       String autoOffsetReset)
+   @Override
+   public void onPartitionsAssigned(Collection<TopicPartition> partitions)
    {
-     this.executor = executor;
-     this.repository = repository;
-     this.bootstrapServer = bootstrapServer;
-     this.groupId = groupId;
-     this.id = clientId;
-     this.topic = topic;
-     this.autoOffsetReset = autoOffsetReset;
+     partitions.forEach(tp ->
+     {
+       Integer partition = tp.partition();
+       Long offset = consumer.position(tp);
+       log.info("{} - adding partition: {}, offset={}", id, partition, offset);
 -      offsets.put(partition, offset);
 -      seen.put(
 -          partition,
++      StatisticsDocument document =
+           repository
 -              .findById(Integer.toString(tp.partition()))
 -              .map(document -> document.statistics)
 -              .orElse(new HashMap<>()));
++              .findById(Integer.toString(partition))
++              .orElse(new StatisticsDocument(partition));
++      consumer.seek(tp, document.offset);
++      seen.put(partition, document.statistics);
+     });
    }
  
    @Override
    public void run()
    {
      }
      catch(WakeupException e)
      {
-       log.info("{} - RIIING!", id);
+       log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id);
 -      consumer.commitSync();
        shutdown();
      }
 -      consumer.commitSync();
+     catch(RecordDeserializationException e)
+     {
+       TopicPartition tp = e.topicPartition();
+       long offset = e.offset();
+       log.error(
+           "{} - Could not deserialize  message on topic {} with offset={}: {}",
+           id,
+           tp,
+           offset,
+           e.getCause().toString());
+       shutdown(e);
+     }
      catch(Exception e)
      {
        log.error("{} - Unexpected error: {}", id, e.toString(), e);
@@@ -14,20 -14,13 +14,20 @@@ public class StatisticsDocumen
  {
    @Id
    public String id;
-   public Map<String, Integer> statistics;
 +  public long offset;
+   public Map<String, Long> statistics;
  
    public StatisticsDocument()
    {
    }
  
 -  public StatisticsDocument(Integer partition, Map<String, Long> statistics)
 +  public StatisticsDocument(Integer partition)
 +  {
 +    this.id = Integer.toString(partition);
 +    this.statistics = new HashMap<>();
 +  }
 +
-   public StatisticsDocument(Integer partition, Map<String, Integer> statistics, long offset)
++  public StatisticsDocument(Integer partition, Map<String, Long> statistics, long offset)
    {
      this.id = Integer.toString(partition);
      this.statistics = statistics;
index 0000000,caa25c5..4b7ef36
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,323 +1,313 @@@
 -      KafkaConsumer<Bytes, Bytes> offsetConsumer;
+ package de.juplo.kafka;
+ import lombok.extern.slf4j.Slf4j;
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
+ import org.apache.kafka.clients.consumer.KafkaConsumer;
+ import org.apache.kafka.clients.producer.KafkaProducer;
+ import org.apache.kafka.clients.producer.ProducerRecord;
+ import org.apache.kafka.common.TopicPartition;
+ import org.apache.kafka.common.errors.RecordDeserializationException;
+ import org.apache.kafka.common.serialization.*;
+ import org.apache.kafka.common.utils.Bytes;
+ import org.junit.jupiter.api.*;
+ import org.springframework.beans.factory.annotation.Autowired;
+ import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+ import org.springframework.boot.test.autoconfigure.data.mongo.AutoConfigureDataMongo;
+ import org.springframework.boot.test.context.ConfigDataApplicationContextInitializer;
+ import org.springframework.boot.test.context.TestConfiguration;
+ import org.springframework.context.annotation.Bean;
+ import org.springframework.context.annotation.Import;
+ import org.springframework.kafka.test.context.EmbeddedKafka;
+ import org.springframework.test.context.TestPropertySource;
+ import org.springframework.test.context.junit.jupiter.SpringJUnitConfig;
+ import java.time.Duration;
+ import java.util.*;
+ import java.util.concurrent.ExecutionException;
+ import java.util.concurrent.ExecutorService;
+ import java.util.function.BiConsumer;
+ import java.util.function.Consumer;
+ import java.util.function.Function;
+ import java.util.stream.Collectors;
+ import java.util.stream.IntStream;
+ import static de.juplo.kafka.ApplicationTests.PARTITIONS;
+ import static de.juplo.kafka.ApplicationTests.TOPIC;
+ import static org.assertj.core.api.Assertions.*;
+ import static org.awaitility.Awaitility.*;
+ @SpringJUnitConfig(initializers = ConfigDataApplicationContextInitializer.class)
+ @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+ @TestPropertySource(
+               properties = {
+                               "consumer.bootstrap-server=${spring.embedded.kafka.brokers}",
+                               "consumer.topic=" + TOPIC,
+                               "spring.mongodb.embedded.version=4.4.13" })
+ @EmbeddedKafka(topics = TOPIC, partitions = PARTITIONS)
+ @EnableAutoConfiguration
+ @AutoConfigureDataMongo
+ @Slf4j
+ class ApplicationTests
+ {
+       public static final String TOPIC = "FOO";
+       public static final int PARTITIONS = 10;
+       StringSerializer stringSerializer = new StringSerializer();
+       @Autowired
+       Serializer valueSerializer;
+       @Autowired
+       KafkaProducer<String, Bytes> kafkaProducer;
+       @Autowired
+       KafkaConsumer<String, Long> kafkaConsumer;
+       @Autowired
 -              offsetConsumer.assign(partitions());
 -              partitions().forEach(tp -> consumer.accept(tp, offsetConsumer.position(tp)));
 -              offsetConsumer.unsubscribe();
++      PartitionStatisticsRepository partitionStatisticsRepository;
+       @Autowired
+       ApplicationProperties properties;
+       @Autowired
+       ExecutorService executor;
+       @Autowired
+       PartitionStatisticsRepository repository;
+       Consumer<ConsumerRecord<String, Long>> testHandler;
+       EndlessConsumer<String, Long> endlessConsumer;
+       Map<TopicPartition, Long> oldOffsets;
+       Map<TopicPartition, Long> newOffsets;
+       Set<ConsumerRecord<String, Long>> receivedRecords;
+       /** Tests methods */
+       @Test
+       @Order(1) // << The poistion pill is not skipped. Hence, this test must run first
+       void commitsCurrentOffsetsOnSuccess() throws ExecutionException, InterruptedException
+       {
+               send100Messages(i ->  new Bytes(valueSerializer.serialize(TOPIC, i)));
+               await("100 records received")
+                               .atMost(Duration.ofSeconds(30))
+                               .until(() -> receivedRecords.size() >= 100);
+               await("Offsets committed")
+                               .atMost(Duration.ofSeconds(10))
+                               .untilAsserted(() ->
+                               {
+                                       checkSeenOffsetsForProgress();
+                                       compareToCommitedOffsets(newOffsets);
+                               });
+               assertThatExceptionOfType(IllegalStateException.class)
+                               .isThrownBy(() -> endlessConsumer.exitStatus())
+                               .describedAs("Consumer should still be running");
+       }
+       @Test
+       @Order(2)
+       void commitsOffsetOfErrorForReprocessingOnError()
+       {
+               send100Messages(counter ->
+                               counter == 77
+                                               ? new Bytes(stringSerializer.serialize(TOPIC, "BOOM!"))
+                                               : new Bytes(valueSerializer.serialize(TOPIC, counter)));
+               await("Consumer failed")
+                               .atMost(Duration.ofSeconds(30))
+                               .until(() -> !endlessConsumer.running());
+               checkSeenOffsetsForProgress();
+               compareToCommitedOffsets(newOffsets);
+               endlessConsumer.start();
+               await("Consumer failed")
+                               .atMost(Duration.ofSeconds(30))
+                               .until(() -> !endlessConsumer.running());
+               checkSeenOffsetsForProgress();
+               compareToCommitedOffsets(newOffsets);
+               assertThat(receivedRecords.size())
+                               .describedAs("Received not all sent events")
+                               .isLessThan(100);
+               assertThatNoException()
+                               .describedAs("Consumer should not be running")
+                               .isThrownBy(() -> endlessConsumer.exitStatus());
+               assertThat(endlessConsumer.exitStatus())
+                               .describedAs("Consumer should have exited abnormally")
+                               .containsInstanceOf(RecordDeserializationException.class);
+       }
+       /** Helper methods for the verification of expectations */
+       void compareToCommitedOffsets(Map<TopicPartition, Long> offsetsToCheck)
+       {
+               doForCurrentOffsets((tp, offset) ->
+               {
+                       Long expected = offsetsToCheck.get(tp) + 1;
+                       log.debug("Checking, if the offset for {} is {}", tp, expected);
+                       assertThat(offset)
+                                       .describedAs("Committed offset corresponds to the offset of the consumer")
+                                       .isEqualTo(expected);
+               });
+       }
+       void checkSeenOffsetsForProgress()
+       {
+               // Be sure, that some messages were consumed...!
+               Set<TopicPartition> withProgress = new HashSet<>();
+               partitions().forEach(tp ->
+               {
+                       Long oldOffset = oldOffsets.get(tp);
+                       Long newOffset = newOffsets.get(tp);
+                       if (!oldOffset.equals(newOffset))
+                       {
+                               log.debug("Progress for {}: {} -> {}", tp, oldOffset, newOffset);
+                               withProgress.add(tp);
+                       }
+               });
+               assertThat(withProgress)
+                               .describedAs("Some offsets must have changed, compared to the old offset-positions")
+                               .isNotEmpty();
+       }
+       /** Helper methods for setting up and running the tests */
+       void doForCurrentOffsets(BiConsumer<TopicPartition, Long> consumer)
+       {
 -
 -              @Bean
 -              KafkaConsumer<Bytes, Bytes> offsetConsumer(ApplicationProperties properties)
 -              {
 -                      Properties props = new Properties();
 -                      props.put("bootstrap.servers", properties.getBootstrapServer());
 -                      props.put("client.id", "OFFSET-CONSUMER");
 -                      props.put("group.id", properties.getGroupId());
 -                      props.put("key.deserializer", BytesDeserializer.class.getName());
 -                      props.put("value.deserializer", BytesDeserializer.class.getName());
 -
 -                      return new KafkaConsumer<>(props);
 -              }
++              partitions().forEach(tp ->
++              {
++                      String partition = Integer.toString(tp.partition());
++                      Optional<Long> offset = partitionStatisticsRepository.findById(partition).map(document -> document.offset);
++                      consumer.accept(tp, offset.orElse(0l));
++              });
+       }
+       List<TopicPartition> partitions()
+       {
+               return
+                               IntStream
+                                               .range(0, PARTITIONS)
+                                               .mapToObj(partition -> new TopicPartition(TOPIC, partition))
+                                               .collect(Collectors.toList());
+       }
+       void send100Messages(Function<Long, Bytes> messageGenerator)
+       {
+               long i = 0;
+               for (int partition = 0; partition < 10; partition++)
+               {
+                       for (int key = 0; key < 10; key++)
+                       {
+                               Bytes value = messageGenerator.apply(++i);
+                               ProducerRecord<String, Bytes> record =
+                                               new ProducerRecord<>(
+                                                               TOPIC,
+                                                               partition,
+                                                               Integer.toString(key%2),
+                                                               value);
+                               kafkaProducer.send(record, (metadata, e) ->
+                               {
+                                       if (metadata != null)
+                                       {
+                                               log.debug(
+                                                               "{}|{} - {}={}",
+                                                               metadata.partition(),
+                                                               metadata.offset(),
+                                                               record.key(),
+                                                               record.value());
+                                       }
+                                       else
+                                       {
+                                               log.warn(
+                                                               "Exception for {}={}: {}",
+                                                               record.key(),
+                                                               record.value(),
+                                                               e.toString());
+                                       }
+                               });
+                       }
+               }
+       }
+       @BeforeEach
+       public void init()
+       {
+               testHandler = record -> {} ;
+               oldOffsets = new HashMap<>();
+               newOffsets = new HashMap<>();
+               receivedRecords = new HashSet<>();
+               doForCurrentOffsets((tp, offset) ->
+               {
+                       oldOffsets.put(tp, offset - 1);
+                       newOffsets.put(tp, offset - 1);
+               });
+               Consumer<ConsumerRecord<String, Long>> captureOffsetAndExecuteTestHandler =
+                               record ->
+                               {
+                                       newOffsets.put(
+                                                       new TopicPartition(record.topic(), record.partition()),
+                                                       record.offset());
+                                       receivedRecords.add(record);
+                                       testHandler.accept(record);
+                               };
+               endlessConsumer =
+                               new EndlessConsumer<>(
+                                               executor,
+                                               repository,
+                                               properties.getClientId(),
+                                               properties.getTopic(),
+                                               kafkaConsumer,
+                                               captureOffsetAndExecuteTestHandler);
+               endlessConsumer.start();
+       }
+       @AfterEach
+       public void deinit()
+       {
+               try
+               {
+                       endlessConsumer.stop();
+               }
+               catch (Exception e)
+               {
+                       log.info("Exception while stopping the consumer: {}", e.toString());
+               }
+       }
+       @TestConfiguration
+       @Import(ApplicationConfiguration.class)
+       public static class Configuration
+       {
+               @Bean
+               Serializer<Long> serializer()
+               {
+                       return new LongSerializer();
+               }
+               @Bean
+               KafkaProducer<String, Bytes> kafkaProducer(ApplicationProperties properties)
+               {
+                       Properties props = new Properties();
+                       props.put("bootstrap.servers", properties.getBootstrapServer());
+                       props.put("linger.ms", 100);
+                       props.put("key.serializer", StringSerializer.class.getName());
+                       props.put("value.serializer", BytesSerializer.class.getName());
+                       return new KafkaProducer<>(props);
+               }
+       }
+ }