Merge der Refaktorisierung des EndlessConsumer (Branch 'deserialization')
authorKai Moritz <kai@juplo.de>
Sun, 24 Jul 2022 10:34:53 +0000 (12:34 +0200)
committerKai Moritz <kai@juplo.de>
Sun, 24 Jul 2022 13:26:23 +0000 (15:26 +0200)
* Um die Implementierung besser testen zu können, wurde die Anwendung
  in dem Branch 'deserialization' refaktorisiert.
* Diese Refaktorisierung werden hier zusammen mit den eingeführten
  Tests gemerged.
* Der so verfügbar gemachte Test wurde so angepasst, dass er das Speichern
  des Zustands in einer MongoDB berücksichtigt.

1  2 
docker-compose.yml
pom.xml
src/main/java/de/juplo/kafka/Application.java
src/main/java/de/juplo/kafka/ApplicationConfiguration.java
src/main/java/de/juplo/kafka/EndlessConsumer.java
src/main/java/de/juplo/kafka/StatisticsDocument.java
src/test/java/de/juplo/kafka/ApplicationTests.java

@@@ -24,30 -24,13 +24,30 @@@ services
      depends_on:
        - zookeeper
  
 -  setup:
 -    image: juplo/toolbox
 -    command: >
 -      bash -c "
 -        kafka-topics --bootstrap-server kafka:9092 --delete --if-exists --topic test
 -        kafka-topics --bootstrap-server kafka:9092 --create --topic test --partitions 2
 -      "
 +  mongo:
-     image: mongo:4.4
++    image: mongo:4.4.13
 +    ports:
 +      - 27017:27017
 +    environment:
 +      MONGO_INITDB_ROOT_USERNAME: juplo
 +      MONGO_INITDB_ROOT_PASSWORD: training
 +
 +  express:
 +    image: mongo-express
 +    ports:
 +      - 8090:8081
 +    environment:
 +      ME_CONFIG_MONGODB_ADMINUSERNAME: juplo
 +      ME_CONFIG_MONGODB_ADMINPASSWORD: training
 +      ME_CONFIG_MONGODB_URL: mongodb://juplo:training@mongo:27017/
 +
 +  kafka-ui:
 +    image: provectuslabs/kafka-ui:0.3.3
 +    ports:
 +      - 8080:8080
 +    environment:
 +      KAFKA_CLUSTERS_0_NAME: local
 +      KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
  
    cli:
      image: juplo/toolbox
diff --cc pom.xml
+++ b/pom.xml
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
      </dependency>
+     <dependency>
+       <groupId>org.springframework.kafka</groupId>
+       <artifactId>spring-kafka-test</artifactId>
+       <scope>test</scope>
+     </dependency>
+     <dependency>
+       <groupId>org.awaitility</groupId>
+       <artifactId>awaitility</artifactId>
+       <scope>test</scope>
+     </dependency>
++    <dependency>
++      <groupId>de.flapdoodle.embed</groupId>
++      <artifactId>de.flapdoodle.embed.mongo</artifactId>
++      <scope>test</scope>
++    </dependency>
    </dependencies>
  
    <build>
@@@ -1,12 -1,16 +1,15 @@@
  package de.juplo.kafka;
  
+ import lombok.extern.slf4j.Slf4j;
  import org.springframework.beans.factory.annotation.Autowired;
+ import org.springframework.boot.ApplicationArguments;
+ import org.springframework.boot.ApplicationRunner;
  import org.springframework.boot.SpringApplication;
  import org.springframework.boot.autoconfigure.SpringBootApplication;
- import org.springframework.boot.context.properties.EnableConfigurationProperties;
- import org.springframework.context.annotation.Bean;
  
- import java.util.concurrent.Executors;
+ import javax.annotation.PreDestroy;
 -import java.util.List;
+ import java.util.concurrent.ExecutorService;
+ import java.util.concurrent.TimeUnit;
  
  
  @SpringBootApplication
index 0000000,4054e93..1ba9d5b
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,67 +1,69 @@@
+ package de.juplo.kafka;
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
+ import org.apache.kafka.clients.consumer.KafkaConsumer;
+ import org.apache.kafka.common.serialization.LongDeserializer;
+ import org.apache.kafka.common.serialization.StringDeserializer;
+ import org.springframework.boot.context.properties.EnableConfigurationProperties;
+ import org.springframework.context.annotation.Bean;
+ import org.springframework.context.annotation.Configuration;
+ import java.util.Properties;
+ import java.util.concurrent.ExecutorService;
+ import java.util.concurrent.Executors;
+ import java.util.function.Consumer;
+ @Configuration
+ @EnableConfigurationProperties(ApplicationProperties.class)
+ public class ApplicationConfiguration
+ {
+   @Bean
+   public Consumer<ConsumerRecord<String, Long>> consumer()
+   {
+     return (record) ->
+     {
+       // Handle record
+     };
+   }
+   @Bean
+   public EndlessConsumer<String, Long> endlessConsumer(
+       KafkaConsumer<String, Long> kafkaConsumer,
+       ExecutorService executor,
+       Consumer<ConsumerRecord<String, Long>> handler,
++      PartitionStatisticsRepository repository,
+       ApplicationProperties properties)
+   {
+     return
+         new EndlessConsumer<>(
+             executor,
++            repository,
+             properties.getClientId(),
+             properties.getTopic(),
+             kafkaConsumer,
+             handler);
+   }
+   @Bean
+   public ExecutorService executor()
+   {
+     return Executors.newSingleThreadExecutor();
+   }
+   @Bean(destroyMethod = "close")
+   public KafkaConsumer<String, Long> kafkaConsumer(ApplicationProperties properties)
+   {
+     Properties props = new Properties();
+     props.put("bootstrap.servers", properties.getBootstrapServer());
+     props.put("group.id", properties.getGroupId());
+     props.put("client.id", properties.getClientId());
+     props.put("auto.offset.reset", properties.getAutoOffsetReset());
+     props.put("metadata.max.age.ms", "1000");
+     props.put("key.deserializer", StringDeserializer.class.getName());
+     props.put("value.deserializer", LongDeserializer.class.getName());
+     return new KafkaConsumer<>(props);
+   }
+ }
@@@ -20,45 -18,67 +18,74 @@@ import java.util.concurrent.locks.Reent
  
  
  @Slf4j
- public class EndlessConsumer implements Runnable
+ @RequiredArgsConstructor
+ public class EndlessConsumer<K, V> implements ConsumerRebalanceListener, Runnable
  {
    private final ExecutorService executor;
-   private final String bootstrapServer;
-   private final String groupId;
 +  private final PartitionStatisticsRepository repository;
    private final String id;
    private final String topic;
-   private final String autoOffsetReset;
+   private final Consumer<K, V> consumer;
+   private final java.util.function.Consumer<ConsumerRecord<K, V>> handler;
  
    private final Lock lock = new ReentrantLock();
    private final Condition condition = lock.newCondition();
    private boolean running = false;
    private Exception exception;
    private long consumed = 0;
-   private KafkaConsumer<String, String> consumer = null;
  
+   private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
+   private final Map<Integer, Long> offsets = new HashMap<>();
  
-   private final Map<Integer, Map<String, Integer>> seen = new HashMap<>();
  
+   @Override
+   public void onPartitionsRevoked(Collection<TopicPartition> partitions)
+   {
+     partitions.forEach(tp ->
+     {
+       Integer partition = tp.partition();
+       Long newOffset = consumer.position(tp);
+       Long oldOffset = offsets.remove(partition);
+       log.info(
+           "{} - removing partition: {}, consumed {} records (offset {} -> {})",
+           id,
+           partition,
+           newOffset - oldOffset,
+           oldOffset,
+           newOffset);
+       Map<String, Long> removed = seen.remove(partition);
+       for (String key : removed.keySet())
+       {
+         log.info(
+             "{} - Seen {} messages for partition={}|key={}",
+             id,
+             removed.get(key),
+             partition,
+             key);
+       }
++      repository.save(new StatisticsDocument(partition, removed));
+     });
+   }
  
-   public EndlessConsumer(
-       ExecutorService executor,
-       PartitionStatisticsRepository repository,
-       String bootstrapServer,
-       String groupId,
-       String clientId,
-       String topic,
-       String autoOffsetReset)
+   @Override
+   public void onPartitionsAssigned(Collection<TopicPartition> partitions)
    {
-     this.executor = executor;
-     this.repository = repository;
-     this.bootstrapServer = bootstrapServer;
-     this.groupId = groupId;
-     this.id = clientId;
-     this.topic = topic;
-     this.autoOffsetReset = autoOffsetReset;
+     partitions.forEach(tp ->
+     {
+       Integer partition = tp.partition();
+       Long offset = consumer.position(tp);
+       log.info("{} - adding partition: {}, offset={}", id, partition, offset);
+       offsets.put(partition, offset);
 -      seen.put(partition, new HashMap<>());
++      seen.put(
++          partition,
++          repository
++              .findById(Integer.toString(tp.partition()))
++              .map(document -> document.statistics)
++              .orElse(new HashMap<>()));
+     });
    }
  
    @Override
    public void run()
    {
index be998ca,0000000..2416253
mode 100644,000000..100644
--- /dev/null
@@@ -1,28 -1,0 +1,28 @@@
-   public Map<String, Integer> statistics;
 +package de.juplo.kafka;
 +
 +import lombok.ToString;
 +import org.springframework.data.annotation.Id;
 +import org.springframework.data.mongodb.core.mapping.Document;
 +
 +import java.util.HashMap;
 +import java.util.Map;
 +
 +
 +@Document(collection = "statistics")
 +@ToString
 +public class StatisticsDocument
 +{
 +  @Id
 +  public String id;
-   public StatisticsDocument(Integer partition, Map<String, Integer> statistics)
++  public Map<String, Long> statistics;
 +
 +  public StatisticsDocument()
 +  {
 +  }
 +
++  public StatisticsDocument(Integer partition, Map<String, Long> statistics)
 +  {
 +    this.id = Integer.toString(partition);
 +    this.statistics = statistics;
 +  }
 +}
index 0000000,40dc149..caa25c5
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,315 +1,323 @@@
 -                              "consumer.topic=" + TOPIC })
+ package de.juplo.kafka;
+ import lombok.extern.slf4j.Slf4j;
+ import org.apache.kafka.clients.consumer.ConsumerRecord;
+ import org.apache.kafka.clients.consumer.KafkaConsumer;
+ import org.apache.kafka.clients.producer.KafkaProducer;
+ import org.apache.kafka.clients.producer.ProducerRecord;
+ import org.apache.kafka.common.TopicPartition;
+ import org.apache.kafka.common.errors.RecordDeserializationException;
+ import org.apache.kafka.common.serialization.*;
+ import org.apache.kafka.common.utils.Bytes;
+ import org.junit.jupiter.api.*;
+ import org.springframework.beans.factory.annotation.Autowired;
++import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
++import org.springframework.boot.test.autoconfigure.data.mongo.AutoConfigureDataMongo;
+ import org.springframework.boot.test.context.ConfigDataApplicationContextInitializer;
+ import org.springframework.boot.test.context.TestConfiguration;
+ import org.springframework.context.annotation.Bean;
+ import org.springframework.context.annotation.Import;
+ import org.springframework.kafka.test.context.EmbeddedKafka;
+ import org.springframework.test.context.TestPropertySource;
+ import org.springframework.test.context.junit.jupiter.SpringJUnitConfig;
+ import java.time.Duration;
+ import java.util.*;
+ import java.util.concurrent.ExecutionException;
+ import java.util.concurrent.ExecutorService;
+ import java.util.function.BiConsumer;
+ import java.util.function.Consumer;
+ import java.util.function.Function;
+ import java.util.stream.Collectors;
+ import java.util.stream.IntStream;
+ import static de.juplo.kafka.ApplicationTests.PARTITIONS;
+ import static de.juplo.kafka.ApplicationTests.TOPIC;
+ import static org.assertj.core.api.Assertions.*;
+ import static org.awaitility.Awaitility.*;
+ @SpringJUnitConfig(initializers = ConfigDataApplicationContextInitializer.class)
+ @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+ @TestPropertySource(
+               properties = {
+                               "consumer.bootstrap-server=${spring.embedded.kafka.brokers}",
++                              "consumer.topic=" + TOPIC,
++                              "spring.mongodb.embedded.version=4.4.13" })
+ @EmbeddedKafka(topics = TOPIC, partitions = PARTITIONS)
++@EnableAutoConfiguration
++@AutoConfigureDataMongo
+ @Slf4j
+ class ApplicationTests
+ {
+       public static final String TOPIC = "FOO";
+       public static final int PARTITIONS = 10;
+       StringSerializer stringSerializer = new StringSerializer();
+       @Autowired
+       Serializer valueSerializer;
+       @Autowired
+       KafkaProducer<String, Bytes> kafkaProducer;
+       @Autowired
+       KafkaConsumer<String, Long> kafkaConsumer;
+       @Autowired
+       KafkaConsumer<Bytes, Bytes> offsetConsumer;
+       @Autowired
+       ApplicationProperties properties;
+       @Autowired
+       ExecutorService executor;
++      @Autowired
++      PartitionStatisticsRepository repository;
+       Consumer<ConsumerRecord<String, Long>> testHandler;
+       EndlessConsumer<String, Long> endlessConsumer;
+       Map<TopicPartition, Long> oldOffsets;
+       Map<TopicPartition, Long> newOffsets;
+       Set<ConsumerRecord<String, Long>> receivedRecords;
+       /** Tests methods */
+       @Test
+       @Order(1) // << The poistion pill is not skipped. Hence, this test must run first
+       void commitsCurrentOffsetsOnSuccess() throws ExecutionException, InterruptedException
+       {
+               send100Messages(i ->  new Bytes(valueSerializer.serialize(TOPIC, i)));
+               await("100 records received")
+                               .atMost(Duration.ofSeconds(30))
+                               .until(() -> receivedRecords.size() >= 100);
+               await("Offsets committed")
+                               .atMost(Duration.ofSeconds(10))
+                               .untilAsserted(() ->
+                               {
+                                       checkSeenOffsetsForProgress();
+                                       compareToCommitedOffsets(newOffsets);
+                               });
+               assertThatExceptionOfType(IllegalStateException.class)
+                               .isThrownBy(() -> endlessConsumer.exitStatus())
+                               .describedAs("Consumer should still be running");
+       }
+       @Test
+       @Order(2)
+       void commitsOffsetOfErrorForReprocessingOnError()
+       {
+               send100Messages(counter ->
+                               counter == 77
+                                               ? new Bytes(stringSerializer.serialize(TOPIC, "BOOM!"))
+                                               : new Bytes(valueSerializer.serialize(TOPIC, counter)));
+               await("Consumer failed")
+                               .atMost(Duration.ofSeconds(30))
+                               .until(() -> !endlessConsumer.running());
+               checkSeenOffsetsForProgress();
+               compareToCommitedOffsets(newOffsets);
+               endlessConsumer.start();
+               await("Consumer failed")
+                               .atMost(Duration.ofSeconds(30))
+                               .until(() -> !endlessConsumer.running());
+               checkSeenOffsetsForProgress();
+               compareToCommitedOffsets(newOffsets);
+               assertThat(receivedRecords.size())
+                               .describedAs("Received not all sent events")
+                               .isLessThan(100);
+               assertThatNoException()
+                               .describedAs("Consumer should not be running")
+                               .isThrownBy(() -> endlessConsumer.exitStatus());
+               assertThat(endlessConsumer.exitStatus())
+                               .describedAs("Consumer should have exited abnormally")
+                               .containsInstanceOf(RecordDeserializationException.class);
+       }
+       /** Helper methods for the verification of expectations */
+       void compareToCommitedOffsets(Map<TopicPartition, Long> offsetsToCheck)
+       {
+               doForCurrentOffsets((tp, offset) ->
+               {
+                       Long expected = offsetsToCheck.get(tp) + 1;
+                       log.debug("Checking, if the offset for {} is {}", tp, expected);
+                       assertThat(offset)
+                                       .describedAs("Committed offset corresponds to the offset of the consumer")
+                                       .isEqualTo(expected);
+               });
+       }
+       void checkSeenOffsetsForProgress()
+       {
+               // Be sure, that some messages were consumed...!
+               Set<TopicPartition> withProgress = new HashSet<>();
+               partitions().forEach(tp ->
+               {
+                       Long oldOffset = oldOffsets.get(tp);
+                       Long newOffset = newOffsets.get(tp);
+                       if (!oldOffset.equals(newOffset))
+                       {
+                               log.debug("Progress for {}: {} -> {}", tp, oldOffset, newOffset);
+                               withProgress.add(tp);
+                       }
+               });
+               assertThat(withProgress)
+                               .describedAs("Some offsets must have changed, compared to the old offset-positions")
+                               .isNotEmpty();
+       }
+       /** Helper methods for setting up and running the tests */
+       void doForCurrentOffsets(BiConsumer<TopicPartition, Long> consumer)
+       {
+               offsetConsumer.assign(partitions());
+               partitions().forEach(tp -> consumer.accept(tp, offsetConsumer.position(tp)));
+               offsetConsumer.unsubscribe();
+       }
+       List<TopicPartition> partitions()
+       {
+               return
+                               IntStream
+                                               .range(0, PARTITIONS)
+                                               .mapToObj(partition -> new TopicPartition(TOPIC, partition))
+                                               .collect(Collectors.toList());
+       }
+       void send100Messages(Function<Long, Bytes> messageGenerator)
+       {
+               long i = 0;
+               for (int partition = 0; partition < 10; partition++)
+               {
+                       for (int key = 0; key < 10; key++)
+                       {
+                               Bytes value = messageGenerator.apply(++i);
+                               ProducerRecord<String, Bytes> record =
+                                               new ProducerRecord<>(
+                                                               TOPIC,
+                                                               partition,
+                                                               Integer.toString(key%2),
+                                                               value);
+                               kafkaProducer.send(record, (metadata, e) ->
+                               {
+                                       if (metadata != null)
+                                       {
+                                               log.debug(
+                                                               "{}|{} - {}={}",
+                                                               metadata.partition(),
+                                                               metadata.offset(),
+                                                               record.key(),
+                                                               record.value());
+                                       }
+                                       else
+                                       {
+                                               log.warn(
+                                                               "Exception for {}={}: {}",
+                                                               record.key(),
+                                                               record.value(),
+                                                               e.toString());
+                                       }
+                               });
+                       }
+               }
+       }
+       @BeforeEach
+       public void init()
+       {
+               testHandler = record -> {} ;
+               oldOffsets = new HashMap<>();
+               newOffsets = new HashMap<>();
+               receivedRecords = new HashSet<>();
+               doForCurrentOffsets((tp, offset) ->
+               {
+                       oldOffsets.put(tp, offset - 1);
+                       newOffsets.put(tp, offset - 1);
+               });
+               Consumer<ConsumerRecord<String, Long>> captureOffsetAndExecuteTestHandler =
+                               record ->
+                               {
+                                       newOffsets.put(
+                                                       new TopicPartition(record.topic(), record.partition()),
+                                                       record.offset());
+                                       receivedRecords.add(record);
+                                       testHandler.accept(record);
+                               };
+               endlessConsumer =
+                               new EndlessConsumer<>(
+                                               executor,
++                                              repository,
+                                               properties.getClientId(),
+                                               properties.getTopic(),
+                                               kafkaConsumer,
+                                               captureOffsetAndExecuteTestHandler);
+               endlessConsumer.start();
+       }
+       @AfterEach
+       public void deinit()
+       {
+               try
+               {
+                       endlessConsumer.stop();
+               }
+               catch (Exception e)
+               {
+                       log.info("Exception while stopping the consumer: {}", e.toString());
+               }
+       }
+       @TestConfiguration
+       @Import(ApplicationConfiguration.class)
+       public static class Configuration
+       {
+               @Bean
+               Serializer<Long> serializer()
+               {
+                       return new LongSerializer();
+               }
+               @Bean
+               KafkaProducer<String, Bytes> kafkaProducer(ApplicationProperties properties)
+               {
+                       Properties props = new Properties();
+                       props.put("bootstrap.servers", properties.getBootstrapServer());
+                       props.put("linger.ms", 100);
+                       props.put("key.serializer", StringSerializer.class.getName());
+                       props.put("value.serializer", BytesSerializer.class.getName());
+                       return new KafkaProducer<>(props);
+               }
+               @Bean
+               KafkaConsumer<Bytes, Bytes> offsetConsumer(ApplicationProperties properties)
+               {
+                       Properties props = new Properties();
+                       props.put("bootstrap.servers", properties.getBootstrapServer());
+                       props.put("client.id", "OFFSET-CONSUMER");
+                       props.put("group.id", properties.getGroupId());
+                       props.put("key.deserializer", BytesDeserializer.class.getName());
+                       props.put("value.deserializer", BytesDeserializer.class.getName());
+                       return new KafkaConsumer<>(props);
+               }
+       }
+ }