- zookeeper
mongo:
- image: mongo:4.4
+ image: mongo:4.4.13
ports:
- 27017:27017
environment:
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.springframework.kafka</groupId>
+ <artifactId>spring-kafka-test</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.awaitility</groupId>
+ <artifactId>awaitility</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>de.flapdoodle.embed</groupId>
+ <artifactId>de.flapdoodle.embed.mongo</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
package de.juplo.kafka;
+import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.ApplicationArguments;
+import org.springframework.boot.ApplicationRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
-import org.springframework.boot.context.properties.EnableConfigurationProperties;
-import org.springframework.context.annotation.Bean;
-import java.util.concurrent.Executors;
+import javax.annotation.PreDestroy;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
@SpringBootApplication
-@EnableConfigurationProperties(ApplicationProperties.class)
-public class Application
+@Slf4j
+public class Application implements ApplicationRunner
{
@Autowired
- ApplicationProperties properties;
+ EndlessConsumer endlessConsumer;
+ @Autowired
+ ExecutorService executor;
- @Bean
- public EndlessConsumer consumer(PartitionStatisticsRepository repository)
+ @Override
+ public void run(ApplicationArguments args) throws Exception
{
- EndlessConsumer consumer =
- new EndlessConsumer(
- Executors.newFixedThreadPool(1),
- repository,
- properties.getBootstrapServer(),
- properties.getGroupId(),
- properties.getClientId(),
- properties.getTopic(),
- properties.getAutoOffsetReset());
-
- consumer.start();
-
- return consumer;
+ log.info("Starting EndlessConsumer");
+ endlessConsumer.start();
}
+ @PreDestroy
+ public void stopExecutor()
+ {
+ try
+ {
+ log.info("Shutting down the ExecutorService.");
+ executor.shutdown();
+ log.info("Waiting 5 seconds for the ExecutorService to terminate...");
+ executor.awaitTermination(5, TimeUnit.SECONDS);
+ }
+ catch (InterruptedException e)
+ {
+ log.error("Exception while waiting for the termination of the ExecutorService: {}", e.toString());
+ }
+ finally
+ {
+ if (!executor.isTerminated())
+ {
+ log.warn("Forcing shutdown of ExecutorService!");
+ executor
+ .shutdownNow()
+ .forEach(runnable -> log.warn("Unprocessed task: {}", runnable.getClass().getSimpleName()));
+ }
+ log.info("Shutdow of ExecutorService finished");
+ }
+ }
+
+
public static void main(String[] args)
{
SpringApplication.run(Application.class, args);
--- /dev/null
+package de.juplo.kafka;
+
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.serialization.LongDeserializer;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+import java.util.Properties;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.function.Consumer;
+
+
+@Configuration
+@EnableConfigurationProperties(ApplicationProperties.class)
+public class ApplicationConfiguration
+{
+ @Bean
+ public Consumer<ConsumerRecord<String, Long>> consumer()
+ {
+ return (record) ->
+ {
+ // Handle record
+ };
+ }
+
+ @Bean
+ public EndlessConsumer<String, Long> endlessConsumer(
+ KafkaConsumer<String, Long> kafkaConsumer,
+ ExecutorService executor,
+ Consumer<ConsumerRecord<String, Long>> handler,
+ PartitionStatisticsRepository repository,
+ ApplicationProperties properties)
+ {
+ return
+ new EndlessConsumer<>(
+ executor,
+ repository,
+ properties.getClientId(),
+ properties.getTopic(),
+ kafkaConsumer,
+ handler);
+ }
+
+ @Bean
+ public ExecutorService executor()
+ {
+ return Executors.newSingleThreadExecutor();
+ }
+
+ @Bean(destroyMethod = "close")
+ public KafkaConsumer<String, Long> kafkaConsumer(ApplicationProperties properties)
+ {
+ Properties props = new Properties();
+
+ props.put("bootstrap.servers", properties.getBootstrapServer());
+ props.put("group.id", properties.getGroupId());
+ props.put("client.id", properties.getClientId());
+ props.put("enable.auto.commit", false);
+ props.put("auto.offset.reset", properties.getAutoOffsetReset());
+ props.put("metadata.max.age.ms", "1000");
+ props.put("key.deserializer", StringDeserializer.class.getName());
+ props.put("value.deserializer", LongDeserializer.class.getName());
+
+ return new KafkaConsumer<>(props);
+ }
+}
@RequiredArgsConstructor
public class ApplicationHealthIndicator implements HealthIndicator
{
- private final EndlessConsumer consumer;
+ private final EndlessConsumer<String, Long> consumer;
@Override
@GetMapping("seen")
- public Map<Integer, Map<String, Integer>> seen()
+ public Map<Integer, Map<String, Long>> seen()
{
return consumer.getSeen();
}
package de.juplo.kafka;
+import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
-import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
-import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.clients.consumer.ConsumerRecords;
-import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.RecordDeserializationException;
import org.apache.kafka.common.errors.WakeupException;
-import org.apache.kafka.common.serialization.StringDeserializer;
import javax.annotation.PreDestroy;
import java.time.Duration;
@Slf4j
-public class EndlessConsumer implements Runnable
+@RequiredArgsConstructor
+public class EndlessConsumer<K, V> implements ConsumerRebalanceListener, Runnable
{
private final ExecutorService executor;
private final PartitionStatisticsRepository repository;
- private final String bootstrapServer;
- private final String groupId;
private final String id;
private final String topic;
- private final String autoOffsetReset;
+ private final Consumer<K, V> consumer;
+ private final java.util.function.Consumer<ConsumerRecord<K, V>> handler;
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
private boolean running = false;
private Exception exception;
private long consumed = 0;
- private KafkaConsumer<String, String> consumer = null;
+ private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
+ private final Map<Integer, Long> offsets = new HashMap<>();
- private final Map<Integer, Map<String, Integer>> seen = new HashMap<>();
+ @Override
+ public void onPartitionsRevoked(Collection<TopicPartition> partitions)
+ {
+ partitions.forEach(tp ->
+ {
+ Integer partition = tp.partition();
+ Long newOffset = consumer.position(tp);
+ Long oldOffset = offsets.remove(partition);
+ log.info(
+ "{} - removing partition: {}, consumed {} records (offset {} -> {})",
+ id,
+ partition,
+ newOffset - oldOffset,
+ oldOffset,
+ newOffset);
+ Map<String, Long> removed = seen.remove(partition);
+ for (String key : removed.keySet())
+ {
+ log.info(
+ "{} - Seen {} messages for partition={}|key={}",
+ id,
+ removed.get(key),
+ partition,
+ key);
+ }
+ repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
+ });
+ }
- public EndlessConsumer(
- ExecutorService executor,
- PartitionStatisticsRepository repository,
- String bootstrapServer,
- String groupId,
- String clientId,
- String topic,
- String autoOffsetReset)
+ @Override
+ public void onPartitionsAssigned(Collection<TopicPartition> partitions)
{
- this.executor = executor;
- this.repository = repository;
- this.bootstrapServer = bootstrapServer;
- this.groupId = groupId;
- this.id = clientId;
- this.topic = topic;
- this.autoOffsetReset = autoOffsetReset;
+ partitions.forEach(tp ->
+ {
+ Integer partition = tp.partition();
+ Long offset = consumer.position(tp);
+ log.info("{} - adding partition: {}, offset={}", id, partition, offset);
+ StatisticsDocument document =
+ repository
+ .findById(Integer.toString(partition))
+ .orElse(new StatisticsDocument(partition));
+ consumer.seek(tp, document.offset);
+ seen.put(partition, document.statistics);
+ });
}
+
@Override
public void run()
{
try
{
- Properties props = new Properties();
- props.put("bootstrap.servers", bootstrapServer);
- props.put("group.id", groupId);
- props.put("client.id", id);
- props.put("enable.auto.commit", false);
- props.put("auto.offset.reset", autoOffsetReset);
- props.put("metadata.max.age.ms", "1000");
- props.put("key.deserializer", StringDeserializer.class.getName());
- props.put("value.deserializer", StringDeserializer.class.getName());
-
- this.consumer = new KafkaConsumer<>(props);
-
log.info("{} - Subscribing to topic {}", id, topic);
- consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener()
- {
- @Override
- public void onPartitionsRevoked(Collection<TopicPartition> partitions)
- {
- partitions.forEach(tp ->
- {
- log.info("{} - removing partition: {}", id, tp);
- Map<String, Integer> removed = seen.remove(tp.partition());
- for (String key : removed.keySet())
- {
- log.info(
- "{} - Seen {} messages for partition={}|key={}",
- id,
- removed.get(key),
- tp.partition(),
- key);
- }
- repository.save(new StatisticsDocument(tp.partition(), removed, consumer.position(tp)));
- });
- }
-
- @Override
- public void onPartitionsAssigned(Collection<TopicPartition> partitions)
- {
- partitions.forEach(tp ->
- {
- log.info("{} - adding partition: {}", id, tp);
- StatisticsDocument document =
- repository
- .findById(Integer.toString(tp.partition()))
- .orElse(new StatisticsDocument(tp.partition()));
- consumer.seek(tp, document.offset);
- seen.put(tp.partition(), document.statistics);
- });
- }
- });
+ consumer.subscribe(Arrays.asList(topic), this);
while (true)
{
- ConsumerRecords<String, String> records =
+ ConsumerRecords<K, V> records =
consumer.poll(Duration.ofSeconds(1));
// Do something with the data...
log.info("{} - Received {} messages", id, records.count());
- for (ConsumerRecord<String, String> record : records)
+ for (ConsumerRecord<K, V> record : records)
{
- consumed++;
log.info(
"{} - {}: {}/{} - {}={}",
id,
record.value()
);
+ handler.accept(record);
+
+ consumed++;
+
Integer partition = record.partition();
- String key = record.key() == null ? "NULL" : record.key();
- Map<String, Integer> byKey = seen.get(partition);
+ String key = record.key() == null ? "NULL" : record.key().toString();
+ Map<String, Long> byKey = seen.get(partition);
if (!byKey.containsKey(key))
- byKey.put(key, 0);
+ byKey.put(key, 0l);
- int seenByKey = byKey.get(key);
+ long seenByKey = byKey.get(key);
seenByKey++;
byKey.put(key, seenByKey);
}
}
catch(WakeupException e)
{
- log.info("{} - RIIING!", id);
+ log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id);
shutdown();
}
+ catch(RecordDeserializationException e)
+ {
+ TopicPartition tp = e.topicPartition();
+ long offset = e.offset();
+ log.error(
+ "{} - Could not deserialize message on topic {} with offset={}: {}",
+ id,
+ tp,
+ offset,
+ e.getCause().toString());
+
+ shutdown(e);
+ }
catch(Exception e)
{
log.error("{} - Unexpected error: {}", id, e.toString(), e);
}
finally
{
- log.info("{} - Closing the KafkaConsumer", id);
- consumer.close();
log.info("{} - Consumer-Thread exiting", id);
}
}
lock.lock();
try
{
- running = false;
- exception = e;
- condition.signal();
+ try
+ {
+ log.info("{} - Unsubscribing from topic {}", id, topic);
+ consumer.unsubscribe();
+ }
+ catch (Exception ue)
+ {
+ log.error(
+ "{} - Error while unsubscribing from topic {}: {}",
+ id,
+ topic,
+ ue.toString());
+ }
+ finally
+ {
+ running = false;
+ exception = e;
+ condition.signal();
+ }
}
finally
{
}
}
- public Map<Integer, Map<String, Integer>> getSeen()
+ public Map<Integer, Map<String, Long>> getSeen()
{
return seen;
}
@Id
public String id;
public long offset;
- public Map<String, Integer> statistics;
+ public Map<String, Long> statistics;
public StatisticsDocument()
{
this.statistics = new HashMap<>();
}
- public StatisticsDocument(Integer partition, Map<String, Integer> statistics, long offset)
+ public StatisticsDocument(Integer partition, Map<String, Long> statistics, long offset)
{
this.id = Integer.toString(partition);
this.statistics = statistics;
--- /dev/null
+package de.juplo.kafka;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.RecordDeserializationException;
+import org.apache.kafka.common.serialization.*;
+import org.apache.kafka.common.utils.Bytes;
+import org.junit.jupiter.api.*;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.test.autoconfigure.data.mongo.AutoConfigureDataMongo;
+import org.springframework.boot.test.context.ConfigDataApplicationContextInitializer;
+import org.springframework.boot.test.context.TestConfiguration;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Import;
+import org.springframework.kafka.test.context.EmbeddedKafka;
+import org.springframework.test.context.TestPropertySource;
+import org.springframework.test.context.junit.jupiter.SpringJUnitConfig;
+
+import java.time.Duration;
+import java.util.*;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static de.juplo.kafka.ApplicationTests.PARTITIONS;
+import static de.juplo.kafka.ApplicationTests.TOPIC;
+import static org.assertj.core.api.Assertions.*;
+import static org.awaitility.Awaitility.*;
+
+
+@SpringJUnitConfig(initializers = ConfigDataApplicationContextInitializer.class)
+@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+@TestPropertySource(
+ properties = {
+ "consumer.bootstrap-server=${spring.embedded.kafka.brokers}",
+ "consumer.topic=" + TOPIC,
+ "spring.mongodb.embedded.version=4.4.13" })
+@EmbeddedKafka(topics = TOPIC, partitions = PARTITIONS)
+@EnableAutoConfiguration
+@AutoConfigureDataMongo
+@Slf4j
+class ApplicationTests
+{
+ public static final String TOPIC = "FOO";
+ public static final int PARTITIONS = 10;
+
+
+ StringSerializer stringSerializer = new StringSerializer();
+
+ @Autowired
+ Serializer valueSerializer;
+ @Autowired
+ KafkaProducer<String, Bytes> kafkaProducer;
+ @Autowired
+ KafkaConsumer<String, Long> kafkaConsumer;
+ @Autowired
+ PartitionStatisticsRepository partitionStatisticsRepository;
+ @Autowired
+ ApplicationProperties properties;
+ @Autowired
+ ExecutorService executor;
+ @Autowired
+ PartitionStatisticsRepository repository;
+
+ Consumer<ConsumerRecord<String, Long>> testHandler;
+ EndlessConsumer<String, Long> endlessConsumer;
+ Map<TopicPartition, Long> oldOffsets;
+ Map<TopicPartition, Long> newOffsets;
+ Set<ConsumerRecord<String, Long>> receivedRecords;
+
+
+ /** Tests methods */
+
+ @Test
+ @Order(1) // << The poistion pill is not skipped. Hence, this test must run first
+ void commitsCurrentOffsetsOnSuccess() throws ExecutionException, InterruptedException
+ {
+ send100Messages(i -> new Bytes(valueSerializer.serialize(TOPIC, i)));
+
+ await("100 records received")
+ .atMost(Duration.ofSeconds(30))
+ .until(() -> receivedRecords.size() >= 100);
+
+ await("Offsets committed")
+ .atMost(Duration.ofSeconds(10))
+ .untilAsserted(() ->
+ {
+ checkSeenOffsetsForProgress();
+ compareToCommitedOffsets(newOffsets);
+ });
+
+ assertThatExceptionOfType(IllegalStateException.class)
+ .isThrownBy(() -> endlessConsumer.exitStatus())
+ .describedAs("Consumer should still be running");
+ }
+
+ @Test
+ @Order(2)
+ void commitsOffsetOfErrorForReprocessingOnError()
+ {
+ send100Messages(counter ->
+ counter == 77
+ ? new Bytes(stringSerializer.serialize(TOPIC, "BOOM!"))
+ : new Bytes(valueSerializer.serialize(TOPIC, counter)));
+
+ await("Consumer failed")
+ .atMost(Duration.ofSeconds(30))
+ .until(() -> !endlessConsumer.running());
+
+ checkSeenOffsetsForProgress();
+ compareToCommitedOffsets(newOffsets);
+
+ endlessConsumer.start();
+ await("Consumer failed")
+ .atMost(Duration.ofSeconds(30))
+ .until(() -> !endlessConsumer.running());
+
+ checkSeenOffsetsForProgress();
+ compareToCommitedOffsets(newOffsets);
+ assertThat(receivedRecords.size())
+ .describedAs("Received not all sent events")
+ .isLessThan(100);
+
+ assertThatNoException()
+ .describedAs("Consumer should not be running")
+ .isThrownBy(() -> endlessConsumer.exitStatus());
+ assertThat(endlessConsumer.exitStatus())
+ .describedAs("Consumer should have exited abnormally")
+ .containsInstanceOf(RecordDeserializationException.class);
+ }
+
+
+ /** Helper methods for the verification of expectations */
+
+ void compareToCommitedOffsets(Map<TopicPartition, Long> offsetsToCheck)
+ {
+ doForCurrentOffsets((tp, offset) ->
+ {
+ Long expected = offsetsToCheck.get(tp) + 1;
+ log.debug("Checking, if the offset for {} is {}", tp, expected);
+ assertThat(offset)
+ .describedAs("Committed offset corresponds to the offset of the consumer")
+ .isEqualTo(expected);
+ });
+ }
+
+ void checkSeenOffsetsForProgress()
+ {
+ // Be sure, that some messages were consumed...!
+ Set<TopicPartition> withProgress = new HashSet<>();
+ partitions().forEach(tp ->
+ {
+ Long oldOffset = oldOffsets.get(tp);
+ Long newOffset = newOffsets.get(tp);
+ if (!oldOffset.equals(newOffset))
+ {
+ log.debug("Progress for {}: {} -> {}", tp, oldOffset, newOffset);
+ withProgress.add(tp);
+ }
+ });
+ assertThat(withProgress)
+ .describedAs("Some offsets must have changed, compared to the old offset-positions")
+ .isNotEmpty();
+ }
+
+
+ /** Helper methods for setting up and running the tests */
+
+ void doForCurrentOffsets(BiConsumer<TopicPartition, Long> consumer)
+ {
+ partitions().forEach(tp ->
+ {
+ String partition = Integer.toString(tp.partition());
+ Optional<Long> offset = partitionStatisticsRepository.findById(partition).map(document -> document.offset);
+ consumer.accept(tp, offset.orElse(0l));
+ });
+ }
+
+ List<TopicPartition> partitions()
+ {
+ return
+ IntStream
+ .range(0, PARTITIONS)
+ .mapToObj(partition -> new TopicPartition(TOPIC, partition))
+ .collect(Collectors.toList());
+ }
+
+
+ void send100Messages(Function<Long, Bytes> messageGenerator)
+ {
+ long i = 0;
+
+ for (int partition = 0; partition < 10; partition++)
+ {
+ for (int key = 0; key < 10; key++)
+ {
+ Bytes value = messageGenerator.apply(++i);
+
+ ProducerRecord<String, Bytes> record =
+ new ProducerRecord<>(
+ TOPIC,
+ partition,
+ Integer.toString(key%2),
+ value);
+
+ kafkaProducer.send(record, (metadata, e) ->
+ {
+ if (metadata != null)
+ {
+ log.debug(
+ "{}|{} - {}={}",
+ metadata.partition(),
+ metadata.offset(),
+ record.key(),
+ record.value());
+ }
+ else
+ {
+ log.warn(
+ "Exception for {}={}: {}",
+ record.key(),
+ record.value(),
+ e.toString());
+ }
+ });
+ }
+ }
+ }
+
+
+ @BeforeEach
+ public void init()
+ {
+ testHandler = record -> {} ;
+
+ oldOffsets = new HashMap<>();
+ newOffsets = new HashMap<>();
+ receivedRecords = new HashSet<>();
+
+ doForCurrentOffsets((tp, offset) ->
+ {
+ oldOffsets.put(tp, offset - 1);
+ newOffsets.put(tp, offset - 1);
+ });
+
+ Consumer<ConsumerRecord<String, Long>> captureOffsetAndExecuteTestHandler =
+ record ->
+ {
+ newOffsets.put(
+ new TopicPartition(record.topic(), record.partition()),
+ record.offset());
+ receivedRecords.add(record);
+ testHandler.accept(record);
+ };
+
+ endlessConsumer =
+ new EndlessConsumer<>(
+ executor,
+ repository,
+ properties.getClientId(),
+ properties.getTopic(),
+ kafkaConsumer,
+ captureOffsetAndExecuteTestHandler);
+
+ endlessConsumer.start();
+ }
+
+ @AfterEach
+ public void deinit()
+ {
+ try
+ {
+ endlessConsumer.stop();
+ }
+ catch (Exception e)
+ {
+ log.info("Exception while stopping the consumer: {}", e.toString());
+ }
+ }
+
+
+ @TestConfiguration
+ @Import(ApplicationConfiguration.class)
+ public static class Configuration
+ {
+ @Bean
+ Serializer<Long> serializer()
+ {
+ return new LongSerializer();
+ }
+
+ @Bean
+ KafkaProducer<String, Bytes> kafkaProducer(ApplicationProperties properties)
+ {
+ Properties props = new Properties();
+ props.put("bootstrap.servers", properties.getBootstrapServer());
+ props.put("linger.ms", 100);
+ props.put("key.serializer", StringSerializer.class.getName());
+ props.put("value.serializer", BytesSerializer.class.getName());
+
+ return new KafkaProducer<>(props);
+ }
+ }
+}