ME_CONFIG_MONGODB_ADMINUSERNAME: juplo
ME_CONFIG_MONGODB_ADMINPASSWORD: training
ME_CONFIG_MONGODB_URL: mongodb://juplo:training@mongo:27017/
+ depends_on:
+ - mongo
- setup:
- image: juplo/toolbox
- command: >
- bash -c "
- kafka-topics --bootstrap-server kafka:9092 --delete --if-exists --topic test
- kafka-topics --bootstrap-server kafka:9092 --create --topic test --partitions 2
- "
+ kafka-ui:
+ image: provectuslabs/kafka-ui:0.3.3
+ ports:
+ - 8080:8080
+ environment:
+ KAFKA_CLUSTERS_0_NAME: local
+ KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
cli:
image: juplo/toolbox
command: sleep infinity
producer:
- image: juplo/endless-producer:1.0-SNAPSHOT
+ image: juplo/endless-long-producer:1.0-SNAPSHOT
ports:
- 8080:8080
environment:
producer.bootstrap-server: kafka:9092
producer.client-id: producer
producer.topic: test
- producer.throttle-ms: 500
+ producer.throttle-ms: 10
- peter:
+ consumer:
image: juplo/endless-consumer:1.0-SNAPSHOT
ports:
- 8081:8080
environment:
server.port: 8080
consumer.bootstrap-server: kafka:9092
- consumer.client-id: peter
- consumer.topic: test
- spring.data.mongodb.uri: mongodb://juplo:training@mongo:27017
- spring.data.mongodb.database: juplo
-
- beate:
- image: juplo/endless-consumer:1.0-SNAPSHOT
- ports:
- - 8082:8080
- environment:
- server.port: 8080
- consumer.bootstrap-server: kafka:9092
- consumer.client-id: beate
+ consumer.client-id: consumer
consumer.topic: test
spring.data.mongodb.uri: mongodb://juplo:training@mongo:27017
spring.data.mongodb.database: juplo
package de.juplo.kafka;
- import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.LongDeserializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
+ import java.time.Clock;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
- import java.util.function.Consumer;
@Configuration
public class ApplicationConfiguration
{
@Bean
- public Consumer<ConsumerRecord<String, Long>> consumer()
- public KeyCountingRecordHandler messageCountingRecordHandler()
++ public KeyCountingRecordHandler keyCountingRecordHandler()
{
- return (record) ->
- {
- // Handle record
- };
+ return new KeyCountingRecordHandler();
+ }
+
+ @Bean
- public KeyCountingRebalanceListener wordcountRebalanceListener(
++ public KeyCountingRebalanceListener keyCountingRebalanceListener(
+ KeyCountingRecordHandler keyCountingRecordHandler,
+ PartitionStatisticsRepository repository,
- Consumer<String, Long> consumer,
+ ApplicationProperties properties)
+ {
+ return new KeyCountingRebalanceListener(
+ keyCountingRecordHandler,
+ repository,
+ properties.getClientId(),
- properties.getTopic(),
+ Clock.systemDefaultZone(),
- properties.getCommitInterval(),
- consumer);
++ properties.getCommitInterval());
}
@Bean
public EndlessConsumer<String, Long> endlessConsumer(
KafkaConsumer<String, Long> kafkaConsumer,
ExecutorService executor,
- Consumer<ConsumerRecord<String, Long>> handler,
- PartitionStatisticsRepository repository,
+ KeyCountingRebalanceListener keyCountingRebalanceListener,
+ KeyCountingRecordHandler keyCountingRecordHandler,
ApplicationProperties properties)
{
return
new EndlessConsumer<>(
executor,
- repository,
properties.getClientId(),
properties.getTopic(),
kafkaConsumer,
- handler);
+ keyCountingRebalanceListener,
+ keyCountingRecordHandler);
}
@Bean
Properties props = new Properties();
props.put("bootstrap.servers", properties.getBootstrapServer());
+ props.put("partition.assignment.strategy", "org.apache.kafka.clients.consumer.CooperativeStickyAssignor");
props.put("group.id", properties.getGroupId());
props.put("client.id", properties.getClientId());
- props.put("enable.auto.commit", false);
props.put("auto.offset.reset", properties.getAutoOffsetReset());
+ props.put("auto.commit.interval.ms", (int)properties.getCommitInterval().toMillis());
props.put("metadata.max.age.ms", "1000");
props.put("key.deserializer", StringDeserializer.class.getName());
props.put("value.deserializer", LongDeserializer.class.getName());
@Slf4j
@RequiredArgsConstructor
- public class EndlessConsumer<K, V> implements ConsumerRebalanceListener, Runnable
+ public class EndlessConsumer<K, V> implements Runnable
{
private final ExecutorService executor;
- private final PartitionStatisticsRepository repository;
private final String id;
private final String topic;
private final Consumer<K, V> consumer;
- private final java.util.function.Consumer<ConsumerRecord<K, V>> handler;
+ private final PollIntervalAwareConsumerRebalanceListener pollIntervalAwareRebalanceListener;
+ private final RecordHandler<K, V> handler;
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
private Exception exception;
private long consumed = 0;
- private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
- private final Map<Integer, Long> offsets = new HashMap<>();
-
-
- @Override
- public void onPartitionsRevoked(Collection<TopicPartition> partitions)
- {
- partitions.forEach(tp ->
- {
- Integer partition = tp.partition();
- Long newOffset = consumer.position(tp);
- Long oldOffset = offsets.remove(partition);
- log.info(
- "{} - removing partition: {}, consumed {} records (offset {} -> {})",
- id,
- partition,
- newOffset - oldOffset,
- oldOffset,
- newOffset);
- Map<String, Long> removed = seen.remove(partition);
- for (String key : removed.keySet())
- {
- log.info(
- "{} - Seen {} messages for partition={}|key={}",
- id,
- removed.get(key),
- partition,
- key);
- }
- repository.save(new StatisticsDocument(partition, removed));
- });
- }
-
- @Override
- public void onPartitionsAssigned(Collection<TopicPartition> partitions)
- {
- partitions.forEach(tp ->
- {
- Integer partition = tp.partition();
- Long offset = consumer.position(tp);
- log.info("{} - adding partition: {}, offset={}", id, partition, offset);
- offsets.put(partition, offset);
- seen.put(
- partition,
- repository
- .findById(Integer.toString(tp.partition()))
- .map(document -> document.statistics)
- .orElse(new HashMap<>()));
- });
- }
@Override
try
{
log.info("{} - Subscribing to topic {}", id, topic);
- consumer.subscribe(Arrays.asList(topic), this);
+ consumer.subscribe(Arrays.asList(topic), pollIntervalAwareRebalanceListener);
while (true)
{
handler.accept(record);
consumed++;
-
- Integer partition = record.partition();
- String key = record.key() == null ? "NULL" : record.key().toString();
- Map<String, Long> byKey = seen.get(partition);
-
- if (!byKey.containsKey(key))
- byKey.put(key, 0l);
-
- long seenByKey = byKey.get(key);
- seenByKey++;
- byKey.put(key, seenByKey);
}
+
+ pollIntervalAwareRebalanceListener.beforeNextPoll();
}
}
catch(WakeupException e)
{
log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id);
+ consumer.commitSync();
shutdown();
}
catch(RecordDeserializationException e)
offset,
e.getCause().toString());
+ consumer.commitSync();
shutdown(e);
}
catch(Exception e)
}
}
- public Map<Integer, Map<String, Long>> getSeen()
- {
- return seen;
- }
-
public void start()
{
lock.lock();
}
}
- public synchronized void stop() throws ExecutionException, InterruptedException
+ public synchronized void stop() throws InterruptedException
{
lock.lock();
try
public void destroy() throws ExecutionException, InterruptedException
{
log.info("{} - Destroy!", id);
- try
- {
- stop();
- }
- catch (IllegalStateException e)
- {
- log.info("{} - Was already stopped", id);
- }
- catch (Exception e)
- {
- log.error("{} - Unexpected exception while trying to stop the consumer", id, e);
- }
- finally
- {
- log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
- }
+ log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
}
public boolean running()
--- /dev/null
-import org.apache.kafka.clients.consumer.Consumer;
+ package de.juplo.kafka;
+
+ import lombok.RequiredArgsConstructor;
+ import lombok.extern.slf4j.Slf4j;
- private final String topic;
+ import org.apache.kafka.common.TopicPartition;
+
+ import java.time.Clock;
+ import java.time.Duration;
+ import java.time.Instant;
+ import java.util.Collection;
+ import java.util.Map;
+
+
+ @RequiredArgsConstructor
+ @Slf4j
+ public class KeyCountingRebalanceListener implements PollIntervalAwareConsumerRebalanceListener
+ {
+ private final KeyCountingRecordHandler handler;
+ private final PartitionStatisticsRepository repository;
+ private final String id;
- private final Consumer<String, Long> consumer;
+ private final Clock clock;
+ private final Duration commitInterval;
- Long offset = consumer.position(tp);
- log.info("{} - adding partition: {}, offset={}", id, partition, offset);
+
+ private Instant lastCommit = Instant.EPOCH;
+
+ @Override
+ public void onPartitionsAssigned(Collection<TopicPartition> partitions)
+ {
+ partitions.forEach(tp ->
+ {
+ Integer partition = tp.partition();
- if (document.offset >= 0)
- {
- // Only seek, if a stored offset was found
- // Otherwise: Use initial offset, generated by Kafka
- consumer.seek(tp, document.offset);
- }
++ log.info("{} - adding partition: {}", id, partition);
+ StatisticsDocument document =
+ repository
+ .findById(Integer.toString(partition))
+ .orElse(new StatisticsDocument(partition));
- Long newOffset = consumer.position(tp);
- log.info(
- "{} - removing partition: {}, offset of next message {})",
- id,
- partition,
- newOffset);
+ handler.addPartition(partition, document.statistics);
+ });
+ }
+
+ @Override
+ public void onPartitionsRevoked(Collection<TopicPartition> partitions)
+ {
+ partitions.forEach(tp ->
+ {
+ Integer partition = tp.partition();
- repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
++ log.info("{} - removing partition: {}", id, partition);
+ Map<String, Long> removed = handler.removePartition(partition);
- log.debug("Storing data and offsets, last commit: {}", lastCommit);
++ for (String key : removed.keySet())
++ {
++ log.info(
++ "{} - Seen {} messages for partition={}|key={}",
++ id,
++ removed.get(key),
++ partition,
++ key);
++ }
++ repository.save(new StatisticsDocument(partition, removed));
+ });
+ }
+
+
+ @Override
+ public void beforeNextPoll()
+ {
+ if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
+ {
- statistics,
- consumer.position(new TopicPartition(topic, partiton)))));
++ log.debug("Storing data, last commit: {}", lastCommit);
+ handler.getSeen().forEach((partiton, statistics) -> repository.save(
+ new StatisticsDocument(
+ partiton,
++ statistics)));
+ lastCommit = clock.instant();
+ }
+ }
+ }
{
@Id
public String id;
- public long offset = -1l;
public Map<String, Long> statistics;
public StatisticsDocument()
{
}
- public StatisticsDocument(Integer partition, Map<String, Long> statistics, long offset)
+ public StatisticsDocument(Integer partition)
+ {
+ this.id = Integer.toString(partition);
+ this.statistics = new HashMap<>();
+ }
+
+ public StatisticsDocument(Integer partition, Map<String, Long> statistics)
{
this.id = Integer.toString(partition);
this.statistics = statistics;
- this.offset = offset;
}
}
--- /dev/null
- public void testApplicationStartup()
+ package de.juplo.kafka;
+
+ import org.junit.jupiter.api.Test;
+ import org.springframework.beans.factory.annotation.Autowired;
+ import org.springframework.boot.test.autoconfigure.data.mongo.AutoConfigureDataMongo;
+ import org.springframework.boot.test.context.SpringBootTest;
+ import org.springframework.boot.test.web.client.TestRestTemplate;
+ import org.springframework.boot.test.web.server.LocalServerPort;
+ import org.springframework.kafka.test.context.EmbeddedKafka;
+
+ import static de.juplo.kafka.ApplicationTests.TOPIC;
+
+
+ @SpringBootTest(
+ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
+ properties = {
+ "consumer.bootstrap-server=${spring.embedded.kafka.brokers}",
+ "consumer.topic=" + TOPIC,
+ "spring.mongodb.embedded.version=4.4.13" })
+ @EmbeddedKafka(topics = TOPIC)
+ @AutoConfigureDataMongo
+ public class ApplicationIT
+ {
+ public static final String TOPIC = "FOO";
+
+ @LocalServerPort
+ private int port;
+
+ @Autowired
+ private TestRestTemplate restTemplate;
+
+
+
+ @Test
++ public void testApplicationStartup()
+ {
+ restTemplate.getForObject(
+ "http://localhost:" + port + "/actuator/health",
+ String.class
+ )
+ .contains("UP");
+ }
+ }
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.function.BiConsumer;
- import java.util.function.Consumer;
- import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
properties = {
"consumer.bootstrap-server=${spring.embedded.kafka.brokers}",
"consumer.topic=" + TOPIC,
+ "consumer.commit-interval=1s",
"spring.mongodb.embedded.version=4.4.13" })
@EmbeddedKafka(topics = TOPIC, partitions = PARTITIONS)
@EnableAutoConfiguration
@Autowired
KafkaConsumer<Bytes, Bytes> offsetConsumer;
@Autowired
- PartitionStatisticsRepository partitionStatisticsRepository;
- @Autowired
ApplicationProperties properties;
@Autowired
ExecutorService executor;
@Autowired
-- PartitionStatisticsRepository repository;
- @Autowired
+ KeyCountingRebalanceListener keyCountingRebalanceListener;
+ @Autowired
+ KeyCountingRecordHandler keyCountingRecordHandler;
- Consumer<ConsumerRecord<String, Long>> testHandler;
EndlessConsumer<String, Long> endlessConsumer;
Map<TopicPartition, Long> oldOffsets;
Map<TopicPartition, Long> newOffsets;
/** Tests methods */
@Test
- @Order(1) // << The poistion pill is not skipped. Hence, this test must run first
void commitsCurrentOffsetsOnSuccess() throws ExecutionException, InterruptedException
{
- send100Messages(i -> new Bytes(valueSerializer.serialize(TOPIC, i)));
+ send100Messages((partition, key, counter) ->
+ {
+ Bytes value = new Bytes(valueSerializer.serialize(TOPIC, counter));
+ return new ProducerRecord<>(TOPIC, partition, key, value);
+ });
await("100 records received")
.atMost(Duration.ofSeconds(30))
+ .pollInterval(Duration.ofSeconds(1))
.until(() -> receivedRecords.size() >= 100);
await("Offsets committed")
.atMost(Duration.ofSeconds(10))
+ .pollInterval(Duration.ofSeconds(1))
.untilAsserted(() ->
{
checkSeenOffsetsForProgress();
}
@Test
- @Order(2)
- void commitsOffsetOfErrorForReprocessingOnError()
+ void commitsOffsetOfErrorForReprocessingOnDeserializationError()
{
- send100Messages(counter ->
- counter == 77
- ? new Bytes(stringSerializer.serialize(TOPIC, "BOOM!"))
- : new Bytes(valueSerializer.serialize(TOPIC, counter)));
+ send100Messages((partition, key, counter) ->
+ {
+ Bytes value = counter == 77
+ ? new Bytes(stringSerializer.serialize(TOPIC, "BOOM!"))
+ : new Bytes(valueSerializer.serialize(TOPIC, counter));
+ return new ProducerRecord<>(TOPIC, partition, key, value);
+ });
await("Consumer failed")
.atMost(Duration.ofSeconds(30))
+ .pollInterval(Duration.ofSeconds(1))
.until(() -> !endlessConsumer.running());
checkSeenOffsetsForProgress();
endlessConsumer.start();
await("Consumer failed")
.atMost(Duration.ofSeconds(30))
+ .pollInterval(Duration.ofSeconds(1))
.until(() -> !endlessConsumer.running());
checkSeenOffsetsForProgress();
Set<TopicPartition> withProgress = new HashSet<>();
partitions().forEach(tp ->
{
- Long oldOffset = oldOffsets.get(tp);
- Long newOffset = newOffsets.get(tp);
+ Long oldOffset = oldOffsets.get(tp) + 1;
+ Long newOffset = newOffsets.get(tp) + 1;
if (!oldOffset.equals(newOffset))
{
log.debug("Progress for {}: {} -> {}", tp, oldOffset, newOffset);
/** Helper methods for setting up and running the tests */
- Integer partition = tp.partition();
- StatisticsDocument document =
- partitionStatisticsRepository
- .findById(partition.toString())
- .orElse(new StatisticsDocument(partition));
- document.offset = offset;
- partitionStatisticsRepository.save(document);
+ void seekToEnd()
+ {
+ offsetConsumer.assign(partitions());
++ offsetConsumer.seekToEnd(partitions());
+ partitions().forEach(tp ->
+ {
++ // seekToEnd() works lazily: it only takes effect on poll()/position()
+ Long offset = offsetConsumer.position(tp);
+ log.info("New position for {}: {}", tp, offset);
+ });
++ // The new positions must be commited!
++ offsetConsumer.commitSync();
+ offsetConsumer.unsubscribe();
+ }
+
void doForCurrentOffsets(BiConsumer<TopicPartition, Long> consumer)
{
- partitions().forEach(tp ->
- {
- String partition = Integer.toString(tp.partition());
- Optional<Long> offset = partitionStatisticsRepository.findById(partition).map(document -> document.offset);
- consumer.accept(tp, offset.orElse(0l));
- });
- }
+ offsetConsumer.assign(partitions());
+ partitions().forEach(tp -> consumer.accept(tp, offsetConsumer.position(tp)));
+ offsetConsumer.unsubscribe();
+ }
List<TopicPartition> partitions()
{
}
- void send100Messages(Function<Long, Bytes> messageGenerator)
+ public interface RecordGenerator<K, V>
+ {
+ public ProducerRecord<String, Bytes> generate(int partition, String key, long counter);
+ }
+
+ void send100Messages(RecordGenerator recordGenerator)
{
long i = 0;
{
for (int key = 0; key < 10; key++)
{
- Bytes value = messageGenerator.apply(++i);
-
ProducerRecord<String, Bytes> record =
- new ProducerRecord<>(
- TOPIC,
- partition,
- Integer.toString(key%2),
- value);
+ recordGenerator.generate(partition, Integer.toString(partition*10+key%2), ++i);
kafkaProducer.send(record, (metadata, e) ->
{
@BeforeEach
public void init()
{
- testHandler = record -> {} ;
+ seekToEnd();
oldOffsets = new HashMap<>();
newOffsets = new HashMap<>();
newOffsets.put(tp, offset - 1);
});
- Consumer<ConsumerRecord<String, Long>> captureOffsetAndExecuteTestHandler =
- record ->
- {
- newOffsets.put(
- new TopicPartition(record.topic(), record.partition()),
- record.offset());
- receivedRecords.add(record);
- testHandler.accept(record);
+ TestRecordHandler<String, Long> captureOffsetAndExecuteTestHandler =
+ new TestRecordHandler<String, Long>(keyCountingRecordHandler) {
+ @Override
+ public void onNewRecord(ConsumerRecord<String, Long> record)
+ {
+ newOffsets.put(
+ new TopicPartition(record.topic(), record.partition()),
+ record.offset());
+ receivedRecords.add(record);
+ }
};
endlessConsumer =
new EndlessConsumer<>(
executor,
- repository,
properties.getClientId(),
properties.getTopic(),
kafkaConsumer,
+ keyCountingRebalanceListener,
captureOffsetAndExecuteTestHandler);
endlessConsumer.start();
Properties props = new Properties();
props.put("bootstrap.servers", properties.getBootstrapServer());
props.put("client.id", "OFFSET-CONSUMER");
- props.put("enable.auto.commit", false);
- props.put("auto.offset.reset", "latest");
+ props.put("group.id", properties.getGroupId());
props.put("key.deserializer", BytesDeserializer.class.getName());
props.put("value.deserializer", BytesDeserializer.class.getName());