import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.RecordDeserializationException;
import org.apache.kafka.common.errors.WakeupException;
import javax.annotation.PreDestroy;
+import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@Slf4j
@RequiredArgsConstructor
-public class EndlessConsumer implements Runnable
+public class EndlessConsumer<K, V> implements ConsumerRebalanceListener, Runnable
{
private final ExecutorService executor;
+ private final PartitionStatisticsRepository repository;
private final String id;
private final String topic;
- private final Consumer<String, String> consumer;
- private final java.util.function.Consumer<ConsumerRecord<String, String>> handler;
+ private final Clock clock;
+ private final Duration commitInterval;
+ private final Consumer<K, V> consumer;
+ private final java.util.function.Consumer<ConsumerRecord<K, V>> handler;
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
private long consumed = 0;
private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
- private final Map<Integer, Long> offsets = new HashMap<>();
+
+
+ @Override
+ public void onPartitionsRevoked(Collection<TopicPartition> partitions)
+ {
+ partitions.forEach(tp ->
+ {
+ Integer partition = tp.partition();
+ Long newOffset = consumer.position(tp);
+ log.info(
+ "{} - removing partition: {}, offset of next message {})",
+ id,
+ partition,
+ newOffset);
+ Map<String, Long> removed = seen.remove(partition);
+ for (String key : removed.keySet())
+ {
+ log.info(
+ "{} - Seen {} messages for partition={}|key={}",
+ id,
+ removed.get(key),
+ partition,
+ key);
+ }
+ repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
+ });
+ }
+
+ @Override
+ public void onPartitionsAssigned(Collection<TopicPartition> partitions)
+ {
+ partitions.forEach(tp ->
+ {
+ Integer partition = tp.partition();
+ Long offset = consumer.position(tp);
+ log.info("{} - adding partition: {}, offset={}", id, partition, offset);
+ StatisticsDocument document =
+ repository
+ .findById(Integer.toString(partition))
+ .orElse(new StatisticsDocument(partition));
+ if (document.offset >= 0)
+ {
+ // Only seek, if a stored offset was found
+ // Otherwise: Use initial offset, generated by Kafka
+ consumer.seek(tp, document.offset);
+ }
+ seen.put(partition, document.statistics);
+ });
+ }
@Override
try
{
log.info("{} - Subscribing to topic {}", id, topic);
- consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener()
- {
- @Override
- public void onPartitionsRevoked(Collection<TopicPartition> partitions)
- {
- partitions.forEach(tp ->
- {
- Integer partition = tp.partition();
- Long newOffset = consumer.position(tp);
- Long oldOffset = offsets.remove(partition);
- log.info(
- "{} - removing partition: {}, consumed {} records (offset {} -> {})",
- id,
- partition,
- newOffset - oldOffset,
- oldOffset,
- newOffset);
- Map<String, Long> removed = seen.remove(partition);
- for (String key : removed.keySet())
- {
- log.info(
- "{} - Seen {} messages for partition={}|key={}",
- id,
- removed.get(key),
- partition,
- key);
- }
- });
- }
+ consumer.subscribe(Arrays.asList(topic), this);
- @Override
- public void onPartitionsAssigned(Collection<TopicPartition> partitions)
- {
- partitions.forEach(tp ->
- {
- Integer partition = tp.partition();
- Long offset = consumer.position(tp);
- log.info("{} - adding partition: {}, offset={}", id, partition, offset);
- offsets.put(partition, offset);
- seen.put(partition, new HashMap<>());
- });
- }
- });
+ Instant lastCommit = clock.instant();
while (true)
{
- ConsumerRecords<String, String> records =
+ ConsumerRecords<K, V> records =
consumer.poll(Duration.ofSeconds(1));
// Do something with the data...
log.info("{} - Received {} messages", id, records.count());
- for (ConsumerRecord<String, String> record : records)
+ for (ConsumerRecord<K, V> record : records)
{
log.info(
"{} - {}: {}/{} - {}={}",
consumed++;
Integer partition = record.partition();
- String key = record.key() == null ? "NULL" : record.key();
+ String key = record.key() == null ? "NULL" : record.key().toString();
Map<String, Long> byKey = seen.get(partition);
if (!byKey.containsKey(key))
seenByKey++;
byKey.put(key, seenByKey);
}
+
+ if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
+ {
+ log.debug("Storing data and offsets, last commit: {}", lastCommit);
+ seen.forEach((partiton, statistics) -> repository.save(
+ new StatisticsDocument(
+ partiton,
+ statistics,
+ consumer.position(new TopicPartition(topic, partiton)))));
+ lastCommit = clock.instant();
+ }
}
}
catch(WakeupException e)
{
log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id);
- consumer.commitSync();
shutdown();
}
+ catch(RecordDeserializationException e)
+ {
+ TopicPartition tp = e.topicPartition();
+ long offset = e.offset();
+ log.error(
+ "{} - Could not deserialize message on topic {} with offset={}: {}",
+ id,
+ tp,
+ offset,
+ e.getCause().toString());
+
+ shutdown(e);
+ }
catch(Exception e)
{
log.error("{} - Unexpected error: {}", id, e.toString(), e);
}
}
- public synchronized void stop() throws ExecutionException, InterruptedException
+ public synchronized void stop() throws InterruptedException
{
lock.lock();
try
public void destroy() throws ExecutionException, InterruptedException
{
log.info("{} - Destroy!", id);
- try
- {
- stop();
- }
- catch (IllegalStateException e)
- {
- log.info("{} - Was already stopped", id);
- }
- catch (Exception e)
- {
- log.error("{} - Unexpected exception while trying to stop the consumer", id, e);
- }
- finally
- {
- log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
- }
+ log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
}
public boolean running()