1 package de.juplo.kafka;
3 import lombok.RequiredArgsConstructor;
4 import lombok.extern.slf4j.Slf4j;
5 import org.apache.kafka.clients.consumer.*;
6 import org.apache.kafka.common.TopicPartition;
7 import org.apache.kafka.common.errors.RecordDeserializationException;
8 import org.apache.kafka.common.errors.WakeupException;
10 import javax.annotation.PreDestroy;
11 import java.time.Clock;
12 import java.time.Duration;
13 import java.time.Instant;
15 import java.util.concurrent.ExecutionException;
16 import java.util.concurrent.ExecutorService;
17 import java.util.concurrent.locks.Condition;
18 import java.util.concurrent.locks.Lock;
19 import java.util.concurrent.locks.ReentrantLock;
20 import java.util.regex.Pattern;
24 @RequiredArgsConstructor
25 public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
27 final static Pattern PATTERN = Pattern.compile("\\W+");
30 private final ExecutorService executor;
31 private final PartitionStatisticsRepository repository;
32 private final String id;
33 private final String topic;
34 private final Clock clock;
35 private final Duration commitInterval;
36 private final Consumer<String, String> consumer;
38 private final Lock lock = new ReentrantLock();
39 private final Condition condition = lock.newCondition();
40 private boolean running = false;
41 private Exception exception;
42 private long consumed = 0;
44 private final Map<Integer, Map<String, Map<String, Long>>> seen = new HashMap<>();
48 public void onPartitionsRevoked(Collection<TopicPartition> partitions)
50 partitions.forEach(tp ->
52 Integer partition = tp.partition();
53 Long newOffset = consumer.position(tp);
55 "{} - removing partition: {}, offset of next message {})",
59 Map<String, Map<String, Long>> removed = seen.remove(partition);
60 repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
65 public void onPartitionsAssigned(Collection<TopicPartition> partitions)
67 partitions.forEach(tp ->
69 Integer partition = tp.partition();
70 Long offset = consumer.position(tp);
71 log.info("{} - adding partition: {}, offset={}", id, partition, offset);
72 StatisticsDocument document =
74 .findById(Integer.toString(partition))
75 .orElse(new StatisticsDocument(partition));
76 if (document.offset >= 0)
78 // Only seek, if a stored offset was found
79 // Otherwise: Use initial offset, generated by Kafka
80 consumer.seek(tp, document.offset);
82 seen.put(partition, document.statistics);
92 log.info("{} - Subscribing to topic {}", id, topic);
93 consumer.subscribe(Arrays.asList(topic), this);
95 Instant lastCommit = clock.instant();
99 ConsumerRecords<String, String> records =
100 consumer.poll(Duration.ofSeconds(1));
102 // Do something with the data...
103 log.info("{} - Received {} messages", id, records.count());
104 for (ConsumerRecord<String, String> record : records)
107 "{} - {}: {}/{} - {}={}",
118 Integer partition = record.partition();
119 String user = record.key();
120 Map<String, Map<String, Long>> users = seen.get(partition);
122 Map<String, Long> words = users.get(user);
125 words = new HashMap<>();
126 users.put(user, words);
129 for (String word : PATTERN.split(record.value()))
131 Long num = words.get(word);
140 words.put(word, num);
144 if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
146 log.debug("Storing data and offsets, last commit: {}", lastCommit);
147 seen.forEach((partiton, statistics) -> repository.save(
148 new StatisticsDocument(
151 consumer.position(new TopicPartition(topic, partiton)))));
152 lastCommit = clock.instant();
156 catch(WakeupException e)
158 log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id);
161 catch(RecordDeserializationException e)
163 TopicPartition tp = e.topicPartition();
164 long offset = e.offset();
166 "{} - Could not deserialize message on topic {} with offset={}: {}",
170 e.getCause().toString());
176 log.error("{} - Unexpected error: {}", id, e.toString(), e);
181 log.info("{} - Consumer-Thread exiting", id);
185 private void shutdown()
190 private void shutdown(Exception e)
197 log.info("{} - Unsubscribing from topic {}", id, topic);
198 consumer.unsubscribe();
203 "{} - Error while unsubscribing from topic {}: {}",
221 public Map<Integer, Map<String, Map<String, Long>>> getSeen()
232 throw new IllegalStateException("Consumer instance " + id + " is already running!");
234 log.info("{} - Starting - consumed {} messages before", id, consumed);
237 executor.submit(this);
245 public synchronized void stop() throws InterruptedException
251 throw new IllegalStateException("Consumer instance " + id + " is not running!");
253 log.info("{} - Stopping", id);
256 log.info("{} - Stopped - consumed {} messages so far", id, consumed);
265 public void destroy() throws ExecutionException, InterruptedException
267 log.info("{} - Destroy!", id);
268 log.info("{}: Consumed {} messages in total, exiting!", id, consumed);
271 public boolean running()
284 public Optional<Exception> exitStatus()
290 throw new IllegalStateException("No exit-status available: Consumer instance " + id + " is running!");
292 return Optional.ofNullable(exception);