X-Git-Url: http://juplo.de/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fde%2Fjuplo%2Fkafka%2FEndlessConsumer.java;h=01f9057a055aa360bb8d98cca1641073df19afe0;hb=a6a0a22a5fa34a01b0e8b2bc1e0e2b82d7b60f33;hp=2a3445c87a5d96837a77b6a2619a5ccede523f30;hpb=e53004f3133b737699e995a3b18fff28203a0e8c;p=demos%2Fkafka%2Ftraining diff --git a/src/main/java/de/juplo/kafka/EndlessConsumer.java b/src/main/java/de/juplo/kafka/EndlessConsumer.java index 2a3445c..01f9057 100644 --- a/src/main/java/de/juplo/kafka/EndlessConsumer.java +++ b/src/main/java/de/juplo/kafka/EndlessConsumer.java @@ -8,25 +8,32 @@ import org.apache.kafka.common.errors.RecordDeserializationException; import org.apache.kafka.common.errors.WakeupException; import javax.annotation.PreDestroy; +import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Pattern; @Slf4j @RequiredArgsConstructor -public class EndlessConsumer implements ConsumerRebalanceListener, Runnable +public class EndlessConsumer implements ConsumerRebalanceListener, Runnable { + final static Pattern PATTERN = Pattern.compile("\\W+"); + + private final ExecutorService executor; private final PartitionStatisticsRepository repository; private final String id; private final String topic; - private final Consumer consumer; - private final java.util.function.Consumer> handler; + private final Clock clock; + private final Duration commitInterval; + private final Consumer consumer; private final Lock lock = new ReentrantLock(); private final Condition condition = lock.newCondition(); @@ -34,8 +41,7 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl private Exception exception; private long consumed = 0; - private final Map> seen = new HashMap<>(); - private final Map offsets = new HashMap<>(); + private final Map>> seen = new HashMap<>(); @Override @@ -45,24 +51,12 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl { Integer partition = tp.partition(); Long newOffset = consumer.position(tp); - Long oldOffset = offsets.remove(partition); log.info( - "{} - removing partition: {}, consumed {} records (offset {} -> {})", + "{} - removing partition: {}, offset of next message {})", id, partition, - newOffset - oldOffset, - oldOffset, newOffset); - Map removed = seen.remove(partition); - for (String key : removed.keySet()) - { - log.info( - "{} - Seen {} messages for partition={}|key={}", - id, - removed.get(key), - partition, - key); - } + Map> removed = seen.remove(partition); repository.save(new StatisticsDocument(partition, removed, consumer.position(tp))); }); } @@ -79,7 +73,12 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl repository .findById(Integer.toString(partition)) .orElse(new StatisticsDocument(partition)); - consumer.seek(tp, document.offset); + if (document.offset >= 0) + { + // Only seek, if a stored offset was found + // Otherwise: Use initial offset, generated by Kafka + consumer.seek(tp, document.offset); + } seen.put(partition, document.statistics); }); } @@ -93,14 +92,16 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl log.info("{} - Subscribing to topic {}", id, topic); consumer.subscribe(Arrays.asList(topic), this); + Instant lastCommit = clock.instant(); + while (true) { - ConsumerRecords records = + ConsumerRecords records = consumer.poll(Duration.ofSeconds(1)); // Do something with the data... log.info("{} - Received {} messages", id, records.count()); - for (ConsumerRecord record : records) + for (ConsumerRecord record : records) { log.info( "{} - {}: {}/{} - {}={}", @@ -112,27 +113,44 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl record.value() ); - handler.accept(record); - consumed++; Integer partition = record.partition(); - String key = record.key() == null ? "NULL" : record.key().toString(); - Map byKey = seen.get(partition); + String user = record.key(); + Map> users = seen.get(partition); - if (!byKey.containsKey(key)) - byKey.put(key, 0l); + Map words = users.get(user); + if (words == null) + { + words = new HashMap<>(); + users.put(user, words); + } - long seenByKey = byKey.get(key); - seenByKey++; - byKey.put(key, seenByKey); + for (String word : PATTERN.split(record.value())) + { + Long num = words.get(word); + if (num == null) + { + num = 1l; + } + else + { + num++; + } + words.put(word, num); + } } - seen.forEach((partiton, statistics) -> repository.save( - new StatisticsDocument( - partiton, - statistics, - consumer.position(new TopicPartition(topic, partiton))))); + if (lastCommit.plus(commitInterval).isBefore(clock.instant())) + { + log.debug("Storing data and offsets, last commit: {}", lastCommit); + seen.forEach((partiton, statistics) -> repository.save( + new StatisticsDocument( + partiton, + statistics, + consumer.position(new TopicPartition(topic, partiton))))); + lastCommit = clock.instant(); + } } } catch(WakeupException e) @@ -200,7 +218,7 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl } } - public Map> getSeen() + public Map>> getSeen() { return seen; } @@ -224,7 +242,7 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl } } - public synchronized void stop() throws ExecutionException, InterruptedException + public synchronized void stop() throws InterruptedException { lock.lock(); try @@ -247,22 +265,7 @@ public class EndlessConsumer implements ConsumerRebalanceListener, Runnabl public void destroy() throws ExecutionException, InterruptedException { log.info("{} - Destroy!", id); - try - { - stop(); - } - catch (IllegalStateException e) - { - log.info("{} - Was already stopped", id); - } - catch (Exception e) - { - log.error("{} - Unexpected exception while trying to stop the consumer", id, e); - } - finally - { - log.info("{}: Consumed {} messages in total, exiting!", id, consumed); - } + log.info("{}: Consumed {} messages in total, exiting!", id, consumed); } public boolean running()