X-Git-Url: http://juplo.de/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fde%2Fjuplo%2Fkafka%2FEndlessConsumer.java;h=01f9057a055aa360bb8d98cca1641073df19afe0;hb=a6a0a22a5fa34a01b0e8b2bc1e0e2b82d7b60f33;hp=25632048aaa0bf637bfacb9acdeb4ea11aab30b6;hpb=620191782035383e0083dc348e4941c9cec0d994;p=demos%2Fkafka%2Ftraining diff --git a/src/main/java/de/juplo/kafka/EndlessConsumer.java b/src/main/java/de/juplo/kafka/EndlessConsumer.java index 2563204..01f9057 100644 --- a/src/main/java/de/juplo/kafka/EndlessConsumer.java +++ b/src/main/java/de/juplo/kafka/EndlessConsumer.java @@ -1,115 +1,98 @@ package de.juplo.kafka; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; -import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.*; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.RecordDeserializationException; import org.apache.kafka.common.errors.WakeupException; -import org.apache.kafka.common.serialization.StringDeserializer; import javax.annotation.PreDestroy; +import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Pattern; @Slf4j -public class EndlessConsumer implements Runnable +@RequiredArgsConstructor +public class EndlessConsumer implements ConsumerRebalanceListener, Runnable { + final static Pattern PATTERN = Pattern.compile("\\W+"); + + private final ExecutorService executor; private final PartitionStatisticsRepository repository; - private final String bootstrapServer; - private final String groupId; private final String id; private final String topic; - private final String autoOffsetReset; + private final Clock clock; + private final Duration commitInterval; + private final Consumer consumer; - private AtomicBoolean running = new AtomicBoolean(); + private final Lock lock = new ReentrantLock(); + private final Condition condition = lock.newCondition(); + private boolean running = false; + private Exception exception; private long consumed = 0; - private KafkaConsumer consumer = null; - private Future future = null; - private final Map> seen = new HashMap<>(); + private final Map>> seen = new HashMap<>(); - public EndlessConsumer( - ExecutorService executor, - PartitionStatisticsRepository repository, - String bootstrapServer, - String groupId, - String clientId, - String topic, - String autoOffsetReset) + @Override + public void onPartitionsRevoked(Collection partitions) { - this.executor = executor; - this.repository = repository; - this.bootstrapServer = bootstrapServer; - this.groupId = groupId; - this.id = clientId; - this.topic = topic; - this.autoOffsetReset = autoOffsetReset; + partitions.forEach(tp -> + { + Integer partition = tp.partition(); + Long newOffset = consumer.position(tp); + log.info( + "{} - removing partition: {}, offset of next message {})", + id, + partition, + newOffset); + Map> removed = seen.remove(partition); + repository.save(new StatisticsDocument(partition, removed, consumer.position(tp))); + }); } @Override - public void run() + public void onPartitionsAssigned(Collection partitions) { - try + partitions.forEach(tp -> { - Properties props = new Properties(); - props.put("bootstrap.servers", bootstrapServer); - props.put("group.id", groupId); - props.put("client.id", id); - props.put("enable.auto.commit", false); - props.put("auto.offset.reset", autoOffsetReset); - props.put("metadata.max.age.ms", "1000"); - props.put("key.deserializer", StringDeserializer.class.getName()); - props.put("value.deserializer", StringDeserializer.class.getName()); + Integer partition = tp.partition(); + Long offset = consumer.position(tp); + log.info("{} - adding partition: {}, offset={}", id, partition, offset); + StatisticsDocument document = + repository + .findById(Integer.toString(partition)) + .orElse(new StatisticsDocument(partition)); + if (document.offset >= 0) + { + // Only seek, if a stored offset was found + // Otherwise: Use initial offset, generated by Kafka + consumer.seek(tp, document.offset); + } + seen.put(partition, document.statistics); + }); + } - this.consumer = new KafkaConsumer<>(props); + @Override + public void run() + { + try + { log.info("{} - Subscribing to topic {}", id, topic); - consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() - { - @Override - public void onPartitionsRevoked(Collection partitions) - { - partitions.forEach(tp -> - { - log.info("{} - removing partition: {}", id, tp); - Map removed = seen.remove(tp.partition()); - for (String key : removed.keySet()) - { - log.info( - "{} - Seen {} messages for partition={}|key={}", - id, - removed.get(key), - tp.partition(), - key); - } - repository.save(new StatisticsDocument(tp.partition(), removed, consumer.position(tp))); - }); - } + consumer.subscribe(Arrays.asList(topic), this); - @Override - public void onPartitionsAssigned(Collection partitions) - { - partitions.forEach(tp -> - { - log.info("{} - adding partition: {}", id, tp); - StatisticsDocument document = - repository - .findById(Integer.toString(tp.partition())) - .orElse(new StatisticsDocument(tp.partition())); - consumer.seek(tp, document.offset); - seen.put(tp.partition(), document.statistics); - }); - } - }); + Instant lastCommit = clock.instant(); while (true) { @@ -120,7 +103,6 @@ public class EndlessConsumer implements Runnable log.info("{} - Received {} messages", id, records.count()); for (ConsumerRecord record : records) { - consumed++; log.info( "{} - {}: {}/{} - {}={}", id, @@ -131,84 +113,187 @@ public class EndlessConsumer implements Runnable record.value() ); + consumed++; + Integer partition = record.partition(); - String key = record.key() == null ? "NULL" : record.key(); - Map byKey = seen.get(partition); + String user = record.key(); + Map> users = seen.get(partition); - if (!byKey.containsKey(key)) - byKey.put(key, 0); + Map words = users.get(user); + if (words == null) + { + words = new HashMap<>(); + users.put(user, words); + } - int seenByKey = byKey.get(key); - seenByKey++; - byKey.put(key, seenByKey); + for (String word : PATTERN.split(record.value())) + { + Long num = words.get(word); + if (num == null) + { + num = 1l; + } + else + { + num++; + } + words.put(word, num); + } } - seen.forEach((partiton, statistics) -> repository.save( - new StatisticsDocument( - partiton, - statistics, - consumer.position(new TopicPartition(topic, partiton))))); + if (lastCommit.plus(commitInterval).isBefore(clock.instant())) + { + log.debug("Storing data and offsets, last commit: {}", lastCommit); + seen.forEach((partiton, statistics) -> repository.save( + new StatisticsDocument( + partiton, + statistics, + consumer.position(new TopicPartition(topic, partiton))))); + lastCommit = clock.instant(); + } } } catch(WakeupException e) { - log.info("{} - RIIING!", id); + log.info("{} - RIIING! Request to stop consumption - commiting current offsets!", id); + shutdown(); + } + catch(RecordDeserializationException e) + { + TopicPartition tp = e.topicPartition(); + long offset = e.offset(); + log.error( + "{} - Could not deserialize message on topic {} with offset={}: {}", + id, + tp, + offset, + e.getCause().toString()); + + shutdown(e); } catch(Exception e) { log.error("{} - Unexpected error: {}", id, e.toString(), e); - running.set(false); // Mark the instance as not running + shutdown(e); } finally { - log.info("{} - Closing the KafkaConsumer", id); - consumer.close(); log.info("{} - Consumer-Thread exiting", id); } } - public Map> getSeen() + private void shutdown() + { + shutdown(null); + } + + private void shutdown(Exception e) + { + lock.lock(); + try + { + try + { + log.info("{} - Unsubscribing from topic {}", id, topic); + consumer.unsubscribe(); + } + catch (Exception ue) + { + log.error( + "{} - Error while unsubscribing from topic {}: {}", + id, + topic, + ue.toString()); + } + finally + { + running = false; + exception = e; + condition.signal(); + } + } + finally + { + lock.unlock(); + } + } + + public Map>> getSeen() { return seen; } - public synchronized void start() + public void start() { - boolean stateChanged = running.compareAndSet(false, true); - if (!stateChanged) - throw new RuntimeException("Consumer instance " + id + " is already running!"); + lock.lock(); + try + { + if (running) + throw new IllegalStateException("Consumer instance " + id + " is already running!"); - log.info("{} - Starting - consumed {} messages before", id, consumed); - future = executor.submit(this); + log.info("{} - Starting - consumed {} messages before", id, consumed); + running = true; + exception = null; + executor.submit(this); + } + finally + { + lock.unlock(); + } } - public synchronized void stop() throws ExecutionException, InterruptedException + public synchronized void stop() throws InterruptedException { - boolean stateChanged = running.compareAndSet(true, false); - if (!stateChanged) - throw new RuntimeException("Consumer instance " + id + " is not running!"); - - log.info("{} - Stopping", id); - consumer.wakeup(); - future.get(); - log.info("{} - Stopped - consumed {} messages so far", id, consumed); + lock.lock(); + try + { + if (!running) + throw new IllegalStateException("Consumer instance " + id + " is not running!"); + + log.info("{} - Stopping", id); + consumer.wakeup(); + condition.await(); + log.info("{} - Stopped - consumed {} messages so far", id, consumed); + } + finally + { + lock.unlock(); + } } @PreDestroy public void destroy() throws ExecutionException, InterruptedException { log.info("{} - Destroy!", id); + log.info("{}: Consumed {} messages in total, exiting!", id, consumed); + } + + public boolean running() + { + lock.lock(); try { - stop(); + return running; } - catch (IllegalStateException e) + finally { - log.info("{} - Was already stopped", id); + lock.unlock(); + } + } + + public Optional exitStatus() + { + lock.lock(); + try + { + if (running) + throw new IllegalStateException("No exit-status available: Consumer instance " + id + " is running!"); + + return Optional.ofNullable(exception); } finally { - log.info("{}: Consumed {} messages in total, exiting!", id, consumed); + lock.unlock(); } } }