import org.apache.kafka.common.errors.WakeupException;
import javax.annotation.PreDestroy;
+import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
+import java.util.regex.Pattern;
@Slf4j
@RequiredArgsConstructor
-public class EndlessConsumer<K, V> implements ConsumerRebalanceListener, Runnable
+public class EndlessConsumer implements ConsumerRebalanceListener, Runnable
{
+ final static Pattern PATTERN = Pattern.compile("\\W+");
+
+
private final ExecutorService executor;
private final PartitionStatisticsRepository repository;
private final String id;
private final String topic;
- private final Consumer<K, V> consumer;
- private final java.util.function.Consumer<ConsumerRecord<K, V>> handler;
+ private final Clock clock;
+ private final Duration commitInterval;
+ private final Consumer<String, String> consumer;
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
private Exception exception;
private long consumed = 0;
- private final Map<Integer, Map<String, Long>> seen = new HashMap<>();
+ private final Map<Integer, Map<String, Map<String, Long>>> seen = new HashMap<>();
@Override
id,
partition,
newOffset);
- Map<String, Long> removed = seen.remove(partition);
- for (String key : removed.keySet())
- {
- log.info(
- "{} - Seen {} messages for partition={}|key={}",
- id,
- removed.get(key),
- partition,
- key);
- }
+ Map<String, Map<String, Long>> removed = seen.remove(partition);
repository.save(new StatisticsDocument(partition, removed, consumer.position(tp)));
});
}
repository
.findById(Integer.toString(partition))
.orElse(new StatisticsDocument(partition));
- consumer.seek(tp, document.offset);
+ if (document.offset >= 0)
+ {
+ // Only seek, if a stored offset was found
+ // Otherwise: Use initial offset, generated by Kafka
+ consumer.seek(tp, document.offset);
+ }
seen.put(partition, document.statistics);
});
}
log.info("{} - Subscribing to topic {}", id, topic);
consumer.subscribe(Arrays.asList(topic), this);
+ Instant lastCommit = clock.instant();
+
while (true)
{
- ConsumerRecords<K, V> records =
+ ConsumerRecords<String, String> records =
consumer.poll(Duration.ofSeconds(1));
// Do something with the data...
log.info("{} - Received {} messages", id, records.count());
- for (ConsumerRecord<K, V> record : records)
+ for (ConsumerRecord<String, String> record : records)
{
log.info(
"{} - {}: {}/{} - {}={}",
record.value()
);
- handler.accept(record);
-
consumed++;
Integer partition = record.partition();
- String key = record.key() == null ? "NULL" : record.key().toString();
- Map<String, Long> byKey = seen.get(partition);
+ String user = record.key();
+ Map<String, Map<String, Long>> users = seen.get(partition);
- if (!byKey.containsKey(key))
- byKey.put(key, 0l);
+ Map<String, Long> words = users.get(user);
+ if (words == null)
+ {
+ words = new HashMap<>();
+ users.put(user, words);
+ }
- long seenByKey = byKey.get(key);
- seenByKey++;
- byKey.put(key, seenByKey);
+ for (String word : PATTERN.split(record.value()))
+ {
+ Long num = words.get(word);
+ if (num == null)
+ {
+ num = 1l;
+ }
+ else
+ {
+ num++;
+ }
+ words.put(word, num);
+ }
}
- seen.forEach((partiton, statistics) -> repository.save(
- new StatisticsDocument(
- partiton,
- statistics,
- consumer.position(new TopicPartition(topic, partiton)))));
+ if (lastCommit.plus(commitInterval).isBefore(clock.instant()))
+ {
+ log.debug("Storing data and offsets, last commit: {}", lastCommit);
+ seen.forEach((partiton, statistics) -> repository.save(
+ new StatisticsDocument(
+ partiton,
+ statistics,
+ consumer.position(new TopicPartition(topic, partiton)))));
+ lastCommit = clock.instant();
+ }
}
}
catch(WakeupException e)
}
}
- public Map<Integer, Map<String, Long>> getSeen()
+ public Map<Integer, Map<String, Map<String, Long>>> getSeen()
{
return seen;
}
}
}
- public synchronized void stop() throws ExecutionException, InterruptedException
+ public synchronized void stop() throws InterruptedException
{
lock.lock();
try