package de.juplo.kafka.wordcount.top10;
-import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
+import de.juplo.kafka.wordcount.avro.Entry;
+import de.juplo.kafka.wordcount.avro.Key;
+import de.juplo.kafka.wordcount.avro.Ranking;
+import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
+import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde;
+import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerializer;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
-import org.apache.kafka.streams.KafkaStreams;
-import org.apache.kafka.streams.KeyValue;
-import org.apache.kafka.streams.StreamsBuilder;
-import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.*;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.Grouped;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.kstream.Produced;
import org.springframework.boot.SpringApplication;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Properties;
import java.util.concurrent.CompletableFuture;
import java.util.regex.Pattern;
StreamsBuilder builder = new StreamsBuilder();
builder
- .<String, String>stream(properties.getInputTopic())
- .map((keyJson, countStr) ->
- {
- try
- {
- Key key = mapper.readValue(keyJson, Key.class);
- Long count = Long.parseLong(countStr);
- Entry entry = Entry.of(key.getWord(), count);
- String entryJson = mapper.writeValueAsString(entry);
- return new KeyValue<>(key.getUsername(), entryJson);
- }
- catch (JsonProcessingException e)
- {
- throw new RuntimeException(e);
- }
- })
- .groupByKey()
+ .<Key, Long>stream(properties.getInputTopic(), Consumed.with(null, Serdes.Long()))
+ .map((key, count) -> new KeyValue<>(
+ key.getUsername(),
+ Entry.newBuilder().setWord(key.getWord()).setCount(count).build()))
+ .groupByKey(Grouped.keySerde(Serdes.String()))
.aggregate(
- () -> "{\"entries\" : []}",
- (username, entryJson, rankingJson) ->
- {
- try
+ () -> Ranking.newBuilder().setEntries(new LinkedList<Entry>()).build(),
+ (username, newEntry, ranking) -> {
+ List<Entry> entries = new LinkedList<>(ranking.getEntries());
+
+ if (entries.isEmpty())
{
- Ranking ranking = mapper.readValue(rankingJson, Ranking.class);
- ranking.add(mapper.readValue(entryJson, Entry.class));
- return mapper.writeValueAsString(ranking);
+ entries.add(newEntry);
}
- catch (JsonProcessingException e)
+ else
{
- throw new RuntimeException(e);
+ for (int i = 0; i < entries.size(); i++)
+ {
+ Entry entry = entries.get(i);
+ if (entry.getCount() <= newEntry.getCount())
+ {
+ entries.add(i, newEntry);
+ for (int j = i + 1; j < entries.size(); j++)
+ {
+ entry = entries.get(j);
+ if (entry.getWord().equals(newEntry.getWord()))
+ {
+ entries.remove(j);
+ break;
+ }
+ }
+ if (entries.size() > 10)
+ {
+ entries = entries.subList(0, 10);
+ }
+ }
+ }
}
- }
- )
+
+ ranking.setEntries(entries);
+ return ranking;
+ })
.toStream()
- .to(properties.getOutputTopic());
+ .to(properties.getOutputTopic(), Produced.keySerde(Serdes.String()));
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, properties.getApplicationId());
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, properties.getBootstrapServer());
- props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
- props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+ props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
+ props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
+ props.put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, properties.getSchemaRegistry());
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
- streams = new KafkaStreams(builder.build(), props);
+ Topology topology = builder.build();
+ log.info("Topology:\n-----------------\n\n{}-----------------", topology.describe());
+
+ streams = new KafkaStreams(topology, props);
streams.setUncaughtExceptionHandler((Throwable e) ->
{
log.error("Unexpected error!", e);