<name>Wordcount-Top-10</name>
<description>Top-10 stream-processor of the multi-user wordcount-example</description>
<properties>
+ <avro.version>1.10.2</avro.version>
<docker-maven-plugin.version>0.33.0</docker-maven-plugin.version>
<java.version>11</java.version>
<kafka.version>2.8.0</kafka.version>
+ <confluent.version>6.2.1</confluent.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-streams</artifactId>
</dependency>
+ <dependency>
+ <groupId>io.confluent</groupId>
+ <artifactId>kafka-streams-avro-serde</artifactId>
+ <version>${confluent.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro</artifactId>
+ <version>${avro.version}</version>
+ </dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
</images>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-maven-plugin</artifactId>
+ <version>${avro.version}</version>
+ <executions>
+ <execution>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>schema</goal>
+ </goals>
+ <configuration>
+ <sourceDirectory>${project.basedir}/src/main/resources/avro</sourceDirectory>
+ <outputDirectory>${project.basedir}/target/generated-sources</outputDirectory>
+ <fieldVisibility>PRIVATE</fieldVisibility>
+ <stringType>String</stringType>
+ <includes>
+ <include>*.avsc</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
+ <repositories>
+ <repository>
+ <id>confluent</id>
+ <url>https://packages.confluent.io/maven/</url>
+ </repository>
+ </repositories>
+
</project>
package de.juplo.kafka.wordcount.top10;
-import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
+import de.juplo.kafka.wordcount.avro.Entry;
+import de.juplo.kafka.wordcount.avro.Key;
+import de.juplo.kafka.wordcount.avro.Ranking;
+import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
+import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
StreamsBuilder builder = new StreamsBuilder();
builder
- .<String, String>stream(properties.getInputTopic())
- .map((keyJson, countStr) ->
- {
- try
- {
- Key key = mapper.readValue(keyJson, Key.class);
- Long count = Long.parseLong(countStr);
- Entry entry = Entry.of(key.getWord(), count);
- String entryJson = mapper.writeValueAsString(entry);
- return new KeyValue<>(key.getUsername(), entryJson);
- }
- catch (JsonProcessingException e)
- {
- throw new RuntimeException(e);
- }
- })
+ .<Key, Long>stream(properties.getInputTopic())
+ .map((key, count) -> new KeyValue<>(
+ key.getUsername(),
+ Entry.newBuilder().setWord(key.getWord()).setCount(count).build()))
.groupByKey()
.aggregate(
- () -> "{\"entries\" : []}",
- (username, entryJson, rankingJson) ->
- {
- try
- {
- Ranking ranking = mapper.readValue(rankingJson, Ranking.class);
- ranking.add(mapper.readValue(entryJson, Entry.class));
- return mapper.writeValueAsString(ranking);
- }
- catch (JsonProcessingException e)
- {
- throw new RuntimeException(e);
- }
- }
- )
+ () -> Ranking.newBuilder().build(),
+ (username, entry, ranking) -> {
+ ranking.getEntries().add(entry);
+ return ranking;
+ })
.toStream()
.to(properties.getOutputTopic());
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, properties.getApplicationId());
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, properties.getBootstrapServer());
- props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
- props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+ props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
+ props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
+ props.put(AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, properties.getSchemaRegistry());
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
streams = new KafkaStreams(builder.build(), props);
--- /dev/null
+{
+ "type": "record",
+ "namespace": "de.juplo.kafka.wordcount.avro",
+ "name": "User",
+ "fields": [
+ {
+ "name": "username", "type": "string"
+ },
+ {
+ "name": "firstName", "type": "string", "default": ""
+ },
+ {
+ "name": "lastName", "type": "string", "default": ""
+ },
+ { "name": "sex", "type":
+ {
+ "type": "enum", "name": "Sex",
+ "symbols": [ "UNKNOWN", "FEMALE", "MALE", "OTHER" ], "default": "UNKNOWN"
+ },
+ "default": "UNKNOWN"
+ }
+ ]
+}