X-Git-Url: https://juplo.de/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Fde%2Fjuplo%2Fkafka%2Fwordcount%2Fsplitter%2FSplitterStreamProcessor.java;h=66188a719e2815887ae4b9585c02bc25611a9103;hb=520528638e7487c845d7fb0f39066ce7d249297b;hp=3eca8f4cef62a7584b2a930ff11e81db2f56312d;hpb=e8eee1b8a20b4aeadcb5384dd8473bc8a543a259;p=demos%2Fkafka%2Fwordcount diff --git a/src/main/java/de/juplo/kafka/wordcount/splitter/SplitterStreamProcessor.java b/src/main/java/de/juplo/kafka/wordcount/splitter/SplitterStreamProcessor.java index 3eca8f4..66188a7 100644 --- a/src/main/java/de/juplo/kafka/wordcount/splitter/SplitterStreamProcessor.java +++ b/src/main/java/de/juplo/kafka/wordcount/splitter/SplitterStreamProcessor.java @@ -9,6 +9,9 @@ import org.apache.kafka.streams.StreamsConfig; import org.apache.kafka.streams.kstream.KStream; import org.springframework.boot.SpringApplication; import org.springframework.context.ConfigurableApplicationContext; +import org.springframework.kafka.support.serializer.JsonDeserializer; +import org.springframework.kafka.support.serializer.JsonSerde; +import org.springframework.kafka.support.serializer.JsonSerializer; import org.springframework.stereotype.Component; import jakarta.annotation.PostConstruct; @@ -25,7 +28,7 @@ import static org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler.St @Component public class SplitterStreamProcessor { - final static Pattern PATTERN = Pattern.compile("\\W+"); + final static Pattern PATTERN = Pattern.compile("[^\\p{IsAlphabetic}]+"); public final KafkaStreams streams; @@ -36,16 +39,23 @@ public class SplitterStreamProcessor { StreamsBuilder builder = new StreamsBuilder(); - KStream source = builder.stream(properties.getInputTopic()); + KStream source = builder.stream(properties.getInputTopic()); + source - .flatMapValues(sentence -> Arrays.asList(PATTERN.split(sentence))) + .flatMapValues(recording -> Arrays + .stream(PATTERN.split(recording.getSentence())) + .map(word -> Word.of(recording.getUser(), word)) + .toList()) .to(properties.getOutputTopic()); Properties props = new Properties(); props.put(StreamsConfig.APPLICATION_ID_CONFIG, properties.getApplicationId()); props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, properties.getBootstrapServer()); - props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); - props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.StringSerde.class.getName()); + props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, JsonSerde.class.getName()); + props.put(JsonDeserializer.TRUSTED_PACKAGES, Recording.class.getName() ); + props.put(JsonDeserializer.VALUE_DEFAULT_TYPE, Recording.class.getName()); + props.put(JsonSerializer.ADD_TYPE_INFO_HEADERS, false); props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); streams = new KafkaStreams(builder.build(), props);