Simple processing pipeline
authorKai Moritz <kai@juplo.de>
Wed, 1 Sep 2021 20:51:08 +0000 (22:51 +0200)
committerKai Moritz <kai@juplo.de>
Mon, 11 Oct 2021 19:12:42 +0000 (21:12 +0200)
* Input: Sentences, that are correlated to usernames
* Words are counted by username
* Top-10 rankings can by queried by usernam

.gitignore [new file with mode: 0644]
.gitmodules [new file with mode: 0644]
README.sh [new file with mode: 0755]
counter [new submodule]
docker-compose.yml [new file with mode: 0644]
pom.xml [new file with mode: 0644]
query [new submodule]
recorder [new submodule]
top10 [new submodule]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..7339146
--- /dev/null
@@ -0,0 +1,31 @@
+target/
+!**/src/main/**/target/
+!**/src/test/**/target/
+
+### STS ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+
+### IntelliJ IDEA ###
+.idea
+*.iws
+*.iml
+*.ipr
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+build/
+!**/src/main/**/build/
+!**/src/test/**/build/
+
+### VS Code ###
+.vscode/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644 (file)
index 0000000..bc2ebe1
--- /dev/null
@@ -0,0 +1,20 @@
+[submodule "recorder"]
+       path = recorder
+       url = ./
+       branch = recorder
+[submodule "query"]
+       path = query
+       url = ./
+       branch = query
+[submodule "top10"]
+       path = top10
+       url = ./
+       branch = top10
+[submodule "wordcount"]
+       path = wordcount
+       url = ./
+       branch = wordcount
+[submodule "counter"]
+       path = counter
+       url = ./
+       branch = counter
diff --git a/README.sh b/README.sh
new file mode 100755 (executable)
index 0000000..6b0e566
--- /dev/null
+++ b/README.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+RECORDER=juplo/wordcount--recorder:1.0.0
+COUNTER=juplo/wordcount--counter:1.0.0
+TOP10=juplo/wordcount--top10:1.0.0
+QUERY=juplo/wordcount--query:1.0.0
+
+if [ "$1" = "cleanup" ]
+then
+  docker-compose down -v
+  docker image rm $RECORDER
+  docker image rm $COUNTER
+  docker image rm $TOP10
+  docker image rm $QUERY
+  exit
+fi
+
+if [[
+  $(docker image ls -q $RECORDER) == "" ||
+  $(docker image ls -q $COUNTER) == "" ||
+  $(docker image ls -q $TOP10) == "" ||
+  $(docker image ls -q $QUERY) == "" ||
+  "$1" = "build"
+]]
+then
+  git submodule update --init
+  mvn clean package || exit
+  docker-compose rm -svf recorder counter top10 query
+  mvn -f recorder/pom.xml docker:build
+  mvn -f counter/pom.xml docker:build
+  mvn -f top10/pom.xml docker:build
+  mvn -f query/pom.xml docker:build
+else
+  echo "Using existing images:"
+  docker image ls $RECORDER
+  docker image ls $COUNTER
+  docker image ls $TOP10
+  docker image ls $QUERY
+fi
+
+
+docker-compose up -d zookeeper kafka cli kafkacat
+
+echo "Waiting for the Kafka-Cluster to become ready..."
+docker-compose exec cli cub kafka-ready -b kafka:9092 1 60 > /dev/null 2>&1 || exit 1
+docker-compose exec cli zookeeper-shell zookeeper:2181 ls /brokers/ids
+
+echo "Starting all instances..."
+docker-compose up -d
diff --git a/counter b/counter
new file mode 160000 (submodule)
index 0000000..02ab54c
--- /dev/null
+++ b/counter
@@ -0,0 +1 @@
+Subproject commit 02ab54c6b3c099f5b0bd420fc0a37034badf1c71
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644 (file)
index 0000000..01181a7
--- /dev/null
@@ -0,0 +1,89 @@
+version: '3.2'
+services:
+  zookeeper:
+    image: confluentinc/cp-zookeeper:6.2.0
+    environment:
+      ZOOKEEPER_CLIENT_PORT: 2181
+    ports:
+      - 2181:2181
+
+  kafka:
+    image: confluentinc/cp-kafka:6.2.0
+    environment:
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
+      KAFKA_LISTENERS: DOCKER://:9092, LOCALHOST://:9082
+      KAFKA_ADVERTISED_LISTENERS: DOCKER://kafka:9092, LOCALHOST://localhost:9082
+      KAFKA_INTER_BROKER_LISTENER_NAME: DOCKER
+      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: DOCKER:PLAINTEXT, LOCALHOST:PLAINTEXT
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "false"
+    ports:
+      - 9092:9082
+      - 9082:9082
+    depends_on:
+      - zookeeper
+
+  recorder:
+    image: juplo/wordcount--recorder:1.0.0
+    environment:
+      juplo.wordcount.recorder.bootstrap-server: kafka:9092
+      juplo.wordcount.recorder.topic: recordings
+    ports:
+      - 8081:8080
+    depends_on:
+      - kafka
+
+  counter:
+    image: juplo/wordcount--counter:1.0.0
+    environment:
+      juplo.wordcount.counter.bootstrap-server: kafka:9092
+      juplo.wordcount.counter.application-id: counter
+      juplo.wordcount.counter.input-topic: recordings
+      juplo.wordcount.counter.output-topic: countings
+    ports:
+      - 8083:8080
+    depends_on:
+      - kafka
+
+  top10:
+    image: juplo/wordcount--top10:1.0.0
+    environment:
+      juplo.wordcount.top10.bootstrap-server: kafka:9092
+      juplo.wordcount.top10.application-id: top10
+      juplo.wordcount.top10.input-topic: countings
+      juplo.wordcount.top10.output-topic: top10
+    ports:
+      - 8084:8080
+    depends_on:
+      - kafka
+
+  query:
+    image: juplo/wordcount--query:1.0.0
+    environment:
+      juplo.wordcount.query.bootstrap-server: kafka:9092
+      juplo.wordcount.query.application-id: query
+      juplo.wordcount.query.ranking-input-topic: top10
+    ports:
+      - 8085:8080
+    depends_on:
+      - kafka
+
+  cli:
+    image: confluentinc/cp-kafka:6.2.0
+    command: bash -c "
+      cub kafka-ready -b kafka:9092 1 60 ;
+      kafka-topics --bootstrap-server kafka:9092 --create --partitions 10 --topic recordings ;
+      kafka-topics --bootstrap-server kafka:9092 --create --partitions 10 --topic users ;
+      kafka-topics --bootstrap-server kafka:9092 --create --partitions 10 --topic countings ;
+      kafka-topics --bootstrap-server kafka:9092 --create --partitions 10 --topic top10 ;
+      sleep infinity"
+
+  kafkacat:
+    image: confluentinc/cp-kafkacat:6.2.0
+    command: sleep infinity
+
+networks:
+  default:
+    external:
+      name: trion
diff --git a/pom.xml b/pom.xml
new file mode 100644 (file)
index 0000000..fb4601a
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+       <modelVersion>4.0.0</modelVersion>
+       <groupId>de.juplo.kafka.wordcount</groupId>
+       <artifactId>docker</artifactId>
+       <packaging>pom</packaging>
+       <version>1.0.0</version>
+       <name>Docker-Compose Setup</name>
+       <description>Docker-Compose Setup for the multiuser-wordcount example</description>
+
+       <modules>
+               <module>recorder</module>
+               <module>counter</module>
+               <module>top10</module>
+               <module>query</module>
+       </modules>
+</project>
diff --git a/query b/query
new file mode 160000 (submodule)
index 0000000..3bfe34e
--- /dev/null
+++ b/query
@@ -0,0 +1 @@
+Subproject commit 3bfe34e8bc0539932cf93361fe6f710738b37897
diff --git a/recorder b/recorder
new file mode 160000 (submodule)
index 0000000..338279a
--- /dev/null
+++ b/recorder
@@ -0,0 +1 @@
+Subproject commit 338279a329a06be7a141a3930d80b2a2805719dc
diff --git a/top10 b/top10
new file mode 160000 (submodule)
index 0000000..4b94d31
--- /dev/null
+++ b/top10
@@ -0,0 +1 @@
+Subproject commit 4b94d31fbd663cb277276def106be9873ec4a246