From: Kai Moritz Date: Sat, 10 Oct 2020 16:30:17 +0000 (+0200) Subject: Using names instead of numbers as key for the messages X-Git-Tag: streams-deduplicator-1.0.0~4 X-Git-Url: http://juplo.de/gitweb/?a=commitdiff_plain;h=d0dd1937d8dbb0b540559dbd397506654c50de0c;p=demos%2Fkafka%2Fdeduplication Using names instead of numbers as key for the messages --- diff --git a/README.sh b/README.sh index dcd632a..3b0e897 100755 --- a/README.sh +++ b/README.sh @@ -46,10 +46,10 @@ cat data.txt | kafkacat -K: -b localhost:9092 -t input kafkacat -C -b localhost:9092 -t input -e | wc -l kafkacat -C -b localhost:9092 -t output -e | wc -l -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^0 > result_0.txt -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^1 > result_1.txt -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^2 > result_2.txt -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^3 > result_3.txt -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^4 > result_4.txt -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^5 > result_5.txt -kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | grep ^6 > result_6.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^peter/ { print $2 }' > result_peter.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^franz/ { print $2 }' > result_franz.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^ute/ { print $2 }' > result_ute.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^klaus/ { print $2 }' > result_klaus.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^paul/ { print $2 }' > result_paul.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^petra/ { print $2 }' > result_petra.txt +kafkacat -C -b localhost:9092 -t output -e -f'%k:%s\n' | awk -F: '/^siggi/ { print $2 }' > result_siggi.txt diff --git a/create-data.sh b/create-data.sh index d64ca41..ea429a3 100755 --- a/create-data.sh +++ b/create-data.sh @@ -1,17 +1,38 @@ #!/bin/bash -for i in `seq 1 333`; do echo $(($i%7)):$i; done > data.txt -for i in `seq 70 578`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 400 1211`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 1000 1111`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 1200 1711`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 1688 3333`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 2567 3500`; do echo $(($i%7)):$i; done >> data.txt +function name() +{ + case $(($1%7)) in + 0) echo "peter" + ;; + 1) echo "franz" + ;; + 2) echo "ute" + ;; + 3) echo "klaus" + ;; + 4) echo "paul" + ;; + 5) echo "petra" + ;; + 6) echo "siggi" + ;; + esac +} + +for i in `seq 1 333`; do echo $(name $i):$i; done > data.txt +for i in `seq 70 578`; do echo $(name $i):$i; done >> data.txt +for i in `seq 400 1211`; do echo $(name $i):$i; done >> data.txt +for i in `seq 1000 1111`; do echo $(name $i):$i; done >> data.txt +for i in `seq 1200 1711`; do echo $(name $i):$i; done >> data.txt +for i in `seq 1688 3333`; do echo $(name $i):$i; done >> data.txt +for i in `seq 2567 3500`; do echo $(name $i):$i; done >> data.txt + +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^peter/ { print $2 }'; done > expected_peter.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^franz/ { print $2 }'; done > expected_franz.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^ute/ { print $2 }'; done > expected_ute.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^klaus/ { print $2 }'; done > expected_klaus.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^paul/ { print $2 }'; done > expected_paul.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^petra/ { print $2 }'; done > expected_petra.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^siggi/ { print $2 }'; done > expected_siggi.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^0 > expected_0.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^1 > expected_1.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^2 > expected_2.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^3 > expected_3.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^4 > expected_4.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^5 > expected_5.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^6 > expected_6.txt