X-Git-Url: https://juplo.de/gitweb/?p=demos%2Fkafka%2Fdeduplication;a=blobdiff_plain;f=create-data.sh;h=ea429a393b47e7ad28385d558a806c04a2b479e2;hp=d64ca41dbc0910580624798b2684c1d533b53630;hb=5ad8c11b5c507e85b256888683b9508999e1b135;hpb=a84868373c1803dca9f6203085240b107af44257 diff --git a/create-data.sh b/create-data.sh index d64ca41..ea429a3 100755 --- a/create-data.sh +++ b/create-data.sh @@ -1,17 +1,38 @@ #!/bin/bash -for i in `seq 1 333`; do echo $(($i%7)):$i; done > data.txt -for i in `seq 70 578`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 400 1211`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 1000 1111`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 1200 1711`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 1688 3333`; do echo $(($i%7)):$i; done >> data.txt -for i in `seq 2567 3500`; do echo $(($i%7)):$i; done >> data.txt +function name() +{ + case $(($1%7)) in + 0) echo "peter" + ;; + 1) echo "franz" + ;; + 2) echo "ute" + ;; + 3) echo "klaus" + ;; + 4) echo "paul" + ;; + 5) echo "petra" + ;; + 6) echo "siggi" + ;; + esac +} + +for i in `seq 1 333`; do echo $(name $i):$i; done > data.txt +for i in `seq 70 578`; do echo $(name $i):$i; done >> data.txt +for i in `seq 400 1211`; do echo $(name $i):$i; done >> data.txt +for i in `seq 1000 1111`; do echo $(name $i):$i; done >> data.txt +for i in `seq 1200 1711`; do echo $(name $i):$i; done >> data.txt +for i in `seq 1688 3333`; do echo $(name $i):$i; done >> data.txt +for i in `seq 2567 3500`; do echo $(name $i):$i; done >> data.txt + +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^peter/ { print $2 }'; done > expected_peter.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^franz/ { print $2 }'; done > expected_franz.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^ute/ { print $2 }'; done > expected_ute.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^klaus/ { print $2 }'; done > expected_klaus.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^paul/ { print $2 }'; done > expected_paul.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^petra/ { print $2 }'; done > expected_petra.txt +for i in `seq 1 3500`; do echo $(name $i):$i | awk -F: '/^siggi/ { print $2 }'; done > expected_siggi.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^0 > expected_0.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^1 > expected_1.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^2 > expected_2.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^3 > expected_3.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^4 > expected_4.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^5 > expected_5.txt -for i in `seq 1 3500`; do echo $(($i%7)):$i; done | grep ^6 > expected_6.txt