]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw-gap-list-comparator
import quincy 17.2.0
[ceph.git] / ceph / src / rgw / rgw-gap-list-comparator
CommitLineData
f67539c2
TL
1#!/usr/bin/awk -f
2
3#
4# Version 1
5#
6# This awk script takes two, similarly sorted lists and outputs
7# only the lines which exist in both lists. The script takes
8# three inputs:
9#
10# ./rgw-gap-list-comparator \
11# -v filetwo=gap-list-B.txt \
12# -v matchout=matched_lines.txt \
13# gap-list-A.txt
14#
15
16function usage() {
17 print "">>"/dev/stderr"
18 print "">>"/dev/stderr"
19 print "The idea behind the script is to eliminate false positive hits">>"/dev/stderr"
20 print "from the rgw-gap-list tool which are due to upload timing of new">>"/dev/stderr"
21 print "objects during the tool's execution. To use the tool properly,">>"/dev/stderr"
22 print "the following process should be followed:">>"/dev/stderr"
23 print "">>"/dev/stderr"
24 print "">>"/dev/stderr"
25 print " 1: Run the 'rgw-gap-list' tool twice">>"/dev/stderr"
26 print "">>"/dev/stderr"
27 print " 2: Sort the resulting map files:">>"/dev/stderr"
28 print " $ export LC_ALL=C">>"/dev/stderr"
29 print " $ sort gap-list-A.gap > gap-list-A.sorted.gap">>"/dev/stderr"
30 print " $ sort gap-list-B.gap > gap-list.B.sorted.gap">>"/dev/stderr"
31 print " -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr"
32 print "">>"/dev/stderr"
33 print " 3: Run the 'same_lines_only.awk' script over the two files:">>"/dev/stderr"
34 print " $ rm matched_lines.txt">>"/dev/stderr"
35 print " $ ./rgw-gap-list-comparator -v filetwo=gap-list-B.sorted.gap -v matchout=matched_lines.txt gap-list-A.sorted.gap">>"/dev/stderr"
36 print " -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr"
37 print "">>"/dev/stderr"
38 print " The resulting 'matched_lines.txt' will be a high confidence list of impacted objects with little to no false positives.">>"/dev/stderr"
39 print "">>"/dev/stderr"
40 print "">>"/dev/stderr"
41 exit 1
42}
43
44function advance_f2() {
45 if ((getline f2line<filetwo) <= 0) {
46 f2_eof=1
47 } else {
48 f2_count++
49 }
50}
51
52function test_lines() {
53 if($0==f2line) {
54 print $0>>matchout
55 lineoutcount++
56 advance_f2()
57 return 0
58 } else if ($0>f2line) {
59 return 2
60 } else {
61 return 1
62 }
63}
64
65function status_out() {
66 printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutcount)>>"/dev/stderr"
67}
68
69function get_date_time() {
70 dtstr="date +%F\\ %T"
71 dtstr | getline mydt
72 close(dtstr)
73 return mydt
74}
75
76BEGIN {
77 if(filetwo==""||matchout=="") {
78 print "">>"/dev/stderr"
79 print "">>"/dev/stderr"
80 print "Missing parameter."
81 print "">>"/dev/stderr"
82 print "">>"/dev/stderr"
83 usage()
84 }
85
86 f1_count=0
87 f2_count=0
88 lineoutcount=0
89 f2_eof=0
90 statusevery=100000
91 advance_f2()
92 printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr"
93 status_out()
94}
95
96
97{
98 f1_count++
99 if(f2_eof==0) {
100 if(test_lines()==2) {
101 while($0>f2line && f2_eof==0) {
102 advance_f2()
103 }
104 test_lines()
105 }
106 } else {
107 exit 0
108 }
109 if ((f1_count % statusevery)==0) {
110 status_out()
111 }
112}
113
114END {
115 if(f1_count>0) {
116 status_out()
117 }
118}
119