]>
git.proxmox.com Git - ceph.git/blob - ceph/src/fmt/support/printable.py
3 # This script is based on
4 # https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py
5 # distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT.
7 # This script uses the following Unicode tables:
11 from collections
import namedtuple
16 NUM_CODEPOINTS
=0x110000
21 if current
is None or i
!= current
[1] or i
in (0x10000, 0x20000):
22 if current
is not None:
27 if current
is not None:
30 def get_escaped(codepoints
):
32 if (c
.class_
or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c
.value
!= ord(' '):
37 return open(os
.path
.basename(f
))
38 except FileNotFoundError
:
39 subprocess
.run(["curl", "-O", f
], check
=True)
40 return open(os
.path
.basename(f
))
42 Codepoint
= namedtuple('Codepoint', 'value class_')
44 def get_codepoints(f
):
45 r
= csv
.reader(f
, delimiter
=";")
49 codepoint
= int(row
[0], 16)
53 if class_first
is not None:
54 if not name
.endswith("Last>"):
55 raise ValueError("Missing Last after First")
57 for c
in range(prev_codepoint
+ 1, codepoint
):
58 yield Codepoint(c
, class_first
)
61 if name
.endswith("First>"):
64 yield Codepoint(codepoint
, class_
)
65 prev_codepoint
= codepoint
67 if class_first
is not None:
68 raise ValueError("Missing Last after First")
70 for c
in range(prev_codepoint
+ 1, NUM_CODEPOINTS
):
71 yield Codepoint(c
, None)
73 def compress_singletons(singletons
):
74 uppers
= [] # (upper, # items in lowers)
80 if len(uppers
) == 0 or uppers
[-1][0] != upper
:
81 uppers
.append((upper
, 1))
83 upper
, count
= uppers
[-1]
84 uppers
[-1] = upper
, count
+ 1
89 def compress_normal(normal
):
90 # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
91 # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
92 compressed
= [] # [truelen, (truelenaux), falselen, (falselenaux)]
95 for start
, count
in normal
:
96 truelen
= start
- prev_start
98 prev_start
= start
+ count
100 assert truelen
< 0x8000 and falselen
< 0x8000
103 entry
.append(0x80 |
(truelen
>> 8))
104 entry
.append(truelen
& 0xff)
106 entry
.append(truelen
& 0x7f)
108 entry
.append(0x80 |
(falselen
>> 8))
109 entry
.append(falselen
& 0xff)
111 entry
.append(falselen
& 0x7f)
113 compressed
.append(entry
)
117 def print_singletons(uppers
, lowers
, uppersname
, lowersname
):
118 print(" static constexpr singleton {}[] = {{".format(uppersname
))
120 print(" {{{:#04x}, {}}},".format(u
, c
))
122 print(" static constexpr unsigned char {}[] = {{".format(lowersname
))
123 for i
in range(0, len(lowers
), 8):
124 print(" {}".format(" ".join("{:#04x},".format(l
) for l
in lowers
[i
:i
+8])))
127 def print_normal(normal
, normalname
):
128 print(" static constexpr unsigned char {}[] = {{".format(normalname
))
130 print(" {}".format(" ".join("{:#04x},".format(i
) for i
in v
)))
134 file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
136 codepoints
= get_codepoints(file)
145 for a
, b
in to_ranges(get_escaped(codepoints
)):
147 extra
.append((a
, b
- a
))
150 singletons1
.append(a
& ~CUTOFF
)
152 singletons0
.append(a
)
155 singletons1
.append(a
& ~CUTOFF
)
156 singletons1
.append((a
+ 1) & ~CUTOFF
)
158 singletons0
.append(a
)
159 singletons0
.append(a
+ 1)
162 extra
.append((a
, b
- a
))
164 normal1
.append((a
& ~CUTOFF
, b
- a
))
166 normal0
.append((a
, b
- a
))
168 singletons0u
, singletons0l
= compress_singletons(singletons0
)
169 singletons1u
, singletons1l
= compress_singletons(singletons1
)
170 normal0
= compress_normal(normal0
)
171 normal1
= compress_normal(normal1
)
174 FMT_FUNC auto is_printable(uint32_t cp) -> bool {\
176 print_singletons(singletons0u
, singletons0l
, 'singletons0', 'singletons0_lower')
177 print_singletons(singletons1u
, singletons1l
, 'singletons1', 'singletons1_lower')
178 print_normal(normal0
, 'normal0')
179 print_normal(normal1
, 'normal1')
181 auto lower = static_cast<uint16_t>(cp);
183 return is_printable(lower, singletons0,
184 sizeof(singletons0) / sizeof(*singletons0),
185 singletons0_lower, normal0, sizeof(normal0));
188 return is_printable(lower, singletons1,
189 sizeof(singletons1) / sizeof(*singletons1),
190 singletons1_lower, normal1, sizeof(normal1));
194 print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a
, a
+ b
))
198 """.format(NUM_CODEPOINTS
))
200 if __name__
== '__main__':