]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | /* |
7c673cae FG |
2 | * Copyright (c) 2016-present, Facebook, Inc. |
3 | * All rights reserved. | |
4 | * | |
11fdf7f2 TL |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * in the COPYING file in the root directory of this source tree). | |
7c673cae FG |
8 | */ |
9 | #include "Options.h" | |
11fdf7f2 | 10 | #include "util.h" |
7c673cae FG |
11 | #include "utils/ScopeGuard.h" |
12 | ||
13 | #include <algorithm> | |
14 | #include <cassert> | |
15 | #include <cstdio> | |
16 | #include <cstring> | |
17 | #include <iterator> | |
18 | #include <thread> | |
7c673cae FG |
19 | #include <vector> |
20 | ||
7c673cae FG |
21 | |
22 | namespace pzstd { | |
23 | ||
24 | namespace { | |
25 | unsigned defaultNumThreads() { | |
26 | #ifdef PZSTD_NUM_THREADS | |
27 | return PZSTD_NUM_THREADS; | |
28 | #else | |
29 | return std::thread::hardware_concurrency(); | |
30 | #endif | |
31 | } | |
32 | ||
33 | unsigned parseUnsigned(const char **arg) { | |
34 | unsigned result = 0; | |
35 | while (**arg >= '0' && **arg <= '9') { | |
36 | result *= 10; | |
37 | result += **arg - '0'; | |
38 | ++(*arg); | |
39 | } | |
40 | return result; | |
41 | } | |
42 | ||
43 | const char *getArgument(const char *options, const char **argv, int &i, | |
44 | int argc) { | |
45 | if (options[1] != 0) { | |
46 | return options + 1; | |
47 | } | |
48 | ++i; | |
49 | if (i == argc) { | |
50 | std::fprintf(stderr, "Option -%c requires an argument, but none provided\n", | |
51 | *options); | |
52 | return nullptr; | |
53 | } | |
54 | return argv[i]; | |
55 | } | |
56 | ||
57 | const std::string kZstdExtension = ".zst"; | |
58 | constexpr char kStdIn[] = "-"; | |
59 | constexpr char kStdOut[] = "-"; | |
60 | constexpr unsigned kDefaultCompressionLevel = 3; | |
61 | constexpr unsigned kMaxNonUltraCompressionLevel = 19; | |
62 | ||
63 | #ifdef _WIN32 | |
64 | const char nullOutput[] = "nul"; | |
65 | #else | |
66 | const char nullOutput[] = "/dev/null"; | |
67 | #endif | |
68 | ||
69 | void notSupported(const char *option) { | |
70 | std::fprintf(stderr, "Operation not supported: %s\n", option); | |
71 | } | |
72 | ||
73 | void usage() { | |
74 | std::fprintf(stderr, "Usage:\n"); | |
75 | std::fprintf(stderr, " pzstd [args] [FILE(s)]\n"); | |
76 | std::fprintf(stderr, "Parallel ZSTD options:\n"); | |
9f95a23c | 77 | std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n"); |
7c673cae FG |
78 | |
79 | std::fprintf(stderr, "ZSTD options:\n"); | |
80 | std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel); | |
81 | std::fprintf(stderr, " -d, --decompress : decompression\n"); | |
82 | std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n"); | |
11fdf7f2 | 83 | std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n"); |
7c673cae FG |
84 | std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n"); |
85 | std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n"); | |
86 | std::fprintf(stderr, " -h, --help : display help and exit\n"); | |
87 | std::fprintf(stderr, " -V, --version : display version number and exit\n"); | |
88 | std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n"); | |
89 | std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n"); | |
90 | std::fprintf(stderr, " -c, --stdout : force write to standard output, even if it is the console\n"); | |
91 | #ifdef UTIL_HAS_CREATEFILELIST | |
92 | std::fprintf(stderr, " -r : operate recursively on directories\n"); | |
93 | #endif | |
94 | std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel()); | |
95 | std::fprintf(stderr, " -C, --check : integrity check (default)\n"); | |
96 | std::fprintf(stderr, " --no-check : no integrity check\n"); | |
97 | std::fprintf(stderr, " -t, --test : test compressed file integrity\n"); | |
98 | std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n"); | |
99 | } | |
100 | } // anonymous namespace | |
101 | ||
102 | Options::Options() | |
103 | : numThreads(defaultNumThreads()), maxWindowLog(23), | |
104 | compressionLevel(kDefaultCompressionLevel), decompress(false), | |
105 | overwrite(false), keepSource(true), writeMode(WriteMode::Auto), | |
106 | checksum(true), verbosity(2) {} | |
107 | ||
108 | Options::Status Options::parse(int argc, const char **argv) { | |
109 | bool test = false; | |
110 | bool recursive = false; | |
111 | bool ultra = false; | |
112 | bool forceStdout = false; | |
11fdf7f2 | 113 | bool followLinks = false; |
7c673cae FG |
114 | // Local copy of input files, which are pointers into argv. |
115 | std::vector<const char *> localInputFiles; | |
116 | for (int i = 1; i < argc; ++i) { | |
117 | const char *arg = argv[i]; | |
118 | // Protect against empty arguments | |
119 | if (arg[0] == 0) { | |
120 | continue; | |
121 | } | |
122 | // Everything after "--" is an input file | |
123 | if (!std::strcmp(arg, "--")) { | |
124 | ++i; | |
125 | std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles)); | |
126 | break; | |
127 | } | |
128 | // Long arguments that don't have a short option | |
129 | { | |
130 | bool isLongOption = true; | |
131 | if (!std::strcmp(arg, "--rm")) { | |
132 | keepSource = false; | |
133 | } else if (!std::strcmp(arg, "--ultra")) { | |
134 | ultra = true; | |
135 | maxWindowLog = 0; | |
136 | } else if (!std::strcmp(arg, "--no-check")) { | |
137 | checksum = false; | |
138 | } else if (!std::strcmp(arg, "--sparse")) { | |
139 | writeMode = WriteMode::Sparse; | |
140 | notSupported("Sparse mode"); | |
141 | return Status::Failure; | |
142 | } else if (!std::strcmp(arg, "--no-sparse")) { | |
143 | writeMode = WriteMode::Regular; | |
144 | notSupported("Sparse mode"); | |
145 | return Status::Failure; | |
146 | } else if (!std::strcmp(arg, "--dictID")) { | |
147 | notSupported(arg); | |
148 | return Status::Failure; | |
149 | } else if (!std::strcmp(arg, "--no-dictID")) { | |
150 | notSupported(arg); | |
151 | return Status::Failure; | |
152 | } else { | |
153 | isLongOption = false; | |
154 | } | |
155 | if (isLongOption) { | |
156 | continue; | |
157 | } | |
158 | } | |
159 | // Arguments with a short option simply set their short option. | |
160 | const char *options = nullptr; | |
161 | if (!std::strcmp(arg, "--processes")) { | |
162 | options = "p"; | |
163 | } else if (!std::strcmp(arg, "--version")) { | |
164 | options = "V"; | |
165 | } else if (!std::strcmp(arg, "--help")) { | |
166 | options = "h"; | |
167 | } else if (!std::strcmp(arg, "--decompress")) { | |
168 | options = "d"; | |
169 | } else if (!std::strcmp(arg, "--force")) { | |
170 | options = "f"; | |
171 | } else if (!std::strcmp(arg, "--stdout")) { | |
172 | options = "c"; | |
173 | } else if (!std::strcmp(arg, "--keep")) { | |
174 | options = "k"; | |
175 | } else if (!std::strcmp(arg, "--verbose")) { | |
176 | options = "v"; | |
177 | } else if (!std::strcmp(arg, "--quiet")) { | |
178 | options = "q"; | |
179 | } else if (!std::strcmp(arg, "--check")) { | |
180 | options = "C"; | |
181 | } else if (!std::strcmp(arg, "--test")) { | |
182 | options = "t"; | |
183 | } else if (arg[0] == '-' && arg[1] != 0) { | |
184 | options = arg + 1; | |
185 | } else { | |
186 | localInputFiles.emplace_back(arg); | |
187 | continue; | |
188 | } | |
189 | assert(options != nullptr); | |
190 | ||
191 | bool finished = false; | |
192 | while (!finished && *options != 0) { | |
193 | // Parse the compression level | |
194 | if (*options >= '0' && *options <= '9') { | |
195 | compressionLevel = parseUnsigned(&options); | |
196 | continue; | |
197 | } | |
198 | ||
199 | switch (*options) { | |
200 | case 'h': | |
201 | case 'H': | |
202 | usage(); | |
203 | return Status::Message; | |
204 | case 'V': | |
205 | std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING); | |
206 | return Status::Message; | |
207 | case 'p': { | |
208 | finished = true; | |
209 | const char *optionArgument = getArgument(options, argv, i, argc); | |
210 | if (optionArgument == nullptr) { | |
211 | return Status::Failure; | |
212 | } | |
213 | if (*optionArgument < '0' || *optionArgument > '9') { | |
214 | std::fprintf(stderr, "Option -p expects a number, but %s provided\n", | |
215 | optionArgument); | |
216 | return Status::Failure; | |
217 | } | |
218 | numThreads = parseUnsigned(&optionArgument); | |
219 | if (*optionArgument != 0) { | |
220 | std::fprintf(stderr, | |
221 | "Option -p expects a number, but %u%s provided\n", | |
222 | numThreads, optionArgument); | |
223 | return Status::Failure; | |
224 | } | |
225 | break; | |
226 | } | |
227 | case 'o': { | |
228 | finished = true; | |
229 | const char *optionArgument = getArgument(options, argv, i, argc); | |
230 | if (optionArgument == nullptr) { | |
231 | return Status::Failure; | |
232 | } | |
233 | outputFile = optionArgument; | |
234 | break; | |
235 | } | |
236 | case 'C': | |
237 | checksum = true; | |
238 | break; | |
239 | case 'k': | |
240 | keepSource = true; | |
241 | break; | |
242 | case 'd': | |
243 | decompress = true; | |
244 | break; | |
245 | case 'f': | |
246 | overwrite = true; | |
247 | forceStdout = true; | |
11fdf7f2 | 248 | followLinks = true; |
7c673cae FG |
249 | break; |
250 | case 't': | |
251 | test = true; | |
252 | decompress = true; | |
253 | break; | |
254 | #ifdef UTIL_HAS_CREATEFILELIST | |
255 | case 'r': | |
256 | recursive = true; | |
257 | break; | |
258 | #endif | |
259 | case 'c': | |
260 | outputFile = kStdOut; | |
261 | forceStdout = true; | |
262 | break; | |
263 | case 'v': | |
264 | ++verbosity; | |
265 | break; | |
266 | case 'q': | |
267 | --verbosity; | |
268 | // Ignore them for now | |
269 | break; | |
270 | // Unsupported options from Zstd | |
271 | case 'D': | |
272 | case 's': | |
273 | notSupported("Zstd dictionaries."); | |
274 | return Status::Failure; | |
275 | case 'b': | |
276 | case 'e': | |
277 | case 'i': | |
278 | case 'B': | |
279 | notSupported("Zstd benchmarking options."); | |
280 | return Status::Failure; | |
281 | default: | |
282 | std::fprintf(stderr, "Invalid argument: %s\n", arg); | |
283 | return Status::Failure; | |
284 | } | |
285 | if (!finished) { | |
286 | ++options; | |
287 | } | |
288 | } // while (*options != 0); | |
289 | } // for (int i = 1; i < argc; ++i); | |
290 | ||
291 | // Set options for test mode | |
292 | if (test) { | |
293 | outputFile = nullOutput; | |
294 | keepSource = true; | |
295 | } | |
296 | ||
297 | // Input file defaults to standard input if not provided. | |
298 | if (localInputFiles.empty()) { | |
299 | localInputFiles.emplace_back(kStdIn); | |
300 | } | |
301 | ||
302 | // Check validity of input files | |
303 | if (localInputFiles.size() > 1) { | |
304 | const auto it = std::find(localInputFiles.begin(), localInputFiles.end(), | |
305 | std::string{kStdIn}); | |
306 | if (it != localInputFiles.end()) { | |
307 | std::fprintf( | |
308 | stderr, | |
309 | "Cannot specify standard input when handling multiple files\n"); | |
310 | return Status::Failure; | |
311 | } | |
312 | } | |
313 | if (localInputFiles.size() > 1 || recursive) { | |
314 | if (!outputFile.empty() && outputFile != nullOutput) { | |
315 | std::fprintf( | |
316 | stderr, | |
317 | "Cannot specify an output file when handling multiple inputs\n"); | |
318 | return Status::Failure; | |
319 | } | |
320 | } | |
321 | ||
11fdf7f2 TL |
322 | g_utilDisplayLevel = verbosity; |
323 | // Remove local input files that are symbolic links | |
324 | if (!followLinks) { | |
325 | std::remove_if(localInputFiles.begin(), localInputFiles.end(), | |
326 | [&](const char *path) { | |
327 | bool isLink = UTIL_isLink(path); | |
328 | if (isLink && verbosity >= 2) { | |
329 | std::fprintf( | |
330 | stderr, | |
331 | "Warning : %s is symbolic link, ignoring\n", | |
332 | path); | |
333 | } | |
334 | return isLink; | |
335 | }); | |
336 | } | |
337 | ||
7c673cae FG |
338 | // Translate input files/directories into files to (de)compress |
339 | if (recursive) { | |
340 | char *scratchBuffer = nullptr; | |
341 | unsigned numFiles = 0; | |
342 | const char **files = | |
343 | UTIL_createFileList(localInputFiles.data(), localInputFiles.size(), | |
11fdf7f2 | 344 | &scratchBuffer, &numFiles, followLinks); |
7c673cae FG |
345 | if (files == nullptr) { |
346 | std::fprintf(stderr, "Error traversing directories\n"); | |
347 | return Status::Failure; | |
348 | } | |
349 | auto guard = | |
350 | makeScopeGuard([&] { UTIL_freeFileList(files, scratchBuffer); }); | |
351 | if (numFiles == 0) { | |
352 | std::fprintf(stderr, "No files found\n"); | |
353 | return Status::Failure; | |
354 | } | |
355 | inputFiles.resize(numFiles); | |
356 | std::copy(files, files + numFiles, inputFiles.begin()); | |
357 | } else { | |
358 | inputFiles.resize(localInputFiles.size()); | |
359 | std::copy(localInputFiles.begin(), localInputFiles.end(), | |
360 | inputFiles.begin()); | |
361 | } | |
362 | localInputFiles.clear(); | |
363 | assert(!inputFiles.empty()); | |
364 | ||
365 | // If reading from standard input, default to standard output | |
366 | if (inputFiles[0] == kStdIn && outputFile.empty()) { | |
367 | assert(inputFiles.size() == 1); | |
368 | outputFile = "-"; | |
369 | } | |
370 | ||
371 | if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) { | |
372 | assert(inputFiles.size() == 1); | |
373 | std::fprintf(stderr, "Cannot read input from interactive console\n"); | |
374 | return Status::Failure; | |
375 | } | |
376 | if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) { | |
377 | std::fprintf(stderr, "Will not write to console stdout unless -c or -f is " | |
378 | "specified and decompressing\n"); | |
379 | return Status::Failure; | |
380 | } | |
381 | ||
382 | // Check compression level | |
383 | { | |
384 | unsigned maxCLevel = | |
385 | ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel; | |
386 | if (compressionLevel > maxCLevel || compressionLevel == 0) { | |
387 | std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel); | |
388 | return Status::Failure; | |
389 | } | |
390 | } | |
391 | ||
392 | // Check that numThreads is set | |
393 | if (numThreads == 0) { | |
394 | std::fprintf(stderr, "Invalid arguments: # of threads not specified " | |
395 | "and unable to determine hardware concurrency.\n"); | |
396 | return Status::Failure; | |
397 | } | |
398 | ||
399 | // Modify verbosity | |
400 | // If we are piping input and output, turn off interaction | |
401 | if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) { | |
402 | verbosity = 1; | |
403 | } | |
404 | // If we are in multi-file mode, turn off interaction | |
405 | if (inputFiles.size() > 1 && verbosity == 2) { | |
406 | verbosity = 1; | |
407 | } | |
408 | ||
409 | return Status::Success; | |
410 | } | |
411 | ||
412 | std::string Options::getOutputFile(const std::string &inputFile) const { | |
413 | if (!outputFile.empty()) { | |
414 | return outputFile; | |
415 | } | |
416 | // Attempt to add/remove zstd extension from the input file | |
417 | if (decompress) { | |
418 | int stemSize = inputFile.size() - kZstdExtension.size(); | |
419 | if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) { | |
420 | return inputFile.substr(0, stemSize); | |
421 | } else { | |
422 | return ""; | |
423 | } | |
424 | } else { | |
425 | return inputFile + kZstdExtension; | |
426 | } | |
427 | } | |
428 | } |