]>
git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/ipc/generate_fuzz_corpus.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 // A command line executable that generates a bunch of valid IPC files
19 // containing example record batches. Those are used as fuzzing seeds
20 // to make fuzzing more efficient.
28 #include "arrow/io/file.h"
29 #include "arrow/io/memory.h"
30 #include "arrow/ipc/json_simple.h"
31 #include "arrow/ipc/test_common.h"
32 #include "arrow/ipc/writer.h"
33 #include "arrow/record_batch.h"
34 #include "arrow/result.h"
35 #include "arrow/testing/extension_type.h"
36 #include "arrow/util/io_util.h"
37 #include "arrow/util/key_value_metadata.h"
42 using ::arrow::internal::CreateDir
;
43 using ::arrow::internal::PlatformFilename
;
44 using internal::json::ArrayFromJSON
;
46 Result
<std::shared_ptr
<RecordBatch
>> MakeExtensionBatch() {
47 auto array
= ExampleUuid();
48 auto md
= key_value_metadata({"key1", "key2"}, {"value1", ""});
49 auto schema
= ::arrow::schema({field("f0", array
->type())}, md
);
50 return RecordBatch::Make(schema
, array
->length(), {array
});
53 Result
<std::shared_ptr
<RecordBatch
>> MakeMapBatch() {
54 std::shared_ptr
<Array
> array
;
55 const char* json_input
= R
"(
57 [[0, 1], [1, 1], [2, 2], [3, 3], [4, 5], [5, 8]],
59 [[0, null], [1, null], [2, 0], [3, 1], [4, null], [5, 2]],
63 RETURN_NOT_OK(ArrayFromJSON(map(int16(), int32()), json_input
, &array
));
64 auto schema
= ::arrow::schema({field("f0", array
->type())});
65 return RecordBatch::Make(schema
, array
->length(), {array
});
68 Result
<std::vector
<std::shared_ptr
<RecordBatch
>>> Batches() {
69 std::vector
<std::shared_ptr
<RecordBatch
>> batches
;
70 std::shared_ptr
<RecordBatch
> batch
;
71 std::shared_ptr
<Array
> array
;
73 RETURN_NOT_OK(test::MakeNullRecordBatch(&batch
));
74 batches
.push_back(batch
);
75 RETURN_NOT_OK(test::MakeListRecordBatch(&batch
));
76 batches
.push_back(batch
);
77 RETURN_NOT_OK(test::MakeDictionary(&batch
));
78 batches
.push_back(batch
);
79 RETURN_NOT_OK(test::MakeTimestamps(&batch
));
80 batches
.push_back(batch
);
81 RETURN_NOT_OK(test::MakeFWBinary(&batch
));
82 batches
.push_back(batch
);
83 RETURN_NOT_OK(test::MakeStruct(&batch
));
84 batches
.push_back(batch
);
85 RETURN_NOT_OK(test::MakeUnion(&batch
));
86 batches
.push_back(batch
);
87 RETURN_NOT_OK(test::MakeFixedSizeListRecordBatch(&batch
));
88 batches
.push_back(batch
);
89 ARROW_ASSIGN_OR_RAISE(batch
, MakeExtensionBatch());
90 batches
.push_back(batch
);
91 ARROW_ASSIGN_OR_RAISE(batch
, MakeMapBatch());
92 batches
.push_back(batch
);
97 Result
<std::shared_ptr
<Buffer
>> SerializeRecordBatch(
98 const std::shared_ptr
<RecordBatch
>& batch
, bool is_stream_format
) {
99 ARROW_ASSIGN_OR_RAISE(auto sink
, io::BufferOutputStream::Create(1024));
100 std::shared_ptr
<RecordBatchWriter
> writer
;
101 if (is_stream_format
) {
102 ARROW_ASSIGN_OR_RAISE(writer
, MakeStreamWriter(sink
, batch
->schema()));
104 ARROW_ASSIGN_OR_RAISE(writer
, MakeFileWriter(sink
, batch
->schema()));
106 RETURN_NOT_OK(writer
->WriteRecordBatch(*batch
));
107 RETURN_NOT_OK(writer
->Close());
108 return sink
->Finish();
111 Status
DoMain(bool is_stream_format
, const std::string
& out_dir
) {
112 ARROW_ASSIGN_OR_RAISE(auto dir_fn
, PlatformFilename::FromString(out_dir
));
113 RETURN_NOT_OK(CreateDir(dir_fn
));
116 auto sample_name
= [&]() -> std::string
{
117 return "batch-" + std::to_string(sample_num
++);
120 ARROW_ASSIGN_OR_RAISE(auto batches
, Batches());
122 for (const auto& batch
: batches
) {
123 RETURN_NOT_OK(batch
->ValidateFull());
124 ARROW_ASSIGN_OR_RAISE(auto buf
, SerializeRecordBatch(batch
, is_stream_format
));
125 ARROW_ASSIGN_OR_RAISE(auto sample_fn
, dir_fn
.Join(sample_name()));
126 std::cerr
<< sample_fn
.ToString() << std::endl
;
127 ARROW_ASSIGN_OR_RAISE(auto file
, io::FileOutputStream::Open(sample_fn
.ToString()));
128 RETURN_NOT_OK(file
->Write(buf
));
129 RETURN_NOT_OK(file
->Close());
134 ARROW_NORETURN
void Usage() {
135 std::cerr
<< "Usage: arrow-ipc-generate-fuzz-corpus "
136 << "[-stream|-file] <output directory>" << std::endl
;
140 int Main(int argc
, char** argv
) {
144 auto opt
= std::string(argv
[1]);
145 if (opt
!= "-stream" && opt
!= "-file") {
148 auto out_dir
= std::string(argv
[2]);
150 Status st
= DoMain(opt
== "-stream", out_dir
);
152 std::cerr
<< st
.ToString() << std::endl
;
161 int main(int argc
, char** argv
) { return arrow::ipc::Main(argc
, argv
); }