]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "plasma/malloc.h" | |
19 | ||
20 | #include <assert.h> | |
21 | #include <stddef.h> | |
22 | #include <stdio.h> | |
23 | #include <stdlib.h> | |
24 | #include <string.h> | |
25 | #include <sys/mman.h> | |
26 | #include <unistd.h> | |
27 | ||
28 | #include <cerrno> | |
29 | #include <string> | |
30 | #include <vector> | |
31 | ||
32 | #include "plasma/common.h" | |
33 | #include "plasma/plasma.h" | |
34 | ||
35 | namespace plasma { | |
36 | ||
37 | void* fake_mmap(size_t); | |
38 | int fake_munmap(void*, int64_t); | |
39 | ||
40 | #define MMAP(s) fake_mmap(s) | |
41 | #define MUNMAP(a, s) fake_munmap(a, s) | |
42 | #define DIRECT_MMAP(s) fake_mmap(s) | |
43 | #define DIRECT_MUNMAP(a, s) fake_munmap(a, s) | |
44 | #define USE_DL_PREFIX | |
45 | #define HAVE_MORECORE 0 | |
46 | #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T | |
47 | #define DEFAULT_GRANULARITY ((size_t)128U * 1024U) | |
48 | ||
49 | #include "plasma/thirdparty/dlmalloc.c" // NOLINT | |
50 | ||
51 | #undef MMAP | |
52 | #undef MUNMAP | |
53 | #undef DIRECT_MMAP | |
54 | #undef DIRECT_MUNMAP | |
55 | #undef USE_DL_PREFIX | |
56 | #undef HAVE_MORECORE | |
57 | #undef DEFAULT_GRANULARITY | |
58 | ||
59 | // dlmalloc.c defined DEBUG which will conflict with ARROW_LOG(DEBUG). | |
60 | #ifdef DEBUG | |
61 | #undef DEBUG | |
62 | #endif | |
63 | ||
64 | constexpr int GRANULARITY_MULTIPLIER = 2; | |
65 | ||
66 | static void* pointer_advance(void* p, ptrdiff_t n) { return (unsigned char*)p + n; } | |
67 | ||
68 | static void* pointer_retreat(void* p, ptrdiff_t n) { return (unsigned char*)p - n; } | |
69 | ||
70 | // Create a buffer. This is creating a temporary file and then | |
71 | // immediately unlinking it so we do not leave traces in the system. | |
72 | int create_buffer(int64_t size) { | |
73 | int fd; | |
74 | std::string file_template = plasma_config->directory; | |
75 | #ifdef _WIN32 | |
76 | if (!CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, | |
77 | (DWORD)((uint64_t)size >> (CHAR_BIT * sizeof(DWORD))), | |
78 | (DWORD)(uint64_t)size, NULL)) { | |
79 | fd = -1; | |
80 | } | |
81 | #else | |
82 | file_template += "/plasmaXXXXXX"; | |
83 | std::vector<char> file_name(file_template.begin(), file_template.end()); | |
84 | file_name.push_back('\0'); | |
85 | fd = mkstemp(&file_name[0]); | |
86 | if (fd < 0) { | |
87 | ARROW_LOG(FATAL) << "create_buffer failed to open file " << &file_name[0]; | |
88 | return -1; | |
89 | } | |
90 | // Immediately unlink the file so we do not leave traces in the system. | |
91 | if (unlink(&file_name[0]) != 0) { | |
92 | ARROW_LOG(FATAL) << "failed to unlink file " << &file_name[0]; | |
93 | return -1; | |
94 | } | |
95 | if (!plasma_config->hugepages_enabled) { | |
96 | // Increase the size of the file to the desired size. This seems not to be | |
97 | // needed for files that are backed by the huge page fs, see also | |
98 | // http://www.mail-archive.com/kvm-devel@lists.sourceforge.net/msg14737.html | |
99 | if (ftruncate(fd, (off_t)size) != 0) { | |
100 | ARROW_LOG(FATAL) << "failed to ftruncate file " << &file_name[0]; | |
101 | return -1; | |
102 | } | |
103 | } | |
104 | #endif | |
105 | return fd; | |
106 | } | |
107 | ||
108 | void* fake_mmap(size_t size) { | |
109 | // Add kMmapRegionsGap so that the returned pointer is deliberately not | |
110 | // page-aligned. This ensures that the segments of memory returned by | |
111 | // fake_mmap are never contiguous. | |
112 | size += kMmapRegionsGap; | |
113 | ||
114 | int fd = create_buffer(size); | |
115 | ARROW_CHECK(fd >= 0) << "Failed to create buffer during mmap"; | |
116 | // MAP_POPULATE can be used to pre-populate the page tables for this memory region | |
117 | // which avoids work when accessing the pages later. However it causes long pauses | |
118 | // when mmapping the files. Only supported on Linux. | |
119 | void* pointer = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | |
120 | if (pointer == MAP_FAILED) { | |
121 | ARROW_LOG(ERROR) << "mmap failed with error: " << std::strerror(errno); | |
122 | if (errno == ENOMEM && plasma_config->hugepages_enabled) { | |
123 | ARROW_LOG(ERROR) | |
124 | << " (this probably means you have to increase /proc/sys/vm/nr_hugepages)"; | |
125 | } | |
126 | return pointer; | |
127 | } | |
128 | ||
129 | // Increase dlmalloc's allocation granularity directly. | |
130 | mparams.granularity *= GRANULARITY_MULTIPLIER; | |
131 | ||
132 | MmapRecord& record = mmap_records[pointer]; | |
133 | record.fd = fd; | |
134 | record.size = size; | |
135 | ||
136 | // We lie to dlmalloc about where mapped memory actually lives. | |
137 | pointer = pointer_advance(pointer, kMmapRegionsGap); | |
138 | ARROW_LOG(DEBUG) << pointer << " = fake_mmap(" << size << ")"; | |
139 | return pointer; | |
140 | } | |
141 | ||
142 | int fake_munmap(void* addr, int64_t size) { | |
143 | ARROW_LOG(DEBUG) << "fake_munmap(" << addr << ", " << size << ")"; | |
144 | addr = pointer_retreat(addr, kMmapRegionsGap); | |
145 | size += kMmapRegionsGap; | |
146 | ||
147 | auto entry = mmap_records.find(addr); | |
148 | ||
149 | if (entry == mmap_records.end() || entry->second.size != size) { | |
150 | // Reject requests to munmap that don't directly match previous | |
151 | // calls to mmap, to prevent dlmalloc from trimming. | |
152 | return -1; | |
153 | } | |
154 | ||
155 | int r = munmap(addr, size); | |
156 | if (r == 0) { | |
157 | close(entry->second.fd); | |
158 | } | |
159 | ||
160 | mmap_records.erase(entry); | |
161 | return r; | |
162 | } | |
163 | ||
164 | void SetMallocGranularity(int value) { change_mparam(M_GRANULARITY, value); } | |
165 | ||
166 | } // namespace plasma |