]>
Commit | Line | Data |
---|---|---|
a1dfa0c6 XL |
1 | //! This is an implementation of a global allocator on the wasm32 platform when |
2 | //! emscripten is not in use. In that situation there's no actual runtime for us | |
3 | //! to lean on for allocation, so instead we provide our own! | |
4 | //! | |
5 | //! The wasm32 instruction set has two instructions for getting the current | |
6 | //! amount of memory and growing the amount of memory. These instructions are the | |
7 | //! foundation on which we're able to build an allocator, so we do so! Note that | |
8 | //! the instructions are also pretty "global" and this is the "global" allocator | |
9 | //! after all! | |
10 | //! | |
11 | //! The current allocator here is the `dlmalloc` crate which we've got included | |
12 | //! in the rust-lang/rust repository as a submodule. The crate is a port of | |
13 | //! dlmalloc.c from C to Rust and is basically just so we can have "pure Rust" | |
14 | //! for now which is currently technically required (can't link with C yet). | |
15 | //! | |
16 | //! The crate itself provides a global allocator which on wasm has no | |
17 | //! synchronization as there are no threads! | |
18 | ||
532ac7d7 | 19 | use crate::alloc::{GlobalAlloc, Layout, System}; |
a1dfa0c6 XL |
20 | |
21 | static mut DLMALLOC: dlmalloc::Dlmalloc = dlmalloc::DLMALLOC_INIT; | |
22 | ||
23 | #[stable(feature = "alloc_system_type", since = "1.28.0")] | |
24 | unsafe impl GlobalAlloc for System { | |
25 | #[inline] | |
26 | unsafe fn alloc(&self, layout: Layout) -> *mut u8 { | |
27 | let _lock = lock::lock(); | |
28 | DLMALLOC.malloc(layout.size(), layout.align()) | |
29 | } | |
30 | ||
31 | #[inline] | |
32 | unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { | |
33 | let _lock = lock::lock(); | |
34 | DLMALLOC.calloc(layout.size(), layout.align()) | |
35 | } | |
36 | ||
37 | #[inline] | |
38 | unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { | |
39 | let _lock = lock::lock(); | |
40 | DLMALLOC.free(ptr, layout.size(), layout.align()) | |
41 | } | |
42 | ||
43 | #[inline] | |
44 | unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { | |
45 | let _lock = lock::lock(); | |
46 | DLMALLOC.realloc(ptr, layout.size(), layout.align(), new_size) | |
47 | } | |
48 | } | |
49 | ||
50 | #[cfg(target_feature = "atomics")] | |
51 | mod lock { | |
532ac7d7 | 52 | use crate::sync::atomic::{AtomicI32, Ordering::SeqCst}; |
a1dfa0c6 XL |
53 | |
54 | static LOCKED: AtomicI32 = AtomicI32::new(0); | |
55 | ||
56 | pub struct DropLock; | |
57 | ||
58 | pub fn lock() -> DropLock { | |
59 | loop { | |
60 | if LOCKED.swap(1, SeqCst) == 0 { | |
61 | return DropLock | |
62 | } | |
532ac7d7 XL |
63 | // Ok so here's where things get a little depressing. At this point |
64 | // in time we need to synchronously acquire a lock, but we're | |
65 | // contending with some other thread. Typically we'd execute some | |
66 | // form of `i32.atomic.wait` like so: | |
67 | // | |
68 | // unsafe { | |
69 | // let r = core::arch::wasm32::i32_atomic_wait( | |
60c5eb7d | 70 | // LOCKED.as_mut_ptr(), |
532ac7d7 XL |
71 | // 1, // expected value |
72 | // -1, // timeout | |
73 | // ); | |
74 | // debug_assert!(r == 0 || r == 1); | |
75 | // } | |
76 | // | |
77 | // Unfortunately though in doing so we would cause issues for the | |
78 | // main thread. The main thread in a web browser *cannot ever | |
79 | // block*, no exceptions. This means that the main thread can't | |
80 | // actually execute the `i32.atomic.wait` instruction. | |
81 | // | |
82 | // As a result if we want to work within the context of browsers we | |
83 | // need to figure out some sort of allocation scheme for the main | |
84 | // thread where when there's contention on the global malloc lock we | |
85 | // do... something. | |
86 | // | |
87 | // Possible ideas include: | |
88 | // | |
89 | // 1. Attempt to acquire the global lock. If it fails, fall back to | |
90 | // memory allocation via `memory.grow`. Later just ... somehow | |
91 | // ... inject this raw page back into the main allocator as it | |
92 | // gets sliced up over time. This strategy has the downside of | |
93 | // forcing allocation of a page to happen whenever the main | |
94 | // thread contents with other threads, which is unfortunate. | |
95 | // | |
96 | // 2. Maintain a form of "two level" allocator scheme where the main | |
97 | // thread has its own allocator. Somehow this allocator would | |
98 | // also be balanced with a global allocator, not only to have | |
99 | // allocations cross between threads but also to ensure that the | |
100 | // two allocators stay "balanced" in terms of free'd memory and | |
101 | // such. This, however, seems significantly complicated. | |
102 | // | |
103 | // Out of a lack of other ideas, the current strategy implemented | |
104 | // here is to simply spin. Typical spin loop algorithms have some | |
105 | // form of "hint" here to the CPU that it's what we're doing to | |
106 | // ensure that the CPU doesn't get too hot, but wasm doesn't have | |
107 | // such an instruction. | |
108 | // | |
109 | // To be clear, spinning here is not a great solution. | |
110 | // Another thread with the lock may take quite a long time to wake | |
111 | // up. For example it could be in `memory.grow` or it could be | |
112 | // evicted from the CPU for a timeslice like 10ms. For these periods | |
113 | // of time our thread will "helpfully" sit here and eat CPU time | |
114 | // until it itself is evicted or the lock holder finishes. This | |
115 | // means we're just burning and wasting CPU time to no one's | |
116 | // benefit. | |
117 | // | |
118 | // Spinning does have the nice properties, though, of being | |
119 | // semantically correct, being fair to all threads for memory | |
120 | // allocation, and being simple enough to implement. | |
121 | // | |
122 | // This will surely (hopefully) be replaced in the future with a | |
123 | // real memory allocator that can handle the restriction of the main | |
124 | // thread. | |
125 | // | |
126 | // | |
127 | // FIXME: We can also possibly add an optimization here to detect | |
128 | // when a thread is the main thread or not and block on all | |
129 | // non-main-thread threads. Currently, however, we have no way | |
130 | // of knowing which wasm thread is on the browser main thread, but | |
131 | // if we could figure out we could at least somewhat mitigate the | |
132 | // cost of this spinning. | |
a1dfa0c6 XL |
133 | } |
134 | } | |
135 | ||
136 | impl Drop for DropLock { | |
137 | fn drop(&mut self) { | |
138 | let r = LOCKED.swap(0, SeqCst); | |
139 | debug_assert_eq!(r, 1); | |
532ac7d7 XL |
140 | |
141 | // Note that due to the above logic we don't actually need to wake | |
142 | // anyone up, but if we did it'd likely look something like this: | |
143 | // | |
144 | // unsafe { | |
145 | // core::arch::wasm32::atomic_notify( | |
60c5eb7d | 146 | // LOCKED.as_mut_ptr(), |
532ac7d7 XL |
147 | // 1, // only one thread |
148 | // ); | |
149 | // } | |
a1dfa0c6 XL |
150 | } |
151 | } | |
152 | } | |
153 | ||
154 | #[cfg(not(target_feature = "atomics"))] | |
155 | mod lock { | |
156 | #[inline] | |
157 | pub fn lock() {} // no atomics, no threads, that's easy! | |
158 | } |