2 #include "pthread_impl.h"
3 #include "stdio_impl.h"
6 #ifdef __wasilibc_unmodified_upstream
11 #ifndef __wasilibc_unmodified_upstream
12 #include <stdatomic.h>
20 weak_alias(dummy_0
, __acquire_ptc
);
21 weak_alias(dummy_0
, __release_ptc
);
22 weak_alias(dummy_0
, __pthread_tsd_run_dtors
);
23 weak_alias(dummy_0
, __do_orphaned_stdio_locks
);
24 #ifdef __wasilibc_unmodified_upstream
25 weak_alias(dummy_0
, __dl_thread_cleanup
);
26 weak_alias(dummy_0
, __membarrier_init
);
29 static int tl_lock_count
;
30 static int tl_lock_waiters
;
34 int tid
= __pthread_self()->tid
;
35 int val
= __thread_list_lock
;
40 while ((val
= a_cas(&__thread_list_lock
, 0, tid
)))
41 __wait(&__thread_list_lock
, &tl_lock_waiters
, val
, 0);
44 void __tl_unlock(void)
50 a_store(&__thread_list_lock
, 0);
51 if (tl_lock_waiters
) __wake(&__thread_list_lock
, 1, 0);
54 void __tl_sync(pthread_t td
)
57 int val
= __thread_list_lock
;
59 __wait(&__thread_list_lock
, &tl_lock_waiters
, val
, 0);
60 if (tl_lock_waiters
) __wake(&__thread_list_lock
, 1, 0);
63 #ifdef __wasilibc_unmodified_upstream
64 _Noreturn
void __pthread_exit(void *result
)
66 static void __pthread_exit(void *result
)
69 pthread_t self
= __pthread_self();
72 self
->canceldisable
= 1;
73 self
->cancelasync
= 0;
74 self
->result
= result
;
76 while (self
->cancelbuf
) {
77 void (*f
)(void *) = self
->cancelbuf
->__f
;
78 void *x
= self
->cancelbuf
->__x
;
79 self
->cancelbuf
= self
->cancelbuf
->__next
;
83 __pthread_tsd_run_dtors();
85 #ifdef __wasilibc_unmodified_upstream
86 __block_app_sigs(&set
);
89 /* This atomic potentially competes with a concurrent pthread_detach
90 * call; the loser is responsible for freeing thread resources. */
91 int state
= a_cas(&self
->detach_state
, DT_JOINABLE
, DT_EXITING
);
93 if (state
==DT_DETACHED
&& self
->map_base
) {
94 /* Since __unmapself bypasses the normal munmap code path,
95 * explicitly wait for vmlock holders first. This must be
96 * done before any locks are taken, to avoid lock ordering
97 * issues that could lead to deadlock. */
98 #ifdef __wasilibc_unmodified_upstream
103 /* Access to target the exiting thread with syscalls that use
104 * its kernel tid is controlled by killlock. For detached threads,
105 * any use past this point would have undefined behavior, but for
106 * joinable threads it's a valid usage that must be handled.
107 * Signals must be blocked since pthread_kill must be AS-safe. */
108 LOCK(self
->killlock
);
110 /* The thread list lock must be AS-safe, and thus depends on
111 * application signals being blocked above. */
114 /* If this is the only thread in the list, don't proceed with
115 * termination of the thread, but restore the previous lock and
116 * signal state to prepare for exit to call atexit handlers. */
117 if (self
->next
== self
) {
119 UNLOCK(self
->killlock
);
120 self
->detach_state
= state
;
121 #ifdef __wasilibc_unmodified_upstream
122 __restore_sigs(&set
);
127 /* At this point we are committed to thread termination. */
129 #ifdef __wasilibc_unmodified_upstream
130 /* Process robust list in userspace to handle non-pshared mutexes
131 * and the detached thread case where the robust list head will
132 * be invalid when the kernel would process it. */
135 volatile void *volatile *rp
;
136 while ((rp
=self
->robust_list
.head
) && rp
!= &self
->robust_list
.head
) {
137 pthread_mutex_t
*m
= (void *)((char *)rp
138 - offsetof(pthread_mutex_t
, _m_next
));
139 int waiters
= m
->_m_waiters
;
140 int priv
= (m
->_m_type
& 128) ^ 128;
141 self
->robust_list
.pending
= rp
;
142 self
->robust_list
.head
= *rp
;
143 int cont
= a_swap(&m
->_m_lock
, 0x40000000);
144 self
->robust_list
.pending
= 0;
145 if (cont
< 0 || waiters
)
146 __wake(&m
->_m_lock
, 1, priv
);
148 #ifdef __wasilibc_unmodified_upstream
152 __do_orphaned_stdio_locks();
153 #ifdef __wasilibc_unmodified_upstream
154 __dl_thread_cleanup();
157 /* Last, unlink thread from the list. This change will not be visible
158 * until the lock is released, which only happens after SYS_exit
159 * has been called, via the exit futex address pointing at the lock.
160 * This needs to happen after any possible calls to LOCK() that might
161 * skip locking if process appears single-threaded. */
162 if (!--libc
.threads_minus_1
) libc
.need_locks
= -1;
163 self
->next
->prev
= self
->prev
;
164 self
->prev
->next
= self
->next
;
165 self
->prev
= self
->next
= self
;
167 #ifndef __wasilibc_unmodified_upstream
168 /* On Linux, the thread is created with CLONE_CHILD_CLEARTID,
169 * and this lock will unlock by kernel when this thread terminates.
170 * So we should unlock it here in WebAssembly.
171 * See also set_tid_address(2) */
175 #ifdef __wasilibc_unmodified_upstream
176 if (state
==DT_DETACHED
&& self
->map_base
) {
177 /* Detached threads must block even implementation-internal
178 * signals, since they will not have a stack in their last
179 * moments of existence. */
180 __block_all_sigs(&set
);
182 /* Robust list will no longer be valid, and was already
183 * processed above, so unregister it with the kernel. */
184 if (self
->robust_list
.off
)
185 __syscall(SYS_set_robust_list
, 0, 3*sizeof(long));
187 /* The following call unmaps the thread's stack mapping
188 * and then exits without touching the stack. */
189 __unmapself(self
->map_base
, self
->map_size
);
192 if (state
==DT_DETACHED
&& self
->map_base
) {
193 // __syscall(SYS_exit) would unlock the thread, list
194 // do it manually here
196 free(self
->map_base
);
197 // Can't use `exit()` here, because it is too high level
202 /* Wake any joiner. */
203 a_store(&self
->detach_state
, DT_EXITED
);
204 __wake(&self
->detach_state
, 1, 1);
206 /* After the kernel thread exits, its tid may be reused. Clear it
207 * to prevent inadvertent use and inform functions that would use
208 * it that it's no longer available. */
210 UNLOCK(self
->killlock
);
212 #ifdef __wasilibc_unmodified_upstream
213 for (;;) __syscall(SYS_exit
, 0);
215 // __syscall(SYS_exit) would unlock the thread, list
216 // do it manually here
218 // Can't use `exit()` here, because it is too high level
222 void __do_cleanup_push(struct __ptcb
*cb
)
224 struct pthread
*self
= __pthread_self();
225 cb
->__next
= self
->cancelbuf
;
226 self
->cancelbuf
= cb
;
229 void __do_cleanup_pop(struct __ptcb
*cb
)
231 __pthread_self()->cancelbuf
= cb
->__next
;
235 #ifdef __wasilibc_unmodified_upstream
236 void *(*start_func
)(void *);
238 volatile int control
;
239 unsigned long sig_mask
[_NSIG
/8/sizeof(long)];
241 void *(*start_func
)(void *);
247 #ifdef __wasilibc_unmodified_upstream
248 static int start(void *p
)
250 struct start_args
*args
= p
;
251 int state
= args
->control
;
253 if (a_cas(&args
->control
, 1, 2)==1)
254 __wait(&args
->control
, 0, 2, 1);
256 #ifdef __wasilibc_unmodified_upstream
257 __syscall(SYS_set_tid_address
, &args
->control
);
258 for (;;) __syscall(SYS_exit
, 0);
262 #ifdef __wasilibc_unmodified_upstream
263 __syscall(SYS_rt_sigprocmask
, SIG_SETMASK
, &args
->sig_mask
, 0, _NSIG
/8);
265 __pthread_exit(args
->start_func(args
->start_arg
));
269 static int start_c11(void *p
)
271 struct start_args
*args
= p
;
272 int (*start
)(void*) = (int(*)(void*)) args
->start_func
;
273 __pthread_exit((void *)(uintptr_t)start(args
->start_arg
));
277 __attribute__((export_name("wasi_thread_start")))
278 void wasi_thread_start(int tid
, void *p
)
281 * Note: it's fragile to implement wasi_thread_start in C.
282 * On entry, we don't even have C stack (__stack_pointer)
283 * set up. Be careful when modifying this function.
285 struct start_args
*args
= p
;
286 __asm__(".globaltype __tls_base, i32\n"
288 "global.set __tls_base\n"
289 :: "r"(args
->tls_base
));
290 pthread_t self
= __pthread_self();
291 // Set the thread ID (TID) on the pthread structure. The TID is stored
292 // atomically since it is also stored by the parent thread; this way,
293 // whichever thread (parent or child) reaches this point first can proceed
295 atomic_store((atomic_int
*) &(self
->tid
), tid
);
296 // Set the stack pointer.
297 __asm__(".globaltype __stack_pointer, i32\n"
299 "global.set __stack_pointer\n"
300 :: "r"(self
->stack
));
301 // Execute the user's start function.
302 __pthread_exit(args
->start_func(args
->start_arg
));
306 #ifdef __wasilibc_unmodified_upstream
307 #define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE)
310 * As we allocate stack with malloc() instead of mmap/mprotect,
311 * there is no point to round it up to PAGE_SIZE.
312 * Instead, round up to a sane alignment.
313 * Note: PAGE_SIZE is rather big on WASM. (65536)
315 #define ROUND(x) (((x)+16-1)&-16)
318 /* pthread_key_create.c overrides this */
319 static volatile size_t dummy
= 0;
320 weak_alias(dummy
, __pthread_tsd_size
);
321 static void *dummy_tsd
[1] = { 0 };
322 weak_alias(dummy_tsd
, __pthread_tsd_main
);
324 static FILE *volatile dummy_file
= 0;
325 weak_alias(dummy_file
, __stdin_used
);
326 weak_alias(dummy_file
, __stdout_used
);
327 weak_alias(dummy_file
, __stderr_used
);
329 static void init_file_lock(FILE *f
)
331 if (f
&& f
->lock
<0) f
->lock
= 0;
334 int __pthread_create(pthread_t
*restrict res
, const pthread_attr_t
*restrict attrp
, void *(*entry
)(void *), void *restrict arg
)
336 int ret
, c11
= (attrp
== __ATTRP_C11_THREAD
);
338 struct pthread
*self
, *new;
339 unsigned char *map
= 0, *stack
= 0, *tsd
= 0, *stack_limit
;
340 #ifdef __wasilibc_unmodified_upstream
341 unsigned flags
= CLONE_VM
| CLONE_FS
| CLONE_FILES
| CLONE_SIGHAND
342 | CLONE_THREAD
| CLONE_SYSVSEM
| CLONE_SETTLS
343 | CLONE_PARENT_SETTID
| CLONE_CHILD_CLEARTID
| CLONE_DETACHED
;
345 pthread_attr_t attr
= { 0 };
347 #ifndef __wasilibc_unmodified_upstream
348 size_t tls_size
= __builtin_wasm_tls_size();
349 size_t tls_align
= __builtin_wasm_tls_align();
350 void* tls_base
= __builtin_wasm_tls_base();
353 tls_size
+= tls_align
;
356 #ifdef __wasilibc_unmodified_upstream
357 if (!libc
.can_do_threads
) return ENOSYS
;
359 self
= __pthread_self();
360 if (!libc
.threaded
) {
361 for (FILE *f
=*__ofl_lock(); f
; f
=f
->next
)
364 init_file_lock(__stdin_used
);
365 init_file_lock(__stdout_used
);
366 init_file_lock(__stderr_used
);
367 #ifdef __wasilibc_unmodified_upstream
368 __syscall(SYS_rt_sigprocmask
, SIG_UNBLOCK
, SIGPT_SET
, 0, _NSIG
/8);
370 self
->tsd
= (void **)__pthread_tsd_main
;
371 #ifdef __wasilibc_unmodified_upstream
376 if (attrp
&& !c11
) attr
= *attrp
;
380 attr
._a_stacksize
= __default_stacksize
;
381 attr
._a_guardsize
= __default_guardsize
;
384 if (attr
._a_stackaddr
) {
385 #ifdef __wasilibc_unmodified_upstream
386 size_t need
= libc
.tls_size
+ __pthread_tsd_size
;
388 size_t need
= tls_size
+ __pthread_tsd_size
;
390 size
= attr
._a_stacksize
;
391 stack
= (void *)(attr
._a_stackaddr
& -16);
392 stack_limit
= (void *)(attr
._a_stackaddr
- size
);
393 /* Use application-provided stack for TLS only when
394 * it does not take more than ~12% or 2k of the
395 * application's stack space. */
396 if (need
< size
/8 && need
< 2048) {
397 tsd
= stack
- __pthread_tsd_size
;
398 #ifdef __wasilibc_unmodified_upstream
399 stack
= tsd
- libc
.tls_size
;
401 stack
= tsd
- tls_size
;
403 memset(stack
, 0, need
);
409 guard
= ROUND(attr
._a_guardsize
);
410 size
= guard
+ ROUND(attr
._a_stacksize
411 #ifdef __wasilibc_unmodified_upstream
412 + libc
.tls_size
+ __pthread_tsd_size
);
414 + tls_size
+ __pthread_tsd_size
);
419 #ifdef __wasilibc_unmodified_upstream
421 map
= __mmap(0, size
, PROT_NONE
, MAP_PRIVATE
|MAP_ANON
, -1, 0);
422 if (map
== MAP_FAILED
) goto fail
;
423 if (__mprotect(map
+guard
, size
-guard
, PROT_READ
|PROT_WRITE
)
424 && errno
!= ENOSYS
) {
429 map
= __mmap(0, size
, PROT_READ
|PROT_WRITE
, MAP_PRIVATE
|MAP_ANON
, -1, 0);
430 if (map
== MAP_FAILED
) goto fail
;
436 tsd
= map
+ size
- __pthread_tsd_size
;
438 #ifdef __wasilibc_unmodified_upstream
439 stack
= tsd
- libc
.tls_size
;
441 stack
= tsd
- tls_size
;
443 stack_limit
= map
+ guard
;
447 #ifdef __wasilibc_unmodified_upstream
448 new = __copy_tls(tsd
- libc
.tls_size
);
450 new_tls_base
= __copy_tls(tsd
- tls_size
);
451 tls_offset
= new_tls_base
- tls_base
;
452 new = (void*)((uintptr_t)self
+ tls_offset
);
455 new->map_size
= size
;
457 new->stack_size
= stack
- stack_limit
;
458 new->guard_size
= guard
;
460 new->tsd
= (void *)tsd
;
461 new->locale
= &libc
.global_locale
;
462 if (attr
._a_detach
) {
463 new->detach_state
= DT_DETACHED
;
465 new->detach_state
= DT_JOINABLE
;
467 new->robust_list
.head
= &new->robust_list
.head
;
468 new->canary
= self
->canary
;
469 new->sysinfo
= self
->sysinfo
;
471 /* Setup argument structure for the new thread on its stack.
472 * It's safe to access from the caller only until the thread
473 * list is unlocked. */
474 #ifdef __wasilibc_unmodified_upstream
475 stack
-= (uintptr_t)stack
% sizeof(uintptr_t);
476 stack
-= sizeof(struct start_args
);
477 struct start_args
*args
= (void *)stack
;
478 args
->start_func
= entry
;
479 args
->start_arg
= arg
;
480 args
->control
= attr
._a_sched
? 1 : 0;
482 /* Application signals (but not the synccall signal) must be
483 * blocked before the thread list lock can be taken, to ensure
484 * that the lock is AS-safe. */
485 __block_app_sigs(&set
);
487 /* Ensure SIGCANCEL is unblocked in new thread. This requires
488 * working with a copy of the set so we can restore the
489 * original mask in the calling thread. */
490 memcpy(&args
->sig_mask
, &set
, sizeof args
->sig_mask
);
491 args
->sig_mask
[(SIGCANCEL
-1)/8/sizeof(long)] &=
492 ~(1UL<<((SIGCANCEL
-1)%(8*sizeof(long))));
494 /* Align the stack to struct start_args */
495 stack
-= sizeof(struct start_args
);
496 stack
-= (uintptr_t)stack
% alignof(struct start_args
);
497 struct start_args
*args
= (void *)stack
;
499 /* Align the stack to 16 and store it */
500 new->stack
= (void *)((uintptr_t) stack
& -16);
501 /* Correct the stack size */
502 new->stack_size
= stack
- stack_limit
;
504 args
->start_func
= entry
;
505 args
->start_arg
= arg
;
506 args
->tls_base
= (void*)new_tls_base
;
510 if (!libc
.threads_minus_1
++) libc
.need_locks
= 1;
511 #ifdef __wasilibc_unmodified_upstream
512 ret
= __clone((c11
? start_c11
: start
), stack
, flags
, args
, &new->tid
, TP_ADJ(new), &__thread_list_lock
);
514 /* Instead of `__clone`, WASI uses a host API to instantiate a new version
515 * of the current module and start executing the entry function. The
516 * wasi-threads specification requires the module to export a
517 * `wasi_thread_start` function, which is invoked with `args`. */
518 ret
= __wasi_thread_spawn((void *) args
);
521 #ifdef __wasilibc_unmodified_upstream
522 /* All clone failures translate to EAGAIN. If explicit scheduling
523 * was requested, attempt it before unlocking the thread list so
524 * that the failed thread is never exposed and so that we can
525 * clean up all transient resource usage before returning. */
528 } else if (attr
._a_sched
) {
529 ret
= __syscall(SYS_sched_setscheduler
,
530 new->tid
, attr
._a_policy
, &attr
._a_prio
);
531 if (a_swap(&args
->control
, ret
? 3 : 0)==2)
532 __wake(&args
->control
, 1, 1);
534 __wait(&args
->control
, 0, 3, 0);
537 /* `wasi_thread_spawn` will either return a host-provided thread ID (TID)
538 * (`>= 0`) or an error code (`< 0`). As in the unmodified version, all
539 * spawn failures translate to EAGAIN; unlike the modified version, there is
540 * no need to "start up" the child thread--the host does this. If the spawn
541 * did succeed, then we store the TID atomically, since this parent thread
542 * is racing with the child thread to set this field; this way, whichever
543 * thread reaches this point first can continue without waiting. */
547 atomic_store((atomic_int
*) &(new->tid
), ret
);
552 new->next
= self
->next
;
554 new->next
->prev
= new;
555 new->prev
->next
= new;
557 if (!--libc
.threads_minus_1
) libc
.need_locks
= 0;
560 #ifdef __wasilibc_unmodified_upstream
561 __restore_sigs(&set
);
566 #ifdef __wasilibc_unmodified_upstream
567 if (map
) __munmap(map
, size
);
581 #ifdef __wasilibc_unmodified_upstream
582 weak_alias(__pthread_exit
, pthread_exit
);
584 weak_alias(__pthread_create
, pthread_create
);