+#ifdef __wasilibc_unmodified_upstream
#define SYSCALL_NO_TLS 1
#include <elf.h>
+#endif
#include <limits.h>
+#ifdef __wasilibc_unmodified_upstream
#include <sys/mman.h>
+#endif
#include <string.h>
#include <stddef.h>
#include "pthread_impl.h"
volatile int __thread_list_lock;
+#ifndef __wasilibc_unmodified_upstream
+void __wasi_init_tp() {
+ __init_tp((void *)__get_tp());
+}
+#endif
+
int __init_tp(void *p)
{
pthread_t td = p;
td->self = td;
+#ifdef __wasilibc_unmodified_upstream
int r = __set_thread_area(TP_ADJ(p));
if (r < 0) return -1;
if (!r) libc.can_do_threads = 1;
td->detach_state = DT_JOINABLE;
td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock);
+#endif
td->locale = &libc.global_locale;
td->robust_list.head = &td->robust_list.head;
td->sysinfo = __sysinfo;
return 0;
}
+#ifdef __wasilibc_unmodified_upstream
+
static struct builtin_tls {
char c;
struct pthread pt;
#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
static struct tls_module main_tls;
+#endif
+
+#ifndef __wasilibc_unmodified_upstream
+extern void __wasm_init_tls(void*);
+#endif
void *__copy_tls(unsigned char *mem)
{
+#ifdef __wasilibc_unmodified_upstream
pthread_t td;
struct tls_module *p;
size_t i;
dtv[0] = libc.tls_cnt;
td->dtv = dtv;
return td;
+#else
+ size_t tls_align = __builtin_wasm_tls_align();
+ volatile void* tls_base = __builtin_wasm_tls_base();
+ mem += tls_align;
+ mem -= (uintptr_t)mem & (tls_align-1);
+ __wasm_init_tls(mem);
+ __asm__("local.get %0\n"
+ "global.set __tls_base\n"
+ :: "r"(tls_base));
+ return mem;
+#endif
}
+#ifdef __wasilibc_unmodified_upstream
#if ULONG_MAX == 0xffffffff
typedef Elf32_Phdr Phdr;
#else
}
weak_alias(static_init_tls, __init_tls);
+#endif
#include <stdatomic.h>
#endif
+#include <stdalign.h>
+
static void dummy_0()
{
}
self->prev->next = self->next;
self->prev = self->next = self;
+#ifndef __wasilibc_unmodified_upstream
+ /* On Linux, the thread is created with CLONE_CHILD_CLEARTID,
+ * and this lock will unlock by kernel when this thread terminates.
+ * So we should unlock it here in WebAssembly.
+ * See also set_tid_address(2) */
+ __tl_unlock();
+#endif
+
#ifdef __wasilibc_unmodified_upstream
if (state==DT_DETACHED && self->map_base) {
/* Detached threads must block even implementation-internal
* and then exits without touching the stack. */
__unmapself(self->map_base, self->map_size);
}
+#else
+ if (state==DT_DETACHED && self->map_base) {
+ // __syscall(SYS_exit) would unlock the thread, list
+ // do it manually here
+ __tl_unlock();
+ free(self->map_base);
+ // Can't use `exit()` here, because it is too high level
+ for (;;) __wasi_proc_exit(0);
+ }
#endif
/* Wake any joiner. */
#ifdef __wasilibc_unmodified_upstream
for (;;) __syscall(SYS_exit, 0);
#else
- for (;;) exit(0);
+ // __syscall(SYS_exit) would unlock the thread, list
+ // do it manually here
+ __tl_unlock();
+ // Can't use `exit()` here, because it is too high level
+ for (;;) __wasi_proc_exit(0);
#endif
}
#else
void *(*start_func)(void *);
void *start_arg;
- struct pthread *thread;
+ void *tls_base;
#endif
};
}
#else
__attribute__((export_name("wasi_thread_start")))
-int wasi_thread_start(int tid, void *p)
+_Noreturn void wasi_thread_start(int tid, void *p)
{
struct start_args *args = p;
+ __asm__(".globaltype __tls_base, i32\n"
+ "local.get %0\n"
+ "global.set __tls_base\n"
+ :: "r"(args->tls_base));
+ pthread_t self = __pthread_self();
// Set the thread ID (TID) on the pthread structure. The TID is stored
// atomically since it is also stored by the parent thread; this way,
// whichever thread (parent or child) reaches this point first can proceed
// without waiting.
- atomic_store((atomic_int *) &(args->thread->tid), tid);
- // Save the pointer to the pthread structure as the global `pthread_self`.
- __asm__("local.set %0\n"
- "global.set __wasilibc_pthread_self\n"
- : "=r"(args->thread));
+ atomic_store((atomic_int *) &(self->tid), tid);
+ // Set the stack pointer.
+ __asm__(".globaltype __stack_pointer, i32\n"
+ "local.get %0\n"
+ "global.set __stack_pointer\n"
+ :: "r"(self->stack));
// Execute the user's start function.
int (*start)(void*) = (int(*)(void*)) args->start_func;
__pthread_exit((void *)(uintptr_t)start(args->start_arg));
- return 0;
}
#endif
#endif
pthread_attr_t attr = { 0 };
sigset_t set;
+#ifndef __wasilibc_unmodified_upstream
+ size_t tls_size = __builtin_wasm_tls_size();
+ size_t tls_align = __builtin_wasm_tls_align();
+ void* tls_base = __builtin_wasm_tls_base();
+ void* new_tls_base;
+ size_t tls_offset;
+ tls_size += tls_align;
+#endif
+#ifdef __wasilibc_unmodified_upstream
if (!libc.can_do_threads) return ENOSYS;
+#endif
self = __pthread_self();
if (!libc.threaded) {
for (FILE *f=*__ofl_lock(); f; f=f->next)
}
if (attr._a_stackaddr) {
+#ifdef __wasilibc_unmodified_upstream
size_t need = libc.tls_size + __pthread_tsd_size;
+#else
+ size_t need = tls_size + __pthread_tsd_size;
+#endif
size = attr._a_stacksize;
stack = (void *)(attr._a_stackaddr & -16);
stack_limit = (void *)(attr._a_stackaddr - size);
* application's stack space. */
if (need < size/8 && need < 2048) {
tsd = stack - __pthread_tsd_size;
+#ifdef __wasilibc_unmodified_upstream
stack = tsd - libc.tls_size;
+#else
+ stack = tsd - tls_size;
+#endif
memset(stack, 0, need);
} else {
size = ROUND(need);
} else {
guard = ROUND(attr._a_guardsize);
size = guard + ROUND(attr._a_stacksize
+#ifdef __wasilibc_unmodified_upstream
+ libc.tls_size + __pthread_tsd_size);
+#else
+ + tls_size + __pthread_tsd_size);
+#endif
}
if (!tsd) {
#endif
tsd = map + size - __pthread_tsd_size;
if (!stack) {
+#ifdef __wasilibc_unmodified_upstream
stack = tsd - libc.tls_size;
+#else
+ stack = tsd - tls_size;
+#endif
stack_limit = map + guard;
}
}
+#ifdef __wasilibc_unmodified_upstream
new = __copy_tls(tsd - libc.tls_size);
+#else
+ new_tls_base = __copy_tls(tsd - tls_size);
+ tls_offset = new_tls_base - tls_base;
+ new = (void*)((uintptr_t)self + tls_offset);
+#endif
new->map_base = map;
new->map_size = size;
new->stack = stack;
/* Setup argument structure for the new thread on its stack.
* It's safe to access from the caller only until the thread
* list is unlocked. */
+#ifdef __wasilibc_unmodified_upstream
stack -= (uintptr_t)stack % sizeof(uintptr_t);
stack -= sizeof(struct start_args);
struct start_args *args = (void *)stack;
args->start_func = entry;
args->start_arg = arg;
-#ifdef __wasilibc_unmodified_upstream
args->control = attr._a_sched ? 1 : 0;
/* Application signals (but not the synccall signal) must be
args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &=
~(1UL<<((SIGCANCEL-1)%(8*sizeof(long))));
#else
- /* The new thread needs a pointer to the pthread struct so that it can set
- * up its `wasilibc_pthread_self` global. */
- args->thread = new;
+ /* Align the stack to struct start_args */
+ stack -= sizeof(struct start_args);
+ stack -= (uintptr_t)stack % alignof(struct start_args);
+ struct start_args *args = (void *)stack;
+
+ /* Align the stack to 16 and store it */
+ new->stack = (void *)((uintptr_t) stack & -16);
+ /* Correct the stack size */
+ new->stack_size = stack - stack_limit;
+
+ args->start_func = entry;
+ args->start_arg = arg;
+ args->tls_base = (void*)new_tls_base;
#endif
__tl_lock();
if (ret < 0) {
ret = -EAGAIN;
} else {
- atomic_store((atomic_int *) &(args->thread->tid), ret);
+ atomic_store((atomic_int *) &(new->tid), ret);
}
#endif