]> git.proxmox.com Git - wasi-libc.git/commitdiff
threads: implement init of TLS and stack pointer (#342)
author韩朴宇 <w12101111@gmail.com>
Thu, 10 Nov 2022 22:12:53 +0000 (06:12 +0800)
committerGitHub <noreply@github.com>
Thu, 10 Nov 2022 22:12:53 +0000 (14:12 -0800)
* threads: implement init of TLS and stack pointer

* fix: rename wasi_snapshot_preview2_thread_spawn to wasi_thread_spawn

Signed-off-by: Harald Hoyer <harald@profian.com>
* fix: change signature of wasi_thread_start

Signed-off-by: Harald Hoyer <harald@profian.com>
* fix: pthread_exit for WASI

Can't use `exit()` because it is too high level.
Have to unlock the thread list.

Signed-off-by: Harald Hoyer <harald@profian.com>
* fix: initialize struct pthread for the main thread

Signed-off-by: Harald Hoyer <harald@profian.com>
* fix: store the aligned stack minus `struct start_args`

Signed-off-by: Harald Hoyer <harald@profian.com>
Signed-off-by: Harald Hoyer <harald@profian.com>
Co-authored-by: Harald Hoyer <harald@profian.com>
Makefile
expected/wasm32-wasi/posix/defined-symbols.txt
expected/wasm32-wasi/posix/undefined-symbols.txt
libc-bottom-half/crt/crt1-command.c
libc-bottom-half/sources/__wasilibc_real.c
libc-top-half/musl/arch/wasm32/pthread_arch.h
libc-top-half/musl/src/env/__init_tls.c
libc-top-half/musl/src/internal/libc.h
libc-top-half/musl/src/internal/pthread_impl.h
libc-top-half/musl/src/thread/pthread_create.c
libc-top-half/musl/src/thread/pthread_self.c

index 11fa91ae16244c97c3b86e4d8998d44579f847ef..59598d467dad4be4c5181aaf630c5c16bff44f39 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -192,9 +192,16 @@ LIBC_TOP_HALF_MUSL_SOURCES = \
 ifeq ($(THREAD_MODEL), posix)
 LIBC_TOP_HALF_MUSL_SOURCES += \
     $(addprefix $(LIBC_TOP_HALF_MUSL_SRC_DIR)/, \
+        env/__init_tls.c \
+        stdio/__lockfile.c \
         thread/__lock.c \
         thread/__wait.c \
         thread/__timedwait.c \
+        thread/default_attr.c \
+        thread/pthread_attr_destroy.c \
+        thread/pthread_attr_init.c \
+        thread/pthread_attr_setstack.c \
+        thread/pthread_attr_setstacksize.c \
         thread/pthread_cleanup_push.c \
         thread/pthread_cond_broadcast.c \
         thread/pthread_cond_destroy.c \
@@ -235,6 +242,7 @@ LIBC_TOP_HALF_MUSL_SOURCES += \
         thread/pthread_rwlockattr_init.c \
         thread/pthread_rwlockattr_setpshared.c \
         thread/pthread_setcancelstate.c \
+        thread/pthread_self.c \
         thread/pthread_testcancel.c \
         thread/sem_destroy.c \
         thread/sem_getvalue.c \
index fe29f30b4cf0d8a140fe42af446a6bdc414872c0..d92bcbd88a8c6edf75777e20ade1864591237765 100644 (file)
@@ -23,6 +23,7 @@ __c_locale
 __clock
 __clock_gettime
 __clock_nanosleep
+__copy_tls
 __cos
 __cosdf
 __cosl
@@ -38,6 +39,8 @@ __ctype_tolower_loc
 __ctype_toupper_loc
 __cxa_atexit
 __cxa_finalize
+__default_guardsize
+__default_stacksize
 __des_setkey
 __do_cleanup_pop
 __do_cleanup_push
@@ -87,6 +90,7 @@ __getopt_msg
 __gmtime_r
 __hwcap
 __inet_aton
+__init_tp
 __intscan
 __invtrigl_R
 __isalnum_l
@@ -144,6 +148,7 @@ __locale_lock
 __locale_lockptr
 __localtime_r
 __lock
+__lockfile
 __log2_data
 __log2f_data
 __log_data
@@ -265,6 +270,7 @@ __tan
 __tandf
 __tanl
 __testcancel
+__thread_list_lock
 __timedwait
 __timedwait_cp
 __tl_lock
@@ -288,6 +294,7 @@ __tsearch_balance
 __uflow
 __unlist_locked_file
 __unlock
+__unlockfile
 __uselocale
 __utc
 __wait
@@ -318,6 +325,7 @@ __wasi_fd_seek
 __wasi_fd_sync
 __wasi_fd_tell
 __wasi_fd_write
+__wasi_init_tp
 __wasi_path_create_directory
 __wasi_path_filestat_get
 __wasi_path_filestat_set_times
@@ -371,6 +379,7 @@ __wasilibc_nocwd_scandirat
 __wasilibc_nocwd_symlinkat
 __wasilibc_nocwd_utimensat
 __wasilibc_open_nomode
+__wasilibc_pthread_self
 __wasilibc_register_preopened_fd
 __wasilibc_rename_newat
 __wasilibc_rename_oldat
@@ -953,6 +962,10 @@ program_invocation_name
 program_invocation_short_name
 pselect
 psignal
+pthread_attr_destroy
+pthread_attr_init
+pthread_attr_setstack
+pthread_attr_setstacksize
 pthread_cond_broadcast
 pthread_cond_destroy
 pthread_cond_init
@@ -992,6 +1005,7 @@ pthread_rwlock_wrlock
 pthread_rwlockattr_destroy
 pthread_rwlockattr_init
 pthread_rwlockattr_setpshared
+pthread_self
 pthread_setcancelstate
 pthread_testcancel
 pthread_timedjoin_np
@@ -1182,6 +1196,7 @@ tfind
 tgamma
 tgammaf
 tgammal
+thrd_current
 thrd_sleep
 time
 timegm
index 407a6b71bddd8533887ada3f6fce65c2c8eb50dc..7def0a9f4e4c20f7a5b5ba3f62b8058bfdc6bd24 100644 (file)
@@ -1,7 +1,4 @@
 __addtf3
-__copy_tls
-__default_guardsize
-__default_stacksize
 __divtf3
 __eqtf2
 __extenddftf2
@@ -59,19 +56,18 @@ __imported_wasi_snapshot_preview1_sock_accept
 __imported_wasi_snapshot_preview1_sock_recv
 __imported_wasi_snapshot_preview1_sock_send
 __imported_wasi_snapshot_preview1_sock_shutdown
-__imported_wasi_snapshot_preview2_thread_spawn
+__imported_wasi_thread_spawn
 __letf2
-__lockfile
 __lttf2
 __main_argc_argv
 __netf2
 __stack_pointer
 __subtf3
-__thread_list_lock
+__tls_align
 __tls_base
+__tls_size
 __trunctfdf2
 __trunctfsf2
-__unlockfile
 __unordtf2
-__wasilibc_pthread_self
 __wasm_call_ctors
+__wasm_init_tls
index 48be79f7ef586e6e256db32f5aaff0ee58ed9c81..fb9ee71fb40e7912e24812d324f5b08007a916cb 100644 (file)
@@ -1,5 +1,6 @@
 #ifdef _REENTRANT
 #include <stdatomic.h>
+extern void __wasi_init_tp(void);
 #endif
 #include <wasi/api.h>
 extern void __wasm_call_ctors(void);
@@ -29,6 +30,10 @@ void _start(void) {
     started = 1;
 #endif
 
+#ifdef _REENTRANT
+       __wasi_init_tp();
+#endif
+
     // The linker synthesizes this to call constructors.
     __wasm_call_ctors();
 
index 2648ac9faf51dfdb289435a6d78234f1347b007d..855a2c6ddd2d878ea49fa8fc0ba057ea963bbdb5 100644 (file)
@@ -660,13 +660,13 @@ __wasi_errno_t __wasi_sock_shutdown(
 }
 
 #ifdef _REENTRANT
-int32_t __imported_wasi_snapshot_preview2_thread_spawn(int32_t arg0) __attribute__((
-    __import_module__("wasi_snapshot_preview2"),
+int32_t __imported_wasi_thread_spawn(int32_t arg0) __attribute__((
+    __import_module__("wasi"),
     __import_name__("thread_spawn")
 ));
 
 __wasi_errno_t __wasi_thread_spawn(void* start_arg) {
-       int32_t ret = __imported_wasi_snapshot_preview2_thread_spawn((int32_t) start_arg);
+       int32_t ret = __imported_wasi_thread_spawn((int32_t) start_arg);
     return (uint16_t) ret;
 }
 #endif
index e23eaf8f2a6eec2776b5737aad5fe5cf9536948f..58e76ab0a5ff2383e6cb05f50a70179b1e67918d 100644 (file)
@@ -1,11 +1,5 @@
-static inline uintptr_t __get_tp(void) {
-#if _REENTRANT
-  int val;
-  __asm__("global.get __wasilibc_pthread_self\n"
-          "local.set %0"
-          : "=r"(val));
-  return val;
-#else
-  return 0;
-#endif
+extern _Thread_local struct __pthread __wasilibc_pthread_self;
+
+static inline uintptr_t __get_tp() {
+  return (uintptr_t)&__wasilibc_pthread_self;
 }
index a93141ed36a8d7d06d73100304a068f94a8011bc..ee785bc11e4c517d93c10b1e3a750366724497f3 100644 (file)
@@ -1,7 +1,11 @@
+#ifdef __wasilibc_unmodified_upstream
 #define SYSCALL_NO_TLS 1
 #include <elf.h>
+#endif
 #include <limits.h>
+#ifdef __wasilibc_unmodified_upstream
 #include <sys/mman.h>
+#endif
 #include <string.h>
 #include <stddef.h>
 #include "pthread_impl.h"
 
 volatile int __thread_list_lock;
 
+#ifndef __wasilibc_unmodified_upstream
+void __wasi_init_tp() {
+       __init_tp((void *)__get_tp());
+}
+#endif
+
 int __init_tp(void *p)
 {
        pthread_t td = p;
        td->self = td;
+#ifdef __wasilibc_unmodified_upstream
        int r = __set_thread_area(TP_ADJ(p));
        if (r < 0) return -1;
        if (!r) libc.can_do_threads = 1;
        td->detach_state = DT_JOINABLE;
        td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock);
+#endif
        td->locale = &libc.global_locale;
        td->robust_list.head = &td->robust_list.head;
        td->sysinfo = __sysinfo;
@@ -27,6 +39,8 @@ int __init_tp(void *p)
        return 0;
 }
 
+#ifdef __wasilibc_unmodified_upstream
+
 static struct builtin_tls {
        char c;
        struct pthread pt;
@@ -35,9 +49,15 @@ static struct builtin_tls {
 #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
 
 static struct tls_module main_tls;
+#endif
+
+#ifndef __wasilibc_unmodified_upstream
+extern void __wasm_init_tls(void*);
+#endif
 
 void *__copy_tls(unsigned char *mem)
 {
+#ifdef __wasilibc_unmodified_upstream
        pthread_t td;
        struct tls_module *p;
        size_t i;
@@ -69,8 +89,20 @@ void *__copy_tls(unsigned char *mem)
        dtv[0] = libc.tls_cnt;
        td->dtv = dtv;
        return td;
+#else
+       size_t tls_align = __builtin_wasm_tls_align();
+       volatile void* tls_base = __builtin_wasm_tls_base();
+       mem += tls_align;
+       mem -= (uintptr_t)mem & (tls_align-1);
+       __wasm_init_tls(mem);
+       __asm__("local.get %0\n"
+                       "global.set __tls_base\n"
+                       :: "r"(tls_base));
+       return mem;
+#endif
 }
 
+#ifdef __wasilibc_unmodified_upstream
 #if ULONG_MAX == 0xffffffff
 typedef Elf32_Phdr Phdr;
 #else
@@ -151,3 +183,4 @@ static void static_init_tls(size_t *aux)
 }
 
 weak_alias(static_init_tls, __init_tls);
+#endif
index 9b3984742b4bf1b46555436e25aec042a8a6d337..355c3a4e2398d9b6c8ea2805d9a010ffc489ebc9 100644 (file)
@@ -18,8 +18,10 @@ struct tls_module {
 };
 
 struct __libc {
-#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT)
+#ifdef __wasilibc_unmodified_upstream
        char can_do_threads;
+#endif
+#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT)
        char threaded;
 #endif
 #ifdef __wasilibc_unmodified_upstream // WASI doesn't currently use any code that needs "secure" mode
@@ -32,7 +34,7 @@ struct __libc {
 #ifdef __wasilibc_unmodified_upstream // WASI has no auxv
        size_t *auxv;
 #endif
-#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT)
+#ifdef __wasilibc_unmodified_upstream // WASI use different TLS implement
        struct tls_module *tls_head;
        size_t tls_size, tls_align, tls_cnt;
 #endif
index 22e557d580095cd1520ede709aacbb86f4ab1c74..a6d188bb3cfbbdcc1f91dba26edd6cb44dbb3ad9 100644 (file)
@@ -25,8 +25,10 @@ struct pthread {
        /* Part 1 -- these fields may be external or
         * internal (accessed via asm) ABI. Do not change. */
        struct pthread *self;
+#ifdef __wasilibc_unmodified_upstream
 #ifndef TLS_ABOVE_TP
        uintptr_t *dtv;
+#endif
 #endif
        struct pthread *prev, *next; /* non-ABI */
        uintptr_t sysinfo;
index d0168987f3e07f0719b02ac4923284ebee405806..1aa7be71a57d5edd3adcb26684bd8de75aa2604e 100644 (file)
@@ -12,6 +12,8 @@
 #include <stdatomic.h>
 #endif
 
+#include <stdalign.h>
+
 static void dummy_0()
 {
 }
@@ -158,6 +160,14 @@ _Noreturn void __pthread_exit(void *result)
        self->prev->next = self->next;
        self->prev = self->next = self;
 
+#ifndef __wasilibc_unmodified_upstream
+       /* On Linux, the thread is created with CLONE_CHILD_CLEARTID,
+        * and this lock will unlock by kernel when this thread terminates.
+        * So we should unlock it here in WebAssembly.
+        * See also set_tid_address(2) */
+       __tl_unlock();
+#endif
+
 #ifdef __wasilibc_unmodified_upstream
        if (state==DT_DETACHED && self->map_base) {
                /* Detached threads must block even implementation-internal
@@ -174,6 +184,15 @@ _Noreturn void __pthread_exit(void *result)
                 * and then exits without touching the stack. */
                __unmapself(self->map_base, self->map_size);
        }
+#else
+       if (state==DT_DETACHED && self->map_base) {
+               // __syscall(SYS_exit) would unlock the thread, list
+               // do it manually here
+               __tl_unlock();
+               free(self->map_base);
+               // Can't use `exit()` here, because it is too high level
+               for (;;) __wasi_proc_exit(0);
+       }
 #endif
 
        /* Wake any joiner. */
@@ -189,7 +208,11 @@ _Noreturn void __pthread_exit(void *result)
 #ifdef __wasilibc_unmodified_upstream
        for (;;) __syscall(SYS_exit, 0);
 #else
-       for (;;) exit(0);
+       // __syscall(SYS_exit) would unlock the thread, list
+       // do it manually here
+       __tl_unlock();
+       // Can't use `exit()` here, because it is too high level
+       for (;;) __wasi_proc_exit(0);
 #endif
 }
 
@@ -214,7 +237,7 @@ struct start_args {
 #else
        void *(*start_func)(void *);
        void *start_arg;
-       struct pthread *thread;
+       void *tls_base;
 #endif
 };
 
@@ -249,22 +272,27 @@ static int start_c11(void *p)
 }
 #else
 __attribute__((export_name("wasi_thread_start")))
-int wasi_thread_start(int tid, void *p)
+_Noreturn void wasi_thread_start(int tid, void *p)
 {
        struct start_args *args = p;
+       __asm__(".globaltype __tls_base, i32\n"
+                       "local.get %0\n"
+                       "global.set __tls_base\n"
+                       :: "r"(args->tls_base));
+       pthread_t self = __pthread_self();
        // Set the thread ID (TID) on the pthread structure. The TID is stored
        // atomically since it is also stored by the parent thread; this way,
        // whichever thread (parent or child) reaches this point first can proceed
        // without waiting.
-       atomic_store((atomic_int *) &(args->thread->tid), tid);
-       // Save the pointer to the pthread structure as the global `pthread_self`.
-       __asm__("local.set %0\n"
-                 "global.set __wasilibc_pthread_self\n"
-          : "=r"(args->thread));
+       atomic_store((atomic_int *) &(self->tid), tid);
+       // Set the stack pointer.
+       __asm__(".globaltype __stack_pointer, i32\n"
+                       "local.get %0\n"
+                       "global.set __stack_pointer\n"
+                       :: "r"(self->stack));
        // Execute the user's start function.
        int (*start)(void*) = (int(*)(void*)) args->start_func;
        __pthread_exit((void *)(uintptr_t)start(args->start_arg));
-       return 0;
 }
 #endif
 
@@ -299,8 +327,18 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 #endif
        pthread_attr_t attr = { 0 };
        sigset_t set;
+#ifndef __wasilibc_unmodified_upstream
+       size_t tls_size = __builtin_wasm_tls_size();
+       size_t tls_align = __builtin_wasm_tls_align();
+       void* tls_base = __builtin_wasm_tls_base();
+       void* new_tls_base;
+       size_t tls_offset;
+       tls_size += tls_align;
+#endif
 
+#ifdef __wasilibc_unmodified_upstream
        if (!libc.can_do_threads) return ENOSYS;
+#endif
        self = __pthread_self();
        if (!libc.threaded) {
                for (FILE *f=*__ofl_lock(); f; f=f->next)
@@ -327,7 +365,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
        }
 
        if (attr._a_stackaddr) {
+#ifdef __wasilibc_unmodified_upstream
                size_t need = libc.tls_size + __pthread_tsd_size;
+#else
+               size_t need = tls_size + __pthread_tsd_size;
+#endif
                size = attr._a_stacksize;
                stack = (void *)(attr._a_stackaddr & -16);
                stack_limit = (void *)(attr._a_stackaddr - size);
@@ -336,7 +378,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
                 * application's stack space. */
                if (need < size/8 && need < 2048) {
                        tsd = stack - __pthread_tsd_size;
+#ifdef __wasilibc_unmodified_upstream
                        stack = tsd - libc.tls_size;
+#else
+                       stack = tsd - tls_size;
+#endif
                        memset(stack, 0, need);
                } else {
                        size = ROUND(need);
@@ -345,7 +391,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
        } else {
                guard = ROUND(attr._a_guardsize);
                size = guard + ROUND(attr._a_stacksize
+#ifdef __wasilibc_unmodified_upstream
                        + libc.tls_size +  __pthread_tsd_size);
+#else
+                       + tls_size +  __pthread_tsd_size);
+#endif
        }
 
        if (!tsd) {
@@ -368,12 +418,22 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
 #endif
                tsd = map + size - __pthread_tsd_size;
                if (!stack) {
+#ifdef __wasilibc_unmodified_upstream
                        stack = tsd - libc.tls_size;
+#else
+                       stack = tsd - tls_size;
+#endif
                        stack_limit = map + guard;
                }
        }
 
+#ifdef __wasilibc_unmodified_upstream
        new = __copy_tls(tsd - libc.tls_size);
+#else
+       new_tls_base = __copy_tls(tsd - tls_size);
+       tls_offset = new_tls_base - tls_base;
+       new = (void*)((uintptr_t)self + tls_offset);
+#endif
        new->map_base = map;
        new->map_size = size;
        new->stack = stack;
@@ -394,12 +454,12 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
        /* Setup argument structure for the new thread on its stack.
         * It's safe to access from the caller only until the thread
         * list is unlocked. */
+#ifdef __wasilibc_unmodified_upstream
        stack -= (uintptr_t)stack % sizeof(uintptr_t);
        stack -= sizeof(struct start_args);
        struct start_args *args = (void *)stack;
        args->start_func = entry;
        args->start_arg = arg;
-#ifdef __wasilibc_unmodified_upstream
        args->control = attr._a_sched ? 1 : 0;
 
        /* Application signals (but not the synccall signal) must be
@@ -414,9 +474,19 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
        args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &=
                ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long))));
 #else
-       /* The new thread needs a pointer to the pthread struct so that it can set
-        * up its `wasilibc_pthread_self` global. */
-       args->thread = new;
+       /* Align the stack to struct start_args */
+       stack -= sizeof(struct start_args);
+       stack -= (uintptr_t)stack % alignof(struct start_args);
+       struct start_args *args = (void *)stack;
+
+       /* Align the stack to 16 and store it */
+       new->stack = (void *)((uintptr_t) stack & -16);
+       /* Correct the stack size */
+       new->stack_size = stack - stack_limit;
+
+       args->start_func = entry;
+       args->start_arg = arg;
+       args->tls_base = (void*)new_tls_base;
 #endif
 
        __tl_lock();
@@ -457,7 +527,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att
        if (ret < 0) {
                ret = -EAGAIN;
        } else {
-               atomic_store((atomic_int *) &(args->thread->tid), ret);
+               atomic_store((atomic_int *) &(new->tid), ret);
        }
 #endif
 
index 197c6830b28c19d185adc1c226819e723f44e87f..1f3eee1d16d076a8cb2f43a5fa420555ea8b91f8 100644 (file)
@@ -3,9 +3,7 @@
 
 #if !defined(__wasilibc_unmodified_upstream) && defined(__wasm__) &&           \
     defined(_REENTRANT)
-// We need some place to store the thread ID. This WebAssembly global fits the
-// bill and is used by `__get_tp` elsewhere.
-__asm__(".globaltype __wasilibc_pthread_self, i32\n");
+_Thread_local struct pthread __wasilibc_pthread_self;
 #endif
 
 static pthread_t __pthread_self_internal()