thread_pthread: remove timer-thread by restructuring GVL

To reduce resource use and reduce CI failure; remove timer-thread. Single-threaded Ruby processes (including forked children) will never see extra thread overhead. This prevents glibc and jemalloc from going into multi-threaded mode and initializing locks or causing fragmentation via arena explosion. The GVL is implements its own wait-queue as a ccan/list to permit controlling wakeup order. Timeslice under contention is handled by a designated timer thread (similar to choosing a "patrol_thread" for current deadlock checking). There is only one self-pipe, now, as wakeups for timeslice are done independently using condition variables. This reduces FD pressure slightly. Signal handling is handled directly by a Ruby Thread (instead of timer-thread) by exposing signal self-pipe to callers of rb_thread_fd_select, native_sleep, rb_wait_for_single_fd, etc... Acquiring, using, and releasing the self-pipe is exposed via 4 new internal functions: 1) rb_sigwait_fd_get - exclusively acquire timer_thread_pipe.normal[0] 2) rb_sigwait_fd_sleep - sleep and wait for signal (and no other FDs) 3) rb_sigwait_fd_put - release acquired result from rb_sigwait_fd_get 4) rb_sigwait_fd_migrate - migrate signal handling to another thread after calling rb_sigwait_fd_put. rb_sigwait_fd_migrate is necessary for waitpid callers because only one thread can wait on self-pipe at a time, otherwise a deadlock will occur if threads fight over the self-pipe. TRAP_INTERRUPT_MASK is now set for the main thread directly in signal handler via rb_thread_wakeup_timer_thread. Originally, I wanted to use POSIX timers (timer_create/timer_settime) for this. Unfortunately, this proved unfeasible as Mutex#sleep resumes on spurious wakeups and test/thread/test_cv.rb::test_condvar_timed_wait failed. Using pthread_sigmask to mask out SIGVTALRM fixed that test, but test/fiddle/test_function.rb::test_nogvl_poll proved there'd be some unavoidable (and frequent) incompatibilities from that approach. Finally, this allows us to drop thread_destruct_lock and interrupt current ec directly. We don't need to rely on vm->thread_destruct_lock or a coherent vm->running_thread on any platform. Separate timer-thread for time slice and signal handling is relegated to thread_win32.c, now. [ruby-core:88088] [Misc #14937] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@64107 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2018-07-29 20:47:33 +0000
committer: normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2018-07-29 20:47:33 +0000
commit: 9986268b7ea94e8d09a7396ffe06562138c70dfa (patch)
tree: 3a29a7e3edf47618e8cf8f1159d27993e12e66ce /process.c
parent: ca9217e95b293991ff7da1253b1d47d61d0ba7dd (diff)
download: ruby-9986268b7ea94e8d09a7396ffe06562138c70dfa.tar.gz
1 files changed, 95 insertions, 8 deletions
diff --git a/process.c b/process.c
index dd70c7104c..57ad802631 100644
--- a/process.c
+++ b/process.c
@@ -936,13 +936,51 @@ void rb_native_cond_signal(rb_nativethread_cond_t *);
 void rb_native_cond_wait(rb_nativethread_cond_t *, rb_nativethread_lock_t *);
 rb_nativethread_cond_t *rb_sleep_cond_get(const rb_execution_context_t *);
 void rb_sleep_cond_put(rb_nativethread_cond_t *);
+int rb_sigwait_fd_get(const rb_thread_t *);
+void rb_sigwait_sleep(const rb_thread_t *, int fd, const struct timespec *);
+void rb_sigwait_fd_put(const rb_thread_t *, int fd);
+
+/*
+ * When a thread is done using sigwait_fd and there are other threads
+ * sleeping on waitpid, we must kick one of the threads out of
+ * rb_native_cond_wait so it can switch to rb_sigwait_sleep
+ */
+static void
+sigwait_fd_migrate_sleeper(rb_vm_t *vm)
+{
+    struct waitpid_state *w = 0;
+
+    list_for_each(&vm->waiting_pids, w, wnode) {
+        if (!w->cond) continue; /* somebody else already got sigwait_fd */
+        rb_native_cond_signal(w->cond);
+        return;
+    }
+    list_for_each(&vm->waiting_grps, w, wnode) {
+        if (!w->cond) continue; /* somebody else already got sigwait_fd */
+        rb_native_cond_signal(w->cond);
+        return;
+    }
+}
+
+void
+rb_sigwait_fd_migrate(rb_vm_t *vm)
+{
+    rb_native_mutex_lock(&vm->waitpid_lock);
+    sigwait_fd_migrate_sleeper(vm);
+    rb_native_mutex_unlock(&vm->waitpid_lock);
+}
 
 static void
 waitpid_notify(struct waitpid_state *w, rb_pid_t ret)
 {
     w->ret = ret;
     list_del_init(&w->wnode);
-    rb_native_cond_signal(w->cond);
+    if (w->cond) {
+        rb_native_cond_signal(w->cond);
+    }
+    else {
+        /* w is owned by this thread */
+    }
 }
 
 #ifdef _WIN32 /* for spawnvp result from mjit.c */
@@ -954,7 +992,7 @@ waitpid_notify(struct waitpid_state *w, rb_pid_t ret)
 #endif
 
 extern volatile unsigned int ruby_nocldwait; /* signal.c */
-/* called by timer thread */
+/* called by timer thread or thread which acquired sigwait_fd */
 static void
 waitpid_each(struct list_head *head)
 {
@@ -1008,6 +1046,17 @@ waitpid_state_init(struct waitpid_state *w, rb_pid_t pid, int options)
     w->options = options;
 }
 
+static const struct timespec *
+sigwait_sleep_time(void)
+{
+    if (SIGCHLD_LOSSY) {
+        static const struct timespec busy_wait = { 0, 100000000 };
+
+        return &busy_wait;
+    }
+    return 0;
+}
+
 /*
  * must be called with vm->waitpid_lock held, this is not interruptible
  */
@@ -1026,13 +1075,30 @@ ruby_waitpid_locked(rb_vm_t *vm, rb_pid_t pid, int *status, int options,
         if (w.ret == -1) w.errnum = errno;
     }
     else {
-        w.cond = cond;
+        int sigwait_fd;
+
         w.ec = 0;
         list_add(w.pid > 0 ? &vm->waiting_pids : &vm->waiting_grps, &w.wnode);
         do {
-            rb_native_cond_wait(w.cond, &vm->waitpid_lock);
+            sigwait_fd = rb_sigwait_fd_get(0);
+
+            if (sigwait_fd >= 0) {
+                w.cond = 0;
+                rb_native_mutex_unlock(&vm->waitpid_lock);
+                rb_sigwait_sleep(0, sigwait_fd, sigwait_sleep_time());
+                rb_native_mutex_lock(&vm->waitpid_lock);
+                rb_sigwait_fd_put(0, sigwait_fd);
+            }
+            else {
+                w.cond = cond;
+                rb_native_cond_wait(w.cond, &vm->waitpid_lock);
+            }
         } while (!w.ret);
         list_del(&w.wnode);
+
+        /* we're done, maybe other waitpid callers are not: */
+        if (sigwait_fd >= 0)
+            sigwait_fd_migrate_sleeper(vm);
     }
     if (status) {
         *status = w.status;
@@ -1047,7 +1113,10 @@ waitpid_wake(void *x)
     struct waitpid_state *w = x;
 
     /* th->interrupt_lock is already held by rb_threadptr_interrupt_common */
-    rb_native_cond_signal(w->cond);
+    if (w->cond)
+        rb_native_cond_signal(w->cond);
+    else
+        rb_thread_wakeup_timer_thread(0); /* kick sigwait_fd */
 }
 
 static void *
@@ -1055,6 +1124,7 @@ waitpid_nogvl(void *x)
 {
     struct waitpid_state *w = x;
     rb_thread_t *th = rb_ec_thread_ptr(w->ec);
+    int sigwait_fd = -1;
 
     rb_native_mutex_lock(&th->interrupt_lock);
     /*
@@ -1062,13 +1132,30 @@ waitpid_nogvl(void *x)
      * by the time we enter this.  And we may also be interrupted.
      */
     if (!w->ret && !RUBY_VM_INTERRUPTED_ANY(w->ec)) {
-        if (SIGCHLD_LOSSY) {
-            rb_thread_wakeup_timer_thread();
+        sigwait_fd = rb_sigwait_fd_get(th);
+        if (sigwait_fd >= 0) {
+            rb_nativethread_cond_t *cond = w->cond;
+
+            w->cond = 0;
+            rb_native_mutex_unlock(&th->interrupt_lock);
+            rb_sigwait_sleep(th, sigwait_fd, sigwait_sleep_time());
+            rb_native_mutex_lock(&th->interrupt_lock);
+            w->cond = cond;
+            rb_sigwait_fd_put(th, sigwait_fd);
+        }
+        else {
+            /* another thread calling rb_sigwait_sleep will process
+             * signals for us */
+            if (SIGCHLD_LOSSY) {
+                rb_thread_wakeup_timer_thread(0);
+            }
+            rb_native_cond_wait(w->cond, &th->interrupt_lock);
         }
-        rb_native_cond_wait(w->cond, &th->interrupt_lock);
     }
     rb_native_mutex_unlock(&th->interrupt_lock);
 
+    if (sigwait_fd >= 0)
+        rb_sigwait_fd_migrate(th->vm);
     return 0;
 }
author	normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2018-07-29 20:47:33 +0000
committer	normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2018-07-29 20:47:33 +0000
commit	9986268b7ea94e8d09a7396ffe06562138c70dfa (patch)
tree	3a29a7e3edf47618e8cf8f1159d27993e12e66ce /process.c
parent	ca9217e95b293991ff7da1253b1d47d61d0ba7dd (diff)
download	ruby-9986268b7ea94e8d09a7396ffe06562138c70dfa.tar.gz