Welcome to little lamb

Code » anopa » commit f24e4e3

start/stop: Fix setting longruns as timed out

author Olivier Brunel
2016-09-16 20:41:27 UTC
committer Olivier Brunel
2016-09-16 20:42:29 UTC
parent 49454019c5630611b3510c7a231b21f0a1f99d30

start/stop: Fix setting longruns as timed out

Only oneshots should be marked as timed out, longruns should not have
their status modified because it would be "take over" the s6 status (at
least from aa-status) while it shouldn't.

Most obvious example is when timing out while waiting for readiness:
surely the status of the services remains "Up", not "Timed out"

src/anopa/start-stop.c +5 -9
src/libanopa/service.c +6 -1

diff --git a/src/anopa/start-stop.c b/src/anopa/start-stop.c
index 8d7f776..70b2778 100644
--- a/src/anopa/start-stop.c
+++ b/src/anopa/start-stop.c
@@ -1145,19 +1145,15 @@ process_timeouts (aa_mode mode, aa_scan_cb scan_cb)
                 /* timeout expired? */
                 if (tain_less (&ts, &STAMP))
                 {
-                    aa_service_status *svst = &aa_service (si)->st;
-
+                    /* flag it to avoid race: by the time it'll be processed for
+                     * dependencies, s6 state could have changed, especially if
+                     * this is a readiness timeout, and change doesn't imply
+                     * success (service could have gone down), hence the flag */
+                    aa_service (si)->timedout = 1;
                     aa_unsubscribe_for (aa_service (si)->ft_id);
                     aa_service (si)->ft_id = 0;
                     --nb_wait_longrun;
 
-                    svst->event = (mode & AA_MODE_START) ? AA_EVT_STARTING_FAILED: AA_EVT_STOPPING_FAILED;
-                    svst->code = ERR_TIMEDOUT;
-                    tain_copynow (&svst->stamp);
-                    aa_service_status_set_msg (svst, "");
-                    if (aa_service_status_write (svst, aa_service_name (aa_service (si))) < 0)
-                        aa_strerr_warnu2sys ("write service status file for ", aa_service_name (aa_service (si)));
-
                     put_err_service (aa_service_name (aa_service (si)), ERR_TIMEDOUT, 1);
                     genalloc_append (int, &ga_timedout, &si);
                     if (mode & AA_MODE_START)
diff --git a/src/libanopa/service.c b/src/libanopa/service.c
index 2fc2d50..34ebb01 100644
--- a/src/libanopa/service.c
+++ b/src/libanopa/service.c
@@ -567,6 +567,11 @@ service_is_ok (aa_mode mode, aa_service *s)
 
     /* TYPE_LONGRUN -- we make assumptions here:
      * - we have a local status, since we started the service
+     * - if it's flagged timedout, that's a fail (flag is used to avoid possible
+     *   race condition: it was processed as timedout (might be for readiness)
+     *   and by the time we're checking here, s6 state has changed (e.g. it now
+     *   is ready, or down...) This should obviously remain a fail, not assume
+     *   it was good (e.g. ready) & process it as success.)
      * - if there's no s6 status, that's a fail (probably fail to even exec run)
      * - we compare stamp, if s6 is more recent, it's good (since we got the
      *   event we were waiting for); else it's a fail (must be our
@@ -575,7 +580,7 @@ service_is_ok (aa_mode mode, aa_service *s)
      *   EVT_STARTING because there's a possible race condition there.
      */
     event = (mode & AA_MODE_START) ? AA_EVT_STARTING : AA_EVT_STOPPING;
-    if (s6_svstatus_read (aa_service_name (s), &st6)
+    if (!s->timedout && s6_svstatus_read (aa_service_name (s), &st6)
             && (tain_less (&svst->stamp, &st6.stamp) || svst->event == event))
         r = 1;
     else