author | Olivier Brunel
<jjk@jjacky.com> 2016-09-16 20:41:27 UTC |
committer | Olivier Brunel
<jjk@jjacky.com> 2016-09-16 20:42:29 UTC |
parent | 49454019c5630611b3510c7a231b21f0a1f99d30 |
src/anopa/start-stop.c | +5 | -9 |
src/libanopa/service.c | +6 | -1 |
diff --git a/src/anopa/start-stop.c b/src/anopa/start-stop.c index 8d7f776..70b2778 100644 --- a/src/anopa/start-stop.c +++ b/src/anopa/start-stop.c @@ -1145,19 +1145,15 @@ process_timeouts (aa_mode mode, aa_scan_cb scan_cb) /* timeout expired? */ if (tain_less (&ts, &STAMP)) { - aa_service_status *svst = &aa_service (si)->st; - + /* flag it to avoid race: by the time it'll be processed for + * dependencies, s6 state could have changed, especially if + * this is a readiness timeout, and change doesn't imply + * success (service could have gone down), hence the flag */ + aa_service (si)->timedout = 1; aa_unsubscribe_for (aa_service (si)->ft_id); aa_service (si)->ft_id = 0; --nb_wait_longrun; - svst->event = (mode & AA_MODE_START) ? AA_EVT_STARTING_FAILED: AA_EVT_STOPPING_FAILED; - svst->code = ERR_TIMEDOUT; - tain_copynow (&svst->stamp); - aa_service_status_set_msg (svst, ""); - if (aa_service_status_write (svst, aa_service_name (aa_service (si))) < 0) - aa_strerr_warnu2sys ("write service status file for ", aa_service_name (aa_service (si))); - put_err_service (aa_service_name (aa_service (si)), ERR_TIMEDOUT, 1); genalloc_append (int, &ga_timedout, &si); if (mode & AA_MODE_START) diff --git a/src/libanopa/service.c b/src/libanopa/service.c index 2fc2d50..34ebb01 100644 --- a/src/libanopa/service.c +++ b/src/libanopa/service.c @@ -567,6 +567,11 @@ service_is_ok (aa_mode mode, aa_service *s) /* TYPE_LONGRUN -- we make assumptions here: * - we have a local status, since we started the service + * - if it's flagged timedout, that's a fail (flag is used to avoid possible + * race condition: it was processed as timedout (might be for readiness) + * and by the time we're checking here, s6 state has changed (e.g. it now + * is ready, or down...) This should obviously remain a fail, not assume + * it was good (e.g. ready) & process it as success.) * - if there's no s6 status, that's a fail (probably fail to even exec run) * - we compare stamp, if s6 is more recent, it's good (since we got the * event we were waiting for); else it's a fail (must be our @@ -575,7 +580,7 @@ service_is_ok (aa_mode mode, aa_service *s) * EVT_STARTING because there's a possible race condition there. */ event = (mode & AA_MODE_START) ? AA_EVT_STARTING : AA_EVT_STOPPING; - if (s6_svstatus_read (aa_service_name (s), &st6) + if (!s->timedout && s6_svstatus_read (aa_service_name (s), &st6) && (tain_less (&svst->stamp, &st6.stamp) || svst->event == event)) r = 1; else