/*
* anopa - Copyright (C) 2015-2017 Olivier Brunel
*
* service.c
* Copyright (C) 2015-2017 Olivier Brunel <jjk@jjacky.com>
*
* This file is part of anopa.
*
* anopa is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* anopa is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* anopa. If not, see http://www.gnu.org/licenses/
*/
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <skalibs/djbunix.h> /* fd_close() */
#include <skalibs/stralloc.h>
#include <skalibs/genalloc.h>
#include <skalibs/bytestr.h>
#include <skalibs/direntry.h>
#include <skalibs/types.h>
#include <skalibs/tai.h>
#include <s6/s6-supervise.h>
#include <s6/ftrigr.h>
#include <anopa/service.h>
#include <anopa/ga_int_list.h>
#include <anopa/scan_dir.h>
#include <anopa/err.h>
#include <anopa/output.h>
#include "service_internal.h"
#define NOTIFICATION_FILENAME "notification-fd"
static aa_close_fd_fn close_fd;
static void
free_service (aa_service *s)
{
genalloc_free (int, &s->needs);
genalloc_free (int, &s->wants);
genalloc_free (int, &s->after);
aa_service_status_free (&s->st);
if (s->fd_out > 0)
close_fd (s->fd_out);
if (s->fd_progress > 0)
close_fd (s->fd_progress);
stralloc_free (&s->sa_out);
}
void
aa_free_services (aa_close_fd_fn _close_fd)
{
if (_close_fd)
close_fd = _close_fd;
else
close_fd = (aa_close_fd_fn) fd_close;
genalloc_deepfree (aa_service, &aa_services, free_service);
}
size_t
aa_add_name (const char *name)
{
size_t offset = aa_names.len;
if (!stralloc_catb (&aa_names, name, strlen (name) + 1))
return (size_t) -1;
return offset;
}
static int
get_new_service (const char *name)
{
aa_service s = {
.nb_mark = 0,
.needs = GENALLOC_ZERO,
.wants = GENALLOC_ZERO,
.after = GENALLOC_ZERO,
.ls = AA_LOAD_NOT,
.st.event = AA_EVT_NONE,
.st.sa = STRALLOC_ZERO,
.st.type = AA_TYPE_UNKNOWN,
.ft_id = 0,
.sa_out = STRALLOC_ZERO,
.pi = -1
};
struct stat st;
if (!_is_valid_service_name (name, strlen (name)))
return -ERR_INVALID_NAME;
if (stat (name, &st) < 0)
{
if (errno == ENOENT)
return -ERR_UNKNOWN;
else
return -ERR_IO;
}
else if (!S_ISDIR (st.st_mode))
return (errno = ENOTDIR, -ERR_IO);
s.offset_name = aa_add_name (name);
if (s.offset_name == (size_t) -1)
return (errno = ENOMEM, -ERR_UNKNOWN);
genalloc_append (aa_service, &aa_services, &s);
return genalloc_len (aa_service, &aa_services) - 1;
}
static int
get_from_list (genalloc *list, const char *name)
{
size_t l = genalloc_len (int, list);
size_t i;
for (i = 0; i < l; ++i)
if (!str_diff (name, aa_service_name (aa_service (list_get (list, i)))))
return list_get (list, i);
return -1;
}
int
aa_get_service (const char *name, int *si, int new_in_main)
{
*si = get_from_list (&aa_main_list, name);
if (*si >= 0)
return AA_SERVICE_FROM_MAIN;
*si = get_from_list (&aa_tmp_list, name);
if (*si >= 0)
return AA_SERVICE_FROM_TMP;
*si = get_new_service (name);
if (*si < 0)
return *si;
if (new_in_main)
{
add_to_list (&aa_main_list, *si, 0);
return AA_SERVICE_FROM_MAIN;
}
else
{
add_to_list (&aa_tmp_list, *si, 0);
return AA_SERVICE_FROM_TMP;
}
}
static int
contains_fd (const char *filename)
{
char buf[UINT_FMT + 1];
ssize_t r;
r = openreadnclose_nb (filename, buf, UINT_FMT);
if (r < 0)
{
if (errno != ENOENT)
aa_strerr_warnu2sys ("open ", filename);
return 0;
}
{
unsigned int i = r;
buf[byte_chr (buf, i, '\n')] = '\0';
if (!uint0_scan (buf, &i))
{
aa_strerr_warn2x ("invalid ", filename);
return 0;
}
}
return 1;
}
int
aa_preload_service (int si)
{
aa_service_status *svst = &aa_service (si)->st;
size_t l_sn = strlen (aa_service_name (aa_service (si)));
char buf[l_sn + 1 + sizeof (NOTIFICATION_FILENAME)];
byte_copy (buf, l_sn, aa_service_name (aa_service (si)));
byte_copy (buf + l_sn, 5, "/run");
if (access (buf, F_OK) < 0)
{
if (errno != ENOENT)
return -ERR_IO;
else
svst->type = AA_TYPE_ONESHOT;
}
else
{
svst->type = AA_TYPE_LONGRUN;
aa_service (si)->gets_ready = 0;
byte_copy (buf + l_sn, 1 + sizeof (AA_GETS_READY_FILENAME), "/" AA_GETS_READY_FILENAME);
if (access (buf, F_OK) == 0)
aa_service (si)->gets_ready = 1;
else
{
byte_copy (buf + l_sn, 1 + sizeof (NOTIFICATION_FILENAME), "/" NOTIFICATION_FILENAME);
if (access (buf, F_OK) == 0 && contains_fd (buf))
aa_service (si)->gets_ready = 1;
}
}
return 0;
}
int
aa_ensure_service_loaded (int si, aa_mode mode, int no_wants, aa_autoload_cb al_cb)
{
stralloc sa = STRALLOC_ZERO;
struct it_data it_data = {
.mode = mode,
.si = si,
.no_wants = no_wants,
.al_cb = al_cb
};
int r;
if (aa_service (si)->ls == AA_LOAD_DONE || aa_service (si)->ls == AA_LOAD_ING)
return 0;
else if (aa_service (si)->ls == AA_LOAD_FAIL)
return -aa_service (si)->st.code;
r = aa_preload_service (si);
if (r < 0)
return r;
{
aa_service_status *svst = &aa_service (si)->st;
int chk_st;
int is_up;
chk_st = aa_service_status_read (svst, aa_service_name (aa_service (si))) == 0;
is_up = 0;
if (svst->type == AA_TYPE_LONGRUN)
{
s6_svstatus_t st6 = S6_SVSTATUS_ZERO;
if (s6_svstatus_read (aa_service_name (aa_service (si)), &st6))
{
chk_st = 0;
is_up = st6.pid && !st6.flagfinishing;
if (is_up && aa_service (si)->gets_ready && st6.flagready)
is_up = 2;
else if ((mode & (AA_MODE_STOP | AA_MODE_STOP_ALL))
&& !is_up && st6.flagwant && st6.flagwantup)
/* it is down, but to be restarted soon by s6-supervise; so
* for our intent & purposes, it shall be considered up, so
* that we stop the restart and place the down file.
* (When starting, it's ok to send the up command, so we
* should still consider it down then.) */
is_up = 1;
}
else if (errno != ENOENT)
{
/* most likely a permission error on supervise folder */
r = -ERR_IO;
goto err;
}
tain_now_g ();
}
if (chk_st)
is_up = (svst->event == AA_EVT_STARTED || svst->event == AA_EVT_STARTING
|| svst->event == AA_EVT_STOPPING_FAILED
|| svst->event == AA_EVT_STOP_FAILED);
/* DRY_FULL means process (i.e. list) even services that are already in
* the right state, so skip that bit then */
if (!(mode & AA_MODE_IS_DRY_FULL))
{
if (mode & AA_MODE_START)
{
/* if it is a longrun w/ readiness support that isn't yet ready,
* we load the service to add it to the "transaction" since
* we'll need to wait for its readyness.
* We set the code to 0 or ERR_ALREADY_UP to indicate whether it
* was alreayd up or not, so when starting it (in exec_cb) it
* can actually be said "Starting" or "Getting ready" as needed.
*/
if (svst->type == AA_TYPE_LONGRUN && aa_service (si)->gets_ready && is_up < 2)
svst->code = (is_up == 1) ? ERR_ALREADY_UP : 0;
else if (is_up)
{
/* if already good, we "fail" because there's no need to
* load the service, it's already good. This error will be
* silently ignored */
aa_service (si)->ls = AA_LOAD_FAIL;
/* this isn't actually true, but we won't save it to file */
svst->code = ERR_ALREADY_UP;
return -ERR_ALREADY_UP;
}
}
else if ((mode & (AA_MODE_STOP | AA_MODE_STOP_ALL)) && !is_up)
{
/* if not up, we "fail" because we can't stop it */
aa_service (si)->ls = AA_LOAD_FAIL;
/* this isn't actually true, but we won't save it to file */
svst->code = ERR_NOT_UP;
return -ERR_NOT_UP;
}
}
}
aa_service (si)->ls = AA_LOAD_ING;
stralloc_cats (&sa, aa_service_name (aa_service (si)));
/* special case: for a longrun that's not a logger, we check if it has one,
* and if so auto-add needs & after on said logger */
if (aa_service (si)->st.type == AA_TYPE_LONGRUN
/* because sa.s is the service name, and the only slashes allowed
* are for loggers, i.e. xxxx/log */
&& (sa.len < 5 || sa.s[sa.len - 4] != '/'))
{
stralloc_catb (&sa, "/log/run", strlen ("/log/run") + 1);
r = access (sa.s, F_OK);
if (r < 0 && (errno != ENOTDIR && errno != ENOENT))
goto err;
if (r == 0)
{
sa.s[sa.len - 5] = '\0';
if (mode & AA_MODE_START)
r = _name_start_needs (sa.s, &it_data);
else
r = _name_stop_needs (sa.s, &it_data);
if (r < 0)
goto err;
}
sa.len -= strlen ("/log/run") + 1;
}
stralloc_catb (&sa, "/needs", strlen ("/needs") + 1);
r = aa_scan_dir (&sa, 1,
(mode & AA_MODE_START) ? _it_start_needs : _it_stop_needs,
&it_data);
/* we can get ERR_IO either from aa_scan_dir() itself, or from the iterator
* function. But since we haven't checked that the directory (needs) does
* exist, ERR_IO w/ ENOENT simply means it doesn't, and isn't an error.
* This works because there's no ENOENT from aa_get_service(), since that
* won't be an ERR_IO but an ERR_UNKNOWN */
if (r < 0 && (r != -ERR_IO || errno != ENOENT))
goto err;
sa.len -= strlen ("needs") + 1;
if ((mode & AA_MODE_START) && !no_wants)
{
stralloc_catb (&sa, "wants", strlen ("wants") + 1);
r = aa_scan_dir (&sa, 1, _it_start_wants, &it_data);
if (r < 0 && (r != -ERR_IO || errno != ENOENT))
goto err;
sa.len -= strlen ("wants") + 1;
}
stralloc_catb (&sa, "after", strlen ("after") + 1);
r = aa_scan_dir (&sa, 1,
(mode & AA_MODE_START) ? _it_start_after : _it_stop_after,
&it_data);
if (r < 0 && (r != -ERR_IO || errno != ENOENT))
goto err;
sa.len -= strlen ("after") + 1;
stralloc_catb (&sa, "before", strlen ("before") + 1);
r = aa_scan_dir (&sa, 1,
(mode & AA_MODE_START) ? _it_start_before : _it_stop_before,
&it_data);
if (r < 0 && (r != -ERR_IO || errno != ENOENT))
goto err;
{
char buf[UINT_FMT + 1];
ssize_t rr;
sa.len -= strlen ("before") + 1;
stralloc_catb (&sa, "timeout", strlen ("timeout") + 1);
rr = openreadnclose_nb (sa.s, buf, UINT_FMT);
if (rr < 0 && errno != ENOENT)
aa_strerr_warnu3sys ("read timeout for ", aa_service_name (aa_service (si)), "; using default");
if (rr >= 0)
{
unsigned int i = rr;
buf[byte_chr (buf, i, '\n')] = '\0';
if (!uint0_scan (buf, &i))
{
aa_strerr_warn3x ("invalid timeout for ", aa_service_name (aa_service (si)), "; using default");
aa_service (si)->secs_timeout = aa_secs_timeout;
}
/* in STOP_ALL the default is also a maximum */
else if ((mode & AA_MODE_STOP_ALL)
&& (i > aa_secs_timeout || i == 0))
aa_service (si)->secs_timeout = aa_secs_timeout;
else
aa_service (si)->secs_timeout = i;
}
else
aa_service (si)->secs_timeout = aa_secs_timeout;
}
{
char buf[UINT_FMT + 1];
ssize_t rr;
sa.len -= strlen ("timeout") + 1;
stralloc_catb (&sa, "retries", strlen ("retries") + 1);
rr = openreadnclose_nb (sa.s, buf, UINT_FMT);
if (rr < 0 && errno != ENOENT)
aa_strerr_warnu3sys ("read retries for ", aa_service_name (aa_service (si)), "; using default");
if (rr >= 0)
{
unsigned int i = rr;
buf[byte_chr (buf, i, '\n')] = '\0';
if (!uint0_scan (buf, &i))
{
aa_strerr_warn3x ("invalid retries for ", aa_service_name (aa_service (si)), "; using default");
aa_service (si)->retries = 0;
}
else
aa_service (si)->retries = (uint16_t) i;
}
else
aa_service (si)->retries = 0;
}
stralloc_free (&sa);
aa_service (si)->ls = AA_LOAD_DONE;
tain_now_g ();
return 0;
err:
aa_service (si)->ls = AA_LOAD_FAIL;
stralloc_free (&sa);
tain_now_g ();
return r;
}
static int
check_afters (int si, int *sli, int *has_longrun)
{
aa_service *s = aa_service (si);
size_t org = genalloc_len (int, &aa_tmp_list);
size_t i;
if (s->ls == AA_LOAD_DONE_CHECKED)
return 0;
if (!add_to_list (&aa_tmp_list, si, 1))
{
*sli = si;
return -1;
}
for (i = 0; i < genalloc_len (int, &s->after); )
{
int sai;
sai = list_get (&s->after, i);
if ((aa_service (sai)->ls != AA_LOAD_DONE
&& aa_service (sai)->ls != AA_LOAD_DONE_CHECKED)
|| !is_in_list (&aa_main_list, sai))
{
remove_from_list (&s->after, sai);
continue;
}
if (check_afters (sai, sli, has_longrun) < 0)
return -1;
++i;
}
if (s->st.type == AA_TYPE_LONGRUN && !*has_longrun)
*has_longrun = 1;
genalloc_setlen (int, &aa_tmp_list, org);
s->ls = AA_LOAD_DONE_CHECKED;
return 0;
}
int
aa_prepare_mainlist (aa_prepare_cb prepare_cb, aa_exec_cb exec_cb)
{
int has_longrun = 0;
size_t i;
_exec_cb = exec_cb;
aa_tmp_list.len = 0;
/* scan main_list to remove unneeded afters and check for loops */
for (i = 0; i < genalloc_len (int, &aa_main_list); )
{
int si;
int sli;
si = list_get (&aa_main_list, i);
/* check the after-s of the service, recursively. It will remove any
* after that's not loaded or in the main list, i.e. that won't be
* started.
* It also constructs a list going down, to find any loop (e.g. a after
* b after a), placing it in aa_tmp_list. Should be noted that the list
* might be "a,b,c,d" with sli set to c if the loop is actually c->d->c
* but was found from b which was itself after a; hence we need to find
* the "real" start of the loop.
*/
if (check_afters (si, &sli, &has_longrun) < 0)
{
size_t l;
size_t j;
size_t found = 0;
add_to_list (&aa_tmp_list, sli, 0);
l = genalloc_len (int, &aa_tmp_list);
for (j = 0; j < l - 1; ++j)
{
int cur;
int next;
cur = list_get (&aa_tmp_list, j);
if (!found && cur == sli)
found = j + 1;
if (!found)
continue;
next = list_get (&aa_tmp_list, j + 1);
/* remove the first after link that's not a need as well */
if (!is_in_list (&aa_service (cur)->needs, next))
{
remove_from_list (&aa_service (cur)->after, next);
if (prepare_cb)
prepare_cb (cur, next, 0, found - 1);
break;
}
}
/* this is actually a loop of needs */
if (j >= l - 1)
{
int cur;
int next;
/* we'll remove the last one (both needs & after) on the loop,
* so the further one away from the explicitly asked to start
* service, so it might break it less... though that really
* doesn't mean much, plus it might also have been explicitly
* asked as well. Either way, major config error, fix it user! */
cur = list_get (&aa_tmp_list, l - 2);
next = list_get (&aa_tmp_list, l - 1);
remove_from_list (&aa_service (cur)->needs, next);
remove_from_list (&aa_service (cur)->after, next);
if (prepare_cb)
prepare_cb (cur, next, 1, found - 1);
}
}
else
++i;
aa_tmp_list.len = 0;
}
if (has_longrun)
{
tain_t deadline;
tain_addsec_g (&deadline, 1);
if (!ftrigr_startf_g (&_aa_ft, &deadline))
return -1;
else
return ftrigr_fd (&_aa_ft);
}
return 0;
}
static int
service_is_ok (aa_mode mode, aa_service *s)
{
aa_service_status *svst = &s->st;
s6_svstatus_t st6 = S6_SVSTATUS_ZERO;
aa_evt event;
int r;
/* if DRY we assume it's ok, since it wasn't really started/stopped.
* if STOP_ALL we pretend it's ok since we're trying to stop everything. */
if (mode & (AA_MODE_IS_DRY | AA_MODE_STOP_ALL))
return 1;
if (svst->type == AA_TYPE_ONESHOT)
{
event = (mode & AA_MODE_START) ? AA_EVT_STARTED : AA_EVT_STOPPED;
return (svst->event == event) ? 1 : 0;
}
/* TYPE_LONGRUN -- we make assumptions here:
* - we have a local status, since we started the service
* - if it's flagged timedout, that's a fail (flag is used to avoid possible
* race condition: it was processed as timedout (might be for readiness)
* and by the time we're checking here, s6 state has changed (e.g. it now
* is ready, or down...) This should obviously remain a fail, not assume
* it was good (e.g. ready) & process it as success.)
* - if there's no s6 status, that's a fail (probably fail to even exec run)
* - we compare stamp, if s6 is more recent, it's good (since we got the
* event we were waiting for); else it's a fail (must be our
* EVT_STARTING_FAILED, might be an ERR_TIMEDOUT if we're still waiting
* for the 'U' event (ready)). Actually we'll allow for our event to be
* EVT_STARTING because there's a possible race condition there.
*/
event = (mode & AA_MODE_START) ? AA_EVT_STARTING : AA_EVT_STOPPING;
if (!s->timedout && s6_svstatus_read (aa_service_name (s), &st6)
&& (tain_less (&svst->stamp, &st6.stamp) || svst->event == event))
r = 1;
else
r = 0;
tain_now_g ();
return r;
}
void
aa_scan_mainlist (aa_scan_cb scan_cb, aa_mode mode)
{
size_t i;
for (i = 0; i < genalloc_len (int, &aa_main_list); )
{
aa_service *s;
int si;
size_t j;
si = list_get (&aa_main_list, i);
s = aa_service (si);
for (j = 0; j < genalloc_len (int, &s->needs); )
{
int sni;
aa_service_status *svst;
sni = list_get (&s->needs, j);
if (is_in_list (&aa_main_list, sni))
{
++j;
continue;
}
if (service_is_ok (mode, aa_service (sni)))
{
remove_from_list (&s->needs, sni);
remove_from_list (&s->after, sni);
continue;
}
svst = &s->st;
svst->event = (mode & AA_MODE_START) ? AA_EVT_STARTING_FAILED: AA_EVT_STOPPING_FAILED;
svst->code = ERR_DEPEND;
tain_copynow (&svst->stamp);
aa_service_status_set_msg (svst, aa_service_name (aa_service (sni)));
if (aa_service_status_write (svst, aa_service_name (s)) < 0)
aa_strerr_warnu2sys ("write service status file for ", aa_service_name (s));
remove_from_list (&aa_main_list, si);
if (scan_cb)
scan_cb (si, sni);
si = -1;
break;
}
if (si < 0)
{
i = 0;
continue;
}
for (j = 0; j < genalloc_len (int, &s->after); )
{
int sai;
sai = list_get (&s->after, j);
if (is_in_list (&aa_main_list, sai))
++j;
else
remove_from_list (&s->after, sai);
}
if (genalloc_len (int, &s->after) == 0
&& (
/* either we're in DRY mode (i.e. we should start it) */
(mode & AA_MODE_IS_DRY)
||
/* or make sure it's in the right state */
(((mode & AA_MODE_START) && s->st.event != AA_EVT_STARTING)
|| ((mode & (AA_MODE_STOP | AA_MODE_STOP_ALL))
&& s->st.event != AA_EVT_STOPPING))
)
&& aa_exec_service (si, mode) < 0)
/* failed to exec service, was removed from main_list, so we need to
* rescan from top */
i = -1;
++i;
}
}
int
aa_exec_service (int si, aa_mode mode)
{
int r = 0;
if (_exec_cb)
/* ugly hack to announce "Starting/Stopping foobar..."; needed because
* we use common code for aa-start & aa-stop, so... yeah */
_exec_cb (si, 0, (pid_t) mode);
tain_now_g ();
tain_copynow (&aa_service (si)->ts_exec);
if (!(mode & AA_MODE_IS_DRY))
{
if (aa_service (si)->st.type == AA_TYPE_ONESHOT)
r = _exec_oneshot (si, mode);
else
r = _exec_longrun (si, mode);
if (r < 0)
remove_from_list (&aa_main_list, si);
}
return r;
}