Welcome to little lamb

Code » limb » commit 692e59d

Add parseopt.h & parseopt() to parse command-line args

author Olivier Brunel
2023-03-24 16:07:16 UTC
committer Olivier Brunel
2023-03-26 14:03:03 UTC
parent c2431d70fecc05ab8ccdf209b7c8b0a8526f35f4

Add parseopt.h & parseopt() to parse command-line args

Somewhat similar to getopt(3)

doc/parseopt.3.md +157 -0
doc/parseopt.h.0.md +49 -0
include/limb/parseopt.h +51 -0
meta/libs/limb +2 -0
src/parseopt.c +120 -0

diff --git a/doc/parseopt.3.md b/doc/parseopt.3.md
new file mode 100644
index 0000000..148001a
--- /dev/null
+++ b/doc/parseopt.3.md
@@ -0,0 +1,157 @@
+% limb manual
+% parseopt(3)
+
+# NAME
+
+parseopt - parse command-line options
+
+# SYNOPSIS
+
+    #include <limb/parseopt.h>
+
+```pre hl
+int parseopt(int *<em>first</em>, int <em>argc</em>, const char **<em>argv</em>, const struct option *<em>options</em>,
+             unsigned int <em>flags</em>, struct parseopt *<em>ctx</em>)
+```
+
+# DESCRIPTION
+
+The `parseopt`() function parses command-line arguments. Its arguments `argc`
+and `argv` are the argument count and array as passed to the `main`() function
+on program invocation.
+
+The `options` argument is a pointer to the first element of an array defining
+all possible options. See [[Options]] below for more.
+
+The `first` argument is optional, as therefore can be NULL. If specified, the
+value it points to will be set to the index of the first-matching option. This
+can notably be useful in the case of abbreviated long options, when more than
+one option did match.
+
+The `flags` argument allows to enable certain options, see [[FLAGS]] below.
+
+The last argument `ctx` is a semi-opaque structure, that should be initialized
+to all zeroes, defined as such :
+
+    struct parseopt {
+        u16 cur;
+        u16 off;
+        const char *arg;
+    };
+
+When `parseopt`() returns a positive value, i.e. an option was successfully
+found, its member `arg` points to the option's argument if any, else it is
+NULL. It is similar to the global `optarg` from [getopt](3).
+
+When `parseopt`() returns -1, its member `cur` is the index in `argv` of the
+first non-option element. In that way, it is similar to the global `optind` from
+[getopt](3).
+
+## Parsing
+
+Parsing starts at element 1, as expected from a typical `argv`.
+
+An element of `argv` starting with '-' is treated as an option element. If
+followed by a second dash, then a long option name is expected to follow, else a
+short option character.
+
+An element of "--" has special meaning, indicating the end of options and
+stopping parsing, i.e. `parseopt`() will return -1 when encountered.
+
+When an option has been identified, `parseopt`() will return its `id` if it is
+non-zero, else its `shortopt` (see [[Options]] below).
+
+When the first non-option element is encountered, or an element "--", parsing
+stops and -1 is returned.
+
+On error, another negative value is returned, see [[ERRORS]] below for more.
+
+## Options
+
+Long options do not require to be specified in full, and an abbreviation will
+be recognized as long as there's no other match possible. (This behavior can be
+disabled, see [[FLAGS]] below.)
+
+When an argument is required, it can be specified within the same element,
+following a '\=', or as the next element. Optional arguments can only be
+specified after a '\=' within the same element.
+
+An element for short options can specify more than one option in a row, so long
+as they don't accept argument. When an option accepts an argument (whether
+optional or not), what follows next within the element will be treated as the
+option's argument.
+
+`options` must be a pointer to the first element of an array of *struct option*
+declared as such :
+
+    struct option {
+        const char  shortopt;
+        const char *longopt;
+        u8 arg      : 2;
+        u8 flags    : 6;
+        int id;
+    };
+
+The meanings of different members are :
+
+: `shortopt`
+:: The character of the short option
+
+: `longopt`
+:: The name of the long option
+
+: `arg`
+:: One of the constants *ARG_NONE*, *ARG_REQ* or *ARG_OPT* to indicate whether
+:: the option takes no argument, requires an argument, or accepts an optional
+:: argument.
+
+: `flags`
+:: Not used, leave at 0.
+
+: `id`
+:: An integer value unique to this option, to recognize when it is parsed. It
+:: should be a positive non-zero value. If zero, `shortopt` will be returned
+:: instead.
+
+The last element of the array must be set to *OPTION_DONE* (all members set to 0
+except `flags` set to 1) to indicate the end.
+
+# FLAGS
+
+It is possible to define some options via the `flags` argument, whose value is
+constructed as a bitwise-inclusive OR of flags from the following list :
+
+: *PARSEOPT_STRICT*
+:: Long options can not be abbreviated, and must be exact match.
+
+: *PARSEOPT_IS_LONG*
+:: /Intended for internal use./ Assumes all elements of `argv` are option
+:: elements, composed of long option names directly (i.e. without "--" prefix).
+
+
+# RETURN VALUE
+
+If an option was successfully found, `parseopt`() returns the option `id` if
+non-zero, else its `shortopt`. When all command-line options have been parsed -1
+is returned.
+
+If an error occurs, a negative value (other than -1) is returned, depending on
+the error.
+
+! NOTE:
+! Unlike [getopt](3), `parseopt`() will not print any warning/error messages.
+! The indication of the error is sent through its return value, and it is up to
+! the caller to inform the user of what failed.
+
+# ERRORS
+
+The `parseopt`() function may fail and return :
+
+: *PARSEOPT_ERR_NONAME*
+:: Option name was missing, i.e. the element was "-" only
+
+: *PARSEOPT_ERR_UNKNOWN*
+:: Unknown option
+
+: *PARSEOPT_ERR_ARGREQ*
+:: An option requiring an argument was found, but no argument was specified.
diff --git a/doc/parseopt.h.0.md b/doc/parseopt.h.0.md
new file mode 100644
index 0000000..df8bc0f
--- /dev/null
+++ b/doc/parseopt.h.0.md
@@ -0,0 +1,49 @@
+% limb manual
+% parseopt.h(0)
+
+# NAME
+
+parseopt.h - parse command-line options
+
+# SYNOPSIS
+
+    #include <limb/parseopt.h>
+
+# DESCRIPTION
+
+This header defines functions to parse command-line options.
+
+## Constants
+
+The following constants are defined :
+
+: *ARG_NONE*, *ARG_REQ*, *ARG_OPT*
+:: To define if an options has no argument, requires one, or may have one.
+
+: *OPTION_DONE*
+:: To be used as last element in the array of *struct option*, indicating the
+:: end of said array.
+
+: *PARSEOPT_IS_LONG*, *PARSEOPT_STRICT*
+:: Flags that can be passed to [parseopt](3).
+
+: *PARSEOPT_DONE*, *PARSEOPT_ERR_NONAME*, *PARSEOPT_ERR_UNKNOWN*,
+: *PARSEOPT_ERR_ARGREQ*
+:: Possible return values for [parseopt](3)
+
+## Structures
+
+The following structure are defined :
+
+: *struct option*
+:: To define an option when calling [parseopt](3).
+
+: *struct parseopt*
+:: A semi-opaque structure to be passed to [parseopt](3)
+
+## Functions
+
+The following functions are defined :
+
+: [parseopt](3)
+:: To parse command-line options
diff --git a/include/limb/parseopt.h b/include/limb/parseopt.h
new file mode 100644
index 0000000..ed26e87
--- /dev/null
+++ b/include/limb/parseopt.h
@@ -0,0 +1,51 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef LIMB_PARSEOPT_H
+#define LIMB_PARSEOPT_H
+
+#include "limb/int.h"
+
+enum {
+    ARG_NONE = 0,   /* no argument */
+    ARG_REQ,        /* argument required */
+    ARG_OPT         /* argument optional */
+};
+
+/* last element in struct option[] to indicate the end */
+#define OPTION_DONE             { .flags = 1 }
+
+struct option {
+    const char  shortopt;
+    const char *longopt;
+    u8 arg      : 2; /* ARG_* */
+    u8 flags    : 6; /* OPT_* -- loadopt only */
+    int id;
+};
+
+enum {
+    /* private */
+    PARSEOPT_IS_LONG    = 1 << 0,
+    /* public */
+    PARSEOPT_STRICT     = 1 << 1,
+};
+
+enum {
+    PARSEOPT_DONE           = -1, /* success, all done */
+    PARSEOPT_ERR_NONAME     = -2, /* option name missing (i.e. "-") */
+    PARSEOPT_ERR_UNKNOWN    = -3, /* unknown option */
+    PARSEOPT_ERR_ARGREQ     = -4, /* argument required */
+};
+
+struct parseopt {
+    /* private */
+    u16 cur;    /* public when done : index of first argument in argv */
+    u16 off;
+    /* public (read-only) */
+    const char *arg;
+};
+
+extern int parseopt(int *first, int argc, const char **argv, const struct option *options,
+                    unsigned int flags, struct parseopt *ctx);
+
+#endif /* LIMB_PARSEOPT_H */
diff --git a/meta/libs/limb b/meta/libs/limb
index 6929e1a..130ac73 100644
--- a/meta/libs/limb
+++ b/meta/libs/limb
@@ -46,6 +46,8 @@ obj/err_putmsg.o
 obj/dbg_putmsg.o
 obj/out_putmsgdie.o
 obj/err_putmsgdie.o
+# parseopt.h
+obj/parseopt.o
 # find msb
 obj/msb64.o
 # {,un}pack u64
diff --git a/src/parseopt.c b/src/parseopt.c
new file mode 100644
index 0000000..08fbb5c
--- /dev/null
+++ b/src/parseopt.c
@@ -0,0 +1,120 @@
+/* This file is part of limb                           https://lila.oss/limb
+ * Copyright (C) 2023 Olivier Brunel                          jjk@jjacky.com */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <stddef.h> /* offsetof() */
+#include "limb/bytestr.h"
+#include "limb/parseopt.h"
+
+int
+parseopt(int *first, int argc, const char **argv, const struct option *options,
+         unsigned int flags, struct parseopt *ctx)
+{
+    const char *arg;
+    int is_long, arg_long;
+    int o;
+
+    if (!ctx->cur)
+        ctx->cur = 1;
+
+again:
+    if (ctx->cur == argc)
+        return PARSEOPT_DONE;
+
+    arg = argv[ctx->cur] + ctx->off;
+
+    if (flags & PARSEOPT_IS_LONG) {
+        is_long = arg_long = 1;
+        if (!*arg)
+            return PARSEOPT_ERR_NONAME;
+    } else {
+        if (ctx->off == 0) {
+            if (*arg != '-')
+                /* not an option, so no more options */
+                return PARSEOPT_DONE;
+            /* move on to the option */
+            ++ctx->off;
+            ++arg;
+        }
+
+        if (!*arg) {
+            if (ctx->off == 1)
+                return PARSEOPT_ERR_NONAME;
+            /* next argument */
+            ++ctx->cur;
+            ctx->off = 0;
+            goto again;
+        }
+
+        is_long = arg_long = (ctx->off == 1 && *arg == '-');
+        if (is_long) {
+            ++ctx->off;
+            ++arg;
+        }
+    }
+
+    if (is_long) {
+        size_t l = strlen(arg);
+        size_t end = byte_in(arg, l, " =\t", 3);
+
+        if (!l) {
+            /* marker "--" for end of options */
+            ++ctx->cur;
+            return PARSEOPT_DONE;
+        }
+
+        if (flags & PARSEOPT_STRICT) {
+            for (o = 0; options[o].longopt; ++o)
+                if (end == strlen(options[o].longopt)
+                        && !strncmp(options[o].longopt, arg, end))
+                    break;
+            if (!options[o].longopt)
+                o = -1;
+        } else {
+            o = byte_get_match_full(first, arg, end, options,
+                                    offsetof(struct option, longopt), sizeof(*options));
+        }
+
+        if (o < 0)
+            return PARSEOPT_ERR_UNKNOWN;
+
+        /* --option-name=value : don't look for optarg on the next arg */
+        if (end < l) {
+            arg_long = 0;
+            arg += end;
+        }
+    } else {
+        for (o = 0; options[o].longopt; ++o)
+            if (*arg == options[o].shortopt)
+                break;
+        if (!options[o].longopt)
+            return PARSEOPT_ERR_UNKNOWN;
+    }
+
+    if (first) *first = o;
+
+    ctx->arg = NULL;
+    if (options[o].arg == ARG_REQ) {
+        if (arg_long) {
+            ++ctx->cur;
+            if (ctx->cur == argc)
+                return PARSEOPT_ERR_ARGREQ;
+            ctx->arg = argv[ctx->cur];
+        } else {
+            if (!arg[1])
+                return PARSEOPT_ERR_ARGREQ;
+            ctx->arg = arg + 1;
+        }
+    } else if (options[o].arg == ARG_OPT) {
+        if (!arg_long && arg[1])
+            ctx->arg = arg + 1;
+    }
+
+    if (!is_long && !ctx->arg) {
+        ++ctx->off;
+    } else {
+        ++ctx->cur;
+        ctx->off = 0;
+    }
+
+    return (options[o].id) ? options[o].id : options[o].shortopt;
+}