aboutsummaryrefslogtreecommitdiff
path: root/lib/mlibc/options/posix/musl-generic-regex/regexec.c
diff options
context:
space:
mode:
authorIan Moffett <ian@osmora.org>2024-03-07 17:28:00 -0500
committerIan Moffett <ian@osmora.org>2024-03-07 17:28:32 -0500
commitbd5969fc876a10b18613302db7087ef3c40f18e1 (patch)
tree7c2b8619afe902abf99570df2873fbdf40a4d1a1 /lib/mlibc/options/posix/musl-generic-regex/regexec.c
parenta95b38b1b92b172e6cc4e8e56a88a30cc65907b0 (diff)
lib: Add mlibc
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'lib/mlibc/options/posix/musl-generic-regex/regexec.c')
-rw-r--r--lib/mlibc/options/posix/musl-generic-regex/regexec.c1028
1 files changed, 1028 insertions, 0 deletions
diff --git a/lib/mlibc/options/posix/musl-generic-regex/regexec.c b/lib/mlibc/options/posix/musl-generic-regex/regexec.c
new file mode 100644
index 0000000..1a169ab
--- /dev/null
+++ b/lib/mlibc/options/posix/musl-generic-regex/regexec.c
@@ -0,0 +1,1028 @@
+/*
+ regexec.c - TRE POSIX compatible matching functions (and more).
+
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include <regex.h>
+
+#include "tre.h"
+
+#include <assert.h>
+
+static void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
+
+/***********************************************************************
+ from tre-match-utils.h
+***********************************************************************/
+
+#define GET_NEXT_WCHAR() do { \
+ prev_c = next_c; pos += pos_add_next; \
+ if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \
+ if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \
+ else pos_add_next++; \
+ } \
+ str_byte += pos_add_next; \
+ } while (0)
+
+#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
+
+#define CHECK_ASSERTIONS(assertions) \
+ (((assertions & ASSERT_AT_BOL) \
+ && (pos > 0 || reg_notbol) \
+ && (prev_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_EOL) \
+ && (next_c != L'\0' || reg_noteol) \
+ && (next_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_BOW) \
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_EOW) \
+ && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB) \
+ && (pos != 0 && next_c != L'\0' \
+ && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB_NEG) \
+ && (pos == 0 || next_c == L'\0' \
+ || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
+
+#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
+ (((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && !(tnfa->cflags & REG_ICASE) \
+ && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && (tnfa->cflags & REG_ICASE) \
+ && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
+ && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
+ && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
+ tnfa->cflags & REG_ICASE)))
+
+
+
+
+/* Returns 1 if `t1' wins `t2', 0 otherwise. */
+static int
+tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
+ regoff_t *t1, regoff_t *t2)
+{
+ int i;
+ for (i = 0; i < num_tags; i++)
+ {
+ if (tag_directions[i] == TRE_TAG_MINIMIZE)
+ {
+ if (t1[i] < t2[i])
+ return 1;
+ if (t1[i] > t2[i])
+ return 0;
+ }
+ else
+ {
+ if (t1[i] > t2[i])
+ return 1;
+ if (t1[i] < t2[i])
+ return 0;
+ }
+ }
+ /* assert(0);*/
+ return 0;
+}
+
+static int
+tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
+{
+ while (*classes != (tre_ctype_t)0)
+ if ((!icase && tre_isctype(wc, *classes))
+ || (icase && (tre_isctype(tre_toupper(wc), *classes)
+ || tre_isctype(tre_tolower(wc), *classes))))
+ return 1; /* Match. */
+ else
+ classes++;
+ return 0; /* No match. */
+}
+
+
+/***********************************************************************
+ from tre-match-parallel.c
+***********************************************************************/
+
+/*
+ This algorithm searches for matches basically by reading characters
+ in the searched string one by one, starting at the beginning. All
+ matching paths in the TNFA are traversed in parallel. When two or
+ more paths reach the same state, exactly one is chosen according to
+ tag ordering rules; if returning submatches is not required it does
+ not matter which path is chosen.
+
+ The worst case time required for finding the leftmost and longest
+ match, or determining that there is no match, is always linearly
+ dependent on the length of the text being searched.
+
+ This algorithm cannot handle TNFAs with back referencing nodes.
+ See `tre-match-backtrack.c'.
+*/
+
+typedef struct {
+ tre_tnfa_transition_t *state;
+ regoff_t *tags;
+} tre_tnfa_reach_t;
+
+typedef struct {
+ regoff_t pos;
+ regoff_t **tags;
+} tre_reach_pos_t;
+
+
+static reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
+ regoff_t *match_tags, int eflags,
+ regoff_t *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ regoff_t pos = -1;
+ regoff_t pos_add_next = 1;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+ reg_errcode_t ret;
+
+ char *buf;
+ tre_tnfa_transition_t *trans_i;
+ tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
+ tre_reach_pos_t *reach_pos;
+ int *tag_i;
+ int num_tags, i;
+
+ regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */
+ int new_match = 0;
+ regoff_t *tmp_tags = NULL;
+ regoff_t *tmp_iptr;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ if (!match_tags)
+ num_tags = 0;
+ else
+ num_tags = tnfa->num_tags;
+
+ /* Allocate memory for temporary data required for matching. This needs to
+ be done for every matching operation to be thread safe. This allocates
+ everything in a single large block with calloc(). */
+ {
+ size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
+ char *tmp_buf;
+
+ /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
+ * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
+ if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
+ return REG_ESPACE;
+
+ /* Likewise check rbytes. */
+ if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
+ return REG_ESPACE;
+
+ /* Likewise check pbytes. */
+ if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
+ return REG_ESPACE;
+
+ /* Compute the length of the block we need. */
+ tbytes = sizeof(*tmp_tags) * num_tags;
+ rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
+ pbytes = sizeof(*reach_pos) * tnfa->num_states;
+ xbytes = sizeof(regoff_t) * num_tags;
+ total_bytes =
+ (sizeof(long) - 1) * 4 /* for alignment paddings */
+ + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
+
+ /* Allocate the memory. */
+ buf = calloc(total_bytes, 1);
+ if (buf == NULL)
+ return REG_ESPACE;
+
+ /* Get the various pointers within tmp_buf (properly aligned). */
+ tmp_tags = (void *)buf;
+ tmp_buf = buf + tbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach_next = (void *)tmp_buf;
+ tmp_buf += rbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach = (void *)tmp_buf;
+ tmp_buf += rbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach_pos = (void *)tmp_buf;
+ tmp_buf += pbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ for (i = 0; i < tnfa->num_states; i++)
+ {
+ reach[i].tags = (void *)tmp_buf;
+ tmp_buf += xbytes;
+ reach_next[i].tags = (void *)tmp_buf;
+ tmp_buf += xbytes;
+ }
+ }
+
+ for (i = 0; i < tnfa->num_states; i++)
+ reach_pos[i].pos = -1;
+
+ GET_NEXT_WCHAR();
+ pos = 0;
+
+ reach_next_i = reach_next;
+ while (1)
+ {
+ /* If no match found yet, add the initial states to `reach_next'. */
+ if (match_eo < 0)
+ {
+ trans_i = tnfa->initial;
+ while (trans_i->state != NULL)
+ {
+ if (reach_pos[trans_i->state_id].pos < pos)
+ {
+ if (trans_i->assertions
+ && CHECK_ASSERTIONS(trans_i->assertions))
+ {
+ trans_i++;
+ continue;
+ }
+
+ reach_next_i->state = trans_i->state;
+ for (i = 0; i < num_tags; i++)
+ reach_next_i->tags[i] = -1;
+ tag_i = trans_i->tags;
+ if (tag_i)
+ while (*tag_i >= 0)
+ {
+ if (*tag_i < num_tags)
+ reach_next_i->tags[*tag_i] = pos;
+ tag_i++;
+ }
+ if (reach_next_i->state == tnfa->final)
+ {
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next_i->tags[i];
+ }
+ reach_pos[trans_i->state_id].pos = pos;
+ reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+ reach_next_i++;
+ }
+ trans_i++;
+ }
+ reach_next_i->state = NULL;
+ }
+ else
+ {
+ if (num_tags == 0 || reach_next_i == reach_next)
+ /* We have found a match. */
+ break;
+ }
+
+ /* Check for end of string. */
+ if (!next_c) break;
+
+ GET_NEXT_WCHAR();
+
+ /* Swap `reach' and `reach_next'. */
+ reach_i = reach;
+ reach = reach_next;
+ reach_next = reach_i;
+
+ /* For each state in `reach', weed out states that don't fulfill the
+ minimal matching conditions. */
+ if (tnfa->num_minimals && new_match)
+ {
+ new_match = 0;
+ reach_next_i = reach_next;
+ for (reach_i = reach; reach_i->state; reach_i++)
+ {
+ int skip = 0;
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
+ {
+ int end = tnfa->minimal_tags[i];
+ int start = tnfa->minimal_tags[i + 1];
+ if (end >= num_tags)
+ {
+ skip = 1;
+ break;
+ }
+ else if (reach_i->tags[start] == match_tags[start]
+ && reach_i->tags[end] < match_tags[end])
+ {
+ skip = 1;
+ break;
+ }
+ }
+ if (!skip)
+ {
+ reach_next_i->state = reach_i->state;
+ tmp_iptr = reach_next_i->tags;
+ reach_next_i->tags = reach_i->tags;
+ reach_i->tags = tmp_iptr;
+ reach_next_i++;
+ }
+ }
+ reach_next_i->state = NULL;
+
+ /* Swap `reach' and `reach_next'. */
+ reach_i = reach;
+ reach = reach_next;
+ reach_next = reach_i;
+ }
+
+ /* For each state in `reach' see if there is a transition leaving with
+ the current input symbol to a state not yet in `reach_next', and
+ add the destination states to `reach_next'. */
+ reach_next_i = reach_next;
+ for (reach_i = reach; reach_i->state; reach_i++)
+ {
+ for (trans_i = reach_i->state; trans_i->state; trans_i++)
+ {
+ /* Does this transition match the input symbol? */
+ if (trans_i->code_min <= (tre_cint_t)prev_c &&
+ trans_i->code_max >= (tre_cint_t)prev_c)
+ {
+ if (trans_i->assertions
+ && (CHECK_ASSERTIONS(trans_i->assertions)
+ || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+ {
+ continue;
+ }
+
+ /* Compute the tags after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach_i->tags[i];
+ tag_i = trans_i->tags;
+ if (tag_i != NULL)
+ while (*tag_i >= 0)
+ {
+ if (*tag_i < num_tags)
+ tmp_tags[*tag_i] = pos;
+ tag_i++;
+ }
+
+ if (reach_pos[trans_i->state_id].pos < pos)
+ {
+ /* Found an unvisited node. */
+ reach_next_i->state = trans_i->state;
+ tmp_iptr = reach_next_i->tags;
+ reach_next_i->tags = tmp_tags;
+ tmp_tags = tmp_iptr;
+ reach_pos[trans_i->state_id].pos = pos;
+ reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+
+ if (reach_next_i->state == tnfa->final
+ && (match_eo == -1
+ || (num_tags > 0
+ && reach_next_i->tags[0] <= match_tags[0])))
+ {
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next_i->tags[i];
+ }
+ reach_next_i++;
+
+ }
+ else
+ {
+ assert(reach_pos[trans_i->state_id].pos == pos);
+ /* Another path has also reached this state. We choose
+ the winner by examining the tag values for both
+ paths. */
+ if (tre_tag_order(num_tags, tnfa->tag_directions,
+ tmp_tags,
+ *reach_pos[trans_i->state_id].tags))
+ {
+ /* The new path wins. */
+ tmp_iptr = *reach_pos[trans_i->state_id].tags;
+ *reach_pos[trans_i->state_id].tags = tmp_tags;
+ if (trans_i->state == tnfa->final)
+ {
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+ tmp_tags = tmp_iptr;
+ }
+ }
+ }
+ }
+ }
+ reach_next_i->state = NULL;
+ }
+
+ *match_end_ofs = match_eo;
+ ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
+error_exit:
+ xfree(buf);
+ return ret;
+}
+
+
+
+/***********************************************************************
+ from tre-match-backtrack.c
+***********************************************************************/
+
+/*
+ This matcher is for regexps that use back referencing. Regexp matching
+ with back referencing is an NP-complete problem on the number of back
+ references. The easiest way to match them is to use a backtracking
+ routine which basically goes through all possible paths in the TNFA
+ and chooses the one which results in the best (leftmost and longest)
+ match. This can be spectacularly expensive and may run out of stack
+ space, but there really is no better known generic algorithm. Quoting
+ Henry Spencer from comp.compilers:
+ <URL: http://compilers.iecc.com/comparch/article/93-03-102>
+
+ POSIX.2 REs require longest match, which is really exciting to
+ implement since the obsolete ("basic") variant also includes
+ \<digit>. I haven't found a better way of tackling this than doing
+ a preliminary match using a DFA (or simulation) on a modified RE
+ that just replicates subREs for \<digit>, and then doing a
+ backtracking match to determine whether the subRE matches were
+ right. This can be rather slow, but I console myself with the
+ thought that people who use \<digit> deserve very slow execution.
+ (Pun unintentional but very appropriate.)
+
+*/
+
+typedef struct {
+ regoff_t pos;
+ const char *str_byte;
+ tre_tnfa_transition_t *state;
+ int state_id;
+ int next_c;
+ regoff_t *tags;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+} tre_backtrack_item_t;
+
+typedef struct tre_backtrack_struct {
+ tre_backtrack_item_t item;
+ struct tre_backtrack_struct *prev;
+ struct tre_backtrack_struct *next;
+} *tre_backtrack_t;
+
+#ifdef TRE_MBSTATE
+#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
+#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
+#else /* !TRE_MBSTATE */
+#define BT_STACK_MBSTATE_IN
+#define BT_STACK_MBSTATE_OUT
+#endif /* !TRE_MBSTATE */
+
+#define tre_bt_mem_new tre_mem_new
+#define tre_bt_mem_alloc tre_mem_alloc
+#define tre_bt_mem_destroy tre_mem_destroy
+
+
+#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
+ do \
+ { \
+ int i; \
+ if (!stack->next) \
+ { \
+ tre_backtrack_t s; \
+ s = tre_bt_mem_alloc(mem, sizeof(*s)); \
+ if (!s) \
+ { \
+ tre_bt_mem_destroy(mem); \
+ if (tags) \
+ xfree(tags); \
+ if (pmatch) \
+ xfree(pmatch); \
+ if (states_seen) \
+ xfree(states_seen); \
+ return REG_ESPACE; \
+ } \
+ s->prev = stack; \
+ s->next = NULL; \
+ s->item.tags = tre_bt_mem_alloc(mem, \
+ sizeof(*tags) * tnfa->num_tags); \
+ if (!s->item.tags) \
+ { \
+ tre_bt_mem_destroy(mem); \
+ if (tags) \
+ xfree(tags); \
+ if (pmatch) \
+ xfree(pmatch); \
+ if (states_seen) \
+ xfree(states_seen); \
+ return REG_ESPACE; \
+ } \
+ stack->next = s; \
+ stack = s; \
+ } \
+ else \
+ stack = stack->next; \
+ stack->item.pos = (_pos); \
+ stack->item.str_byte = (_str_byte); \
+ stack->item.state = (_state); \
+ stack->item.state_id = (_state_id); \
+ stack->item.next_c = (_next_c); \
+ for (i = 0; i < tnfa->num_tags; i++) \
+ stack->item.tags[i] = (_tags)[i]; \
+ BT_STACK_MBSTATE_IN; \
+ } \
+ while (0)
+
+#define BT_STACK_POP() \
+ do \
+ { \
+ int i; \
+ assert(stack->prev); \
+ pos = stack->item.pos; \
+ str_byte = stack->item.str_byte; \
+ state = stack->item.state; \
+ next_c = stack->item.next_c; \
+ for (i = 0; i < tnfa->num_tags; i++) \
+ tags[i] = stack->item.tags[i]; \
+ BT_STACK_MBSTATE_OUT; \
+ stack = stack->prev; \
+ } \
+ while (0)
+
+#undef MIN
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+static reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+ regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ regoff_t pos = 0;
+ regoff_t pos_add_next = 1;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+
+ /* These are used to remember the necessary values of the above
+ variables to return to the position where the current search
+ started from. */
+ int next_c_start;
+ const char *str_byte_start;
+ regoff_t pos_start = -1;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate_start;
+#endif /* TRE_MBSTATE */
+
+ /* End offset of best match so far, or -1 if no match found yet. */
+ regoff_t match_eo = -1;
+ /* Tag arrays. */
+ int *next_tags;
+ regoff_t *tags = NULL;
+ /* Current TNFA state. */
+ tre_tnfa_transition_t *state;
+ int *states_seen = NULL;
+
+ /* Memory allocator to for allocating the backtracking stack. */
+ tre_mem_t mem = tre_bt_mem_new();
+
+ /* The backtracking stack. */
+ tre_backtrack_t stack;
+
+ tre_tnfa_transition_t *trans_i;
+ regmatch_t *pmatch = NULL;
+ int ret;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ if (!mem)
+ return REG_ESPACE;
+ stack = tre_bt_mem_alloc(mem, sizeof(*stack));
+ if (!stack)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ stack->prev = NULL;
+ stack->next = NULL;
+
+ if (tnfa->num_tags)
+ {
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+ if (!tags)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+ if (tnfa->num_submatches)
+ {
+ pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
+ if (!pmatch)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+ if (tnfa->num_states)
+ {
+ states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
+ if (!states_seen)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+
+ retry:
+ {
+ int i;
+ for (i = 0; i < tnfa->num_tags; i++)
+ {
+ tags[i] = -1;
+ if (match_tags)
+ match_tags[i] = -1;
+ }
+ for (i = 0; i < tnfa->num_states; i++)
+ states_seen[i] = 0;
+ }
+
+ state = NULL;
+ pos = pos_start;
+ GET_NEXT_WCHAR();
+ pos_start = pos;
+ next_c_start = next_c;
+ str_byte_start = str_byte;
+#ifdef TRE_MBSTATE
+ mbstate_start = mbstate;
+#endif /* TRE_MBSTATE */
+
+ /* Handle initial states. */
+ next_tags = NULL;
+ for (trans_i = tnfa->initial; trans_i->state; trans_i++)
+ {
+ if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
+ {
+ continue;
+ }
+ if (state == NULL)
+ {
+ /* Start from this state. */
+ state = trans_i->state;
+ next_tags = trans_i->tags;
+ }
+ else
+ {
+ /* Backtrack to this state. */
+ BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
+ trans_i->state_id, next_c, tags, mbstate);
+ {
+ int *tmp = trans_i->tags;
+ if (tmp)
+ while (*tmp >= 0)
+ stack->item.tags[*tmp++] = pos;
+ }
+ }
+ }
+
+ if (next_tags)
+ for (; *next_tags >= 0; next_tags++)
+ tags[*next_tags] = pos;
+
+
+ if (state == NULL)
+ goto backtrack;
+
+ while (1)
+ {
+ tre_tnfa_transition_t *next_state;
+ int empty_br_match;
+
+ if (state == tnfa->final)
+ {
+ if (match_eo < pos
+ || (match_eo == pos
+ && match_tags
+ && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
+ tags, match_tags)))
+ {
+ int i;
+ /* This match wins the previous match. */
+ match_eo = pos;
+ if (match_tags)
+ for (i = 0; i < tnfa->num_tags; i++)
+ match_tags[i] = tags[i];
+ }
+ /* Our TNFAs never have transitions leaving from the final state,
+ so we jump right to backtracking. */
+ goto backtrack;
+ }
+
+ /* Go to the next character in the input string. */
+ empty_br_match = 0;
+ trans_i = state;
+ if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
+ {
+ /* This is a back reference state. All transitions leaving from
+ this state have the same back reference "assertion". Instead
+ of reading the next character, we match the back reference. */
+ regoff_t so, eo;
+ int bt = trans_i->u.backref;
+ regoff_t bt_len;
+ int result;
+
+ /* Get the substring we need to match against. Remember to
+ turn off REG_NOSUB temporarily. */
+ tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
+ tnfa, tags, pos);
+ so = pmatch[bt].rm_so;
+ eo = pmatch[bt].rm_eo;
+ bt_len = eo - so;
+
+ result = strncmp((const char*)string + so, str_byte - 1,
+ (size_t)bt_len);
+
+ if (result == 0)
+ {
+ /* Back reference matched. Check for infinite loop. */
+ if (bt_len == 0)
+ empty_br_match = 1;
+ if (empty_br_match && states_seen[trans_i->state_id])
+ {
+ goto backtrack;
+ }
+
+ states_seen[trans_i->state_id] = empty_br_match;
+
+ /* Advance in input string and resync `prev_c', `next_c'
+ and pos. */
+ str_byte += bt_len - 1;
+ pos += bt_len - 1;
+ GET_NEXT_WCHAR();
+ }
+ else
+ {
+ goto backtrack;
+ }
+ }
+ else
+ {
+ /* Check for end of string. */
+ if (next_c == L'\0')
+ goto backtrack;
+
+ /* Read the next character. */
+ GET_NEXT_WCHAR();
+ }
+
+ next_state = NULL;
+ for (trans_i = state; trans_i->state; trans_i++)
+ {
+ if (trans_i->code_min <= (tre_cint_t)prev_c
+ && trans_i->code_max >= (tre_cint_t)prev_c)
+ {
+ if (trans_i->assertions
+ && (CHECK_ASSERTIONS(trans_i->assertions)
+ || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+ {
+ continue;
+ }
+
+ if (next_state == NULL)
+ {
+ /* First matching transition. */
+ next_state = trans_i->state;
+ next_tags = trans_i->tags;
+ }
+ else
+ {
+ /* Second matching transition. We may need to backtrack here
+ to take this transition instead of the first one, so we
+ push this transition in the backtracking stack so we can
+ jump back here if needed. */
+ BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
+ trans_i->state_id, next_c, tags, mbstate);
+ {
+ int *tmp;
+ for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
+ stack->item.tags[*tmp] = pos;
+ }
+#if 0 /* XXX - it's important not to look at all transitions here to keep
+ the stack small! */
+ break;
+#endif
+ }
+ }
+ }
+
+ if (next_state != NULL)
+ {
+ /* Matching transitions were found. Take the first one. */
+ state = next_state;
+
+ /* Update the tag values. */
+ if (next_tags)
+ while (*next_tags >= 0)
+ tags[*next_tags++] = pos;
+ }
+ else
+ {
+ backtrack:
+ /* A matching transition was not found. Try to backtrack. */
+ if (stack->prev)
+ {
+ if (stack->item.state->assertions & ASSERT_BACKREF)
+ {
+ states_seen[stack->item.state_id] = 0;
+ }
+
+ BT_STACK_POP();
+ }
+ else if (match_eo < 0)
+ {
+ /* Try starting from a later position in the input string. */
+ /* Check for end of string. */
+ if (next_c == L'\0')
+ {
+ break;
+ }
+ next_c = next_c_start;
+#ifdef TRE_MBSTATE
+ mbstate = mbstate_start;
+#endif /* TRE_MBSTATE */
+ str_byte = str_byte_start;
+ goto retry;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
+ *match_end_ofs = match_eo;
+
+ error_exit:
+ tre_bt_mem_destroy(mem);
+#ifndef TRE_USE_ALLOCA
+ if (tags)
+ xfree(tags);
+ if (pmatch)
+ xfree(pmatch);
+ if (states_seen)
+ xfree(states_seen);
+#endif /* !TRE_USE_ALLOCA */
+
+ return ret;
+}
+
+/***********************************************************************
+ from regexec.c
+***********************************************************************/
+
+/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
+ endpoint values. */
+static void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
+{
+ tre_submatch_data_t *submatch_data;
+ unsigned int i, j;
+ int *parents;
+
+ i = 0;
+ if (match_eo >= 0 && !(cflags & REG_NOSUB))
+ {
+ /* Construct submatch offsets from the tags. */
+ submatch_data = tnfa->submatch_data;
+ while (i < tnfa->num_submatches && i < nmatch)
+ {
+ if (submatch_data[i].so_tag == tnfa->end_tag)
+ pmatch[i].rm_so = match_eo;
+ else
+ pmatch[i].rm_so = tags[submatch_data[i].so_tag];
+
+ if (submatch_data[i].eo_tag == tnfa->end_tag)
+ pmatch[i].rm_eo = match_eo;
+ else
+ pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
+
+ /* If either of the endpoints were not used, this submatch
+ was not part of the match. */
+ if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
+ pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+
+ i++;
+ }
+ /* Reset all submatches that are not within all of their parent
+ submatches. */
+ i = 0;
+ while (i < tnfa->num_submatches && i < nmatch)
+ {
+ if (pmatch[i].rm_eo == -1)
+ assert(pmatch[i].rm_so == -1);
+ assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
+
+ parents = submatch_data[i].parents;
+ if (parents != NULL)
+ for (j = 0; parents[j] >= 0; j++)
+ {
+ if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
+ || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
+ pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+ }
+ i++;
+ }
+ }
+
+ while (i < nmatch)
+ {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ i++;
+ }
+}
+
+
+/*
+ Wrapper functions for POSIX compatible regexp matching.
+*/
+
+int
+regexec(const regex_t *restrict preg, const char *restrict string,
+ size_t nmatch, regmatch_t pmatch[restrict], int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ reg_errcode_t status;
+ regoff_t *tags = NULL, eo;
+ if (tnfa->cflags & REG_NOSUB) nmatch = 0;
+ if (tnfa->num_tags > 0 && nmatch > 0)
+ {
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+ if (tags == NULL)
+ return REG_ESPACE;
+ }
+
+ /* Dispatch to the appropriate matcher. */
+ if (tnfa->have_backrefs)
+ {
+ /* The regex has back references, use the backtracking matcher. */
+ status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
+ }
+ else
+ {
+ /* Exact matching, no back references, use the parallel matcher. */
+ status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
+ }
+
+ if (status == REG_OK)
+ /* A match was found, so fill the submatch registers. */
+ tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
+ if (tags)
+ xfree(tags);
+ return status;
+} \ No newline at end of file