diff options
Diffstat (limited to 'lib/mlibc/options/posix/musl-generic-regex/regexec.c')
-rw-r--r-- | lib/mlibc/options/posix/musl-generic-regex/regexec.c | 1028 |
1 files changed, 0 insertions, 1028 deletions
diff --git a/lib/mlibc/options/posix/musl-generic-regex/regexec.c b/lib/mlibc/options/posix/musl-generic-regex/regexec.c deleted file mode 100644 index 1a169ab..0000000 --- a/lib/mlibc/options/posix/musl-generic-regex/regexec.c +++ /dev/null @@ -1,1028 +0,0 @@ -/* - regexec.c - TRE POSIX compatible matching functions (and more). - - Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include <stdlib.h> -#include <string.h> -#include <wchar.h> -#include <wctype.h> -#include <limits.h> -#include <stdint.h> - -#include <regex.h> - -#include "tre.h" - -#include <assert.h> - -static void -tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, - const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo); - -/*********************************************************************** - from tre-match-utils.h -***********************************************************************/ - -#define GET_NEXT_WCHAR() do { \ - prev_c = next_c; pos += pos_add_next; \ - if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \ - if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ - else pos_add_next++; \ - } \ - str_byte += pos_add_next; \ - } while (0) - -#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) - -#define CHECK_ASSERTIONS(assertions) \ - (((assertions & ASSERT_AT_BOL) \ - && (pos > 0 || reg_notbol) \ - && (prev_c != L'\n' || !reg_newline)) \ - || ((assertions & ASSERT_AT_EOL) \ - && (next_c != L'\0' || reg_noteol) \ - && (next_c != L'\n' || !reg_newline)) \ - || ((assertions & ASSERT_AT_BOW) \ - && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ - || ((assertions & ASSERT_AT_EOW) \ - && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ - || ((assertions & ASSERT_AT_WB) \ - && (pos != 0 && next_c != L'\0' \ - && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ - || ((assertions & ASSERT_AT_WB_NEG) \ - && (pos == 0 || next_c == L'\0' \ - || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) - -#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ - (((trans_i->assertions & ASSERT_CHAR_CLASS) \ - && !(tnfa->cflags & REG_ICASE) \ - && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ - || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ - && (tnfa->cflags & REG_ICASE) \ - && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ - && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ - || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ - && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ - tnfa->cflags & REG_ICASE))) - - - - -/* Returns 1 if `t1' wins `t2', 0 otherwise. */ -static int -tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, - regoff_t *t1, regoff_t *t2) -{ - int i; - for (i = 0; i < num_tags; i++) - { - if (tag_directions[i] == TRE_TAG_MINIMIZE) - { - if (t1[i] < t2[i]) - return 1; - if (t1[i] > t2[i]) - return 0; - } - else - { - if (t1[i] > t2[i]) - return 1; - if (t1[i] < t2[i]) - return 0; - } - } - /* assert(0);*/ - return 0; -} - -static int -tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) -{ - while (*classes != (tre_ctype_t)0) - if ((!icase && tre_isctype(wc, *classes)) - || (icase && (tre_isctype(tre_toupper(wc), *classes) - || tre_isctype(tre_tolower(wc), *classes)))) - return 1; /* Match. */ - else - classes++; - return 0; /* No match. */ -} - - -/*********************************************************************** - from tre-match-parallel.c -***********************************************************************/ - -/* - This algorithm searches for matches basically by reading characters - in the searched string one by one, starting at the beginning. All - matching paths in the TNFA are traversed in parallel. When two or - more paths reach the same state, exactly one is chosen according to - tag ordering rules; if returning submatches is not required it does - not matter which path is chosen. - - The worst case time required for finding the leftmost and longest - match, or determining that there is no match, is always linearly - dependent on the length of the text being searched. - - This algorithm cannot handle TNFAs with back referencing nodes. - See `tre-match-backtrack.c'. -*/ - -typedef struct { - tre_tnfa_transition_t *state; - regoff_t *tags; -} tre_tnfa_reach_t; - -typedef struct { - regoff_t pos; - regoff_t **tags; -} tre_reach_pos_t; - - -static reg_errcode_t -tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, - regoff_t *match_tags, int eflags, - regoff_t *match_end_ofs) -{ - /* State variables required by GET_NEXT_WCHAR. */ - tre_char_t prev_c = 0, next_c = 0; - const char *str_byte = string; - regoff_t pos = -1; - regoff_t pos_add_next = 1; -#ifdef TRE_MBSTATE - mbstate_t mbstate; -#endif /* TRE_MBSTATE */ - int reg_notbol = eflags & REG_NOTBOL; - int reg_noteol = eflags & REG_NOTEOL; - int reg_newline = tnfa->cflags & REG_NEWLINE; - reg_errcode_t ret; - - char *buf; - tre_tnfa_transition_t *trans_i; - tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; - tre_reach_pos_t *reach_pos; - int *tag_i; - int num_tags, i; - - regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */ - int new_match = 0; - regoff_t *tmp_tags = NULL; - regoff_t *tmp_iptr; - -#ifdef TRE_MBSTATE - memset(&mbstate, '\0', sizeof(mbstate)); -#endif /* TRE_MBSTATE */ - - if (!match_tags) - num_tags = 0; - else - num_tags = tnfa->num_tags; - - /* Allocate memory for temporary data required for matching. This needs to - be done for every matching operation to be thread safe. This allocates - everything in a single large block with calloc(). */ - { - size_t tbytes, rbytes, pbytes, xbytes, total_bytes; - char *tmp_buf; - - /* Ensure that tbytes and xbytes*num_states cannot overflow, and that - * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */ - if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states)) - return REG_ESPACE; - - /* Likewise check rbytes. */ - if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next))) - return REG_ESPACE; - - /* Likewise check pbytes. */ - if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos))) - return REG_ESPACE; - - /* Compute the length of the block we need. */ - tbytes = sizeof(*tmp_tags) * num_tags; - rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); - pbytes = sizeof(*reach_pos) * tnfa->num_states; - xbytes = sizeof(regoff_t) * num_tags; - total_bytes = - (sizeof(long) - 1) * 4 /* for alignment paddings */ - + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; - - /* Allocate the memory. */ - buf = calloc(total_bytes, 1); - if (buf == NULL) - return REG_ESPACE; - - /* Get the various pointers within tmp_buf (properly aligned). */ - tmp_tags = (void *)buf; - tmp_buf = buf + tbytes; - tmp_buf += ALIGN(tmp_buf, long); - reach_next = (void *)tmp_buf; - tmp_buf += rbytes; - tmp_buf += ALIGN(tmp_buf, long); - reach = (void *)tmp_buf; - tmp_buf += rbytes; - tmp_buf += ALIGN(tmp_buf, long); - reach_pos = (void *)tmp_buf; - tmp_buf += pbytes; - tmp_buf += ALIGN(tmp_buf, long); - for (i = 0; i < tnfa->num_states; i++) - { - reach[i].tags = (void *)tmp_buf; - tmp_buf += xbytes; - reach_next[i].tags = (void *)tmp_buf; - tmp_buf += xbytes; - } - } - - for (i = 0; i < tnfa->num_states; i++) - reach_pos[i].pos = -1; - - GET_NEXT_WCHAR(); - pos = 0; - - reach_next_i = reach_next; - while (1) - { - /* If no match found yet, add the initial states to `reach_next'. */ - if (match_eo < 0) - { - trans_i = tnfa->initial; - while (trans_i->state != NULL) - { - if (reach_pos[trans_i->state_id].pos < pos) - { - if (trans_i->assertions - && CHECK_ASSERTIONS(trans_i->assertions)) - { - trans_i++; - continue; - } - - reach_next_i->state = trans_i->state; - for (i = 0; i < num_tags; i++) - reach_next_i->tags[i] = -1; - tag_i = trans_i->tags; - if (tag_i) - while (*tag_i >= 0) - { - if (*tag_i < num_tags) - reach_next_i->tags[*tag_i] = pos; - tag_i++; - } - if (reach_next_i->state == tnfa->final) - { - match_eo = pos; - new_match = 1; - for (i = 0; i < num_tags; i++) - match_tags[i] = reach_next_i->tags[i]; - } - reach_pos[trans_i->state_id].pos = pos; - reach_pos[trans_i->state_id].tags = &reach_next_i->tags; - reach_next_i++; - } - trans_i++; - } - reach_next_i->state = NULL; - } - else - { - if (num_tags == 0 || reach_next_i == reach_next) - /* We have found a match. */ - break; - } - - /* Check for end of string. */ - if (!next_c) break; - - GET_NEXT_WCHAR(); - - /* Swap `reach' and `reach_next'. */ - reach_i = reach; - reach = reach_next; - reach_next = reach_i; - - /* For each state in `reach', weed out states that don't fulfill the - minimal matching conditions. */ - if (tnfa->num_minimals && new_match) - { - new_match = 0; - reach_next_i = reach_next; - for (reach_i = reach; reach_i->state; reach_i++) - { - int skip = 0; - for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) - { - int end = tnfa->minimal_tags[i]; - int start = tnfa->minimal_tags[i + 1]; - if (end >= num_tags) - { - skip = 1; - break; - } - else if (reach_i->tags[start] == match_tags[start] - && reach_i->tags[end] < match_tags[end]) - { - skip = 1; - break; - } - } - if (!skip) - { - reach_next_i->state = reach_i->state; - tmp_iptr = reach_next_i->tags; - reach_next_i->tags = reach_i->tags; - reach_i->tags = tmp_iptr; - reach_next_i++; - } - } - reach_next_i->state = NULL; - - /* Swap `reach' and `reach_next'. */ - reach_i = reach; - reach = reach_next; - reach_next = reach_i; - } - - /* For each state in `reach' see if there is a transition leaving with - the current input symbol to a state not yet in `reach_next', and - add the destination states to `reach_next'. */ - reach_next_i = reach_next; - for (reach_i = reach; reach_i->state; reach_i++) - { - for (trans_i = reach_i->state; trans_i->state; trans_i++) - { - /* Does this transition match the input symbol? */ - if (trans_i->code_min <= (tre_cint_t)prev_c && - trans_i->code_max >= (tre_cint_t)prev_c) - { - if (trans_i->assertions - && (CHECK_ASSERTIONS(trans_i->assertions) - || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) - { - continue; - } - - /* Compute the tags after this transition. */ - for (i = 0; i < num_tags; i++) - tmp_tags[i] = reach_i->tags[i]; - tag_i = trans_i->tags; - if (tag_i != NULL) - while (*tag_i >= 0) - { - if (*tag_i < num_tags) - tmp_tags[*tag_i] = pos; - tag_i++; - } - - if (reach_pos[trans_i->state_id].pos < pos) - { - /* Found an unvisited node. */ - reach_next_i->state = trans_i->state; - tmp_iptr = reach_next_i->tags; - reach_next_i->tags = tmp_tags; - tmp_tags = tmp_iptr; - reach_pos[trans_i->state_id].pos = pos; - reach_pos[trans_i->state_id].tags = &reach_next_i->tags; - - if (reach_next_i->state == tnfa->final - && (match_eo == -1 - || (num_tags > 0 - && reach_next_i->tags[0] <= match_tags[0]))) - { - match_eo = pos; - new_match = 1; - for (i = 0; i < num_tags; i++) - match_tags[i] = reach_next_i->tags[i]; - } - reach_next_i++; - - } - else - { - assert(reach_pos[trans_i->state_id].pos == pos); - /* Another path has also reached this state. We choose - the winner by examining the tag values for both - paths. */ - if (tre_tag_order(num_tags, tnfa->tag_directions, - tmp_tags, - *reach_pos[trans_i->state_id].tags)) - { - /* The new path wins. */ - tmp_iptr = *reach_pos[trans_i->state_id].tags; - *reach_pos[trans_i->state_id].tags = tmp_tags; - if (trans_i->state == tnfa->final) - { - match_eo = pos; - new_match = 1; - for (i = 0; i < num_tags; i++) - match_tags[i] = tmp_tags[i]; - } - tmp_tags = tmp_iptr; - } - } - } - } - } - reach_next_i->state = NULL; - } - - *match_end_ofs = match_eo; - ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; -error_exit: - xfree(buf); - return ret; -} - - - -/*********************************************************************** - from tre-match-backtrack.c -***********************************************************************/ - -/* - This matcher is for regexps that use back referencing. Regexp matching - with back referencing is an NP-complete problem on the number of back - references. The easiest way to match them is to use a backtracking - routine which basically goes through all possible paths in the TNFA - and chooses the one which results in the best (leftmost and longest) - match. This can be spectacularly expensive and may run out of stack - space, but there really is no better known generic algorithm. Quoting - Henry Spencer from comp.compilers: - <URL: http://compilers.iecc.com/comparch/article/93-03-102> - - POSIX.2 REs require longest match, which is really exciting to - implement since the obsolete ("basic") variant also includes - \<digit>. I haven't found a better way of tackling this than doing - a preliminary match using a DFA (or simulation) on a modified RE - that just replicates subREs for \<digit>, and then doing a - backtracking match to determine whether the subRE matches were - right. This can be rather slow, but I console myself with the - thought that people who use \<digit> deserve very slow execution. - (Pun unintentional but very appropriate.) - -*/ - -typedef struct { - regoff_t pos; - const char *str_byte; - tre_tnfa_transition_t *state; - int state_id; - int next_c; - regoff_t *tags; -#ifdef TRE_MBSTATE - mbstate_t mbstate; -#endif /* TRE_MBSTATE */ -} tre_backtrack_item_t; - -typedef struct tre_backtrack_struct { - tre_backtrack_item_t item; - struct tre_backtrack_struct *prev; - struct tre_backtrack_struct *next; -} *tre_backtrack_t; - -#ifdef TRE_MBSTATE -#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) -#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate -#else /* !TRE_MBSTATE */ -#define BT_STACK_MBSTATE_IN -#define BT_STACK_MBSTATE_OUT -#endif /* !TRE_MBSTATE */ - -#define tre_bt_mem_new tre_mem_new -#define tre_bt_mem_alloc tre_mem_alloc -#define tre_bt_mem_destroy tre_mem_destroy - - -#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \ - do \ - { \ - int i; \ - if (!stack->next) \ - { \ - tre_backtrack_t s; \ - s = tre_bt_mem_alloc(mem, sizeof(*s)); \ - if (!s) \ - { \ - tre_bt_mem_destroy(mem); \ - if (tags) \ - xfree(tags); \ - if (pmatch) \ - xfree(pmatch); \ - if (states_seen) \ - xfree(states_seen); \ - return REG_ESPACE; \ - } \ - s->prev = stack; \ - s->next = NULL; \ - s->item.tags = tre_bt_mem_alloc(mem, \ - sizeof(*tags) * tnfa->num_tags); \ - if (!s->item.tags) \ - { \ - tre_bt_mem_destroy(mem); \ - if (tags) \ - xfree(tags); \ - if (pmatch) \ - xfree(pmatch); \ - if (states_seen) \ - xfree(states_seen); \ - return REG_ESPACE; \ - } \ - stack->next = s; \ - stack = s; \ - } \ - else \ - stack = stack->next; \ - stack->item.pos = (_pos); \ - stack->item.str_byte = (_str_byte); \ - stack->item.state = (_state); \ - stack->item.state_id = (_state_id); \ - stack->item.next_c = (_next_c); \ - for (i = 0; i < tnfa->num_tags; i++) \ - stack->item.tags[i] = (_tags)[i]; \ - BT_STACK_MBSTATE_IN; \ - } \ - while (0) - -#define BT_STACK_POP() \ - do \ - { \ - int i; \ - assert(stack->prev); \ - pos = stack->item.pos; \ - str_byte = stack->item.str_byte; \ - state = stack->item.state; \ - next_c = stack->item.next_c; \ - for (i = 0; i < tnfa->num_tags; i++) \ - tags[i] = stack->item.tags[i]; \ - BT_STACK_MBSTATE_OUT; \ - stack = stack->prev; \ - } \ - while (0) - -#undef MIN -#define MIN(a, b) ((a) <= (b) ? (a) : (b)) - -static reg_errcode_t -tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, - regoff_t *match_tags, int eflags, regoff_t *match_end_ofs) -{ - /* State variables required by GET_NEXT_WCHAR. */ - tre_char_t prev_c = 0, next_c = 0; - const char *str_byte = string; - regoff_t pos = 0; - regoff_t pos_add_next = 1; -#ifdef TRE_MBSTATE - mbstate_t mbstate; -#endif /* TRE_MBSTATE */ - int reg_notbol = eflags & REG_NOTBOL; - int reg_noteol = eflags & REG_NOTEOL; - int reg_newline = tnfa->cflags & REG_NEWLINE; - - /* These are used to remember the necessary values of the above - variables to return to the position where the current search - started from. */ - int next_c_start; - const char *str_byte_start; - regoff_t pos_start = -1; -#ifdef TRE_MBSTATE - mbstate_t mbstate_start; -#endif /* TRE_MBSTATE */ - - /* End offset of best match so far, or -1 if no match found yet. */ - regoff_t match_eo = -1; - /* Tag arrays. */ - int *next_tags; - regoff_t *tags = NULL; - /* Current TNFA state. */ - tre_tnfa_transition_t *state; - int *states_seen = NULL; - - /* Memory allocator to for allocating the backtracking stack. */ - tre_mem_t mem = tre_bt_mem_new(); - - /* The backtracking stack. */ - tre_backtrack_t stack; - - tre_tnfa_transition_t *trans_i; - regmatch_t *pmatch = NULL; - int ret; - -#ifdef TRE_MBSTATE - memset(&mbstate, '\0', sizeof(mbstate)); -#endif /* TRE_MBSTATE */ - - if (!mem) - return REG_ESPACE; - stack = tre_bt_mem_alloc(mem, sizeof(*stack)); - if (!stack) - { - ret = REG_ESPACE; - goto error_exit; - } - stack->prev = NULL; - stack->next = NULL; - - if (tnfa->num_tags) - { - tags = xmalloc(sizeof(*tags) * tnfa->num_tags); - if (!tags) - { - ret = REG_ESPACE; - goto error_exit; - } - } - if (tnfa->num_submatches) - { - pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); - if (!pmatch) - { - ret = REG_ESPACE; - goto error_exit; - } - } - if (tnfa->num_states) - { - states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); - if (!states_seen) - { - ret = REG_ESPACE; - goto error_exit; - } - } - - retry: - { - int i; - for (i = 0; i < tnfa->num_tags; i++) - { - tags[i] = -1; - if (match_tags) - match_tags[i] = -1; - } - for (i = 0; i < tnfa->num_states; i++) - states_seen[i] = 0; - } - - state = NULL; - pos = pos_start; - GET_NEXT_WCHAR(); - pos_start = pos; - next_c_start = next_c; - str_byte_start = str_byte; -#ifdef TRE_MBSTATE - mbstate_start = mbstate; -#endif /* TRE_MBSTATE */ - - /* Handle initial states. */ - next_tags = NULL; - for (trans_i = tnfa->initial; trans_i->state; trans_i++) - { - if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) - { - continue; - } - if (state == NULL) - { - /* Start from this state. */ - state = trans_i->state; - next_tags = trans_i->tags; - } - else - { - /* Backtrack to this state. */ - BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, - trans_i->state_id, next_c, tags, mbstate); - { - int *tmp = trans_i->tags; - if (tmp) - while (*tmp >= 0) - stack->item.tags[*tmp++] = pos; - } - } - } - - if (next_tags) - for (; *next_tags >= 0; next_tags++) - tags[*next_tags] = pos; - - - if (state == NULL) - goto backtrack; - - while (1) - { - tre_tnfa_transition_t *next_state; - int empty_br_match; - - if (state == tnfa->final) - { - if (match_eo < pos - || (match_eo == pos - && match_tags - && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, - tags, match_tags))) - { - int i; - /* This match wins the previous match. */ - match_eo = pos; - if (match_tags) - for (i = 0; i < tnfa->num_tags; i++) - match_tags[i] = tags[i]; - } - /* Our TNFAs never have transitions leaving from the final state, - so we jump right to backtracking. */ - goto backtrack; - } - - /* Go to the next character in the input string. */ - empty_br_match = 0; - trans_i = state; - if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) - { - /* This is a back reference state. All transitions leaving from - this state have the same back reference "assertion". Instead - of reading the next character, we match the back reference. */ - regoff_t so, eo; - int bt = trans_i->u.backref; - regoff_t bt_len; - int result; - - /* Get the substring we need to match against. Remember to - turn off REG_NOSUB temporarily. */ - tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, - tnfa, tags, pos); - so = pmatch[bt].rm_so; - eo = pmatch[bt].rm_eo; - bt_len = eo - so; - - result = strncmp((const char*)string + so, str_byte - 1, - (size_t)bt_len); - - if (result == 0) - { - /* Back reference matched. Check for infinite loop. */ - if (bt_len == 0) - empty_br_match = 1; - if (empty_br_match && states_seen[trans_i->state_id]) - { - goto backtrack; - } - - states_seen[trans_i->state_id] = empty_br_match; - - /* Advance in input string and resync `prev_c', `next_c' - and pos. */ - str_byte += bt_len - 1; - pos += bt_len - 1; - GET_NEXT_WCHAR(); - } - else - { - goto backtrack; - } - } - else - { - /* Check for end of string. */ - if (next_c == L'\0') - goto backtrack; - - /* Read the next character. */ - GET_NEXT_WCHAR(); - } - - next_state = NULL; - for (trans_i = state; trans_i->state; trans_i++) - { - if (trans_i->code_min <= (tre_cint_t)prev_c - && trans_i->code_max >= (tre_cint_t)prev_c) - { - if (trans_i->assertions - && (CHECK_ASSERTIONS(trans_i->assertions) - || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) - { - continue; - } - - if (next_state == NULL) - { - /* First matching transition. */ - next_state = trans_i->state; - next_tags = trans_i->tags; - } - else - { - /* Second matching transition. We may need to backtrack here - to take this transition instead of the first one, so we - push this transition in the backtracking stack so we can - jump back here if needed. */ - BT_STACK_PUSH(pos, str_byte, 0, trans_i->state, - trans_i->state_id, next_c, tags, mbstate); - { - int *tmp; - for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) - stack->item.tags[*tmp] = pos; - } -#if 0 /* XXX - it's important not to look at all transitions here to keep - the stack small! */ - break; -#endif - } - } - } - - if (next_state != NULL) - { - /* Matching transitions were found. Take the first one. */ - state = next_state; - - /* Update the tag values. */ - if (next_tags) - while (*next_tags >= 0) - tags[*next_tags++] = pos; - } - else - { - backtrack: - /* A matching transition was not found. Try to backtrack. */ - if (stack->prev) - { - if (stack->item.state->assertions & ASSERT_BACKREF) - { - states_seen[stack->item.state_id] = 0; - } - - BT_STACK_POP(); - } - else if (match_eo < 0) - { - /* Try starting from a later position in the input string. */ - /* Check for end of string. */ - if (next_c == L'\0') - { - break; - } - next_c = next_c_start; -#ifdef TRE_MBSTATE - mbstate = mbstate_start; -#endif /* TRE_MBSTATE */ - str_byte = str_byte_start; - goto retry; - } - else - { - break; - } - } - } - - ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; - *match_end_ofs = match_eo; - - error_exit: - tre_bt_mem_destroy(mem); -#ifndef TRE_USE_ALLOCA - if (tags) - xfree(tags); - if (pmatch) - xfree(pmatch); - if (states_seen) - xfree(states_seen); -#endif /* !TRE_USE_ALLOCA */ - - return ret; -} - -/*********************************************************************** - from regexec.c -***********************************************************************/ - -/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match - endpoint values. */ -static void -tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, - const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo) -{ - tre_submatch_data_t *submatch_data; - unsigned int i, j; - int *parents; - - i = 0; - if (match_eo >= 0 && !(cflags & REG_NOSUB)) - { - /* Construct submatch offsets from the tags. */ - submatch_data = tnfa->submatch_data; - while (i < tnfa->num_submatches && i < nmatch) - { - if (submatch_data[i].so_tag == tnfa->end_tag) - pmatch[i].rm_so = match_eo; - else - pmatch[i].rm_so = tags[submatch_data[i].so_tag]; - - if (submatch_data[i].eo_tag == tnfa->end_tag) - pmatch[i].rm_eo = match_eo; - else - pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; - - /* If either of the endpoints were not used, this submatch - was not part of the match. */ - if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) - pmatch[i].rm_so = pmatch[i].rm_eo = -1; - - i++; - } - /* Reset all submatches that are not within all of their parent - submatches. */ - i = 0; - while (i < tnfa->num_submatches && i < nmatch) - { - if (pmatch[i].rm_eo == -1) - assert(pmatch[i].rm_so == -1); - assert(pmatch[i].rm_so <= pmatch[i].rm_eo); - - parents = submatch_data[i].parents; - if (parents != NULL) - for (j = 0; parents[j] >= 0; j++) - { - if (pmatch[i].rm_so < pmatch[parents[j]].rm_so - || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) - pmatch[i].rm_so = pmatch[i].rm_eo = -1; - } - i++; - } - } - - while (i < nmatch) - { - pmatch[i].rm_so = -1; - pmatch[i].rm_eo = -1; - i++; - } -} - - -/* - Wrapper functions for POSIX compatible regexp matching. -*/ - -int -regexec(const regex_t *restrict preg, const char *restrict string, - size_t nmatch, regmatch_t pmatch[restrict], int eflags) -{ - tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; - reg_errcode_t status; - regoff_t *tags = NULL, eo; - if (tnfa->cflags & REG_NOSUB) nmatch = 0; - if (tnfa->num_tags > 0 && nmatch > 0) - { - tags = xmalloc(sizeof(*tags) * tnfa->num_tags); - if (tags == NULL) - return REG_ESPACE; - } - - /* Dispatch to the appropriate matcher. */ - if (tnfa->have_backrefs) - { - /* The regex has back references, use the backtracking matcher. */ - status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); - } - else - { - /* Exact matching, no back references, use the parallel matcher. */ - status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); - } - - if (status == REG_OK) - /* A match was found, so fill the submatch registers. */ - tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); - if (tags) - xfree(tags); - return status; -}
\ No newline at end of file |