diff --git a/src/openalpr/postprocess/regexrule.cpp b/src/openalpr/postprocess/regexrule.cpp index 6bcc23d..57bd619 100644 --- a/src/openalpr/postprocess/regexrule.cpp +++ b/src/openalpr/postprocess/regexrule.cpp @@ -29,6 +29,7 @@ namespace alpr { RegexRule::RegexRule(string region, string pattern) + //: re2_regex("") { this->original = pattern; this->region = region; @@ -79,11 +80,11 @@ namespace alpr } else if (utf_character == "@") { - regexval << "\\" << "p" << "{Alpha}"; + regexval << "\\pL"; } else if (utf_character == "#") { - regexval << "\\" << "p" << "{Digit}"; + regexval << "\\pN"; } else if ((utf_character == "*") || (utf_character == "+")) { @@ -98,20 +99,12 @@ namespace alpr } this->regex = regexval.str(); - // Onigurama is not thread safe when compiling regex. Using a mutex to ensure that - // we don't crash - regexrule_mutex_m.lock(); - UChar* cstr_pattern = (UChar* )this->regex.c_str(); - OnigErrorInfo einfo; - int r = onig_new(&onig_regex, cstr_pattern, cstr_pattern + strlen((char* )cstr_pattern), - ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); - - regexrule_mutex_m.unlock(); + re2_regex = new re2::RE2(this->regex); - if (r != ONIG_NORMAL) { - //char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - //onig_error_code_to_str(s, r, &einfo); + + + if (!re2_regex->ok()) { cerr << "Unable to load regex: " << pattern << endl; } else @@ -123,8 +116,7 @@ namespace alpr RegexRule::~RegexRule() { - onig_free(onig_regex); - onig_end(); + delete re2_regex; } bool RegexRule::match(string text) @@ -143,17 +135,9 @@ namespace alpr if (text_char_length != numchars) return false; - OnigRegion *region = onig_region_new(); - unsigned char *start, *end; - UChar* cstr_text = (UChar* )text.c_str(); - end = cstr_text + strlen((char* )cstr_text); - start = cstr_text; - - int match = onig_match(onig_regex, cstr_text, end, start, region, ONIG_OPTION_NONE); - - onig_region_free(region, 1); - - return match == text.length(); + bool match = re2::RE2::FullMatch(text, *re2_regex); + + return match; } string RegexRule::filterSkips(string text) diff --git a/src/openalpr/postprocess/regexrule.h b/src/openalpr/postprocess/regexrule.h index 8532e06..d071f61 100644 --- a/src/openalpr/postprocess/regexrule.h +++ b/src/openalpr/postprocess/regexrule.h @@ -24,7 +24,7 @@ #include #include #include -#include "support/regex/oniguruma.h" +#include "support/re2.h" #include "support/utf8.h" #include "support/tinythread.h" @@ -43,7 +43,7 @@ namespace alpr bool valid; int numchars; - regex_t* onig_regex; + re2::RE2* re2_regex; std::string original; std::string regex; std::string region; diff --git a/src/openalpr/support/CMakeLists.txt b/src/openalpr/support/CMakeLists.txt index 54b0a36..3e742c1 100644 --- a/src/openalpr/support/CMakeLists.txt +++ b/src/openalpr/support/CMakeLists.txt @@ -10,25 +10,39 @@ set(support_source_files ) set(regex_source_files -regex/regsyntax.c -regex/regposerr.c -regex/regcomp.c -regex/reggnu.c -regex/regerror.c -regex/regext.c -regex/regversion.c -regex/regparse.c -regex/regenc.c -regex/st.c -regex/regposix.c -regex/regexec.c -regex/regtrav.c -regex/ascii.c -regex/unicode.c -regex/utf8.c + + +re2/bitstate.cc +re2/compile.cc +re2/dfa.cc +re2/filtered_re2.cc +re2/mimics_pcre.cc +re2/nfa.cc +re2/onepass.cc +re2/parse.cc +re2/perl_groups.cc +re2/prefilter.cc +re2/prefilter_tree.cc +re2/prog.cc +re2/re2.cc +re2/regexp.cc +re2/set.cc +re2/simplify.cc +re2/stringpiece.cc +re2/tostring.cc +re2/unicode_casefold.cc +re2/unicode_groups.cc +re2/util/hash.cc +re2/util/stringprintf.cc +re2/util/rune.cc +re2/util/strutil.cc +re2/util/valgrind.cc + ) +include_directories(.) + add_library(support STATIC ${support_source_files} ${regex_source_files} diff --git a/src/openalpr/support/re2.h b/src/openalpr/support/re2.h new file mode 100644 index 0000000..bad75bb --- /dev/null +++ b/src/openalpr/support/re2.h @@ -0,0 +1,883 @@ +// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_RE2_H +#define RE2_RE2_H + +// C++ interface to the re2 regular-expression library. +// RE2 supports Perl-style regular expressions (with extensions like +// \d, \w, \s, ...). +// +// ----------------------------------------------------------------------- +// REGEXP SYNTAX: +// +// This module uses the re2 library and hence supports +// its syntax for regular expressions, which is similar to Perl's with +// some of the more complicated things thrown away. In particular, +// backreferences and generalized assertions are not available, nor is \Z. +// +// See https://github.com/google/re2/wiki/Syntax for the syntax +// supported by RE2, and a comparison with PCRE and PERL regexps. +// +// For those not familiar with Perl's regular expressions, +// here are some examples of the most commonly used extensions: +// +// "hello (\\w+) world" -- \w matches a "word" character +// "version (\\d+)" -- \d matches a digit +// "hello\\s+world" -- \s matches any whitespace character +// "\\b(\\w+)\\b" -- \b matches non-empty string at word boundary +// "(?i)hello" -- (?i) turns on case-insensitive matching +// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible +// +// ----------------------------------------------------------------------- +// MATCHING INTERFACE: +// +// The "FullMatch" operation checks that supplied text matches a +// supplied pattern exactly. +// +// Example: successful match +// CHECK(RE2::FullMatch("hello", "h.*o")); +// +// Example: unsuccessful match (requires full match): +// CHECK(!RE2::FullMatch("hello", "e")); +// +// ----------------------------------------------------------------------- +// UTF-8 AND THE MATCHING INTERFACE: +// +// By default, the pattern and input text are interpreted as UTF-8. +// The RE2::Latin1 option causes them to be interpreted as Latin-1. +// +// Example: +// CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern))); +// CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1))); +// +// ----------------------------------------------------------------------- +// MATCHING WITH SUB-STRING EXTRACTION: +// +// You can supply extra pointer arguments to extract matched subpieces. +// +// Example: extracts "ruby" into "s" and 1234 into "i" +// int i; +// string s; +// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); +// +// Example: fails because string cannot be stored in integer +// CHECK(!RE2::FullMatch("ruby", "(.*)", &i)); +// +// Example: fails because there aren't enough sub-patterns: +// CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s)); +// +// Example: does not try to extract any extra sub-patterns +// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); +// +// Example: does not try to extract into NULL +// CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); +// +// Example: integer overflow causes failure +// CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); +// +// NOTE(rsc): Asking for substrings slows successful matches quite a bit. +// This may get a little faster in the future, but right now is slower +// than PCRE. On the other hand, failed matches run *very* fast (faster +// than PCRE), as do matches without substring extraction. +// +// ----------------------------------------------------------------------- +// PARTIAL MATCHES +// +// You can use the "PartialMatch" operation when you want the pattern +// to match any substring of the text. +// +// Example: simple search for a string: +// CHECK(RE2::PartialMatch("hello", "ell")); +// +// Example: find first number in a string +// int number; +// CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number)); +// CHECK_EQ(number, 100); +// +// ----------------------------------------------------------------------- +// PRE-COMPILED REGULAR EXPRESSIONS +// +// RE2 makes it easy to use any string as a regular expression, without +// requiring a separate compilation step. +// +// If speed is of the essence, you can create a pre-compiled "RE2" +// object from the pattern and use it multiple times. If you do so, +// you can typically parse text faster than with sscanf. +// +// Example: precompile pattern for faster matching: +// RE2 pattern("h.*o"); +// while (ReadLine(&str)) { +// if (RE2::FullMatch(str, pattern)) ...; +// } +// +// ----------------------------------------------------------------------- +// SCANNING TEXT INCREMENTALLY +// +// The "Consume" operation may be useful if you want to repeatedly +// match regular expressions at the front of a string and skip over +// them as they match. This requires use of the "StringPiece" type, +// which represents a sub-range of a real string. +// +// Example: read lines of the form "var = value" from a string. +// string contents = ...; // Fill string somehow +// StringPiece input(contents); // Wrap a StringPiece around it +// +// string var; +// int value; +// while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { +// ...; +// } +// +// Each successful call to "Consume" will set "var/value", and also +// advance "input" so it points past the matched text. Note that if the +// regular expression matches an empty string, input will advance +// by 0 bytes. If the regular expression being used might match +// an empty string, the loop body must check for this case and either +// advance the string or break out of the loop. +// +// The "FindAndConsume" operation is similar to "Consume" but does not +// anchor your match at the beginning of the string. For example, you +// could extract all words from a string by repeatedly calling +// RE2::FindAndConsume(&input, "(\\w+)", &word) +// +// ----------------------------------------------------------------------- +// USING VARIABLE NUMBER OF ARGUMENTS +// +// The above operations require you to know the number of arguments +// when you write the code. This is not always possible or easy (for +// example, the regular expression may be calculated at run time). +// You can use the "N" version of the operations when the number of +// match arguments are determined at run time. +// +// Example: +// const RE2::Arg* args[10]; +// int n; +// // ... populate args with pointers to RE2::Arg values ... +// // ... set n to the number of RE2::Arg objects ... +// bool match = RE2::FullMatchN(input, pattern, args, n); +// +// The last statement is equivalent to +// +// bool match = RE2::FullMatch(input, pattern, +// *args[0], *args[1], ..., *args[n - 1]); +// +// ----------------------------------------------------------------------- +// PARSING HEX/OCTAL/C-RADIX NUMBERS +// +// By default, if you pass a pointer to a numeric value, the +// corresponding text is interpreted as a base-10 number. You can +// instead wrap the pointer with a call to one of the operators Hex(), +// Octal(), or CRadix() to interpret the text in another base. The +// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) +// prefixes, but defaults to base-10. +// +// Example: +// int a, b, c, d; +// CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", +// RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d)); +// will leave 64 in a, b, c, and d. + +#include +#include +#include +#include "re2/stringpiece.h" +#include "re2/variadic_function.h" + +#ifndef RE2_HAVE_LONGLONG +#define RE2_HAVE_LONGLONG 1 +#endif + +namespace re2 { + +using std::string; +using std::map; +class Mutex; +class Prog; +class Regexp; + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static instance of the class, provided the constructor is given +// the LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the type's +// destructor does nothing to the storage, then a constructor for static +// initialization can be declared as +// explicit MyClass(LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(LINKER_INITIALIZED); +enum LinkerInitialized { LINKER_INITIALIZED }; + +// Interface for regular expression matching. Also corresponds to a +// pre-compiled regular expression. An "RE2" object is safe for +// concurrent use by multiple threads. +class RE2 { + public: + // We convert user-passed pointers into special Arg objects + class Arg; + class Options; + + // Defined in set.h. + class Set; + + enum ErrorCode { + NoError = 0, + + // Unexpected error + ErrorInternal, + + // Parse errors + ErrorBadEscape, // bad escape sequence + ErrorBadCharClass, // bad character class + ErrorBadCharRange, // bad character class range + ErrorMissingBracket, // missing closing ] + ErrorMissingParen, // missing closing ) + ErrorTrailingBackslash, // trailing \ at end of regexp + ErrorRepeatArgument, // repeat argument missing, e.g. "*" + ErrorRepeatSize, // bad repetition argument + ErrorRepeatOp, // bad repetition operator + ErrorBadPerlOp, // bad perl operator + ErrorBadUTF8, // invalid UTF-8 in regexp + ErrorBadNamedCapture, // bad named capture group + ErrorPatternTooLarge // pattern too large (compile failed) + }; + + // Predefined common options. + // If you need more complicated things, instantiate + // an Option class, possibly passing one of these to + // the Option constructor, change the settings, and pass that + // Option class to the RE2 constructor. + enum CannedOptions { + DefaultOptions = 0, + Latin1, // treat input as Latin-1 (default UTF-8) + POSIX, // POSIX syntax, leftmost-longest match + Quiet // do not log about regexp parse errors + }; + + // Need to have the const char* and const string& forms for implicit + // conversions when passing string literals to FullMatch and PartialMatch. + // Otherwise the StringPiece form would be sufficient. +#ifndef SWIG + RE2(const char* pattern); + RE2(const string& pattern); +#endif + RE2(const StringPiece& pattern); + RE2(const StringPiece& pattern, const Options& option); + ~RE2(); + + // Returns whether RE2 was created properly. + bool ok() const { return error_code() == NoError; } + + // The string specification for this RE2. E.g. + // RE2 re("ab*c?d+"); + // re.pattern(); // "ab*c?d+" + const string& pattern() const { return pattern_; } + + // If RE2 could not be created properly, returns an error string. + // Else returns the empty string. + const string& error() const { return *error_; } + + // If RE2 could not be created properly, returns an error code. + // Else returns RE2::NoError (== 0). + ErrorCode error_code() const { return error_code_; } + + // If RE2 could not be created properly, returns the offending + // portion of the regexp. + const string& error_arg() const { return error_arg_; } + + // Returns the program size, a very approximate measure of a regexp's "cost". + // Larger numbers are more expensive than smaller numbers. + int ProgramSize() const; + + // EXPERIMENTAL! SUBJECT TO CHANGE! + // Outputs the program fanout as a histogram bucketed by powers of 2. + // Returns the number of the largest non-empty bucket. + int ProgramFanout(map* histogram) const; + + // Returns the underlying Regexp; not for general use. + // Returns entire_regexp_ so that callers don't need + // to know about prefix_ and prefix_foldcase_. + re2::Regexp* Regexp() const { return entire_regexp_; } + + /***** The useful part: the matching interface *****/ + + // Matches "text" against "pattern". If pointer arguments are + // supplied, copies matched sub-patterns into them. + // + // You can pass in a "const char*" or a "string" for "text". + // You can pass in a "const char*" or a "string" or a "RE2" for "pattern". + // + // The provided pointer arguments can be pointers to any scalar numeric + // type, or one of: + // string (matched piece is copied to string) + // StringPiece (StringPiece is mutated to point to matched piece) + // T (where "bool T::ParseFrom(const char*, int)" exists) + // (void*)NULL (the corresponding matched sub-pattern is not copied) + // + // Returns true iff all of the following conditions are satisfied: + // a. "text" matches "pattern" exactly + // b. The number of matched sub-patterns is >= number of supplied pointers + // c. The "i"th argument has a suitable type for holding the + // string captured as the "i"th sub-pattern. If you pass in + // NULL for the "i"th argument, or pass fewer arguments than + // number of sub-patterns, "i"th captured sub-pattern is + // ignored. + // + // CAVEAT: An optional sub-pattern that does not exist in the + // matched string is assigned the empty string. Therefore, the + // following will return false (because the empty string is not a + // valid number): + // int number; + // RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number); + static bool FullMatchN(const StringPiece& text, const RE2& re, + const Arg* const args[], int argc); + static const VariadicFunction2< + bool, const StringPiece&, const RE2&, Arg, RE2::FullMatchN> FullMatch; + + // Exactly like FullMatch(), except that "pattern" is allowed to match + // a substring of "text". + static bool PartialMatchN(const StringPiece& text, const RE2& re, // 3..16 args + const Arg* const args[], int argc); + static const VariadicFunction2< + bool, const StringPiece&, const RE2&, Arg, RE2::PartialMatchN> PartialMatch; + + // Like FullMatch() and PartialMatch(), except that pattern has to + // match a prefix of "text", and "input" is advanced past the matched + // text. Note: "input" is modified iff this routine returns true. + static bool ConsumeN(StringPiece* input, const RE2& pattern, // 3..16 args + const Arg* const args[], int argc); + static const VariadicFunction2< + bool, StringPiece*, const RE2&, Arg, RE2::ConsumeN> Consume; + + // Like Consume(..), but does not anchor the match at the beginning of the + // string. That is, "pattern" need not start its match at the beginning of + // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next + // word in "s" and stores it in "word". + static bool FindAndConsumeN(StringPiece* input, const RE2& pattern, + const Arg* const args[], int argc); + static const VariadicFunction2< + bool, StringPiece*, const RE2&, Arg, RE2::FindAndConsumeN> FindAndConsume; + + // Replace the first match of "pattern" in "str" with "rewrite". + // Within "rewrite", backslash-escaped digits (\1 to \9) can be + // used to insert text matching corresponding parenthesized group + // from the pattern. \0 in "rewrite" refers to the entire matching + // text. E.g., + // + // string s = "yabba dabba doo"; + // CHECK(RE2::Replace(&s, "b+", "d")); + // + // will leave "s" containing "yada dabba doo" + // + // Returns true if the pattern matches and a replacement occurs, + // false otherwise. + static bool Replace(string *str, + const RE2& pattern, + const StringPiece& rewrite); + + // Like Replace(), except replaces successive non-overlapping occurrences + // of the pattern in the string with the rewrite. E.g. + // + // string s = "yabba dabba doo"; + // CHECK(RE2::GlobalReplace(&s, "b+", "d")); + // + // will leave "s" containing "yada dada doo" + // Replacements are not subject to re-matching. + // + // Because GlobalReplace only replaces non-overlapping matches, + // replacing "ana" within "banana" makes only one replacement, not two. + // + // Returns the number of replacements made. + static int GlobalReplace(string *str, + const RE2& pattern, + const StringPiece& rewrite); + + // Like Replace, except that if the pattern matches, "rewrite" + // is copied into "out" with substitutions. The non-matching + // portions of "text" are ignored. + // + // Returns true iff a match occurred and the extraction happened + // successfully; if no match occurs, the string is left unaffected. + // + // REQUIRES: "text" must not alias any part of "*out". + static bool Extract(const StringPiece &text, + const RE2& pattern, + const StringPiece &rewrite, + string *out); + + // Escapes all potentially meaningful regexp characters in + // 'unquoted'. The returned string, used as a regular expression, + // will exactly match the original string. For example, + // 1.5-2.0? + // may become: + // 1\.5\-2\.0\? + static string QuoteMeta(const StringPiece& unquoted); + + // Computes range for any strings matching regexp. The min and max can in + // some cases be arbitrarily precise, so the caller gets to specify the + // maximum desired length of string returned. + // + // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any + // string s that is an anchored match for this regexp satisfies + // min <= s && s <= max. + // + // Note that PossibleMatchRange() will only consider the first copy of an + // infinitely repeated element (i.e., any regexp element followed by a '*' or + // '+' operator). Regexps with "{N}" constructions are not affected, as those + // do not compile down to infinite repetitions. + // + // Returns true on success, false on error. + bool PossibleMatchRange(string* min, string* max, int maxlen) const; + + // Generic matching interface + + // Type of match. + enum Anchor { + UNANCHORED, // No anchoring + ANCHOR_START, // Anchor at start only + ANCHOR_BOTH // Anchor at start and end + }; + + // Return the number of capturing subpatterns, or -1 if the + // regexp wasn't valid on construction. The overall match ($0) + // does not count: if the regexp is "(a)(b)", returns 2. + int NumberOfCapturingGroups() const; + + + // Return a map from names to capturing indices. + // The map records the index of the leftmost group + // with the given name. + // Only valid until the re is deleted. + const map& NamedCapturingGroups() const; + + // Return a map from capturing indices to names. + // The map has no entries for unnamed groups. + // Only valid until the re is deleted. + const map& CapturingGroupNames() const; + + // General matching routine. + // Match against text starting at offset startpos + // and stopping the search at offset endpos. + // Returns true if match found, false if not. + // On a successful match, fills in match[] (up to nmatch entries) + // with information about submatches. + // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, + // setting match[0] = "barbaz", match[1] = NULL, match[2] = "bar", + // match[3] = NULL, ..., up to match[nmatch-1] = NULL. + // + // Don't ask for more match information than you will use: + // runs much faster with nmatch == 1 than nmatch > 1, and + // runs even faster if nmatch == 0. + // Doesn't make sense to use nmatch > 1 + NumberOfCapturingGroups(), + // but will be handled correctly. + // + // Passing text == StringPiece(NULL, 0) will be handled like any other + // empty string, but note that on return, it will not be possible to tell + // whether submatch i matched the empty string or did not match: + // either way, match[i] == NULL. + bool Match(const StringPiece& text, + int startpos, + int endpos, + Anchor anchor, + StringPiece *match, + int nmatch) const; + + // Check that the given rewrite string is suitable for use with this + // regular expression. It checks that: + // * The regular expression has enough parenthesized subexpressions + // to satisfy all of the \N tokens in rewrite + // * The rewrite string doesn't have any syntax errors. E.g., + // '\' followed by anything other than a digit or '\'. + // A true return value guarantees that Replace() and Extract() won't + // fail because of a bad rewrite string. + bool CheckRewriteString(const StringPiece& rewrite, string* error) const; + + // Returns the maximum submatch needed for the rewrite to be done by + // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2. + static int MaxSubmatch(const StringPiece& rewrite); + + // Append the "rewrite" string, with backslash subsitutions from "vec", + // to string "out". + // Returns true on success. This method can fail because of a malformed + // rewrite string. CheckRewriteString guarantees that the rewrite will + // be sucessful. + bool Rewrite(string *out, + const StringPiece &rewrite, + const StringPiece* vec, + int veclen) const; + + // Constructor options + class Options { + public: + // The options are (defaults in parentheses): + // + // utf8 (true) text and pattern are UTF-8; otherwise Latin-1 + // posix_syntax (false) restrict regexps to POSIX egrep syntax + // longest_match (false) search for longest match, not first match + // log_errors (true) log syntax and execution errors to ERROR + // max_mem (see below) approx. max memory footprint of RE2 + // literal (false) interpret string as literal, not regexp + // never_nl (false) never match \n, even if it is in regexp + // dot_nl (false) dot matches everything including new line + // never_capture (false) parse all parens as non-capturing + // case_sensitive (true) match is case-sensitive (regexp can override + // with (?i) unless in posix_syntax mode) + // + // The following options are only consulted when posix_syntax == true. + // (When posix_syntax == false these features are always enabled and + // cannot be turned off.) + // perl_classes (false) allow Perl's \d \s \w \D \S \W + // word_boundary (false) allow Perl's \b \B (word boundary and not) + // one_line (false) ^ and $ only match beginning and end of text + // + // The max_mem option controls how much memory can be used + // to hold the compiled form of the regexp (the Prog) and + // its cached DFA graphs. Code Search placed limits on the number + // of Prog instructions and DFA states: 10,000 for both. + // In RE2, those limits would translate to about 240 KB per Prog + // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a + // better job of keeping them small than Code Search did). + // Each RE2 has two Progs (one forward, one reverse), and each Prog + // can have two DFAs (one first match, one longest match). + // That makes 4 DFAs: + // + // forward, first-match - used for UNANCHORED or ANCHOR_LEFT searches + // if opt.longest_match() == false + // forward, longest-match - used for all ANCHOR_BOTH searches, + // and the other two kinds if + // opt.longest_match() == true + // reverse, first-match - never used + // reverse, longest-match - used as second phase for unanchored searches + // + // The RE2 memory budget is statically divided between the two + // Progs and then the DFAs: two thirds to the forward Prog + // and one third to the reverse Prog. The forward Prog gives half + // of what it has left over to each of its DFAs. The reverse Prog + // gives it all to its longest-match DFA. + // + // Once a DFA fills its budget, it flushes its cache and starts over. + // If this happens too often, RE2 falls back on the NFA implementation. + + // For now, make the default budget something close to Code Search. + static const int kDefaultMaxMem = 8<<20; + + enum Encoding { + EncodingUTF8 = 1, + EncodingLatin1 + }; + + Options() : + encoding_(EncodingUTF8), + posix_syntax_(false), + longest_match_(false), + log_errors_(true), + max_mem_(kDefaultMaxMem), + literal_(false), + never_nl_(false), + dot_nl_(false), + never_capture_(false), + case_sensitive_(true), + perl_classes_(false), + word_boundary_(false), + one_line_(false) { + } + + /*implicit*/ Options(CannedOptions); + + Encoding encoding() const { return encoding_; } + void set_encoding(Encoding encoding) { encoding_ = encoding; } + + // Legacy interface to encoding. + // TODO(rsc): Remove once clients have been converted. + bool utf8() const { return encoding_ == EncodingUTF8; } + void set_utf8(bool b) { + if (b) { + encoding_ = EncodingUTF8; + } else { + encoding_ = EncodingLatin1; + } + } + + bool posix_syntax() const { return posix_syntax_; } + void set_posix_syntax(bool b) { posix_syntax_ = b; } + + bool longest_match() const { return longest_match_; } + void set_longest_match(bool b) { longest_match_ = b; } + + bool log_errors() const { return log_errors_; } + void set_log_errors(bool b) { log_errors_ = b; } + + int64_t max_mem() const { return max_mem_; } + void set_max_mem(int64_t m) { max_mem_ = m; } + + bool literal() const { return literal_; } + void set_literal(bool b) { literal_ = b; } + + bool never_nl() const { return never_nl_; } + void set_never_nl(bool b) { never_nl_ = b; } + + bool dot_nl() const { return dot_nl_; } + void set_dot_nl(bool b) { dot_nl_ = b; } + + bool never_capture() const { return never_capture_; } + void set_never_capture(bool b) { never_capture_ = b; } + + bool case_sensitive() const { return case_sensitive_; } + void set_case_sensitive(bool b) { case_sensitive_ = b; } + + bool perl_classes() const { return perl_classes_; } + void set_perl_classes(bool b) { perl_classes_ = b; } + + bool word_boundary() const { return word_boundary_; } + void set_word_boundary(bool b) { word_boundary_ = b; } + + bool one_line() const { return one_line_; } + void set_one_line(bool b) { one_line_ = b; } + + void Copy(const Options& src) { + encoding_ = src.encoding_; + posix_syntax_ = src.posix_syntax_; + longest_match_ = src.longest_match_; + log_errors_ = src.log_errors_; + max_mem_ = src.max_mem_; + literal_ = src.literal_; + never_nl_ = src.never_nl_; + dot_nl_ = src.dot_nl_; + never_capture_ = src.never_capture_; + case_sensitive_ = src.case_sensitive_; + perl_classes_ = src.perl_classes_; + word_boundary_ = src.word_boundary_; + one_line_ = src.one_line_; + } + + int ParseFlags() const; + + private: + Encoding encoding_; + bool posix_syntax_; + bool longest_match_; + bool log_errors_; + int64_t max_mem_; + bool literal_; + bool never_nl_; + bool dot_nl_; + bool never_capture_; + bool case_sensitive_; + bool perl_classes_; + bool word_boundary_; + bool one_line_; + + //DISALLOW_COPY_AND_ASSIGN(Options); + Options(const Options&); + void operator=(const Options&); + }; + + // Returns the options set in the constructor. + const Options& options() const { return options_; }; + + // Argument converters; see below. + static inline Arg CRadix(short* x); + static inline Arg CRadix(unsigned short* x); + static inline Arg CRadix(int* x); + static inline Arg CRadix(unsigned int* x); + static inline Arg CRadix(long* x); + static inline Arg CRadix(unsigned long* x); + #if RE2_HAVE_LONGLONG + static inline Arg CRadix(long long* x); + static inline Arg CRadix(unsigned long long* x); + #endif + + static inline Arg Hex(short* x); + static inline Arg Hex(unsigned short* x); + static inline Arg Hex(int* x); + static inline Arg Hex(unsigned int* x); + static inline Arg Hex(long* x); + static inline Arg Hex(unsigned long* x); + #if RE2_HAVE_LONGLONG + static inline Arg Hex(long long* x); + static inline Arg Hex(unsigned long long* x); + #endif + + static inline Arg Octal(short* x); + static inline Arg Octal(unsigned short* x); + static inline Arg Octal(int* x); + static inline Arg Octal(unsigned int* x); + static inline Arg Octal(long* x); + static inline Arg Octal(unsigned long* x); + #if RE2_HAVE_LONGLONG + static inline Arg Octal(long long* x); + static inline Arg Octal(unsigned long long* x); + #endif + + private: + void Init(const StringPiece& pattern, const Options& options); + + bool DoMatch(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const args[], + int n) const; + + re2::Prog* ReverseProg() const; + + mutable Mutex* mutex_; + string pattern_; // string regular expression + Options options_; // option flags + string prefix_; // required prefix (before regexp_) + bool prefix_foldcase_; // prefix is ASCII case-insensitive + re2::Regexp* entire_regexp_; // parsed regular expression + re2::Regexp* suffix_regexp_; // parsed regular expression, prefix removed + re2::Prog* prog_; // compiled program for regexp + mutable re2::Prog* rprog_; // reverse program for regexp + bool is_one_pass_; // can use prog_->SearchOnePass? + mutable const string* error_; // Error indicator + // (or points to empty string) + mutable ErrorCode error_code_; // Error code + mutable string error_arg_; // Fragment of regexp showing error + mutable int num_captures_; // Number of capturing groups + + // Map from capture names to indices + mutable const map* named_groups_; + + // Map from capture indices to names + mutable const map* group_names_; + + //DISALLOW_COPY_AND_ASSIGN(RE2); + RE2(const RE2&); + void operator=(const RE2&); +}; + +/***** Implementation details *****/ + +// Hex/Octal/Binary? + +// Special class for parsing into objects that define a ParseFrom() method +template +class _RE2_MatchObject { + public: + static inline bool Parse(const char* str, int n, void* dest) { + if (dest == NULL) return true; + T* object = reinterpret_cast(dest); + return object->ParseFrom(str, n); + } +}; + +class RE2::Arg { + public: + // Empty constructor so we can declare arrays of RE2::Arg + Arg(); + + // Constructor specially designed for NULL arguments + Arg(void*); + + typedef bool (*Parser)(const char* str, int n, void* dest); + +// Type-specific parsers +#define MAKE_PARSER(type,name) \ + Arg(type* p) : arg_(p), parser_(name) { } \ + Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } \ + + + MAKE_PARSER(char, parse_char); + MAKE_PARSER(signed char, parse_char); + MAKE_PARSER(unsigned char, parse_uchar); + MAKE_PARSER(short, parse_short); + MAKE_PARSER(unsigned short, parse_ushort); + MAKE_PARSER(int, parse_int); + MAKE_PARSER(unsigned int, parse_uint); + MAKE_PARSER(long, parse_long); + MAKE_PARSER(unsigned long, parse_ulong); + #if RE2_HAVE_LONGLONG + MAKE_PARSER(long long, parse_longlong); + MAKE_PARSER(unsigned long long, parse_ulonglong); + #endif + MAKE_PARSER(float, parse_float); + MAKE_PARSER(double, parse_double); + MAKE_PARSER(string, parse_string); + MAKE_PARSER(StringPiece, parse_stringpiece); + +#undef MAKE_PARSER + + // Generic constructor templates + template Arg(T* p) + : arg_(p), parser_(_RE2_MatchObject::Parse) { } + template Arg(T* p, Parser parser) + : arg_(p), parser_(parser) { } + + // Parse the data + bool Parse(const char* str, int n) const; + + private: + void* arg_; + Parser parser_; + + static bool parse_null (const char* str, int n, void* dest); + static bool parse_char (const char* str, int n, void* dest); + static bool parse_uchar (const char* str, int n, void* dest); + static bool parse_float (const char* str, int n, void* dest); + static bool parse_double (const char* str, int n, void* dest); + static bool parse_string (const char* str, int n, void* dest); + static bool parse_stringpiece (const char* str, int n, void* dest); + +#define DECLARE_INTEGER_PARSER(name) \ + private: \ + static bool parse_ ## name(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _radix( \ + const char* str, int n, void* dest, int radix); \ + public: \ + static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) + + DECLARE_INTEGER_PARSER(short); + DECLARE_INTEGER_PARSER(ushort); + DECLARE_INTEGER_PARSER(int); + DECLARE_INTEGER_PARSER(uint); + DECLARE_INTEGER_PARSER(long); + DECLARE_INTEGER_PARSER(ulong); + #if RE2_HAVE_LONGLONG + DECLARE_INTEGER_PARSER(longlong); + DECLARE_INTEGER_PARSER(ulonglong); + #endif + +#undef DECLARE_INTEGER_PARSER +}; + +inline RE2::Arg::Arg() : arg_(NULL), parser_(parse_null) { } +inline RE2::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } + +inline bool RE2::Arg::Parse(const char* str, int n) const { + return (*parser_)(str, n, arg_); +} + +// This part of the parser, appropriate only for ints, deals with bases +#define MAKE_INTEGER_PARSER(type, name) \ + inline RE2::Arg RE2::Hex(type* ptr) { \ + return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _hex); } \ + inline RE2::Arg RE2::Octal(type* ptr) { \ + return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _octal); } \ + inline RE2::Arg RE2::CRadix(type* ptr) { \ + return RE2::Arg(ptr, RE2::Arg::parse_ ## name ## _cradix); } + +MAKE_INTEGER_PARSER(short, short) +MAKE_INTEGER_PARSER(unsigned short, ushort) +MAKE_INTEGER_PARSER(int, int) +MAKE_INTEGER_PARSER(unsigned int, uint) +MAKE_INTEGER_PARSER(long, long) +MAKE_INTEGER_PARSER(unsigned long, ulong) +#if RE2_HAVE_LONGLONG +MAKE_INTEGER_PARSER(long long, longlong) +MAKE_INTEGER_PARSER(unsigned long long, ulonglong) +#endif + +#undef MAKE_INTEGER_PARSER + +} // namespace re2 + +using re2::RE2; + +#endif /* RE2_RE2_H */ diff --git a/src/openalpr/support/re2/bitstate.cc b/src/openalpr/support/re2/bitstate.cc new file mode 100644 index 0000000..8ced6ea --- /dev/null +++ b/src/openalpr/support/re2/bitstate.cc @@ -0,0 +1,380 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tested by search_test.cc, exhaustive_test.cc, tester.cc + +// Prog::SearchBitState is a regular expression search with submatch +// tracking for small regular expressions and texts. Like +// testing/backtrack.cc, it allocates a bit vector with (length of +// text) * (length of prog) bits, to make sure it never explores the +// same (character position, instruction) state multiple times. This +// limits the search to run in time linear in the length of the text. +// +// Unlike testing/backtrack.cc, SearchBitState is not recursive +// on the text. +// +// SearchBitState is a fast replacement for the NFA code on small +// regexps and texts when SearchOnePass cannot be used. + +#include "re2/prog.h" +#include "re2/regexp.h" + +namespace re2 { + +struct Job { + int id; + int arg; + const char* p; +}; + +class BitState { + public: + explicit BitState(Prog* prog); + ~BitState(); + + // The usual Search prototype. + // Can only call Search once per BitState. + bool Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch); + + private: + inline bool ShouldVisit(int id, const char* p); + void Push(int id, const char* p, int arg); + bool GrowStack(); + bool TrySearch(int id, const char* p); + + // Search parameters + Prog* prog_; // program being run + StringPiece text_; // text being searched + StringPiece context_; // greater context of text being searched + bool anchored_; // whether search is anchored at text.begin() + bool longest_; // whether search wants leftmost-longest match + bool endmatch_; // whether match must end at text.end() + StringPiece *submatch_; // submatches to fill in + int nsubmatch_; // # of submatches to fill in + + // Search state + const char** cap_; // capture registers + int ncap_; + + static const int VisitedBits = 32; + uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked + int nvisited_; // # of words in bitmap + + Job *job_; // stack of text positions to explore + int njob_; + int maxjob_; +}; + +BitState::BitState(Prog* prog) + : prog_(prog), + anchored_(false), + longest_(false), + endmatch_(false), + submatch_(NULL), + nsubmatch_(0), + cap_(NULL), + ncap_(0), + visited_(NULL), + nvisited_(0), + job_(NULL), + njob_(0), + maxjob_(0) { +} + +BitState::~BitState() { + delete[] visited_; + delete[] job_; + delete[] cap_; +} + +// Should the search visit the pair ip, p? +// If so, remember that it was visited so that the next time, +// we don't repeat the visit. +bool BitState::ShouldVisit(int id, const char* p) { + uint n = id * (text_.size() + 1) + (p - text_.begin()); + if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1)))) + return false; + visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1)); + return true; +} + +// Grow the stack. +bool BitState::GrowStack() { + // VLOG(0) << "Reallocate."; + maxjob_ *= 2; + Job* newjob = new Job[maxjob_]; + memmove(newjob, job_, njob_*sizeof job_[0]); + delete[] job_; + job_ = newjob; + if (njob_ >= maxjob_) { + LOG(DFATAL) << "Job stack overflow."; + return false; + } + return true; +} + +// Push the triple (id, p, arg) onto the stack, growing it if necessary. +void BitState::Push(int id, const char* p, int arg) { + if (njob_ >= maxjob_) { + if (!GrowStack()) + return; + } + int op = prog_->inst(id)->opcode(); + if (op == kInstFail) + return; + + // Only check ShouldVisit when arg == 0. + // When arg > 0, we are continuing a previous visit. + if (arg == 0 && !ShouldVisit(id, p)) + return; + + Job* j = &job_[njob_++]; + j->id = id; + j->p = p; + j->arg = arg; +} + +// Try a search from instruction id0 in state p0. +// Return whether it succeeded. +bool BitState::TrySearch(int id0, const char* p0) { + bool matched = false; + const char* end = text_.end(); + njob_ = 0; + Push(id0, p0, 0); + while (njob_ > 0) { + // Pop job off stack. + --njob_; + int id = job_[njob_].id; + const char* p = job_[njob_].p; + int arg = job_[njob_].arg; + + // Optimization: rather than push and pop, + // code that is going to Push and continue + // the loop simply updates ip, p, and arg + // and jumps to CheckAndLoop. We have to + // do the ShouldVisit check that Push + // would have, but we avoid the stack + // manipulation. + if (0) { + CheckAndLoop: + if (!ShouldVisit(id, p)) + continue; + } + + // Visit ip, p. + // VLOG(0) << "Job: " << ip->id() << " " + // << (p - text_.begin()) << " " << arg; + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + case kInstFail: + return false; + + default: + LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg; + return false; + + case kInstAlt: + // Cannot just + // Push(ip->out1(), p, 0); + // Push(ip->out(), p, 0); + // If, during the processing of ip->out(), we encounter + // ip->out1() via another path, we want to process it then. + // Pushing it here will inhibit that. Instead, re-push + // ip with arg==1 as a reminder to push ip->out1() later. + switch (arg) { + case 0: + Push(id, p, 1); // come back when we're done + id = ip->out(); + goto CheckAndLoop; + + case 1: + // Finished ip->out(); try ip->out1(). + arg = 0; + id = ip->out1(); + goto CheckAndLoop; + } + LOG(DFATAL) << "Bad arg in kInstCapture: " << arg; + continue; + + case kInstAltMatch: + // One opcode is byte range; the other leads to match. + if (ip->greedy(prog_)) { + // out1 is the match + Push(ip->out1(), p, 0); + id = ip->out1(); + p = end; + goto CheckAndLoop; + } + // out is the match - non-greedy + Push(ip->out(), end, 0); + id = ip->out(); + goto CheckAndLoop; + + case kInstByteRange: { + int c = -1; + if (p < end) + c = *p & 0xFF; + if (ip->Matches(c)) { + id = ip->out(); + p++; + goto CheckAndLoop; + } + continue; + } + + case kInstCapture: + switch (arg) { + case 0: + if (0 <= ip->cap() && ip->cap() < ncap_) { + // Capture p to register, but save old value. + Push(id, cap_[ip->cap()], 1); // come back when we're done + cap_[ip->cap()] = p; + } + // Continue on. + id = ip->out(); + goto CheckAndLoop; + case 1: + // Finished ip->out(); restore the old value. + cap_[ip->cap()] = p; + continue; + } + LOG(DFATAL) << "Bad arg in kInstCapture: " << arg; + continue; + + case kInstEmptyWidth: + if (ip->empty() & ~Prog::EmptyFlags(context_, p)) + continue; + id = ip->out(); + goto CheckAndLoop; + + case kInstNop: + id = ip->out(); + goto CheckAndLoop; + + case kInstMatch: { + if (endmatch_ && p != text_.end()) + continue; + + // VLOG(0) << "Found match."; + // We found a match. If the caller doesn't care + // where the match is, no point going further. + if (nsubmatch_ == 0) + return true; + + // Record best match so far. + // Only need to check end point, because this entire + // call is only considering one start position. + matched = true; + cap_[1] = p; + if (submatch_[0].data() == NULL || + (longest_ && p > submatch_[0].end())) { + for (int i = 0; i < nsubmatch_; i++) + submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]); + } + + // If going for first match, we're done. + if (!longest_) + return true; + + // If we used the entire text, no longer match is possible. + if (p == text_.end()) + return true; + + // Otherwise, continue on in hope of a longer match. + continue; + } + } + } + return matched; +} + +// Search text (within context) for prog_. +bool BitState::Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch) { + // Search parameters. + text_ = text; + context_ = context; + if (context_.begin() == NULL) + context_ = text; + if (prog_->anchor_start() && context_.begin() != text.begin()) + return false; + if (prog_->anchor_end() && context_.end() != text.end()) + return false; + anchored_ = anchored || prog_->anchor_start(); + longest_ = longest || prog_->anchor_end(); + endmatch_ = prog_->anchor_end(); + submatch_ = submatch; + nsubmatch_ = nsubmatch; + for (int i = 0; i < nsubmatch_; i++) + submatch_[i] = NULL; + + // Allocate scratch space. + nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits; + visited_ = new uint32[nvisited_]; + memset(visited_, 0, nvisited_*sizeof visited_[0]); + // VLOG(0) << "nvisited_ = " << nvisited_; + + ncap_ = 2*nsubmatch; + if (ncap_ < 2) + ncap_ = 2; + cap_ = new const char*[ncap_]; + memset(cap_, 0, ncap_*sizeof cap_[0]); + + maxjob_ = 256; + job_ = new Job[maxjob_]; + + // Anchored search must start at text.begin(). + if (anchored_) { + cap_[0] = text.begin(); + return TrySearch(prog_->start(), text.begin()); + } + + // Unanchored search, starting from each possible text position. + // Notice that we have to try the empty string at the end of + // the text, so the loop condition is p <= text.end(), not p < text.end(). + // This looks like it's quadratic in the size of the text, + // but we are not clearing visited_ between calls to TrySearch, + // so no work is duplicated and it ends up still being linear. + for (const char* p = text.begin(); p <= text.end(); p++) { + cap_[0] = p; + if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. + return true; + } + return false; +} + +// Bit-state search. +bool Prog::SearchBitState(const StringPiece& text, + const StringPiece& context, + Anchor anchor, + MatchKind kind, + StringPiece* match, + int nmatch) { + // If full match, we ask for an anchored longest match + // and then check that match[0] == text. + // So make sure match[0] exists. + StringPiece sp0; + if (kind == kFullMatch) { + anchor = kAnchored; + if (nmatch < 1) { + match = &sp0; + nmatch = 1; + } + } + + // Run the search. + BitState b(this); + bool anchored = anchor == kAnchored; + bool longest = kind != kFirstMatch; + if (!b.Search(text, context, anchored, longest, match, nmatch)) + return false; + if (kind == kFullMatch && match[0].end() != text.end()) + return false; + return true; +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/compile.cc b/src/openalpr/support/re2/compile.cc new file mode 100644 index 0000000..53cee91 --- /dev/null +++ b/src/openalpr/support/re2/compile.cc @@ -0,0 +1,1144 @@ +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Compile regular expression to Prog. +// +// Prog and Inst are defined in prog.h. +// This file's external interface is just Regexp::CompileToProg. +// The Compiler class defined in this file is private. + +#include "re2/prog.h" +#include "re2.h" +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// List of pointers to Inst* that need to be filled in (patched). +// Because the Inst* haven't been filled in yet, +// we can use the Inst* word to hold the list's "next" pointer. +// It's kind of sleazy, but it works well in practice. +// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. +// +// Because the out and out1 fields in Inst are no longer pointers, +// we can't use pointers directly here either. Instead, p refers +// to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1). +// p == 0 represents the NULL list. This is okay because instruction #0 +// is always the fail instruction, which never appears on a list. + +struct PatchList { + uint32 p; + + // Returns patch list containing just p. + static PatchList Mk(uint32 p); + + // Patches all the entries on l to have value v. + // Caller must not ever use patch list again. + static void Patch(Prog::Inst *inst0, PatchList l, uint32 v); + + // Deref returns the next pointer pointed at by p. + static PatchList Deref(Prog::Inst *inst0, PatchList l); + + // Appends two patch lists and returns result. + static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2); +}; + +static PatchList nullPatchList = { 0 }; + +// Returns patch list containing just p. +PatchList PatchList::Mk(uint32 p) { + PatchList l; + l.p = p; + return l; +} + +// Returns the next pointer pointed at by l. +PatchList PatchList::Deref(Prog::Inst* inst0, PatchList l) { + Prog::Inst* ip = &inst0[l.p>>1]; + if (l.p&1) + l.p = ip->out1(); + else + l.p = ip->out(); + return l; +} + +// Patches all the entries on l to have value v. +void PatchList::Patch(Prog::Inst *inst0, PatchList l, uint32 val) { + while (l.p != 0) { + Prog::Inst* ip = &inst0[l.p>>1]; + if (l.p&1) { + l.p = ip->out1(); + ip->out1_ = val; + } else { + l.p = ip->out(); + ip->set_out(val); + } + } +} + +// Appends two patch lists and returns result. +PatchList PatchList::Append(Prog::Inst* inst0, PatchList l1, PatchList l2) { + if (l1.p == 0) + return l2; + if (l2.p == 0) + return l1; + + PatchList l = l1; + for (;;) { + PatchList next = PatchList::Deref(inst0, l); + if (next.p == 0) + break; + l = next; + } + + Prog::Inst* ip = &inst0[l.p>>1]; + if (l.p&1) + ip->out1_ = l2.p; + else + ip->set_out(l2.p); + + return l1; +} + +// Compiled program fragment. +struct Frag { + uint32 begin; + PatchList end; + + Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector + Frag(uint32 begin, PatchList end) : begin(begin), end(end) {} +}; + +// Input encodings. +enum Encoding { + kEncodingUTF8 = 1, // UTF-8 (0-10FFFF) + kEncodingLatin1, // Latin1 (0-FF) +}; + +class Compiler : public Regexp::Walker { + public: + explicit Compiler(); + ~Compiler(); + + // Compiles Regexp to a new Prog. + // Caller is responsible for deleting Prog when finished with it. + // If reversed is true, compiles for walking over the input + // string backward (reverses all concatenations). + static Prog *Compile(Regexp* re, bool reversed, int64 max_mem); + + // Compiles alternation of all the re to a new Prog. + // Each re has a match with an id equal to its index in the vector. + static Prog* CompileSet(const RE2::Options& options, RE2::Anchor anchor, + Regexp* re); + + // Interface for Regexp::Walker, which helps traverse the Regexp. + // The walk is purely post-recursive: given the machines for the + // children, PostVisit combines them to create the machine for + // the current node. The child_args are Frags. + // The Compiler traverses the Regexp parse tree, visiting + // each node in depth-first order. It invokes PreVisit before + // visiting the node's children and PostVisit after visiting + // the children. + Frag PreVisit(Regexp* re, Frag parent_arg, bool* stop); + Frag PostVisit(Regexp* re, Frag parent_arg, Frag pre_arg, Frag* child_args, + int nchild_args); + Frag ShortVisit(Regexp* re, Frag parent_arg); + Frag Copy(Frag arg); + + // Given fragment a, returns a+ or a+?; a* or a*?; a? or a?? + Frag Plus(Frag a, bool nongreedy); + Frag Star(Frag a, bool nongreedy); + Frag Quest(Frag a, bool nongreedy); + + // Given fragment a, returns (a) capturing as \n. + Frag Capture(Frag a, int n); + + // Given fragments a and b, returns ab; a|b + Frag Cat(Frag a, Frag b); + Frag Alt(Frag a, Frag b); + + // Returns a fragment that can't match anything. + Frag NoMatch(); + + // Returns a fragment that matches the empty string. + Frag Match(int32 id); + + // Returns a no-op fragment. + Frag Nop(); + + // Returns a fragment matching the byte range lo-hi. + Frag ByteRange(int lo, int hi, bool foldcase); + + // Returns a fragment matching an empty-width special op. + Frag EmptyWidth(EmptyOp op); + + // Adds n instructions to the program. + // Returns the index of the first one. + // Returns -1 if no more instructions are available. + int AllocInst(int n); + + // Deletes unused instructions. + void Trim(); + + // Rune range compiler. + + // Begins a new alternation. + void BeginRange(); + + // Adds a fragment matching the rune range lo-hi. + void AddRuneRange(Rune lo, Rune hi, bool foldcase); + void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase); + void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase); + void Add_80_10ffff(); + + // New suffix that matches the byte range lo-hi, then goes to next. + int RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next); + int UncachedRuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, int next); + + // Adds a suffix to alternation. + void AddSuffix(int id); + + // Returns the alternation of all the added suffixes. + Frag EndRange(); + + // Single rune. + Frag Literal(Rune r, bool foldcase); + + void Setup(Regexp::ParseFlags, int64, RE2::Anchor); + Prog* Finish(); + + // Returns .* where dot = any byte + Frag DotStar(); + + private: + Prog* prog_; // Program being built. + bool failed_; // Did we give up compiling? + Encoding encoding_; // Input encoding + bool reversed_; // Should program run backward over text? + + int max_inst_; // Maximum number of instructions. + + Prog::Inst* inst_; // Pointer to first instruction. + int inst_len_; // Number of instructions used. + int inst_cap_; // Number of instructions allocated. + + int64 max_mem_; // Total memory budget. + + map rune_cache_; + Frag rune_range_; + + RE2::Anchor anchor_; // anchor mode for RE2::Set + + DISALLOW_COPY_AND_ASSIGN(Compiler); +}; + +Compiler::Compiler() { + prog_ = new Prog(); + failed_ = false; + encoding_ = kEncodingUTF8; + reversed_ = false; + inst_ = NULL; + inst_len_ = 0; + inst_cap_ = 0; + max_inst_ = 1; // make AllocInst for fail instruction okay + max_mem_ = 0; + int fail = AllocInst(1); + inst_[fail].InitFail(); + max_inst_ = 0; // Caller must change +} + +Compiler::~Compiler() { + delete prog_; + delete[] inst_; +} + +int Compiler::AllocInst(int n) { + if (failed_ || inst_len_ + n > max_inst_) { + failed_ = true; + return -1; + } + + if (inst_len_ + n > inst_cap_) { + if (inst_cap_ == 0) + inst_cap_ = 8; + while (inst_len_ + n > inst_cap_) + inst_cap_ *= 2; + Prog::Inst* ip = new Prog::Inst[inst_cap_]; + memmove(ip, inst_, inst_len_ * sizeof ip[0]); + memset(ip + inst_len_, 0, (inst_cap_ - inst_len_) * sizeof ip[0]); + delete[] inst_; + inst_ = ip; + } + int id = inst_len_; + inst_len_ += n; + return id; +} + +void Compiler::Trim() { + if (inst_len_ < inst_cap_) { + Prog::Inst* ip = new Prog::Inst[inst_len_]; + memmove(ip, inst_, inst_len_ * sizeof ip[0]); + delete[] inst_; + inst_ = ip; + inst_cap_ = inst_len_; + } +} + +// These routines are somewhat hard to visualize in text -- +// see http://swtch.com/~rsc/regexp/regexp1.html for +// pictures explaining what is going on here. + +// Returns an unmatchable fragment. +Frag Compiler::NoMatch() { + return Frag(0, nullPatchList); +} + +// Is a an unmatchable fragment? +static bool IsNoMatch(Frag a) { + return a.begin == 0; +} + +// Given fragments a and b, returns fragment for ab. +Frag Compiler::Cat(Frag a, Frag b) { + if (IsNoMatch(a) || IsNoMatch(b)) + return NoMatch(); + + // Elide no-op. + Prog::Inst* begin = &inst_[a.begin]; + if (begin->opcode() == kInstNop && + a.end.p == (a.begin << 1) && + begin->out() == 0) { + PatchList::Patch(inst_, a.end, b.begin); // in case refs to a somewhere + return b; + } + + // To run backward over string, reverse all concatenations. + if (reversed_) { + PatchList::Patch(inst_, b.end, a.begin); + return Frag(b.begin, a.end); + } + + PatchList::Patch(inst_, a.end, b.begin); + return Frag(a.begin, b.end); +} + +// Given fragments for a and b, returns fragment for a|b. +Frag Compiler::Alt(Frag a, Frag b) { + // Special case for convenience in loops. + if (IsNoMatch(a)) + return b; + if (IsNoMatch(b)) + return a; + + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + + inst_[id].InitAlt(a.begin, b.begin); + return Frag(id, PatchList::Append(inst_, a.end, b.end)); +} + +// When capturing submatches in like-Perl mode, a kOpAlt Inst +// treats out_ as the first choice, out1_ as the second. +// +// For *, +, and ?, if out_ causes another repetition, +// then the operator is greedy. If out1_ is the repetition +// (and out_ moves forward), then the operator is non-greedy. + +// Given a fragment a, returns a fragment for a* or a*? (if nongreedy) +Frag Compiler::Star(Frag a, bool nongreedy) { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitAlt(0, 0); + PatchList::Patch(inst_, a.end, id); + if (nongreedy) { + inst_[id].out1_ = a.begin; + return Frag(id, PatchList::Mk(id << 1)); + } else { + inst_[id].set_out(a.begin); + return Frag(id, PatchList::Mk((id << 1) | 1)); + } +} + +// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy) +Frag Compiler::Plus(Frag a, bool nongreedy) { + // a+ is just a* with a different entry point. + Frag f = Star(a, nongreedy); + return Frag(a.begin, f.end); +} + +// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy) +Frag Compiler::Quest(Frag a, bool nongreedy) { + if (IsNoMatch(a)) + return Nop(); + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + PatchList pl; + if (nongreedy) { + inst_[id].InitAlt(0, a.begin); + pl = PatchList::Mk(id << 1); + } else { + inst_[id].InitAlt(a.begin, 0); + pl = PatchList::Mk((id << 1) | 1); + } + return Frag(id, PatchList::Append(inst_, pl, a.end)); +} + +// Returns a fragment for the byte range lo-hi. +Frag Compiler::ByteRange(int lo, int hi, bool foldcase) { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitByteRange(lo, hi, foldcase, 0); + prog_->byte_inst_count_++; + prog_->MarkByteRange(lo, hi); + if (foldcase && lo <= 'z' && hi >= 'a') { + if (lo < 'a') + lo = 'a'; + if (hi > 'z') + hi = 'z'; + if (lo <= hi) + prog_->MarkByteRange(lo + 'A' - 'a', hi + 'A' - 'a'); + } + return Frag(id, PatchList::Mk(id << 1)); +} + +// Returns a no-op fragment. Sometimes unavoidable. +Frag Compiler::Nop() { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitNop(0); + return Frag(id, PatchList::Mk(id << 1)); +} + +// Returns a fragment that signals a match. +Frag Compiler::Match(int32 match_id) { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitMatch(match_id); + return Frag(id, nullPatchList); +} + +// Returns a fragment matching a particular empty-width op (like ^ or $) +Frag Compiler::EmptyWidth(EmptyOp empty) { + int id = AllocInst(1); + if (id < 0) + return NoMatch(); + inst_[id].InitEmptyWidth(empty, 0); + if (empty & (kEmptyBeginLine|kEmptyEndLine)) + prog_->MarkByteRange('\n', '\n'); + if (empty & (kEmptyWordBoundary|kEmptyNonWordBoundary)) { + int j; + for (int i = 0; i < 256; i = j) { + for (j = i+1; j < 256 && Prog::IsWordChar(i) == Prog::IsWordChar(j); j++) + ; + prog_->MarkByteRange(i, j-1); + } + } + return Frag(id, PatchList::Mk(id << 1)); +} + +// Given a fragment a, returns a fragment with capturing parens around a. +Frag Compiler::Capture(Frag a, int n) { + if (IsNoMatch(a)) + return NoMatch(); + int id = AllocInst(2); + if (id < 0) + return NoMatch(); + inst_[id].InitCapture(2*n, a.begin); + inst_[id+1].InitCapture(2*n+1, 0); + PatchList::Patch(inst_, a.end, id+1); + + return Frag(id, PatchList::Mk((id+1) << 1)); +} + +// A Rune is a name for a Unicode code point. +// Returns maximum rune encoded by UTF-8 sequence of length len. +static int MaxRune(int len) { + int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax) + if (len == 1) + b = 7; + else + b = 8-(len+1) + 6*(len-1); + return (1<::iterator it = rune_cache_.find(key); + if (it != rune_cache_.end()) + return it->second; + int id = UncachedRuneByteSuffix(lo, hi, foldcase, next); + rune_cache_[key] = id; + return id; +} + +void Compiler::AddSuffix(int id) { + if (rune_range_.begin == 0) { + rune_range_.begin = id; + return; + } + + int alt = AllocInst(1); + if (alt < 0) { + rune_range_.begin = 0; + return; + } + inst_[alt].InitAlt(rune_range_.begin, id); + rune_range_.begin = alt; +} + +Frag Compiler::EndRange() { + return rune_range_; +} + +// Converts rune range lo-hi into a fragment that recognizes +// the bytes that would make up those runes in the current +// encoding (Latin 1 or UTF-8). +// This lets the machine work byte-by-byte even when +// using multibyte encodings. + +void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) { + switch (encoding_) { + default: + case kEncodingUTF8: + AddRuneRangeUTF8(lo, hi, foldcase); + break; + case kEncodingLatin1: + AddRuneRangeLatin1(lo, hi, foldcase); + break; + } +} + +void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) { + // Latin1 is easy: runes *are* bytes. + if (lo > hi || lo > 0xFF) + return; + if (hi > 0xFF) + hi = 0xFF; + AddSuffix(RuneByteSuffix(lo, hi, foldcase, 0)); +} + +// Table describing how to make a UTF-8 matching machine +// for the rune range 80-10FFFF (Runeself-Runemax). +// This range happens frequently enough (for example /./ and /[^a-z]/) +// and the rune_cache_ map is slow enough that this is worth +// special handling. Makes compilation of a small expression +// with a dot in it about 10% faster. +// The * in the comments below mark whole sequences. +static struct ByteRangeProg { + int next; + int lo; + int hi; +} prog_80_10ffff[] = { + // Two-byte + { -1, 0x80, 0xBF, }, // 0: 80-BF + { 0, 0xC2, 0xDF, }, // 1: C2-DF 80-BF* + + // Three-byte + { 0, 0xA0, 0xBF, }, // 2: A0-BF 80-BF + { 2, 0xE0, 0xE0, }, // 3: E0 A0-BF 80-BF* + { 0, 0x80, 0xBF, }, // 4: 80-BF 80-BF + { 4, 0xE1, 0xEF, }, // 5: E1-EF 80-BF 80-BF* + + // Four-byte + { 4, 0x90, 0xBF, }, // 6: 90-BF 80-BF 80-BF + { 6, 0xF0, 0xF0, }, // 7: F0 90-BF 80-BF 80-BF* + { 4, 0x80, 0xBF, }, // 8: 80-BF 80-BF 80-BF + { 8, 0xF1, 0xF3, }, // 9: F1-F3 80-BF 80-BF 80-BF* + { 4, 0x80, 0x8F, }, // 10: 80-8F 80-BF 80-BF + { 10, 0xF4, 0xF4, }, // 11: F4 80-8F 80-BF 80-BF* +}; + +void Compiler::Add_80_10ffff() { + int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning + for (int i = 0; i < arraysize(prog_80_10ffff); i++) { + const ByteRangeProg& p = prog_80_10ffff[i]; + int next = 0; + if (p.next >= 0) + next = inst[p.next]; + inst[i] = UncachedRuneByteSuffix(p.lo, p.hi, false, next); + if ((p.lo & 0xC0) != 0x80) + AddSuffix(inst[i]); + } +} + +void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) { + if (lo > hi) + return; + + // Pick off 80-10FFFF as a common special case + // that can bypass the slow rune_cache_. + if (lo == 0x80 && hi == 0x10ffff && !reversed_) { + Add_80_10ffff(); + return; + } + + // Split range into same-length sized ranges. + for (int i = 1; i < UTFmax; i++) { + Rune max = MaxRune(i); + if (lo <= max && max < hi) { + AddRuneRangeUTF8(lo, max, foldcase); + AddRuneRangeUTF8(max+1, hi, foldcase); + return; + } + } + + // ASCII range is always a special case. + if (hi < Runeself) { + AddSuffix(RuneByteSuffix(lo, hi, foldcase, 0)); + return; + } + + // Split range into sections that agree on leading bytes. + for (int i = 1; i < UTFmax; i++) { + uint m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence + if ((lo & ~m) != (hi & ~m)) { + if ((lo & m) != 0) { + AddRuneRangeUTF8(lo, lo|m, foldcase); + AddRuneRangeUTF8((lo|m)+1, hi, foldcase); + return; + } + if ((hi & m) != m) { + AddRuneRangeUTF8(lo, (hi&~m)-1, foldcase); + AddRuneRangeUTF8(hi&~m, hi, foldcase); + return; + } + } + } + + // Finally. Generate byte matching equivalent for lo-hi. + uint8 ulo[UTFmax], uhi[UTFmax]; + int n = runetochar(reinterpret_cast(ulo), &lo); + int m = runetochar(reinterpret_cast(uhi), &hi); + (void)m; // USED(m) + DCHECK_EQ(n, m); + + int id = 0; + if (reversed_) { + for (int i = 0; i < n; i++) + id = RuneByteSuffix(ulo[i], uhi[i], false, id); + } else { + for (int i = n-1; i >= 0; i--) + id = RuneByteSuffix(ulo[i], uhi[i], false, id); + } + AddSuffix(id); +} + +// Should not be called. +Frag Compiler::Copy(Frag arg) { + // We're using WalkExponential; there should be no copying. + LOG(DFATAL) << "Compiler::Copy called!"; + failed_ = true; + return NoMatch(); +} + +// Visits a node quickly; called once WalkExponential has +// decided to cut this walk short. +Frag Compiler::ShortVisit(Regexp* re, Frag) { + failed_ = true; + return NoMatch(); +} + +// Called before traversing a node's children during the walk. +Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) { + // Cut off walk if we've already failed. + if (failed_) + *stop = true; + + return Frag(); // not used by caller +} + +Frag Compiler::Literal(Rune r, bool foldcase) { + switch (encoding_) { + default: + return Frag(); + + case kEncodingLatin1: + return ByteRange(r, r, foldcase); + + case kEncodingUTF8: { + if (r < Runeself) // Make common case fast. + return ByteRange(r, r, foldcase); + uint8 buf[UTFmax]; + int n = runetochar(reinterpret_cast(buf), &r); + Frag f = ByteRange((uint8)buf[0], buf[0], false); + for (int i = 1; i < n; i++) + f = Cat(f, ByteRange((uint8)buf[i], buf[i], false)); + return f; + } + } +} + +// Called after traversing the node's children during the walk. +// Given their frags, build and return the frag for this re. +Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags, + int nchild_frags) { + // If a child failed, don't bother going forward, especially + // since the child_frags might contain Frags with NULLs in them. + if (failed_) + return NoMatch(); + + // Given the child fragments, return the fragment for this node. + switch (re->op()) { + case kRegexpRepeat: + // Should not see; code at bottom of function will print error + break; + + case kRegexpNoMatch: + return NoMatch(); + + case kRegexpEmptyMatch: + return Nop(); + + case kRegexpHaveMatch: { + Frag f = Match(re->match_id()); + // Remember unanchored match to end of string. + if (anchor_ != RE2::ANCHOR_BOTH) + f = Cat(DotStar(), Cat(EmptyWidth(kEmptyEndText), f)); + return f; + } + + case kRegexpConcat: { + Frag f = child_frags[0]; + for (int i = 1; i < nchild_frags; i++) + f = Cat(f, child_frags[i]); + return f; + } + + case kRegexpAlternate: { + Frag f = child_frags[0]; + for (int i = 1; i < nchild_frags; i++) + f = Alt(f, child_frags[i]); + return f; + } + + case kRegexpStar: + return Star(child_frags[0], re->parse_flags()&Regexp::NonGreedy); + + case kRegexpPlus: + return Plus(child_frags[0], re->parse_flags()&Regexp::NonGreedy); + + case kRegexpQuest: + return Quest(child_frags[0], re->parse_flags()&Regexp::NonGreedy); + + case kRegexpLiteral: + return Literal(re->rune(), re->parse_flags()&Regexp::FoldCase); + + case kRegexpLiteralString: { + // Concatenation of literals. + if (re->nrunes() == 0) + return Nop(); + Frag f; + for (int i = 0; i < re->nrunes(); i++) { + Frag f1 = Literal(re->runes()[i], re->parse_flags()&Regexp::FoldCase); + if (i == 0) + f = f1; + else + f = Cat(f, f1); + } + return f; + } + + case kRegexpAnyChar: + BeginRange(); + AddRuneRange(0, Runemax, false); + return EndRange(); + + case kRegexpAnyByte: + return ByteRange(0x00, 0xFF, false); + + case kRegexpCharClass: { + CharClass* cc = re->cc(); + if (cc->empty()) { + // This can't happen. + LOG(DFATAL) << "No ranges in char class"; + failed_ = true; + return NoMatch(); + } + + // ASCII case-folding optimization: if the char class + // behaves the same on A-Z as it does on a-z, + // discard any ranges wholly contained in A-Z + // and mark the other ranges as foldascii. + // This reduces the size of a program for + // (?i)abc from 3 insts per letter to 1 per letter. + bool foldascii = cc->FoldsASCII(); + + // Character class is just a big OR of the different + // character ranges in the class. + BeginRange(); + for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) { + // ASCII case-folding optimization (see above). + if (foldascii && 'A' <= i->lo && i->hi <= 'Z') + continue; + + // If this range contains all of A-Za-z or none of it, + // the fold flag is unnecessary; don't bother. + bool fold = foldascii; + if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo || + ('Z' < i->lo && i->hi < 'a')) + fold = false; + + AddRuneRange(i->lo, i->hi, fold); + } + return EndRange(); + } + + case kRegexpCapture: + // If this is a non-capturing parenthesis -- (?:foo) -- + // just use the inner expression. + if (re->cap() < 0) + return child_frags[0]; + return Capture(child_frags[0], re->cap()); + + case kRegexpBeginLine: + return EmptyWidth(reversed_ ? kEmptyEndLine : kEmptyBeginLine); + + case kRegexpEndLine: + return EmptyWidth(reversed_ ? kEmptyBeginLine : kEmptyEndLine); + + case kRegexpBeginText: + return EmptyWidth(reversed_ ? kEmptyEndText : kEmptyBeginText); + + case kRegexpEndText: + return EmptyWidth(reversed_ ? kEmptyBeginText : kEmptyEndText); + + case kRegexpWordBoundary: + return EmptyWidth(kEmptyWordBoundary); + + case kRegexpNoWordBoundary: + return EmptyWidth(kEmptyNonWordBoundary); + } + LOG(DFATAL) << "Missing case in Compiler: " << re->op(); + failed_ = true; + return NoMatch(); +} + +// Is this regexp required to start at the beginning of the text? +// Only approximate; can return false for complicated regexps like (\Aa|\Ab), +// but handles (\A(a|b)). Could use the Walker to write a more exact one. +static bool IsAnchorStart(Regexp** pre, int depth) { + Regexp* re = *pre; + Regexp* sub; + // The depth limit makes sure that we don't overflow + // the stack on a deeply nested regexp. As the comment + // above says, IsAnchorStart is conservative, so returning + // a false negative is okay. The exact limit is somewhat arbitrary. + if (re == NULL || depth >= 4) + return false; + switch (re->op()) { + default: + break; + case kRegexpConcat: + if (re->nsub() > 0) { + sub = re->sub()[0]->Incref(); + if (IsAnchorStart(&sub, depth+1)) { + Regexp** subcopy = new Regexp*[re->nsub()]; + subcopy[0] = sub; // already have reference + for (int i = 1; i < re->nsub(); i++) + subcopy[i] = re->sub()[i]->Incref(); + *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags()); + delete[] subcopy; + re->Decref(); + return true; + } + sub->Decref(); + } + break; + case kRegexpCapture: + sub = re->sub()[0]->Incref(); + if (IsAnchorStart(&sub, depth+1)) { + *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); + re->Decref(); + return true; + } + sub->Decref(); + break; + case kRegexpBeginText: + *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); + re->Decref(); + return true; + } + return false; +} + +// Is this regexp required to start at the end of the text? +// Only approximate; can return false for complicated regexps like (a\z|b\z), +// but handles ((a|b)\z). Could use the Walker to write a more exact one. +static bool IsAnchorEnd(Regexp** pre, int depth) { + Regexp* re = *pre; + Regexp* sub; + // The depth limit makes sure that we don't overflow + // the stack on a deeply nested regexp. As the comment + // above says, IsAnchorEnd is conservative, so returning + // a false negative is okay. The exact limit is somewhat arbitrary. + if (re == NULL || depth >= 4) + return false; + switch (re->op()) { + default: + break; + case kRegexpConcat: + if (re->nsub() > 0) { + sub = re->sub()[re->nsub() - 1]->Incref(); + if (IsAnchorEnd(&sub, depth+1)) { + Regexp** subcopy = new Regexp*[re->nsub()]; + subcopy[re->nsub() - 1] = sub; // already have reference + for (int i = 0; i < re->nsub() - 1; i++) + subcopy[i] = re->sub()[i]->Incref(); + *pre = Regexp::Concat(subcopy, re->nsub(), re->parse_flags()); + delete[] subcopy; + re->Decref(); + return true; + } + sub->Decref(); + } + break; + case kRegexpCapture: + sub = re->sub()[0]->Incref(); + if (IsAnchorEnd(&sub, depth+1)) { + *pre = Regexp::Capture(sub, re->parse_flags(), re->cap()); + re->Decref(); + return true; + } + sub->Decref(); + break; + case kRegexpEndText: + *pre = Regexp::LiteralString(NULL, 0, re->parse_flags()); + re->Decref(); + return true; + } + return false; +} + +void Compiler::Setup(Regexp::ParseFlags flags, int64 max_mem, + RE2::Anchor anchor) { + prog_->set_flags(flags); + + if (flags & Regexp::Latin1) + encoding_ = kEncodingLatin1; + max_mem_ = max_mem; + if (max_mem <= 0) { + max_inst_ = 100000; // more than enough + } else if (max_mem <= static_cast(sizeof(Prog))) { + // No room for anything. + max_inst_ = 0; + } else { + int64 m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst); + // Limit instruction count so that inst->id() fits nicely in an int. + // SparseArray also assumes that the indices (inst->id()) are ints. + // The call to WalkExponential uses 2*max_inst_ below, + // and other places in the code use 2 or 3 * prog->size(). + // Limiting to 2^24 should avoid overflow in those places. + // (The point of allowing more than 32 bits of memory is to + // have plenty of room for the DFA states, not to use it up + // on the program.) + if (m >= 1<<24) + m = 1<<24; + + // Inst imposes its own limit (currently bigger than 2^24 but be safe). + if (m > Prog::Inst::kMaxInst) + m = Prog::Inst::kMaxInst; + + max_inst_ = m; + } + + anchor_ = anchor; +} + +// Compiles re, returning program. +// Caller is responsible for deleting prog_. +// If reversed is true, compiles a program that expects +// to run over the input string backward (reverses all concatenations). +// The reversed flag is also recorded in the returned program. +Prog* Compiler::Compile(Regexp* re, bool reversed, int64 max_mem) { + Compiler c; + + c.Setup(re->parse_flags(), max_mem, RE2::ANCHOR_BOTH /* unused */); + c.reversed_ = reversed; + + // Simplify to remove things like counted repetitions + // and character classes like \d. + Regexp* sre = re->Simplify(); + if (sre == NULL) + return NULL; + + // Record whether prog is anchored, removing the anchors. + // (They get in the way of other optimizations.) + bool is_anchor_start = IsAnchorStart(&sre, 0); + bool is_anchor_end = IsAnchorEnd(&sre, 0); + + // Generate fragment for entire regexp. + Frag f = c.WalkExponential(sre, Frag(), 2*c.max_inst_); + sre->Decref(); + if (c.failed_) + return NULL; + + // Success! Finish by putting Match node at end, and record start. + // Turn off c.reversed_ (if it is set) to force the remaining concatenations + // to behave normally. + c.reversed_ = false; + Frag all = c.Cat(f, c.Match(0)); + c.prog_->set_start(all.begin); + + if (reversed) { + c.prog_->set_anchor_start(is_anchor_end); + c.prog_->set_anchor_end(is_anchor_start); + } else { + c.prog_->set_anchor_start(is_anchor_start); + c.prog_->set_anchor_end(is_anchor_end); + } + + // Also create unanchored version, which starts with a .*? loop. + if (c.prog_->anchor_start()) { + c.prog_->set_start_unanchored(c.prog_->start()); + } else { + Frag unanchored = c.Cat(c.DotStar(), all); + c.prog_->set_start_unanchored(unanchored.begin); + } + + c.prog_->set_reversed(reversed); + + // Hand ownership of prog_ to caller. + return c.Finish(); +} + +Prog* Compiler::Finish() { + if (failed_) + return NULL; + + if (prog_->start() == 0 && prog_->start_unanchored() == 0) { + // No possible matches; keep Fail instruction only. + inst_len_ = 1; + } + + // Trim instruction to minimum array and transfer to Prog. + Trim(); + prog_->inst_ = inst_; + prog_->size_ = inst_len_; + inst_ = NULL; + + // Compute byte map. + prog_->ComputeByteMap(); + + prog_->Optimize(); + + // Record remaining memory for DFA. + if (max_mem_ <= 0) { + prog_->set_dfa_mem(1<<20); + } else { + int64 m = max_mem_ - sizeof(Prog) - inst_len_*sizeof(Prog::Inst); + if (m < 0) + m = 0; + prog_->set_dfa_mem(m); + } + + Prog* p = prog_; + prog_ = NULL; + return p; +} + +// Converts Regexp to Prog. +Prog* Regexp::CompileToProg(int64 max_mem) { + return Compiler::Compile(this, false, max_mem); +} + +Prog* Regexp::CompileToReverseProg(int64 max_mem) { + return Compiler::Compile(this, true, max_mem); +} + +Frag Compiler::DotStar() { + return Star(ByteRange(0x00, 0xff, false), true); +} + +// Compiles RE set to Prog. +Prog* Compiler::CompileSet(const RE2::Options& options, RE2::Anchor anchor, + Regexp* re) { + Compiler c; + + Regexp::ParseFlags pf = static_cast(options.ParseFlags()); + c.Setup(pf, options.max_mem(), anchor); + + // Compile alternation of fragments. + Frag all = c.WalkExponential(re, Frag(), 2*c.max_inst_); + re->Decref(); + if (c.failed_) + return NULL; + + if (anchor == RE2::UNANCHORED) { + // The trailing .* was added while handling kRegexpHaveMatch. + // We just have to add the leading one. + all = c.Cat(c.DotStar(), all); + } + + c.prog_->set_start(all.begin); + c.prog_->set_start_unanchored(all.begin); + c.prog_->set_anchor_start(true); + c.prog_->set_anchor_end(true); + + Prog* prog = c.Finish(); + if (prog == NULL) + return NULL; + + // Make sure DFA has enough memory to operate, + // since we're not going to fall back to the NFA. + bool failed; + StringPiece sp = "hello, world"; + prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch, + NULL, &failed, NULL); + if (failed) { + delete prog; + return NULL; + } + + return prog; +} + +Prog* Prog::CompileSet(const RE2::Options& options, RE2::Anchor anchor, + Regexp* re) { + return Compiler::CompileSet(options, anchor, re); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/dfa.cc b/src/openalpr/support/re2/dfa.cc new file mode 100644 index 0000000..d1e31c3 --- /dev/null +++ b/src/openalpr/support/re2/dfa.cc @@ -0,0 +1,2112 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A DFA (deterministic finite automaton)-based regular expression search. +// +// The DFA search has two main parts: the construction of the automaton, +// which is represented by a graph of State structures, and the execution +// of the automaton over a given input string. +// +// The basic idea is that the State graph is constructed so that the +// execution can simply start with a state s, and then for each byte c in +// the input string, execute "s = s->next[c]", checking at each point whether +// the current s represents a matching state. +// +// The simple explanation just given does convey the essence of this code, +// but it omits the details of how the State graph gets constructed as well +// as some performance-driven optimizations to the execution of the automaton. +// All these details are explained in the comments for the code following +// the definition of class DFA. +// +// See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent. + +#include "re2/prog.h" +#include "re2/stringpiece.h" +#include "util/atomicops.h" +#include "util/flags.h" +#include "util/sparse_set.h" + +DEFINE_bool(re2_dfa_bail_when_slow, true, + "Whether the RE2 DFA should bail out early " + "if the NFA would be faster (for testing)."); + +namespace re2 { + +#if !defined(__linux__) /* only Linux seems to have memrchr */ +static void* memrchr(const void* s, int c, size_t n) { + const unsigned char* p = (const unsigned char*)s; + for (p += n; n > 0; n--) + if (*--p == c) + return (void*)p; + + return NULL; +} +#endif + +// Changing this to true compiles in prints that trace execution of the DFA. +// Generates a lot of output -- only useful for debugging. +static const bool DebugDFA = false; + +// A DFA implementation of a regular expression program. +// Since this is entirely a forward declaration mandated by C++, +// some of the comments here are better understood after reading +// the comments in the sections that follow the DFA definition. +class DFA { + public: + DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem); + ~DFA(); + bool ok() const { return !init_failed_; } + Prog::MatchKind kind() { return kind_; } + + // Searches for the regular expression in text, which is considered + // as a subsection of context for the purposes of interpreting flags + // like ^ and $ and \A and \z. + // Returns whether a match was found. + // If a match is found, sets *ep to the end point of the best match in text. + // If "anchored", the match must begin at the start of text. + // If "want_earliest_match", the match that ends first is used, not + // necessarily the best one. + // If "run_forward" is true, the DFA runs from text.begin() to text.end(). + // If it is false, the DFA runs from text.end() to text.begin(), + // returning the leftmost end of the match instead of the rightmost one. + // If the DFA cannot complete the search (for example, if it is out of + // memory), it sets *failed and returns false. + bool Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool want_earliest_match, bool run_forward, + bool* failed, const char** ep, vector* matches); + + // Builds out all states for the entire DFA. FOR TESTING ONLY + // Returns number of states. + int BuildAllStates(); + + // Computes min and max for matching strings. Won't return strings + // bigger than maxlen. + bool PossibleMatchRange(string* min, string* max, int maxlen); + + // These data structures are logically private, but C++ makes it too + // difficult to mark them as such. + class Workq; + class RWLocker; + class StateSaver; + + // A single DFA state. The DFA is represented as a graph of these + // States, linked by the next_ pointers. If in state s and reading + // byte c, the next state should be s->next_[c]. + struct State { + inline bool IsMatch() const { return flag_ & kFlagMatch; } + void SaveMatch(vector* v); + + int* inst_; // Instruction pointers in the state. + int ninst_; // # of inst_ pointers. + uint flag_; // Empty string bitfield flags in effect on the way + // into this state, along with kFlagMatch if this + // is a matching state. + State** next_; // Outgoing arrows from State, + // one per input byte class + }; + + enum { + kByteEndText = 256, // imaginary byte at end of text + + kFlagEmptyMask = 0xFFF, // State.flag_: bits holding kEmptyXXX flags + kFlagMatch = 0x1000, // State.flag_: this is a matching state + kFlagLastWord = 0x2000, // State.flag_: last byte was a word char + kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left + }; + +#ifndef STL_MSVC + // STL function structures for use with unordered_set. + struct StateEqual { + bool operator()(const State* a, const State* b) const { + if (a == b) + return true; + if (a == NULL || b == NULL) + return false; + if (a->ninst_ != b->ninst_) + return false; + if (a->flag_ != b->flag_) + return false; + for (int i = 0; i < a->ninst_; i++) + if (a->inst_[i] != b->inst_[i]) + return false; + return true; // they're equal + } + }; +#endif // STL_MSVC + struct StateHash { + size_t operator()(const State* a) const { + if (a == NULL) + return 0; + const char* s = reinterpret_cast(a->inst_); + int len = a->ninst_ * sizeof a->inst_[0]; + if (sizeof(size_t) == sizeof(uint32)) + return Hash32StringWithSeed(s, len, a->flag_); + else + return Hash64StringWithSeed(s, len, a->flag_); + } +#ifdef STL_MSVC + // Less than operator. + bool operator()(const State* a, const State* b) const { + if (a == b) + return false; + if (a == NULL || b == NULL) + return a == NULL; + if (a->ninst_ != b->ninst_) + return a->ninst_ < b->ninst_; + if (a->flag_ != b->flag_) + return a->flag_ < b->flag_; + for (int i = 0; i < a->ninst_; ++i) + if (a->inst_[i] != b->inst_[i]) + return a->inst_[i] < b->inst_[i]; + return false; // they're equal + } + // The two public members are required by msvc. 4 and 8 are default values. + // Reference: http://msdn.microsoft.com/en-us/library/1s1byw77.aspx + static const size_t bucket_size = 4; + static const size_t min_buckets = 8; +#endif // STL_MSVC + }; + +#ifdef STL_MSVC + typedef unordered_set StateSet; +#else // !STL_MSVC + typedef unordered_set StateSet; +#endif // STL_MSVC + + + private: + // Special "firstbyte" values for a state. (Values >= 0 denote actual bytes.) + enum { + kFbUnknown = -1, // No analysis has been performed. + kFbMany = -2, // Many bytes will lead out of this state. + kFbNone = -3, // No bytes lead out of this state. + }; + + enum { + // Indices into start_ for unanchored searches. + // Add kStartAnchored for anchored searches. + kStartBeginText = 0, // text at beginning of context + kStartBeginLine = 2, // text at beginning of line + kStartAfterWordChar = 4, // text follows a word character + kStartAfterNonWordChar = 6, // text follows non-word character + kMaxStart = 8, + + kStartAnchored = 1, + }; + + // Resets the DFA State cache, flushing all saved State* information. + // Releases and reacquires cache_mutex_ via cache_lock, so any + // State* existing before the call are not valid after the call. + // Use a StateSaver to preserve important states across the call. + // cache_mutex_.r <= L < mutex_ + // After: cache_mutex_.w <= L < mutex_ + void ResetCache(RWLocker* cache_lock); + + // Looks up and returns the State corresponding to a Workq. + // L >= mutex_ + State* WorkqToCachedState(Workq* q, uint flag); + + // Looks up and returns a State matching the inst, ninst, and flag. + // L >= mutex_ + State* CachedState(int* inst, int ninst, uint flag); + + // Clear the cache entirely. + // Must hold cache_mutex_.w or be in destructor. + void ClearCache(); + + // Converts a State into a Workq: the opposite of WorkqToCachedState. + // L >= mutex_ + static void StateToWorkq(State* s, Workq* q); + + // Runs a State on a given byte, returning the next state. + State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ + State* RunStateOnByte(State*, int); // L >= mutex_ + + // Runs a Workq on a given byte followed by a set of empty-string flags, + // producing a new Workq in nq. If a match instruction is encountered, + // sets *ismatch to true. + // L >= mutex_ + void RunWorkqOnByte(Workq* q, Workq* nq, + int c, uint flag, bool* ismatch, + Prog::MatchKind kind); + + // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. + // L >= mutex_ + void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag); + + // Adds the instruction id to the Workq, following empty arrows + // according to flag. + // L >= mutex_ + void AddToQueue(Workq* q, int id, uint flag); + + // For debugging, returns a text representation of State. + static string DumpState(State* state); + + // For debugging, returns a text representation of a Workq. + static string DumpWorkq(Workq* q); + + // Search parameters + struct SearchParams { + SearchParams(const StringPiece& text, const StringPiece& context, + RWLocker* cache_lock) + : text(text), context(context), + anchored(false), + want_earliest_match(false), + run_forward(false), + start(NULL), + firstbyte(kFbUnknown), + cache_lock(cache_lock), + failed(false), + ep(NULL), + matches(NULL) { } + + StringPiece text; + StringPiece context; + bool anchored; + bool want_earliest_match; + bool run_forward; + State* start; + int firstbyte; + RWLocker *cache_lock; + bool failed; // "out" parameter: whether search gave up + const char* ep; // "out" parameter: end pointer for match + vector* matches; + + private: + DISALLOW_COPY_AND_ASSIGN(SearchParams); + }; + + // Before each search, the parameters to Search are analyzed by + // AnalyzeSearch to determine the state in which to start and the + // "firstbyte" for that state, if any. + struct StartInfo { + StartInfo() : start(NULL), firstbyte(kFbUnknown) { } + State* start; + volatile int firstbyte; + }; + + // Fills in params->start and params->firstbyte using + // the other search parameters. Returns true on success, + // false on failure. + // cache_mutex_.r <= L < mutex_ + bool AnalyzeSearch(SearchParams* params); + bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info, uint flags); + + // The generic search loop, inlined to create specialized versions. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. + inline bool InlinedSearchLoop(SearchParams* params, + bool have_firstbyte, + bool want_earliest_match, + bool run_forward); + + // The specialized versions of InlinedSearchLoop. The three letters + // at the ends of the name denote the true/false values used as the + // last three parameters of InlinedSearchLoop. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. + bool SearchFFF(SearchParams* params); + bool SearchFFT(SearchParams* params); + bool SearchFTF(SearchParams* params); + bool SearchFTT(SearchParams* params); + bool SearchTFF(SearchParams* params); + bool SearchTFT(SearchParams* params); + bool SearchTTF(SearchParams* params); + bool SearchTTT(SearchParams* params); + + // The main search loop: calls an appropriate specialized version of + // InlinedSearchLoop. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. + bool FastSearchLoop(SearchParams* params); + + // For debugging, a slow search loop that calls InlinedSearchLoop + // directly -- because the booleans passed are not constants, the + // loop is not specialized like the SearchFFF etc. versions, so it + // runs much more slowly. Useful only for debugging. + // cache_mutex_.r <= L < mutex_ + // Might unlock and relock cache_mutex_ via params->cache_lock. + bool SlowSearchLoop(SearchParams* params); + + // Looks up bytes in bytemap_ but handles case c == kByteEndText too. + int ByteMap(int c) { + if (c == kByteEndText) + return prog_->bytemap_range(); + return prog_->bytemap()[c]; + } + + // Constant after initialization. + Prog* prog_; // The regular expression program to run. + Prog::MatchKind kind_; // The kind of DFA. + bool init_failed_; // initialization failed (out of memory) + + Mutex mutex_; // mutex_ >= cache_mutex_.r + + // Scratch areas, protected by mutex_. + Workq* q0_; // Two pre-allocated work queues. + Workq* q1_; + int* astack_; // Pre-allocated stack for AddToQueue + int nastack_; + + // State* cache. Many threads use and add to the cache simultaneously, + // holding cache_mutex_ for reading and mutex_ (above) when adding. + // If the cache fills and needs to be discarded, the discarding is done + // while holding cache_mutex_ for writing, to avoid interrupting other + // readers. Any State* pointers are only valid while cache_mutex_ + // is held. + Mutex cache_mutex_; + int64 mem_budget_; // Total memory budget for all States. + int64 state_budget_; // Amount of memory remaining for new States. + StateSet state_cache_; // All States computed so far. + StartInfo start_[kMaxStart]; + bool cache_warned_; // have printed to LOG(INFO) about the cache +}; + +// Shorthand for casting to uint8*. +static inline const uint8* BytePtr(const void* v) { + return reinterpret_cast(v); +} + +// Work queues + +// Marks separate thread groups of different priority +// in the work queue when in leftmost-longest matching mode. +#define Mark (-1) + +// Internally, the DFA uses a sparse array of +// program instruction pointers as a work queue. +// In leftmost longest mode, marks separate sections +// of workq that started executing at different +// locations in the string (earlier locations first). +class DFA::Workq : public SparseSet { + public: + // Constructor: n is number of normal slots, maxmark number of mark slots. + Workq(int n, int maxmark) : + SparseSet(n+maxmark), + n_(n), + maxmark_(maxmark), + nextmark_(n), + last_was_mark_(true) { + } + + bool is_mark(int i) { return i >= n_; } + + int maxmark() { return maxmark_; } + + void clear() { + SparseSet::clear(); + nextmark_ = n_; + } + + void mark() { + if (last_was_mark_) + return; + last_was_mark_ = false; + SparseSet::insert_new(nextmark_++); + } + + int size() { + return n_ + maxmark_; + } + + void insert(int id) { + if (contains(id)) + return; + insert_new(id); + } + + void insert_new(int id) { + last_was_mark_ = false; + SparseSet::insert_new(id); + } + + private: + int n_; // size excluding marks + int maxmark_; // maximum number of marks + int nextmark_; // id of next mark + bool last_was_mark_; // last inserted was mark + DISALLOW_COPY_AND_ASSIGN(Workq); +}; + +DFA::DFA(Prog* prog, Prog::MatchKind kind, int64 max_mem) + : prog_(prog), + kind_(kind), + init_failed_(false), + q0_(NULL), + q1_(NULL), + astack_(NULL), + mem_budget_(max_mem), + cache_warned_(false) { + if (DebugDFA) + fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str()); + int nmark = 0; + if (kind_ == Prog::kLongestMatch) + nmark = prog->size(); + nastack_ = 2 * prog->size() + nmark; + + // Account for space needed for DFA, q0, q1, astack. + mem_budget_ -= sizeof(DFA); + mem_budget_ -= (prog_->size() + nmark) * + (sizeof(int)+sizeof(int)) * 2; // q0, q1 + mem_budget_ -= nastack_ * sizeof(int); // astack + if (mem_budget_ < 0) { + LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", + prog_->size(), max_mem); + init_failed_ = true; + return; + } + + state_budget_ = mem_budget_; + + // Make sure there is a reasonable amount of working room left. + // At minimum, the search requires room for two states in order + // to limp along, restarting frequently. We'll get better performance + // if there is room for a larger number of states, say 20. + int64 one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) + + (prog_->bytemap_range()+1)*sizeof(State*); + if (state_budget_ < 20*one_state) { + LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", + prog_->size(), max_mem); + init_failed_ = true; + return; + } + + q0_ = new Workq(prog->size(), nmark); + q1_ = new Workq(prog->size(), nmark); + astack_ = new int[nastack_]; +} + +DFA::~DFA() { + delete q0_; + delete q1_; + delete[] astack_; + ClearCache(); +} + +// In the DFA state graph, s->next[c] == NULL means that the +// state has not yet been computed and needs to be. We need +// a different special value to signal that s->next[c] is a +// state that can never lead to a match (and thus the search +// can be called off). Hence DeadState. +#define DeadState reinterpret_cast(1) + +// Signals that the rest of the string matches no matter what it is. +#define FullMatchState reinterpret_cast(2) + +#define SpecialStateMax FullMatchState + +// Debugging printouts + +// For debugging, returns a string representation of the work queue. +string DFA::DumpWorkq(Workq* q) { + string s; + const char* sep = ""; + for (DFA::Workq::iterator it = q->begin(); it != q->end(); ++it) { + if (q->is_mark(*it)) { + StringAppendF(&s, "|"); + sep = ""; + } else { + StringAppendF(&s, "%s%d", sep, *it); + sep = ","; + } + } + return s; +} + +// For debugging, returns a string representation of the state. +string DFA::DumpState(State* state) { + if (state == NULL) + return "_"; + if (state == DeadState) + return "X"; + if (state == FullMatchState) + return "*"; + string s; + const char* sep = ""; + StringAppendF(&s, "(%p)", state); + for (int i = 0; i < state->ninst_; i++) { + if (state->inst_[i] == Mark) { + StringAppendF(&s, "|"); + sep = ""; + } else { + StringAppendF(&s, "%s%d", sep, state->inst_[i]); + sep = ","; + } + } + StringAppendF(&s, " flag=%#x", state->flag_); + return s; +} + +////////////////////////////////////////////////////////////////////// +// +// DFA state graph construction. +// +// The DFA state graph is a heavily-linked collection of State* structures. +// The state_cache_ is a set of all the State structures ever allocated, +// so that if the same state is reached by two different paths, +// the same State structure can be used. This reduces allocation +// requirements and also avoids duplication of effort across the two +// identical states. +// +// A State is defined by an ordered list of instruction ids and a flag word. +// +// The choice of an ordered list of instructions differs from a typical +// textbook DFA implementation, which would use an unordered set. +// Textbook descriptions, however, only care about whether +// the DFA matches, not where it matches in the text. To decide where the +// DFA matches, we need to mimic the behavior of the dominant backtracking +// implementations like PCRE, which try one possible regular expression +// execution, then another, then another, stopping when one of them succeeds. +// The DFA execution tries these many executions in parallel, representing +// each by an instruction id. These pointers are ordered in the State.inst_ +// list in the same order that the executions would happen in a backtracking +// search: if a match is found during execution of inst_[2], inst_[i] for i>=3 +// can be discarded. +// +// Textbooks also typically do not consider context-aware empty string operators +// like ^ or $. These are handled by the flag word, which specifies the set +// of empty-string operators that should be matched when executing at the +// current text position. These flag bits are defined in prog.h. +// The flag word also contains two DFA-specific bits: kFlagMatch if the state +// is a matching state (one that reached a kInstMatch in the program) +// and kFlagLastWord if the last processed byte was a word character, for the +// implementation of \B and \b. +// +// The flag word also contains, shifted up 16 bits, the bits looked for by +// any kInstEmptyWidth instructions in the state. These provide a useful +// summary indicating when new flags might be useful. +// +// The permanent representation of a State's instruction ids is just an array, +// but while a state is being analyzed, these instruction ids are represented +// as a Workq, which is an array that allows iteration in insertion order. + +// NOTE(rsc): The choice of State construction determines whether the DFA +// mimics backtracking implementations (so-called leftmost first matching) or +// traditional DFA implementations (so-called leftmost longest matching as +// prescribed by POSIX). This implementation chooses to mimic the +// backtracking implementations, because we want to replace PCRE. To get +// POSIX behavior, the states would need to be considered not as a simple +// ordered list of instruction ids, but as a list of unordered sets of instruction +// ids. A match by a state in one set would inhibit the running of sets +// farther down the list but not other instruction ids in the same set. Each +// set would correspond to matches beginning at a given point in the string. +// This is implemented by separating different sets with Mark pointers. + +// Looks in the State cache for a State matching q, flag. +// If one is found, returns it. If one is not found, allocates one, +// inserts it in the cache, and returns it. +DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { + if (DEBUG_MODE) + mutex_.AssertHeld(); + + // Construct array of instruction ids for the new state. + // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: + // those are the only operators with any effect in + // RunWorkqOnEmptyString or RunWorkqOnByte. + int* inst = new int[q->size()]; + int n = 0; + uint needflags = 0; // flags needed by kInstEmptyWidth instructions + bool sawmatch = false; // whether queue contains guaranteed kInstMatch + bool sawmark = false; // whether queue contains a Mark + if (DebugDFA) + fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); + for (Workq::iterator it = q->begin(); it != q->end(); ++it) { + int id = *it; + if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id))) + break; + if (q->is_mark(id)) { + if (n > 0 && inst[n-1] != Mark) { + sawmark = true; + inst[n++] = Mark; + } + continue; + } + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + case kInstAltMatch: + // This state will continue to a match no matter what + // the rest of the input is. If it is the highest priority match + // being considered, return the special FullMatchState + // to indicate that it's all matches from here out. + if (kind_ != Prog::kManyMatch && + (kind_ != Prog::kFirstMatch || + (it == q->begin() && ip->greedy(prog_))) && + (kind_ != Prog::kLongestMatch || !sawmark) && + (flag & kFlagMatch)) { + delete[] inst; + if (DebugDFA) + fprintf(stderr, " -> FullMatchState\n"); + return FullMatchState; + } + // Fall through. + case kInstByteRange: // These are useful. + case kInstEmptyWidth: + case kInstMatch: + case kInstAlt: // Not useful, but necessary [*] + inst[n++] = *it; + if (ip->opcode() == kInstEmptyWidth) + needflags |= ip->empty(); + if (ip->opcode() == kInstMatch && !prog_->anchor_end()) + sawmatch = true; + break; + + default: // The rest are not. + break; + } + + // [*] kInstAlt would seem useless to record in a state, since + // we've already followed both its arrows and saved all the + // interesting states we can reach from there. The problem + // is that one of the empty-width instructions might lead + // back to the same kInstAlt (if an empty-width operator is starred), + // producing a different evaluation order depending on whether + // we keep the kInstAlt to begin with. Sigh. + // A specific case that this affects is /(^|a)+/ matching "a". + // If we don't save the kInstAlt, we will match the whole "a" (0,1) + // but in fact the correct leftmost-first match is the leading "" (0,0). + } + DCHECK_LE(n, q->size()); + if (n > 0 && inst[n-1] == Mark) + n--; + + // If there are no empty-width instructions waiting to execute, + // then the extra flag bits will not be used, so there is no + // point in saving them. (Discarding them reduces the number + // of distinct states.) + if (needflags == 0) + flag &= kFlagMatch; + + // NOTE(rsc): The code above cannot do flag &= needflags, + // because if the right flags were present to pass the current + // kInstEmptyWidth instructions, new kInstEmptyWidth instructions + // might be reached that in turn need different flags. + // The only sure thing is that if there are no kInstEmptyWidth + // instructions at all, no flags will be needed. + // We could do the extra work to figure out the full set of + // possibly needed flags by exploring past the kInstEmptyWidth + // instructions, but the check above -- are any flags needed + // at all? -- handles the most common case. More fine-grained + // analysis can only be justified by measurements showing that + // too many redundant states are being allocated. + + // If there are no Insts in the list, it's a dead state, + // which is useful to signal with a special pointer so that + // the execution loop can stop early. This is only okay + // if the state is *not* a matching state. + if (n == 0 && flag == 0) { + delete[] inst; + if (DebugDFA) + fprintf(stderr, " -> DeadState\n"); + return DeadState; + } + + // If we're in longest match mode, the state is a sequence of + // unordered state sets separated by Marks. Sort each set + // to canonicalize, to reduce the number of distinct sets stored. + if (kind_ == Prog::kLongestMatch) { + int* ip = inst; + int* ep = ip + n; + while (ip < ep) { + int* markp = ip; + while (markp < ep && *markp != Mark) + markp++; + sort(ip, markp); + if (markp < ep) + markp++; + ip = markp; + } + } + + // Save the needed empty-width flags in the top bits for use later. + flag |= needflags << kFlagNeedShift; + + State* state = CachedState(inst, n, flag); + delete[] inst; + return state; +} + +// Looks in the State cache for a State matching inst, ninst, flag. +// If one is found, returns it. If one is not found, allocates one, +// inserts it in the cache, and returns it. +DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) { + if (DEBUG_MODE) + mutex_.AssertHeld(); + + // Look in the cache for a pre-existing state. + State state = { inst, ninst, flag, NULL }; + StateSet::iterator it = state_cache_.find(&state); + if (it != state_cache_.end()) { + if (DebugDFA) + fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); + return *it; + } + + // Must have enough memory for new state. + // In addition to what we're going to allocate, + // the state cache hash table seems to incur about 32 bytes per + // State*, empirically. + const int kStateCacheOverhead = 32; + int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot + int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int); + if (mem_budget_ < mem + kStateCacheOverhead) { + mem_budget_ = -1; + return NULL; + } + mem_budget_ -= mem + kStateCacheOverhead; + + // Allocate new state, along with room for next and inst. + char* space = new char[mem]; + State* s = reinterpret_cast(space); + s->next_ = reinterpret_cast(s + 1); + s->inst_ = reinterpret_cast(s->next_ + nnext); + memset(s->next_, 0, nnext*sizeof s->next_[0]); + memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); + s->ninst_ = ninst; + s->flag_ = flag; + if (DebugDFA) + fprintf(stderr, " -> %s\n", DumpState(s).c_str()); + + // Put state in cache and return it. + state_cache_.insert(s); + return s; +} + +// Clear the cache. Must hold cache_mutex_.w or be in destructor. +void DFA::ClearCache() { + // In case state_cache_ doesn't support deleting entries + // during iteration, copy into a vector and then delete. + vector v; + v.reserve(state_cache_.size()); + for (StateSet::iterator it = state_cache_.begin(); + it != state_cache_.end(); ++it) + v.push_back(*it); + state_cache_.clear(); + for (size_t i = 0; i < v.size(); i++) + delete[] reinterpret_cast(v[i]); +} + +// Copies insts in state s to the work queue q. +void DFA::StateToWorkq(State* s, Workq* q) { + q->clear(); + for (int i = 0; i < s->ninst_; i++) { + if (s->inst_[i] == Mark) + q->mark(); + else + q->insert_new(s->inst_[i]); + } +} + +// Adds ip to the work queue, following empty arrows according to flag +// and expanding kInstAlt instructions (two-target gotos). +void DFA::AddToQueue(Workq* q, int id, uint flag) { + + // Use astack_ to hold our stack of states yet to process. + // It is sized to have room for nastack_ == 2*prog->size() + nmark + // instructions, which is enough: each instruction can be + // processed by the switch below only once, and the processing + // pushes at most two instructions plus maybe a mark. + // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.) + int* stk = astack_; + int nstk = 0; + + stk[nstk++] = id; + while (nstk > 0) { + DCHECK_LE(nstk, nastack_); + id = stk[--nstk]; + + if (id == Mark) { + q->mark(); + continue; + } + + if (id == 0) + continue; + + // If ip is already on the queue, nothing to do. + // Otherwise add it. We don't actually keep all the ones + // that get added -- for example, kInstAlt is ignored + // when on a work queue -- but adding all ip's here + // increases the likelihood of q->contains(id), + // reducing the amount of duplicated work. + if (q->contains(id)) + continue; + q->insert_new(id); + + // Process instruction. + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + case kInstFail: // can't happen: discarded above + break; + + case kInstByteRange: // just save these on the queue + case kInstMatch: + break; + + case kInstCapture: // DFA treats captures as no-ops. + case kInstNop: + stk[nstk++] = ip->out(); + break; + + case kInstAlt: // two choices: expand both, in order + case kInstAltMatch: + // Want to visit out then out1, so push on stack in reverse order. + // This instruction is the [00-FF]* loop at the beginning of + // a leftmost-longest unanchored search, separate out from out1 + // with a Mark, so that out1's threads (which will start farther + // to the right in the string being searched) are lower priority + // than the current ones. + stk[nstk++] = ip->out1(); + if (q->maxmark() > 0 && + id == prog_->start_unanchored() && id != prog_->start()) + stk[nstk++] = Mark; + stk[nstk++] = ip->out(); + break; + + case kInstEmptyWidth: + // Continue on if we have all the right flag bits. + if (ip->empty() & ~flag) + break; + stk[nstk++] = ip->out(); + break; + } + } +} + +// Running of work queues. In the work queue, order matters: +// the queue is sorted in priority order. If instruction i comes before j, +// then the instructions that i produces during the run must come before +// the ones that j produces. In order to keep this invariant, all the +// work queue runners have to take an old queue to process and then +// also a new queue to fill in. It's not acceptable to add to the end of +// an existing queue, because new instructions will not end up in the +// correct position. + +// Runs the work queue, processing the empty strings indicated by flag. +// For example, flag == kEmptyBeginLine|kEmptyEndLine means to match +// both ^ and $. It is important that callers pass all flags at once: +// processing both ^ and $ is not the same as first processing only ^ +// and then processing only $. Doing the two-step sequence won't match +// ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior +// exhibited by existing implementations). +void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint flag) { + newq->clear(); + for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { + if (oldq->is_mark(*i)) + AddToQueue(newq, Mark, flag); + else + AddToQueue(newq, *i, flag); + } +} + +// Runs the work queue, processing the single byte c followed by any empty +// strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine, +// means to match c$. Sets the bool *ismatch to true if the end of the +// regular expression program has been reached (the regexp has matched). +void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, + int c, uint flag, bool* ismatch, + Prog::MatchKind kind) { + if (DEBUG_MODE) + mutex_.AssertHeld(); + + newq->clear(); + for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { + if (oldq->is_mark(*i)) { + if (*ismatch) + return; + newq->mark(); + continue; + } + int id = *i; + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + case kInstFail: // never succeeds + case kInstCapture: // already followed + case kInstNop: // already followed + case kInstAlt: // already followed + case kInstAltMatch: // already followed + case kInstEmptyWidth: // already followed + break; + + case kInstByteRange: // can follow if c is in range + if (ip->Matches(c)) + AddToQueue(newq, ip->out(), flag); + break; + + case kInstMatch: + if (prog_->anchor_end() && c != kByteEndText) + break; + *ismatch = true; + if (kind == Prog::kFirstMatch) { + // Can stop processing work queue since we found a match. + return; + } + break; + } + } + + if (DebugDFA) + fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(), + c, flag, DumpWorkq(newq).c_str(), *ismatch); +} + +// Processes input byte c in state, returning new state. +// Caller does not hold mutex. +DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) { + // Keep only one RunStateOnByte going + // even if the DFA is being run by multiple threads. + MutexLock l(&mutex_); + return RunStateOnByte(state, c); +} + +// Processes input byte c in state, returning new state. +DFA::State* DFA::RunStateOnByte(State* state, int c) { + if (DEBUG_MODE) + mutex_.AssertHeld(); + if (state <= SpecialStateMax) { + if (state == FullMatchState) { + // It is convenient for routines like PossibleMatchRange + // if we implement RunStateOnByte for FullMatchState: + // once you get into this state you never get out, + // so it's pretty easy. + return FullMatchState; + } + if (state == DeadState) { + LOG(DFATAL) << "DeadState in RunStateOnByte"; + return NULL; + } + if (state == NULL) { + LOG(DFATAL) << "NULL state in RunStateOnByte"; + return NULL; + } + LOG(DFATAL) << "Unexpected special state in RunStateOnByte"; + return NULL; + } + + // If someone else already computed this, return it. + State* ns; + ATOMIC_LOAD_CONSUME(ns, &state->next_[ByteMap(c)]); + if (ns != NULL) + return ns; + + // Convert state into Workq. + StateToWorkq(state, q0_); + + // Flags marking the kinds of empty-width things (^ $ etc) + // around this byte. Before the byte we have the flags recorded + // in the State structure itself. After the byte we have + // nothing yet (but that will change: read on). + uint needflag = state->flag_ >> kFlagNeedShift; + uint beforeflag = state->flag_ & kFlagEmptyMask; + uint oldbeforeflag = beforeflag; + uint afterflag = 0; + + if (c == '\n') { + // Insert implicit $ and ^ around \n + beforeflag |= kEmptyEndLine; + afterflag |= kEmptyBeginLine; + } + + if (c == kByteEndText) { + // Insert implicit $ and \z before the fake "end text" byte. + beforeflag |= kEmptyEndLine | kEmptyEndText; + } + + // The state flag kFlagLastWord says whether the last + // byte processed was a word character. Use that info to + // insert empty-width (non-)word boundaries. + bool islastword = state->flag_ & kFlagLastWord; + bool isword = (c != kByteEndText && Prog::IsWordChar(c)); + if (isword == islastword) + beforeflag |= kEmptyNonWordBoundary; + else + beforeflag |= kEmptyWordBoundary; + + // Okay, finally ready to run. + // Only useful to rerun on empty string if there are new, useful flags. + if (beforeflag & ~oldbeforeflag & needflag) { + RunWorkqOnEmptyString(q0_, q1_, beforeflag); + swap(q0_, q1_); + } + bool ismatch = false; + RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch, kind_); + + // Most of the time, we build the state from the output of + // RunWorkqOnByte, so swap q0_ and q1_ here. However, so that + // RE2::Set can tell exactly which match instructions + // contributed to the match, don't swap if c is kByteEndText. + // The resulting state wouldn't be correct for further processing + // of the string, but we're at the end of the text so that's okay. + // Leaving q0_ alone preseves the match instructions that led to + // the current setting of ismatch. + if (c != kByteEndText || kind_ != Prog::kManyMatch) + swap(q0_, q1_); + + // Save afterflag along with ismatch and isword in new state. + uint flag = afterflag; + if (ismatch) + flag |= kFlagMatch; + if (isword) + flag |= kFlagLastWord; + + ns = WorkqToCachedState(q0_, flag); + + // Flush ns before linking to it. + // Write barrier before updating state->next_ so that the + // main search loop can proceed without any locking, for speed. + // (Otherwise it would need one mutex operation per input byte.) + ATOMIC_STORE_RELEASE(&state->next_[ByteMap(c)], ns); + return ns; +} + + +////////////////////////////////////////////////////////////////////// +// DFA cache reset. + +// Reader-writer lock helper. +// +// The DFA uses a reader-writer mutex to protect the state graph itself. +// Traversing the state graph requires holding the mutex for reading, +// and discarding the state graph and starting over requires holding the +// lock for writing. If a search needs to expand the graph but is out +// of memory, it will need to drop its read lock and then acquire the +// write lock. Since it cannot then atomically downgrade from write lock +// to read lock, it runs the rest of the search holding the write lock. +// (This probably helps avoid repeated contention, but really the decision +// is forced by the Mutex interface.) It's a bit complicated to keep +// track of whether the lock is held for reading or writing and thread +// that through the search, so instead we encapsulate it in the RWLocker +// and pass that around. + +class DFA::RWLocker { + public: + explicit RWLocker(Mutex* mu); + ~RWLocker(); + + // If the lock is only held for reading right now, + // drop the read lock and re-acquire for writing. + // Subsequent calls to LockForWriting are no-ops. + // Notice that the lock is *released* temporarily. + void LockForWriting(); + + // Returns whether the lock is already held for writing. + bool IsLockedForWriting() { + return writing_; + } + + private: + Mutex* mu_; + bool writing_; + + DISALLOW_COPY_AND_ASSIGN(RWLocker); +}; + +DFA::RWLocker::RWLocker(Mutex* mu) + : mu_(mu), writing_(false) { + + mu_->ReaderLock(); +} + +// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the annotations +// does not support lock upgrade. +void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS { + if (!writing_) { + mu_->ReaderUnlock(); + mu_->Lock(); + writing_ = true; + } +} + +DFA::RWLocker::~RWLocker() { + if (writing_) + mu_->WriterUnlock(); + else + mu_->ReaderUnlock(); +} + + +// When the DFA's State cache fills, we discard all the states in the +// cache and start over. Many threads can be using and adding to the +// cache at the same time, so we synchronize using the cache_mutex_ +// to keep from stepping on other threads. Specifically, all the +// threads using the current cache hold cache_mutex_ for reading. +// When a thread decides to flush the cache, it drops cache_mutex_ +// and then re-acquires it for writing. That ensures there are no +// other threads accessing the cache anymore. The rest of the search +// runs holding cache_mutex_ for writing, avoiding any contention +// with or cache pollution caused by other threads. + +void DFA::ResetCache(RWLocker* cache_lock) { + // Re-acquire the cache_mutex_ for writing (exclusive use). + bool was_writing = cache_lock->IsLockedForWriting(); + cache_lock->LockForWriting(); + + // If we already held cache_mutex_ for writing, it means + // this invocation of Search() has already reset the + // cache once already. That's a pretty clear indication + // that the cache is too small. Warn about that, once. + // TODO(rsc): Only warn if state_cache_.size() < some threshold. + if (was_writing && !cache_warned_) { + LOG(INFO) << "DFA memory cache could be too small: " + << "only room for " << state_cache_.size() << " states."; + cache_warned_ = true; + } + + // Clear the cache, reset the memory budget. + for (int i = 0; i < kMaxStart; i++) { + start_[i].start = NULL; + start_[i].firstbyte = kFbUnknown; + } + ClearCache(); + mem_budget_ = state_budget_; +} + +// Typically, a couple States do need to be preserved across a cache +// reset, like the State at the current point in the search. +// The StateSaver class helps keep States across cache resets. +// It makes a copy of the state's guts outside the cache (before the reset) +// and then can be asked, after the reset, to recreate the State +// in the new cache. For example, in a DFA method ("this" is a DFA): +// +// StateSaver saver(this, s); +// ResetCache(cache_lock); +// s = saver.Restore(); +// +// The saver should always have room in the cache to re-create the state, +// because resetting the cache locks out all other threads, and the cache +// is known to have room for at least a couple states (otherwise the DFA +// constructor fails). + +class DFA::StateSaver { + public: + explicit StateSaver(DFA* dfa, State* state); + ~StateSaver(); + + // Recreates and returns a state equivalent to the + // original state passed to the constructor. + // Returns NULL if the cache has filled, but + // since the DFA guarantees to have room in the cache + // for a couple states, should never return NULL + // if used right after ResetCache. + State* Restore(); + + private: + DFA* dfa_; // the DFA to use + int* inst_; // saved info from State + int ninst_; + uint flag_; + bool is_special_; // whether original state was special + State* special_; // if is_special_, the original state + + DISALLOW_COPY_AND_ASSIGN(StateSaver); +}; + +DFA::StateSaver::StateSaver(DFA* dfa, State* state) { + dfa_ = dfa; + if (state <= SpecialStateMax) { + inst_ = NULL; + ninst_ = 0; + flag_ = 0; + is_special_ = true; + special_ = state; + return; + } + is_special_ = false; + special_ = NULL; + flag_ = state->flag_; + ninst_ = state->ninst_; + inst_ = new int[ninst_]; + memmove(inst_, state->inst_, ninst_*sizeof inst_[0]); +} + +DFA::StateSaver::~StateSaver() { + if (!is_special_) + delete[] inst_; +} + +DFA::State* DFA::StateSaver::Restore() { + if (is_special_) + return special_; + MutexLock l(&dfa_->mutex_); + State* s = dfa_->CachedState(inst_, ninst_, flag_); + if (s == NULL) + LOG(DFATAL) << "StateSaver failed to restore state."; + return s; +} + + +////////////////////////////////////////////////////////////////////// +// +// DFA execution. +// +// The basic search loop is easy: start in a state s and then for each +// byte c in the input, s = s->next[c]. +// +// This simple description omits a few efficiency-driven complications. +// +// First, the State graph is constructed incrementally: it is possible +// that s->next[c] is null, indicating that that state has not been +// fully explored. In this case, RunStateOnByte must be invoked to +// determine the next state, which is cached in s->next[c] to save +// future effort. An alternative reason for s->next[c] to be null is +// that the DFA has reached a so-called "dead state", in which any match +// is no longer possible. In this case RunStateOnByte will return NULL +// and the processing of the string can stop early. +// +// Second, a 256-element pointer array for s->next_ makes each State +// quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[] +// maps from bytes to "byte classes" and then next_ only needs to have +// as many pointers as there are byte classes. A byte class is simply a +// range of bytes that the regexp never distinguishes between. +// A regexp looking for a[abc] would have four byte ranges -- 0 to 'a'-1, +// 'a', 'b' to 'c', and 'c' to 0xFF. The bytemap slows us a little bit +// but in exchange we typically cut the size of a State (and thus our +// memory footprint) by about 5-10x. The comments still refer to +// s->next[c] for simplicity, but code should refer to s->next_[bytemap_[c]]. +// +// Third, it is common for a DFA for an unanchored match to begin in a +// state in which only one particular byte value can take the DFA to a +// different state. That is, s->next[c] != s for only one c. In this +// situation, the DFA can do better than executing the simple loop. +// Instead, it can call memchr to search very quickly for the byte c. +// Whether the start state has this property is determined during a +// pre-compilation pass, and if so, the byte b is passed to the search +// loop as the "firstbyte" argument, along with a boolean "have_firstbyte". +// +// Fourth, the desired behavior is to search for the leftmost-best match +// (approximately, the same one that Perl would find), which is not +// necessarily the match ending earliest in the string. Each time a +// match is found, it must be noted, but the DFA must continue on in +// hope of finding a higher-priority match. In some cases, the caller only +// cares whether there is any match at all, not which one is found. +// The "want_earliest_match" flag causes the search to stop at the first +// match found. +// +// Fifth, one algorithm that uses the DFA needs it to run over the +// input string backward, beginning at the end and ending at the beginning. +// Passing false for the "run_forward" flag causes the DFA to run backward. +// +// The checks for these last three cases, which in a naive implementation +// would be performed once per input byte, slow the general loop enough +// to merit specialized versions of the search loop for each of the +// eight possible settings of the three booleans. Rather than write +// eight different functions, we write one general implementation and then +// inline it to create the specialized ones. +// +// Note that matches are delayed by one byte, to make it easier to +// accomodate match conditions depending on the next input byte (like $ and \b). +// When s->next[c]->IsMatch(), it means that there is a match ending just +// *before* byte c. + +// The generic search loop. Searches text for a match, returning +// the pointer to the end of the chosen match, or NULL if no match. +// The bools are equal to the same-named variables in params, but +// making them function arguments lets the inliner specialize +// this function to each combination (see two paragraphs above). +inline bool DFA::InlinedSearchLoop(SearchParams* params, + bool have_firstbyte, + bool want_earliest_match, + bool run_forward) { + State* start = params->start; + const uint8* bp = BytePtr(params->text.begin()); // start of text + const uint8* p = bp; // text scanning point + const uint8* ep = BytePtr(params->text.end()); // end of text + const uint8* resetp = NULL; // p at last cache reset + if (!run_forward) + swap(p, ep); + + const uint8* bytemap = prog_->bytemap(); + const uint8* lastmatch = NULL; // most recent matching position in text + bool matched = false; + State* s = start; + + if (s->IsMatch()) { + matched = true; + lastmatch = p; + if (want_earliest_match) { + params->ep = reinterpret_cast(lastmatch); + return true; + } + } + + while (p != ep) { + if (DebugDFA) + fprintf(stderr, "@%d: %s\n", static_cast(p - bp), + DumpState(s).c_str()); + if (have_firstbyte && s == start) { + // In start state, only way out is to find firstbyte, + // so use optimized assembly in memchr to skip ahead. + // If firstbyte isn't found, we can skip to the end + // of the string. + if (run_forward) { + if ((p = BytePtr(memchr(p, params->firstbyte, ep - p))) == NULL) { + p = ep; + break; + } + } else { + if ((p = BytePtr(memrchr(ep, params->firstbyte, p - ep))) == NULL) { + p = ep; + break; + } + p++; + } + } + + int c; + if (run_forward) + c = *p++; + else + c = *--p; + + // Note that multiple threads might be consulting + // s->next_[bytemap[c]] simultaneously. + // RunStateOnByte takes care of the appropriate locking, + // including a memory barrier so that the unlocked access + // (sometimes known as "double-checked locking") is safe. + // The alternative would be either one DFA per thread + // or one mutex operation per input byte. + // + // ns == DeadState means the state is known to be dead + // (no more matches are possible). + // ns == NULL means the state has not yet been computed + // (need to call RunStateOnByteUnlocked). + // RunStateOnByte returns ns == NULL if it is out of memory. + // ns == FullMatchState means the rest of the string matches. + // + // Okay to use bytemap[] not ByteMap() here, because + // c is known to be an actual byte and not kByteEndText. + + State* ns; + ATOMIC_LOAD_CONSUME(ns, &s->next_[bytemap[c]]); + if (ns == NULL) { + ns = RunStateOnByteUnlocked(s, c); + if (ns == NULL) { + // After we reset the cache, we hold cache_mutex exclusively, + // so if resetp != NULL, it means we filled the DFA state + // cache with this search alone (without any other threads). + // Benchmarks show that doing a state computation on every + // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the + // same at about 2 MB/s. Unless we're processing an average + // of 10 bytes per state computation, fail so that RE2 can + // fall back to the NFA. + if (FLAGS_re2_dfa_bail_when_slow && resetp != NULL && + static_cast(p - resetp) < 10*state_cache_.size()) { + params->failed = true; + return false; + } + resetp = p; + + // Prepare to save start and s across the reset. + StateSaver save_start(this, start); + StateSaver save_s(this, s); + + // Discard all the States in the cache. + ResetCache(params->cache_lock); + + // Restore start and s so we can continue. + if ((start = save_start.Restore()) == NULL || + (s = save_s.Restore()) == NULL) { + // Restore already did LOG(DFATAL). + params->failed = true; + return false; + } + ns = RunStateOnByteUnlocked(s, c); + if (ns == NULL) { + LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache"; + params->failed = true; + return false; + } + } + } + if (ns <= SpecialStateMax) { + if (ns == DeadState) { + params->ep = reinterpret_cast(lastmatch); + return matched; + } + // FullMatchState + params->ep = reinterpret_cast(ep); + return true; + } + s = ns; + + if (s->IsMatch()) { + matched = true; + // The DFA notices the match one byte late, + // so adjust p before using it in the match. + if (run_forward) + lastmatch = p - 1; + else + lastmatch = p + 1; + if (DebugDFA) + fprintf(stderr, "match @%d! [%s]\n", + static_cast(lastmatch - bp), + DumpState(s).c_str()); + + if (want_earliest_match) { + params->ep = reinterpret_cast(lastmatch); + return true; + } + } + } + + // Process one more byte to see if it triggers a match. + // (Remember, matches are delayed one byte.) + int lastbyte; + if (run_forward) { + if (params->text.end() == params->context.end()) + lastbyte = kByteEndText; + else + lastbyte = params->text.end()[0] & 0xFF; + } else { + if (params->text.begin() == params->context.begin()) + lastbyte = kByteEndText; + else + lastbyte = params->text.begin()[-1] & 0xFF; + } + + State* ns; + ATOMIC_LOAD_CONSUME(ns, &s->next_[ByteMap(lastbyte)]); + if (ns == NULL) { + ns = RunStateOnByteUnlocked(s, lastbyte); + if (ns == NULL) { + StateSaver save_s(this, s); + ResetCache(params->cache_lock); + if ((s = save_s.Restore()) == NULL) { + params->failed = true; + return false; + } + ns = RunStateOnByteUnlocked(s, lastbyte); + if (ns == NULL) { + LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset"; + params->failed = true; + return false; + } + } + } + s = ns; + if (DebugDFA) + fprintf(stderr, "@_: %s\n", DumpState(s).c_str()); + if (s == FullMatchState) { + params->ep = reinterpret_cast(ep); + return true; + } + if (s > SpecialStateMax && s->IsMatch()) { + matched = true; + lastmatch = p; + if (params->matches && kind_ == Prog::kManyMatch) { + vector* v = params->matches; + v->clear(); + for (int i = 0; i < s->ninst_; i++) { + Prog::Inst* ip = prog_->inst(s->inst_[i]); + if (ip->opcode() == kInstMatch) + v->push_back(ip->match_id()); + } + } + if (DebugDFA) + fprintf(stderr, "match @%d! [%s]\n", static_cast(lastmatch - bp), + DumpState(s).c_str()); + } + params->ep = reinterpret_cast(lastmatch); + return matched; +} + +// Inline specializations of the general loop. +bool DFA::SearchFFF(SearchParams* params) { + return InlinedSearchLoop(params, 0, 0, 0); +} +bool DFA::SearchFFT(SearchParams* params) { + return InlinedSearchLoop(params, 0, 0, 1); +} +bool DFA::SearchFTF(SearchParams* params) { + return InlinedSearchLoop(params, 0, 1, 0); +} +bool DFA::SearchFTT(SearchParams* params) { + return InlinedSearchLoop(params, 0, 1, 1); +} +bool DFA::SearchTFF(SearchParams* params) { + return InlinedSearchLoop(params, 1, 0, 0); +} +bool DFA::SearchTFT(SearchParams* params) { + return InlinedSearchLoop(params, 1, 0, 1); +} +bool DFA::SearchTTF(SearchParams* params) { + return InlinedSearchLoop(params, 1, 1, 0); +} +bool DFA::SearchTTT(SearchParams* params) { + return InlinedSearchLoop(params, 1, 1, 1); +} + +// For debugging, calls the general code directly. +bool DFA::SlowSearchLoop(SearchParams* params) { + return InlinedSearchLoop(params, + params->firstbyte >= 0, + params->want_earliest_match, + params->run_forward); +} + +// For performance, calls the appropriate specialized version +// of InlinedSearchLoop. +bool DFA::FastSearchLoop(SearchParams* params) { + // Because the methods are private, the Searches array + // cannot be declared at top level. + static bool (DFA::*Searches[])(SearchParams*) = { + &DFA::SearchFFF, + &DFA::SearchFFT, + &DFA::SearchFTF, + &DFA::SearchFTT, + &DFA::SearchTFF, + &DFA::SearchTFT, + &DFA::SearchTTF, + &DFA::SearchTTT, + }; + + bool have_firstbyte = (params->firstbyte >= 0); + int index = 4 * have_firstbyte + + 2 * params->want_earliest_match + + 1 * params->run_forward; + return (this->*Searches[index])(params); +} + + +// The discussion of DFA execution above ignored the question of how +// to determine the initial state for the search loop. There are two +// factors that influence the choice of start state. +// +// The first factor is whether the search is anchored or not. +// The regexp program (Prog*) itself has +// two different entry points: one for anchored searches and one for +// unanchored searches. (The unanchored version starts with a leading ".*?" +// and then jumps to the anchored one.) +// +// The second factor is where text appears in the larger context, which +// determines which empty-string operators can be matched at the beginning +// of execution. If text is at the very beginning of context, \A and ^ match. +// Otherwise if text is at the beginning of a line, then ^ matches. +// Otherwise it matters whether the character before text is a word character +// or a non-word character. +// +// The two cases (unanchored vs not) and four cases (empty-string flags) +// combine to make the eight cases recorded in the DFA's begin_text_[2], +// begin_line_[2], after_wordchar_[2], and after_nonwordchar_[2] cached +// StartInfos. The start state for each is filled in the first time it +// is used for an actual search. + +// Examines text, context, and anchored to determine the right start +// state for the DFA search loop. Fills in params and returns true on success. +// Returns false on failure. +bool DFA::AnalyzeSearch(SearchParams* params) { + const StringPiece& text = params->text; + const StringPiece& context = params->context; + + // Sanity check: make sure that text lies within context. + if (text.begin() < context.begin() || text.end() > context.end()) { + LOG(DFATAL) << "Text is not inside context."; + params->start = DeadState; + return true; + } + + // Determine correct search type. + int start; + uint flags; + if (params->run_forward) { + if (text.begin() == context.begin()) { + start = kStartBeginText; + flags = kEmptyBeginText|kEmptyBeginLine; + } else if (text.begin()[-1] == '\n') { + start = kStartBeginLine; + flags = kEmptyBeginLine; + } else if (Prog::IsWordChar(text.begin()[-1] & 0xFF)) { + start = kStartAfterWordChar; + flags = kFlagLastWord; + } else { + start = kStartAfterNonWordChar; + flags = 0; + } + } else { + if (text.end() == context.end()) { + start = kStartBeginText; + flags = kEmptyBeginText|kEmptyBeginLine; + } else if (text.end()[0] == '\n') { + start = kStartBeginLine; + flags = kEmptyBeginLine; + } else if (Prog::IsWordChar(text.end()[0] & 0xFF)) { + start = kStartAfterWordChar; + flags = kFlagLastWord; + } else { + start = kStartAfterNonWordChar; + flags = 0; + } + } + if (params->anchored || prog_->anchor_start()) + start |= kStartAnchored; + StartInfo* info = &start_[start]; + + // Try once without cache_lock for writing. + // Try again after resetting the cache + // (ResetCache will relock cache_lock for writing). + if (!AnalyzeSearchHelper(params, info, flags)) { + ResetCache(params->cache_lock); + if (!AnalyzeSearchHelper(params, info, flags)) { + LOG(DFATAL) << "Failed to analyze start state."; + params->failed = true; + return false; + } + } + + if (DebugDFA) { + int fb; + ATOMIC_LOAD_RELAXED(fb, &info->firstbyte); + fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s firstbyte=%d\n", + params->anchored, params->run_forward, flags, + DumpState(info->start).c_str(), fb); + } + + params->start = info->start; + ATOMIC_LOAD_ACQUIRE(params->firstbyte, &info->firstbyte); + + return true; +} + +// Fills in info if needed. Returns true on success, false on failure. +bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info, + uint flags) { + // Quick check. + int fb; + ATOMIC_LOAD_ACQUIRE(fb, &info->firstbyte); + if (fb != kFbUnknown) + return true; + + MutexLock l(&mutex_); + if (info->firstbyte != kFbUnknown) + return true; + + q0_->clear(); + AddToQueue(q0_, + params->anchored ? prog_->start() : prog_->start_unanchored(), + flags); + info->start = WorkqToCachedState(q0_, flags); + if (info->start == NULL) + return false; + + if (info->start == DeadState) { + // Synchronize with "quick check" above. + ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); + return true; + } + + if (info->start == FullMatchState) { + // Synchronize with "quick check" above. + ATOMIC_STORE_RELEASE(&info->firstbyte, kFbNone); // will be ignored + return true; + } + + // Compute info->firstbyte by running state on all + // possible byte values, looking for a single one that + // leads to a different state. + int firstbyte = kFbNone; + for (int i = 0; i < 256; i++) { + State* s = RunStateOnByte(info->start, i); + if (s == NULL) { + // Synchronize with "quick check" above. + ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte); + return false; + } + if (s == info->start) + continue; + // Goes to new state... + if (firstbyte == kFbNone) { + firstbyte = i; // ... first one + } else { + firstbyte = kFbMany; // ... too many + break; + } + } + // Synchronize with "quick check" above. + ATOMIC_STORE_RELEASE(&info->firstbyte, firstbyte); + return true; +} + +// The actual DFA search: calls AnalyzeSearch and then FastSearchLoop. +bool DFA::Search(const StringPiece& text, + const StringPiece& context, + bool anchored, + bool want_earliest_match, + bool run_forward, + bool* failed, + const char** epp, + vector* matches) { + *epp = NULL; + if (!ok()) { + *failed = true; + return false; + } + *failed = false; + + if (DebugDFA) { + fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str()); + fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n", + text.as_string().c_str(), anchored, want_earliest_match, + run_forward, kind_); + } + + RWLocker l(&cache_mutex_); + SearchParams params(text, context, &l); + params.anchored = anchored; + params.want_earliest_match = want_earliest_match; + params.run_forward = run_forward; + params.matches = matches; + + if (!AnalyzeSearch(¶ms)) { + *failed = true; + return false; + } + if (params.start == DeadState) + return false; + if (params.start == FullMatchState) { + if (run_forward == want_earliest_match) + *epp = text.begin(); + else + *epp = text.end(); + return true; + } + if (DebugDFA) + fprintf(stderr, "start %s\n", DumpState(params.start).c_str()); + bool ret = FastSearchLoop(¶ms); + if (params.failed) { + *failed = true; + return false; + } + *epp = params.ep; + return ret; +} + +// Deletes dfa. +// +// This is a separate function so that +// prog.h can be used without moving the definition of +// class DFA out of this file. If you set +// prog->dfa_ = dfa; +// then you also have to set +// prog->delete_dfa_ = DeleteDFA; +// so that ~Prog can delete the dfa. +static void DeleteDFA(DFA* dfa) { + delete dfa; +} + +DFA* Prog::GetDFA(MatchKind kind) { + DFA*volatile* pdfa; + if (kind == kFirstMatch || kind == kManyMatch) { + pdfa = &dfa_first_; + } else { + kind = kLongestMatch; + pdfa = &dfa_longest_; + } + + // Quick check. + DFA *dfa; + ATOMIC_LOAD_ACQUIRE(dfa, pdfa); + if (dfa != NULL) + return dfa; + + MutexLock l(&dfa_mutex_); + dfa = *pdfa; + if (dfa != NULL) + return dfa; + + // For a forward DFA, half the memory goes to each DFA. + // For a reverse DFA, all the memory goes to the + // "longest match" DFA, because RE2 never does reverse + // "first match" searches. + int64 m = dfa_mem_/2; + if (reversed_) { + if (kind == kLongestMatch || kind == kManyMatch) + m = dfa_mem_; + else + m = 0; + } + dfa = new DFA(this, kind, m); + delete_dfa_ = DeleteDFA; + + // Synchronize with "quick check" above. + ATOMIC_STORE_RELEASE(pdfa, dfa); + + return dfa; +} + + +// Executes the regexp program to search in text, +// which itself is inside the larger context. (As a convenience, +// passing a NULL context is equivalent to passing text.) +// Returns true if a match is found, false if not. +// If a match is found, fills in match0->end() to point at the end of the match +// and sets match0->begin() to text.begin(), since the DFA can't track +// where the match actually began. +// +// This is the only external interface (class DFA only exists in this file). +// +bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context, + Anchor anchor, MatchKind kind, + StringPiece* match0, bool* failed, vector* matches) { + *failed = false; + + StringPiece context = const_context; + if (context.begin() == NULL) + context = text; + bool carat = anchor_start(); + bool dollar = anchor_end(); + if (reversed_) { + bool t = carat; + carat = dollar; + dollar = t; + } + if (carat && context.begin() != text.begin()) + return false; + if (dollar && context.end() != text.end()) + return false; + + // Handle full match by running an anchored longest match + // and then checking if it covers all of text. + bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch; + bool endmatch = false; + if (kind == kManyMatch) { + endmatch = true; + } else if (kind == kFullMatch || anchor_end()) { + endmatch = true; + kind = kLongestMatch; + } + + // If the caller doesn't care where the match is (just whether one exists), + // then we can stop at the very first match we find, the so-called + // "shortest match". + bool want_shortest_match = false; + if (match0 == NULL && !endmatch) { + want_shortest_match = true; + kind = kLongestMatch; + } + + DFA* dfa = GetDFA(kind); + const char* ep; + bool matched = dfa->Search(text, context, anchored, + want_shortest_match, !reversed_, + failed, &ep, matches); + if (*failed) + return false; + if (!matched) + return false; + if (endmatch && ep != (reversed_ ? text.begin() : text.end())) + return false; + + // If caller cares, record the boundary of the match. + // We only know where it ends, so use the boundary of text + // as the beginning. + if (match0) { + if (reversed_) + *match0 = StringPiece(ep, text.end() - ep); + else + *match0 = StringPiece(text.begin(), ep - text.begin()); + } + return true; +} + +// Build out all states in DFA. Returns number of states. +int DFA::BuildAllStates() { + if (!ok()) + return 0; + + // Pick out start state for unanchored search + // at beginning of text. + RWLocker l(&cache_mutex_); + SearchParams params(NULL, NULL, &l); + params.anchored = false; + if (!AnalyzeSearch(¶ms) || params.start <= SpecialStateMax) + return 0; + + // Add start state to work queue. + StateSet queued; + vector q; + queued.insert(params.start); + q.push_back(params.start); + + // Flood to expand every state. + for (size_t i = 0; i < q.size(); i++) { + State* s = q[i]; + for (int c = 0; c < 257; c++) { + State* ns = RunStateOnByteUnlocked(s, c); + if (ns > SpecialStateMax && queued.find(ns) == queued.end()) { + queued.insert(ns); + q.push_back(ns); + } + } + } + + return q.size(); +} + +// Build out all states in DFA for kind. Returns number of states. +int Prog::BuildEntireDFA(MatchKind kind) { + //LOG(ERROR) << "BuildEntireDFA is only for testing."; + return GetDFA(kind)->BuildAllStates(); +} + +// Computes min and max for matching string. +// Won't return strings bigger than maxlen. +bool DFA::PossibleMatchRange(string* min, string* max, int maxlen) { + if (!ok()) + return false; + + // NOTE: if future users of PossibleMatchRange want more precision when + // presented with infinitely repeated elements, consider making this a + // parameter to PossibleMatchRange. + static int kMaxEltRepetitions = 0; + + // Keep track of the number of times we've visited states previously. We only + // revisit a given state if it's part of a repeated group, so if the value + // portion of the map tuple exceeds kMaxEltRepetitions we bail out and set + // |*max| to |PrefixSuccessor(*max)|. + // + // Also note that previously_visited_states[UnseenStatePtr] will, in the STL + // tradition, implicitly insert a '0' value at first use. We take advantage + // of that property below. + map previously_visited_states; + + // Pick out start state for anchored search at beginning of text. + RWLocker l(&cache_mutex_); + SearchParams params(NULL, NULL, &l); + params.anchored = true; + if (!AnalyzeSearch(¶ms)) + return false; + if (params.start == DeadState) { // No matching strings + *min = ""; + *max = ""; + return true; + } + if (params.start == FullMatchState) // Every string matches: no max + return false; + + // The DFA is essentially a big graph rooted at params.start, + // and paths in the graph correspond to accepted strings. + // Each node in the graph has potentially 256+1 arrows + // coming out, one for each byte plus the magic end of + // text character kByteEndText. + + // To find the smallest possible prefix of an accepted + // string, we just walk the graph preferring to follow + // arrows with the lowest bytes possible. To find the + // largest possible prefix, we follow the largest bytes + // possible. + + // The test for whether there is an arrow from s on byte j is + // ns = RunStateOnByteUnlocked(s, j); + // if (ns == NULL) + // return false; + // if (ns != DeadState && ns->ninst > 0) + // The RunStateOnByteUnlocked call asks the DFA to build out the graph. + // It returns NULL only if the DFA has run out of memory, + // in which case we can't be sure of anything. + // The second check sees whether there was graph built + // and whether it is interesting graph. Nodes might have + // ns->ninst == 0 if they exist only to represent the fact + // that a match was found on the previous byte. + + // Build minimum prefix. + State* s = params.start; + min->clear(); + MutexLock lock(&mutex_); + for (int i = 0; i < maxlen; i++) { + if (previously_visited_states[s] > kMaxEltRepetitions) { + VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions + << " for state s=" << s << " and min=" << CEscape(*min); + break; + } + previously_visited_states[s]++; + + // Stop if min is a match. + State* ns = RunStateOnByte(s, kByteEndText); + if (ns == NULL) // DFA out of memory + return false; + if (ns != DeadState && (ns == FullMatchState || ns->IsMatch())) + break; + + // Try to extend the string with low bytes. + bool extended = false; + for (int j = 0; j < 256; j++) { + ns = RunStateOnByte(s, j); + if (ns == NULL) // DFA out of memory + return false; + if (ns == FullMatchState || + (ns > SpecialStateMax && ns->ninst_ > 0)) { + extended = true; + min->append(1, j); + s = ns; + break; + } + } + if (!extended) + break; + } + + // Build maximum prefix. + previously_visited_states.clear(); + s = params.start; + max->clear(); + for (int i = 0; i < maxlen; i++) { + if (previously_visited_states[s] > kMaxEltRepetitions) { + VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions + << " for state s=" << s << " and max=" << CEscape(*max); + break; + } + previously_visited_states[s] += 1; + + // Try to extend the string with high bytes. + bool extended = false; + for (int j = 255; j >= 0; j--) { + State* ns = RunStateOnByte(s, j); + if (ns == NULL) + return false; + if (ns == FullMatchState || + (ns > SpecialStateMax && ns->ninst_ > 0)) { + extended = true; + max->append(1, j); + s = ns; + break; + } + } + if (!extended) { + // Done, no need for PrefixSuccessor. + return true; + } + } + + // Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b + *max = PrefixSuccessor(*max); + + // If there are no bytes left, we have no way to say "there is no maximum + // string". We could make the interface more complicated and be able to + // return "there is no maximum but here is a minimum", but that seems like + // overkill -- the most common no-max case is all possible strings, so not + // telling the caller that the empty string is the minimum match isn't a + // great loss. + if (max->empty()) + return false; + + return true; +} + +// PossibleMatchRange for a Prog. +bool Prog::PossibleMatchRange(string* min, string* max, int maxlen) { + DFA* dfa = NULL; + { + MutexLock l(&dfa_mutex_); + // Have to use dfa_longest_ to get all strings for full matches. + // For example, (a|aa) never matches aa in first-match mode. + dfa = dfa_longest_; + if (dfa == NULL) { + dfa = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); + ATOMIC_STORE_RELEASE(&dfa_longest_, dfa); + delete_dfa_ = DeleteDFA; + } + } + return dfa->PossibleMatchRange(min, max, maxlen); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/filtered_re2.cc b/src/openalpr/support/re2/filtered_re2.cc new file mode 100644 index 0000000..d6a1a4e --- /dev/null +++ b/src/openalpr/support/re2/filtered_re2.cc @@ -0,0 +1,107 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include "util/util.h" +#include "re2/filtered_re2.h" +#include "re2/prefilter.h" +#include "re2/prefilter_tree.h" + +namespace re2 { + +FilteredRE2::FilteredRE2() + : compiled_(false), + prefilter_tree_(new PrefilterTree()) { +} + +FilteredRE2::~FilteredRE2() { + for (size_t i = 0; i < re2_vec_.size(); i++) + delete re2_vec_[i]; + delete prefilter_tree_; +} + +RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, + const RE2::Options& options, int* id) { + RE2* re = new RE2(pattern, options); + RE2::ErrorCode code = re->error_code(); + + if (!re->ok()) { + if (options.log_errors()) { + LOG(ERROR) << "Couldn't compile regular expression, skipping: " + << re << " due to error " << re->error(); + } + delete re; + } else { + *id = re2_vec_.size(); + re2_vec_.push_back(re); + } + + return code; +} + +void FilteredRE2::Compile(vector* atoms) { + if (compiled_ || re2_vec_.size() == 0) { + LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size(); + return; + } + + for (size_t i = 0; i < re2_vec_.size(); i++) { + Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); + prefilter_tree_->Add(prefilter); + } + atoms->clear(); + prefilter_tree_->Compile(atoms); + compiled_ = true; +} + +int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { + for (size_t i = 0; i < re2_vec_.size(); i++) + if (RE2::PartialMatch(text, *re2_vec_[i])) + return i; + return -1; +} + +int FilteredRE2::FirstMatch(const StringPiece& text, + const vector& atoms) const { + if (!compiled_) { + LOG(DFATAL) << "FirstMatch called before Compile"; + return -1; + } + vector regexps; + prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); + for (size_t i = 0; i < regexps.size(); i++) + if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) + return regexps[i]; + return -1; +} + +bool FilteredRE2::AllMatches( + const StringPiece& text, + const vector& atoms, + vector* matching_regexps) const { + matching_regexps->clear(); + vector regexps; + prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); + for (size_t i = 0; i < regexps.size(); i++) + if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) + matching_regexps->push_back(regexps[i]); + return !matching_regexps->empty(); +} + +void FilteredRE2::AllPotentials( + const vector& atoms, + vector* potential_regexps) const { + prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps); +} + +void FilteredRE2::RegexpsGivenStrings(const vector& matched_atoms, + vector* passed_regexps) { + prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); +} + +void FilteredRE2::PrintPrefilter(int regexpid) { + prefilter_tree_->PrintPrefilter(regexpid); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/filtered_re2.h b/src/openalpr/support/re2/filtered_re2.h new file mode 100644 index 0000000..8766930 --- /dev/null +++ b/src/openalpr/support/re2/filtered_re2.h @@ -0,0 +1,109 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. +// It provides a prefilter mechanism that helps in cutting down the +// number of regexps that need to be actually searched. +// +// By design, it does not include a string matching engine. This is to +// allow the user of the class to use their favorite string match +// engine. The overall flow is: Add all the regexps using Add, then +// Compile the FilteredRE2. The compile returns strings that need to +// be matched. Note that all returned strings are lowercase. For +// applying regexps to a search text, the caller does the string +// matching using the strings returned. When doing the string match, +// note that the caller has to do that on lower cased version of the +// search text. Then call FirstMatch or AllMatches with a vector of +// indices of strings that were found in the text to get the actual +// regexp matches. + +#ifndef RE2_FILTERED_RE2_H_ +#define RE2_FILTERED_RE2_H_ + +#include +#include "re2.h" + +namespace re2 { +using std::vector; + +class PrefilterTree; + +class FilteredRE2 { + public: + FilteredRE2(); + ~FilteredRE2(); + + // Uses RE2 constructor to create a RE2 object (re). Returns + // re->error_code(). If error_code is other than NoError, then re is + // deleted and not added to re2_vec_. + RE2::ErrorCode Add(const StringPiece& pattern, + const RE2::Options& options, + int *id); + + // Prepares the regexps added by Add for filtering. Returns a set + // of strings that the caller should check for in candidate texts. + // The returned strings are lowercased. When doing string matching, + // the search text should be lowercased first to find matching + // strings from the set of strings returned by Compile. Call after + // all Add calls are done. + void Compile(vector* strings_to_match); + + // Returns the index of the first matching regexp. + // Returns -1 on no match. Can be called prior to Compile. + // Does not do any filtering: simply tries to Match the + // regexps in a loop. + int SlowFirstMatch(const StringPiece& text) const; + + // Returns the index of the first matching regexp. + // Returns -1 on no match. Compile has to be called before + // calling this. + int FirstMatch(const StringPiece& text, + const vector& atoms) const; + + // Returns the indices of all matching regexps, after first clearing + // matched_regexps. + bool AllMatches(const StringPiece& text, + const vector& atoms, + vector* matching_regexps) const; + + // Returns the indices of all potentially matching regexps after first + // clearing potential_regexps. + // A regexp is potentially matching if it passes the filter. + // If a regexp passes the filter it may still not match. + // A regexp that does not pass the filter is guaranteed to not match. + void AllPotentials(const vector& atoms, + vector* potential_regexps) const; + + // The number of regexps added. + int NumRegexps() const { return re2_vec_.size(); } + + private: + + // Get the individual RE2 objects. Useful for testing. + RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; } + + // Print prefilter. + void PrintPrefilter(int regexpid); + + // Useful for testing and debugging. + void RegexpsGivenStrings(const vector& matched_atoms, + vector* passed_regexps); + + // All the regexps in the FilteredRE2. + vector re2_vec_; + + // Has the FilteredRE2 been compiled using Compile() + bool compiled_; + + // An AND-OR tree of string atoms used for filtering regexps. + PrefilterTree* prefilter_tree_; + + //DISALLOW_COPY_AND_ASSIGN(FilteredRE2); + FilteredRE2(const FilteredRE2&); + void operator=(const FilteredRE2&); +}; + +} // namespace re2 + +#endif // RE2_FILTERED_RE2_H_ diff --git a/src/openalpr/support/re2/mimics_pcre.cc b/src/openalpr/support/re2/mimics_pcre.cc new file mode 100644 index 0000000..0a55004 --- /dev/null +++ b/src/openalpr/support/re2/mimics_pcre.cc @@ -0,0 +1,185 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Determine whether this library should match PCRE exactly +// for a particular Regexp. (If so, the testing framework can +// check that it does.) +// +// This library matches PCRE except in these cases: +// * the regexp contains a repetition of an empty string, +// like (a*)* or (a*)+. In this case, PCRE will treat +// the repetition sequence as ending with an empty string, +// while this library does not. +// * Perl and PCRE differ on whether \v matches \n. +// For historical reasons, this library implements the Perl behavior. +// * Perl and PCRE allow $ in one-line mode to match either the very +// end of the text or just before a \n at the end of the text. +// This library requires it to match only the end of the text. +// * Similarly, Perl and PCRE do not allow ^ in multi-line mode to +// match the end of the text if the last character is a \n. +// This library does allow it. +// +// Regexp::MimicsPCRE checks for any of these conditions. + +#include "util/util.h" +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// Returns whether re might match an empty string. +static bool CanBeEmptyString(Regexp *re); + +// Walker class to compute whether library handles a regexp +// exactly as PCRE would. See comment at top for conditions. + +class PCREWalker : public Regexp::Walker { + public: + PCREWalker() {} + bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, bool* child_args, + int nchild_args); + + bool ShortVisit(Regexp* re, bool a) { + // Should never be called: we use Walk not WalkExponential. + LOG(DFATAL) << "EmptyStringWalker::ShortVisit called"; + return a; + } +}; + +// Called after visiting each of re's children and accumulating +// the return values in child_args. So child_args contains whether +// this library mimics PCRE for those subexpressions. +bool PCREWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, + bool* child_args, int nchild_args) { + // If children failed, so do we. + for (int i = 0; i < nchild_args; i++) + if (!child_args[i]) + return false; + + // Otherwise look for other reasons to fail. + switch (re->op()) { + // Look for repeated empty string. + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + if (CanBeEmptyString(re->sub()[0])) + return false; + break; + case kRegexpRepeat: + if (re->max() == -1 && CanBeEmptyString(re->sub()[0])) + return false; + break; + + // Look for \v + case kRegexpLiteral: + if (re->rune() == '\v') + return false; + break; + + // Look for $ in single-line mode. + case kRegexpEndText: + case kRegexpEmptyMatch: + if (re->parse_flags() & Regexp::WasDollar) + return false; + break; + + // Look for ^ in multi-line mode. + case kRegexpBeginLine: + // No condition: in single-line mode ^ becomes kRegexpBeginText. + return false; + + default: + break; + } + + // Not proven guilty. + return true; +} + +// Returns whether this regexp's behavior will mimic PCRE's exactly. +bool Regexp::MimicsPCRE() { + PCREWalker w; + return w.Walk(this, true); +} + + +// Walker class to compute whether a Regexp can match an empty string. +// It is okay to overestimate. For example, \b\B cannot match an empty +// string, because \b and \B are mutually exclusive, but this isn't +// that smart and will say it can. Spurious empty strings +// will reduce the number of regexps we sanity check against PCRE, +// but they won't break anything. + +class EmptyStringWalker : public Regexp::Walker { + public: + EmptyStringWalker() { } + bool PostVisit(Regexp* re, bool parent_arg, bool pre_arg, + bool* child_args, int nchild_args); + + bool ShortVisit(Regexp* re, bool a) { + // Should never be called: we use Walk not WalkExponential. + LOG(DFATAL) << "EmptyStringWalker::ShortVisit called"; + return a; + } + + private: + DISALLOW_COPY_AND_ASSIGN(EmptyStringWalker); +}; + +// Called after visiting re's children. child_args contains the return +// value from each of the children's PostVisits (i.e., whether each child +// can match an empty string). Returns whether this clause can match an +// empty string. +bool EmptyStringWalker::PostVisit(Regexp* re, bool parent_arg, bool pre_arg, + bool* child_args, int nchild_args) { + switch (re->op()) { + case kRegexpNoMatch: // never empty + case kRegexpLiteral: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpCharClass: + case kRegexpLiteralString: + return false; + + case kRegexpEmptyMatch: // always empty + case kRegexpBeginLine: // always empty, when they match + case kRegexpEndLine: + case kRegexpNoWordBoundary: + case kRegexpWordBoundary: + case kRegexpBeginText: + case kRegexpEndText: + case kRegexpStar: // can always be empty + case kRegexpQuest: + case kRegexpHaveMatch: + return true; + + case kRegexpConcat: // can be empty if all children can + for (int i = 0; i < nchild_args; i++) + if (!child_args[i]) + return false; + return true; + + case kRegexpAlternate: // can be empty if any child can + for (int i = 0; i < nchild_args; i++) + if (child_args[i]) + return true; + return false; + + case kRegexpPlus: // can be empty if the child can + case kRegexpCapture: + return child_args[0]; + + case kRegexpRepeat: // can be empty if child can or is x{0} + return child_args[0] || re->min() == 0; + } + return false; +} + +// Returns whether re can match an empty string. +static bool CanBeEmptyString(Regexp* re) { + EmptyStringWalker w; + return w.Walk(re, true); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/nfa.cc b/src/openalpr/support/re2/nfa.cc new file mode 100644 index 0000000..57a18fe --- /dev/null +++ b/src/openalpr/support/re2/nfa.cc @@ -0,0 +1,757 @@ +// Copyright 2006-2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tested by search_test.cc. +// +// Prog::SearchNFA, an NFA search. +// This is an actual NFA like the theorists talk about, +// not the pseudo-NFA found in backtracking regexp implementations. +// +// IMPLEMENTATION +// +// This algorithm is a variant of one that appeared in Rob Pike's sam editor, +// which is a variant of the one described in Thompson's 1968 CACM paper. +// See http://swtch.com/~rsc/regexp/ for various history. The main feature +// over the DFA implementation is that it tracks submatch boundaries. +// +// When the choice of submatch boundaries is ambiguous, this particular +// implementation makes the same choices that traditional backtracking +// implementations (in particular, Perl and PCRE) do. +// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential +// time in the length of the input. +// +// Like Thompson's original machine and like the DFA implementation, this +// implementation notices a match only once it is one byte past it. + +#include "re2/prog.h" +#include "re2/regexp.h" +#include "util/sparse_array.h" +#include "util/sparse_set.h" + +namespace re2 { + +class NFA { + public: + NFA(Prog* prog); + ~NFA(); + + // Searches for a matching string. + // * If anchored is true, only considers matches starting at offset. + // Otherwise finds lefmost match at or after offset. + // * If longest is true, returns the longest match starting + // at the chosen start point. Otherwise returns the so-called + // left-biased match, the one traditional backtracking engines + // (like Perl and PCRE) find. + // Records submatch boundaries in submatch[1..nsubmatch-1]. + // Submatch[0] is the entire match. When there is a choice in + // which text matches each subexpression, the submatch boundaries + // are chosen to match what a backtracking implementation would choose. + bool Search(const StringPiece& text, const StringPiece& context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch); + + static const int Debug = 0; + + private: + struct Thread { + union { + int id; + Thread* next; // when on free list + }; + const char** capture; + }; + + // State for explicit stack in AddToThreadq. + struct AddState { + int id; // Inst to process + int j; + const char* cap_j; // if j>=0, set capture[j] = cap_j before processing ip + + AddState() + : id(0), j(-1), cap_j(NULL) {} + explicit AddState(int id) + : id(id), j(-1), cap_j(NULL) {} + AddState(int id, const char* cap_j, int j) + : id(id), j(j), cap_j(cap_j) {} + }; + + // Threadq is a list of threads. The list is sorted by the order + // in which Perl would explore that particular state -- the earlier + // choices appear earlier in the list. + typedef SparseArray Threadq; + + inline Thread* AllocThread(); + inline void FreeThread(Thread*); + + // Add id (or its children, following unlabeled arrows) + // to the workqueue q with associated capture info. + void AddToThreadq(Threadq* q, int id, int flag, + const char* p, const char** capture); + + // Run runq on byte c, appending new states to nextq. + // Updates matched_ and match_ as new, better matches are found. + // p is position of the next byte (the one after c) + // in the input string, used when processing capturing parens. + // flag is the bitwise or of Bol, Eol, etc., specifying whether + // ^, $ and \b match the current input point (after c). + inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p); + + // Returns text version of capture information, for debugging. + string FormatCapture(const char** capture); + + inline void CopyCapture(const char** dst, const char** src); + + // Computes whether all matches must begin with the same first + // byte, and if so, returns that byte. If not, returns -1. + int ComputeFirstByte(); + + Prog* prog_; // underlying program + int start_; // start instruction in program + int ncapture_; // number of submatches to track + bool longest_; // whether searching for longest match + bool endmatch_; // whether match must end at text.end() + const char* btext_; // beginning of text being matched (for FormatSubmatch) + const char* etext_; // end of text being matched (for endmatch_) + Threadq q0_, q1_; // pre-allocated for Search. + const char** match_; // best match so far + bool matched_; // any match so far? + AddState* astack_; // pre-allocated for AddToThreadq + int nastack_; + int first_byte_; // required first byte for match, or -1 if none + + Thread* free_threads_; // free list + + DISALLOW_COPY_AND_ASSIGN(NFA); +}; + +NFA::NFA(Prog* prog) { + prog_ = prog; + start_ = prog->start(); + ncapture_ = 0; + longest_ = false; + endmatch_ = false; + btext_ = NULL; + etext_ = NULL; + q0_.resize(prog_->size()); + q1_.resize(prog_->size()); + nastack_ = 2*prog_->size(); + astack_ = new AddState[nastack_]; + match_ = NULL; + matched_ = false; + free_threads_ = NULL; + first_byte_ = ComputeFirstByte(); +} + +NFA::~NFA() { + delete[] match_; + delete[] astack_; + Thread* next; + for (Thread* t = free_threads_; t; t = next) { + next = t->next; + delete[] t->capture; + delete t; + } +} + +void NFA::FreeThread(Thread *t) { + if (t == NULL) + return; + t->next = free_threads_; + free_threads_ = t; +} + +NFA::Thread* NFA::AllocThread() { + Thread* t = free_threads_; + if (t == NULL) { + t = new Thread; + t->capture = new const char*[ncapture_]; + return t; + } + free_threads_ = t->next; + return t; +} + +void NFA::CopyCapture(const char** dst, const char** src) { + for (int i = 0; i < ncapture_; i+=2) { + dst[i] = src[i]; + dst[i+1] = src[i+1]; + } +} + +// Follows all empty arrows from id0 and enqueues all the states reached. +// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. +// The pointer p is the current input position, and m is the +// current set of match boundaries. +void NFA::AddToThreadq(Threadq* q, int id0, int flag, + const char* p, const char** capture) { + if (id0 == 0) + return; + + // Astack_ is pre-allocated to avoid resize operations. + // It has room for 2*prog_->size() entries, which is enough: + // Each inst in prog can be processed at most once, + // pushing at most two entries on stk. + + int nstk = 0; + AddState* stk = astack_; + stk[nstk++] = AddState(id0); + + while (nstk > 0) { + DCHECK_LE(nstk, nastack_); + const AddState& a = stk[--nstk]; + if (a.j >= 0) + capture[a.j] = a.cap_j; + + int id = a.id; + if (id == 0) + continue; + if (q->has_index(id)) { + if (Debug) + fprintf(stderr, " [%d%s]\n", id, FormatCapture(capture).c_str()); + continue; + } + + // Create entry in q no matter what. We might fill it in below, + // or we might not. Even if not, it is necessary to have it, + // so that we don't revisit id0 during the recursion. + q->set_new(id, NULL); + + Thread** tp = &q->find(id)->second; + int j; + Thread* t; + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq"; + break; + + case kInstFail: + break; + + case kInstAltMatch: + // Save state; will pick up at next byte. + t = AllocThread(); + t->id = id; + CopyCapture(t->capture, capture); + *tp = t; + // fall through + + case kInstAlt: + // Explore alternatives. + stk[nstk++] = AddState(ip->out1()); + stk[nstk++] = AddState(ip->out()); + break; + + case kInstNop: + // Continue on. + stk[nstk++] = AddState(ip->out()); + break; + + case kInstCapture: + if ((j=ip->cap()) < ncapture_) { + // Push a dummy whose only job is to restore capture[j] + // once we finish exploring this possibility. + stk[nstk++] = AddState(0, capture[j], j); + + // Record capture. + capture[j] = p; + } + stk[nstk++] = AddState(ip->out()); + break; + + case kInstMatch: + case kInstByteRange: + // Save state; will pick up at next byte. + t = AllocThread(); + t->id = id; + CopyCapture(t->capture, capture); + *tp = t; + if (Debug) + fprintf(stderr, " + %d%s [%p]\n", id, FormatCapture(t->capture).c_str(), t); + break; + + case kInstEmptyWidth: + // Continue on if we have all the right flag bits. + if (ip->empty() & ~flag) + break; + stk[nstk++] = AddState(ip->out()); + break; + } + } +} + +// Run runq on byte c, appending new states to nextq. +// Updates match as new, better matches are found. +// p is position of the byte c in the input string, +// used when processing capturing parens. +// flag is the bitwise or of Bol, Eol, etc., specifying whether +// ^, $ and \b match the current input point (after c). +// Frees all the threads on runq. +// If there is a shortcut to the end, returns that shortcut. +int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { + nextq->clear(); + + for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { + Thread* t = i->second; + if (t == NULL) + continue; + + if (longest_) { + // Can skip any threads started after our current best match. + if (matched_ && match_[0] < t->capture[0]) { + FreeThread(t); + continue; + } + } + + int id = t->id; + Prog::Inst* ip = prog_->inst(id); + + switch (ip->opcode()) { + default: + // Should only see the values handled below. + LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step"; + break; + + case kInstByteRange: + if (ip->Matches(c)) + AddToThreadq(nextq, ip->out(), flag, p+1, t->capture); + break; + + case kInstAltMatch: + if (i != runq->begin()) + break; + // The match is ours if we want it. + if (ip->greedy(prog_) || longest_) { + CopyCapture((const char**)match_, t->capture); + FreeThread(t); + for (++i; i != runq->end(); ++i) + FreeThread(i->second); + runq->clear(); + matched_ = true; + if (ip->greedy(prog_)) + return ip->out1(); + return ip->out(); + } + break; + + case kInstMatch: + if (endmatch_ && p != etext_) + break; + + const char* old = t->capture[1]; // previous end pointer + t->capture[1] = p; + if (longest_) { + // Leftmost-longest mode: save this match only if + // it is either farther to the left or at the same + // point but longer than an existing match. + if (!matched_ || t->capture[0] < match_[0] || + (t->capture[0] == match_[0] && t->capture[1] > match_[1])) + CopyCapture((const char**)match_, t->capture); + } else { + // Leftmost-biased mode: this match is by definition + // better than what we've already found (see next line). + CopyCapture((const char**)match_, t->capture); + + // Cut off the threads that can only find matches + // worse than the one we just found: don't run the + // rest of the current Threadq. + t->capture[0] = old; + FreeThread(t); + for (++i; i != runq->end(); ++i) + FreeThread(i->second); + runq->clear(); + matched_ = true; + return 0; + } + t->capture[0] = old; + matched_ = true; + break; + } + FreeThread(t); + } + runq->clear(); + return 0; +} + +string NFA::FormatCapture(const char** capture) { + string s; + + for (int i = 0; i < ncapture_; i+=2) { + if (capture[i] == NULL) + StringAppendF(&s, "(?,?)"); + else if (capture[i+1] == NULL) + StringAppendF(&s, "(%d,?)", (int)(capture[i] - btext_)); + else + StringAppendF(&s, "(%d,%d)", + (int)(capture[i] - btext_), + (int)(capture[i+1] - btext_)); + } + return s; +} + +// Returns whether haystack contains needle's memory. +static bool StringPieceContains(const StringPiece haystack, const StringPiece needle) { + return haystack.begin() <= needle.begin() && + haystack.end() >= needle.end(); +} + +bool NFA::Search(const StringPiece& text, const StringPiece& const_context, + bool anchored, bool longest, + StringPiece* submatch, int nsubmatch) { + if (start_ == 0) + return false; + + StringPiece context = const_context; + if (context.begin() == NULL) + context = text; + + if (!StringPieceContains(context, text)) { + LOG(FATAL) << "Bad args: context does not contain text " + << reinterpret_cast(context.begin()) + << "+" << context.size() << " " + << reinterpret_cast(text.begin()) + << "+" << text.size(); + return false; + } + + if (prog_->anchor_start() && context.begin() != text.begin()) + return false; + if (prog_->anchor_end() && context.end() != text.end()) + return false; + anchored |= prog_->anchor_start(); + if (prog_->anchor_end()) { + longest = true; + endmatch_ = true; + etext_ = text.end(); + } + + if (nsubmatch < 0) { + LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch; + return false; + } + + // Save search parameters. + ncapture_ = 2*nsubmatch; + longest_ = longest; + + if (nsubmatch == 0) { + // We need to maintain match[0], both to distinguish the + // longest match (if longest is true) and also to tell + // whether we've seen any matches at all. + ncapture_ = 2; + } + + match_ = new const char*[ncapture_]; + matched_ = false; + memset(match_, 0, ncapture_*sizeof match_[0]); + + // For debugging prints. + btext_ = context.begin(); + + if (Debug) { + fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", + text.as_string().c_str(), context.as_string().c_str(), anchored, + longest); + } + + // Set up search. + Threadq* runq = &q0_; + Threadq* nextq = &q1_; + runq->clear(); + nextq->clear(); + memset(&match_[0], 0, ncapture_*sizeof match_[0]); + const char* bp = context.begin(); + int c = -1; + int wasword = 0; + + if (text.begin() > context.begin()) { + c = text.begin()[-1] & 0xFF; + wasword = Prog::IsWordChar(c); + } + + // Loop over the text, stepping the machine. + for (const char* p = text.begin();; p++) { + // Check for empty-width specials. + int flag = 0; + + // ^ and \A + if (p == context.begin()) + flag |= kEmptyBeginText | kEmptyBeginLine; + else if (p <= context.end() && p[-1] == '\n') + flag |= kEmptyBeginLine; + + // $ and \z + if (p == context.end()) + flag |= kEmptyEndText | kEmptyEndLine; + else if (p < context.end() && p[0] == '\n') + flag |= kEmptyEndLine; + + // \b and \B + int isword = 0; + if (p < context.end()) + isword = Prog::IsWordChar(p[0] & 0xFF); + + if (isword != wasword) + flag |= kEmptyWordBoundary; + else + flag |= kEmptyNonWordBoundary; + + if (Debug) { + fprintf(stderr, "%c[%#x/%d/%d]:", p > text.end() ? '$' : p == bp ? '^' : c, flag, isword, wasword); + for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { + Thread* t = i->second; + if (t == NULL) + continue; + fprintf(stderr, " %d%s", t->id, + FormatCapture((const char**)t->capture).c_str()); + } + fprintf(stderr, "\n"); + } + + // Process previous character (waited until now to avoid + // repeating the flag computation above). + // This is a no-op the first time around the loop, because + // runq is empty. + int id = Step(runq, nextq, c, flag, p-1); + DCHECK_EQ(runq->size(), 0); + swap(nextq, runq); + nextq->clear(); + if (id != 0) { + // We're done: full match ahead. + p = text.end(); + for (;;) { + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode(); + break; + + case kInstCapture: + if (ip->cap() < ncapture_) + match_[ip->cap()] = p; + id = ip->out(); + continue; + + case kInstNop: + id = ip->out(); + continue; + + case kInstMatch: + match_[1] = p; + matched_ = true; + break; + + case kInstEmptyWidth: + if (ip->empty() & ~(kEmptyEndLine|kEmptyEndText)) { + LOG(DFATAL) << "Unexpected empty-width in short circuit: " << ip->empty(); + break; + } + id = ip->out(); + continue; + } + break; + } + break; + } + + if (p > text.end()) + break; + + // Start a new thread if there have not been any matches. + // (No point in starting a new thread if there have been + // matches, since it would be to the right of the match + // we already found.) + if (!matched_ && (!anchored || p == text.begin())) { + // If there's a required first byte for an unanchored search + // and we're not in the middle of any possible matches, + // use memchr to search for the byte quickly. + if (!anchored && first_byte_ >= 0 && runq->size() == 0 && + p < text.end() && (p[0] & 0xFF) != first_byte_) { + p = reinterpret_cast(memchr(p, first_byte_, + text.end() - p)); + if (p == NULL) { + p = text.end(); + isword = 0; + } else { + isword = Prog::IsWordChar(p[0] & 0xFF); + } + flag = Prog::EmptyFlags(context, p); + } + + // Steal match storage (cleared but unused as of yet) + // temporarily to hold match boundaries for new thread. + match_[0] = p; + AddToThreadq(runq, start_, flag, p, match_); + match_[0] = NULL; + } + + // If all the threads have died, stop early. + if (runq->size() == 0) { + if (Debug) + fprintf(stderr, "dead\n"); + break; + } + + if (p == text.end()) + c = 0; + else + c = *p & 0xFF; + wasword = isword; + + // Will run step(runq, nextq, c, ...) on next iteration. See above. + } + + for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) + FreeThread(i->second); + + if (matched_) { + for (int i = 0; i < nsubmatch; i++) + submatch[i].set(match_[2*i], match_[2*i+1] - match_[2*i]); + if (Debug) + fprintf(stderr, "match (%d,%d)\n", + static_cast(match_[0] - btext_), + static_cast(match_[1] - btext_)); + return true; + } + VLOG(1) << "No matches found"; + return false; +} + +// Computes whether all successful matches have a common first byte, +// and if so, returns that byte. If not, returns -1. +int NFA::ComputeFirstByte() { + if (start_ == 0) + return -1; + + int b = -1; // first byte, not yet computed + + typedef SparseSet Workq; + Workq q(prog_->size()); + q.insert(start_); + for (Workq::iterator it = q.begin(); it != q.end(); ++it) { + int id = *it; + Prog::Inst* ip = prog_->inst(id); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte"; + break; + + case kInstMatch: + // The empty string matches: no first byte. + return -1; + + case kInstByteRange: + // Must match only a single byte + if (ip->lo() != ip->hi()) + return -1; + if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z') + return -1; + // If we haven't seen any bytes yet, record it; + // otherwise must match the one we saw before. + if (b == -1) + b = ip->lo(); + else if (b != ip->lo()) + return -1; + break; + + case kInstNop: + case kInstCapture: + case kInstEmptyWidth: + // Continue on. + // Ignore ip->empty() flags for kInstEmptyWidth + // in order to be as conservative as possible + // (assume all possible empty-width flags are true). + if (ip->out()) + q.insert(ip->out()); + break; + + case kInstAlt: + case kInstAltMatch: + // Explore alternatives. + if (ip->out()) + q.insert(ip->out()); + if (ip->out1()) + q.insert(ip->out1()); + break; + + case kInstFail: + break; + } + } + return b; +} + +bool +Prog::SearchNFA(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch) { + if (NFA::Debug) + Dump(); + + NFA nfa(this); + StringPiece sp; + if (kind == kFullMatch) { + anchor = kAnchored; + if (nmatch == 0) { + match = &sp; + nmatch = 1; + } + } + if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch)) + return false; + if (kind == kFullMatch && match[0].end() != text.end()) + return false; + return true; +} + +// For each instruction i in the program reachable from the start, compute the +// number of instructions reachable from i by following only empty transitions +// and record that count as fanout[i]. +// +// fanout holds the results and is also the work queue for the outer iteration. +// reachable holds the reached nodes for the inner iteration. +void Prog::Fanout(SparseArray* fanout) { + DCHECK_EQ(fanout->max_size(), size()); + SparseSet reachable(size()); + fanout->clear(); + fanout->set_new(start(), 0); + for (SparseArray::iterator i = fanout->begin(); i != fanout->end(); ++i) { + int* count = &i->second; + reachable.clear(); + reachable.insert(i->index()); + for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) { + Prog::Inst* ip = inst(*j); + switch (ip->opcode()) { + default: + LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()"; + break; + + case kInstByteRange: + (*count)++; + if (!fanout->has_index(ip->out())) { + fanout->set_new(ip->out(), 0); + } + break; + + case kInstAlt: + case kInstAltMatch: + reachable.insert(ip->out1()); + // fall through + + case kInstCapture: + case kInstEmptyWidth: + case kInstNop: + reachable.insert(ip->out()); + break; + + case kInstMatch: + case kInstFail: + break; + } + } + } +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/onepass.cc b/src/openalpr/support/re2/onepass.cc new file mode 100644 index 0000000..2404617 --- /dev/null +++ b/src/openalpr/support/re2/onepass.cc @@ -0,0 +1,610 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tested by search_test.cc. +// +// Prog::SearchOnePass is an efficient implementation of +// regular expression search with submatch tracking for +// what I call "one-pass regular expressions". (An alternate +// name might be "backtracking-free regular expressions".) +// +// One-pass regular expressions have the property that +// at each input byte during an anchored match, there may be +// multiple alternatives but only one can proceed for any +// given input byte. +// +// For example, the regexp /x*yx*/ is one-pass: you read +// x's until a y, then you read the y, then you keep reading x's. +// At no point do you have to guess what to do or back up +// and try a different guess. +// +// On the other hand, /x*x/ is not one-pass: when you're +// looking at an input "x", it's not clear whether you should +// use it to extend the x* or as the final x. +// +// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not. +// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not. +// +// A simple intuition for identifying one-pass regular expressions +// is that it's always immediately obvious when a repetition ends. +// It must also be immediately obvious which branch of an | to take: +// +// /x(y|z)/ is one-pass, but /(xy|xz)/ is not. +// +// The NFA-based search in nfa.cc does some bookkeeping to +// avoid the need for backtracking and its associated exponential blowup. +// But if we have a one-pass regular expression, there is no +// possibility of backtracking, so there is no need for the +// extra bookkeeping. Hence, this code. +// +// On a one-pass regular expression, the NFA code in nfa.cc +// runs at about 1/20 of the backtracking-based PCRE speed. +// In contrast, the code in this file runs at about the same +// speed as PCRE. +// +// One-pass regular expressions get used a lot when RE is +// used for parsing simple strings, so it pays off to +// notice them and handle them efficiently. +// +// See also Anne Brüggemann-Klein and Derick Wood, +// "One-unambiguous regular languages", Information and Computation 142(2). + +#include +#include +#include "util/util.h" +#include "util/sparse_set.h" +#include "re2/prog.h" +#include "re2/stringpiece.h" + +namespace re2 { + +static const int Debug = 0; + +// The key insight behind this implementation is that the +// non-determinism in an NFA for a one-pass regular expression +// is contained. To explain what that means, first a +// refresher about what regular expression programs look like +// and how the usual NFA execution runs. +// +// In a regular expression program, only the kInstByteRange +// instruction processes an input byte c and moves on to the +// next byte in the string (it does so if c is in the given range). +// The kInstByteRange instructions correspond to literal characters +// and character classes in the regular expression. +// +// The kInstAlt instructions are used as wiring to connect the +// kInstByteRange instructions together in interesting ways when +// implementing | + and *. +// The kInstAlt instruction forks execution, like a goto that +// jumps to ip->out() and ip->out1() in parallel. Each of the +// resulting computation paths is called a thread. +// +// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture -- +// are interesting in their own right but like kInstAlt they don't +// advance the input pointer. Only kInstByteRange does. +// +// The automaton execution in nfa.cc runs all the possible +// threads of execution in lock-step over the input. To process +// a particular byte, each thread gets run until it either dies +// or finds a kInstByteRange instruction matching the byte. +// If the latter happens, the thread stops just past the +// kInstByteRange instruction (at ip->out()) and waits for +// the other threads to finish processing the input byte. +// Then, once all the threads have processed that input byte, +// the whole process repeats. The kInstAlt state instruction +// might create new threads during input processing, but no +// matter what, all the threads stop after a kInstByteRange +// and wait for the other threads to "catch up". +// Running in lock step like this ensures that the NFA reads +// the input string only once. +// +// Each thread maintains its own set of capture registers +// (the string positions at which it executed the kInstCapture +// instructions corresponding to capturing parentheses in the +// regular expression). Repeated copying of the capture registers +// is the main performance bottleneck in the NFA implementation. +// +// A regular expression program is "one-pass" if, no matter what +// the input string, there is only one thread that makes it +// past a kInstByteRange instruction at each input byte. This means +// that there is in some sense only one active thread throughout +// the execution. Other threads might be created during the +// processing of an input byte, but they are ephemeral: only one +// thread is left to start processing the next input byte. +// This is what I meant above when I said the non-determinism +// was "contained". +// +// To execute a one-pass regular expression program, we can build +// a DFA (no non-determinism) that has at most as many states as +// the NFA (compare this to the possibly exponential number of states +// in the general case). Each state records, for each possible +// input byte, the next state along with the conditions required +// before entering that state -- empty-width flags that must be true +// and capture operations that must be performed. It also records +// whether a set of conditions required to finish a match at that +// point in the input rather than process the next byte. + +// A state in the one-pass NFA - just an array of actions indexed +// by the bytemap_[] of the next input byte. (The bytemap +// maps next input bytes into equivalence classes, to reduce +// the memory footprint.) +struct OneState { + uint32 matchcond; // conditions to match right now. + uint32 action[1]; +}; + +// The uint32 conditions in the action are a combination of +// condition and capture bits and the next state. The bottom 16 bits +// are the condition and capture bits, and the top 16 are the index of +// the next state. +// +// Bits 0-5 are the empty-width flags from prog.h. +// Bit 6 is kMatchWins, which means the match takes +// priority over moving to next in a first-match search. +// The remaining bits mark capture registers that should +// be set to the current input position. The capture bits +// start at index 2, since the search loop can take care of +// cap[0], cap[1] (the overall match position). +// That means we can handle up to 5 capturing parens: $1 through $4, plus $0. +// No input position can satisfy both kEmptyWordBoundary +// and kEmptyNonWordBoundary, so we can use that as a sentinel +// instead of needing an extra bit. + +static const int kIndexShift = 16; // number of bits below index +static const int kEmptyShift = 6; // number of empty flags in prog.h +static const int kRealCapShift = kEmptyShift + 1; +static const int kRealMaxCap = (kIndexShift - kRealCapShift) / 2 * 2; + +// Parameters used to skip over cap[0], cap[1]. +static const int kCapShift = kRealCapShift - 2; +static const int kMaxCap = kRealMaxCap + 2; + +static const uint32 kMatchWins = 1 << kEmptyShift; +static const uint32 kCapMask = ((1 << kRealMaxCap) - 1) << kRealCapShift; + +static const uint32 kImpossible = kEmptyWordBoundary | kEmptyNonWordBoundary; + +// Check, at compile time, that prog.h agrees with math above. +// This function is never called. +void OnePass_Checks() { + COMPILE_ASSERT((1<( + const_cast(nodes + statesize*nodeindex)); +} + +bool Prog::SearchOnePass(const StringPiece& text, + const StringPiece& const_context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch) { + if (anchor != kAnchored && kind != kFullMatch) { + LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches."; + return false; + } + + // Make sure we have at least cap[1], + // because we use it to tell if we matched. + int ncap = 2*nmatch; + if (ncap < 2) + ncap = 2; + + const char* cap[kMaxCap]; + for (int i = 0; i < ncap; i++) + cap[i] = NULL; + + const char* matchcap[kMaxCap]; + for (int i = 0; i < ncap; i++) + matchcap[i] = NULL; + + StringPiece context = const_context; + if (context.begin() == NULL) + context = text; + if (anchor_start() && context.begin() != text.begin()) + return false; + if (anchor_end() && context.end() != text.end()) + return false; + if (anchor_end()) + kind = kFullMatch; + + // State and act are marked volatile to + // keep the compiler from re-ordering the + // memory accesses walking over the NFA. + // This is worth about 5%. + volatile OneState* state = onepass_start_; + volatile uint8* nodes = onepass_nodes_; + volatile uint32 statesize = onepass_statesize_; + uint8* bytemap = bytemap_; + const char* bp = text.begin(); + const char* ep = text.end(); + const char* p; + bool matched = false; + matchcap[0] = bp; + cap[0] = bp; + uint32 nextmatchcond = state->matchcond; + for (p = bp; p < ep; p++) { + int c = bytemap[*p & 0xFF]; + uint32 matchcond = nextmatchcond; + uint32 cond = state->action[c]; + + // Determine whether we can reach act->next. + // If so, advance state and nextmatchcond. + if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) { + uint32 nextindex = cond >> kIndexShift; + state = IndexToNode(nodes, statesize, nextindex); + nextmatchcond = state->matchcond; + } else { + state = NULL; + nextmatchcond = kImpossible; + } + + // This code section is carefully tuned. + // The goto sequence is about 10% faster than the + // obvious rewrite as a large if statement in the + // ASCIIMatchRE2 and DotMatchRE2 benchmarks. + + // Saving the match capture registers is expensive. + // Is this intermediate match worth thinking about? + + // Not if we want a full match. + if (kind == kFullMatch) + goto skipmatch; + + // Not if it's impossible. + if (matchcond == kImpossible) + goto skipmatch; + + // Not if the possible match is beaten by the certain + // match at the next byte. When this test is useless + // (e.g., HTTPPartialMatchRE2) it slows the loop by + // about 10%, but when it avoids work (e.g., DotMatchRE2), + // it cuts the loop execution by about 45%. + if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0) + goto skipmatch; + + // Finally, the match conditions must be satisfied. + if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) { + for (int i = 2; i < 2*nmatch; i++) + matchcap[i] = cap[i]; + if (nmatch > 1 && (matchcond & kCapMask)) + ApplyCaptures(matchcond, p, matchcap, ncap); + matchcap[1] = p; + matched = true; + + // If we're in longest match mode, we have to keep + // going and see if we find a longer match. + // In first match mode, we can stop if the match + // takes priority over the next state for this input byte. + // That bit is per-input byte and thus in cond, not matchcond. + if (kind == kFirstMatch && (cond & kMatchWins)) + goto done; + } + + skipmatch: + if (state == NULL) + goto done; + if ((cond & kCapMask) && nmatch > 1) + ApplyCaptures(cond, p, cap, ncap); + } + + // Look for match at end of input. + { + uint32 matchcond = state->matchcond; + if (matchcond != kImpossible && + ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) { + if (nmatch > 1 && (matchcond & kCapMask)) + ApplyCaptures(matchcond, p, cap, ncap); + for (int i = 2; i < ncap; i++) + matchcap[i] = cap[i]; + matchcap[1] = p; + matched = true; + } + } + +done: + if (!matched) + return false; + for (int i = 0; i < nmatch; i++) + match[i].set(matchcap[2*i], matchcap[2*i+1] - matchcap[2*i]); + return true; +} + + +// Analysis to determine whether a given regexp program is one-pass. + +// If ip is not on workq, adds ip to work queue and returns true. +// If ip is already on work queue, does nothing and returns false. +// If ip is NULL, does nothing and returns true (pretends to add it). +typedef SparseSet Instq; +static bool AddQ(Instq *q, int id) { + if (id == 0) + return true; + if (q->contains(id)) + return false; + q->insert(id); + return true; +} + +struct InstCond { + int id; + uint32 cond; +}; + +// Returns whether this is a one-pass program; that is, +// returns whether it is safe to use SearchOnePass on this program. +// These conditions must be true for any instruction ip: +// +// (1) for any other Inst nip, there is at most one input-free +// path from ip to nip. +// (2) there is at most one kInstByte instruction reachable from +// ip that matches any particular byte c. +// (3) there is at most one input-free path from ip to a kInstMatch +// instruction. +// +// This is actually just a conservative approximation: it might +// return false when the answer is true, when kInstEmptyWidth +// instructions are involved. +// Constructs and saves corresponding one-pass NFA on success. +bool Prog::IsOnePass() { + if (did_onepass_) + return onepass_start_ != NULL; + did_onepass_ = true; + + if (start() == 0) // no match + return false; + + // Steal memory for the one-pass NFA from the overall DFA budget. + // Willing to use at most 1/4 of the DFA budget (heuristic). + // Limit max node count to 65000 as a conservative estimate to + // avoid overflowing 16-bit node index in encoding. + int maxnodes = 2 + byte_inst_count_; + int statesize = sizeof(OneState) + (bytemap_range_-1)*sizeof(uint32); + if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes) + return false; + + // Flood the graph starting at the start state, and check + // that in each reachable state, each possible byte leads + // to a unique next state. + int size = this->size(); + InstCond *stack = new InstCond[size]; + + int* nodebyid = new int[size]; // indexed by ip + memset(nodebyid, 0xFF, size*sizeof nodebyid[0]); + + uint8* nodes = new uint8[maxnodes*statesize]; + uint8* nodep = nodes; + + Instq tovisit(size), workq(size); + AddQ(&tovisit, start()); + nodebyid[start()] = 0; + nodep += statesize; + int nalloc = 1; + for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { + int id = *it; + int nodeindex = nodebyid[id]; + OneState* node = IndexToNode(nodes, statesize, nodeindex); + + // Flood graph using manual stack, filling in actions as found. + // Default is none. + for (int b = 0; b < bytemap_range_; b++) + node->action[b] = kImpossible; + node->matchcond = kImpossible; + + workq.clear(); + bool matched = false; + int nstack = 0; + stack[nstack].id = id; + stack[nstack++].cond = 0; + while (nstack > 0) { + int id = stack[--nstack].id; + Prog::Inst* ip = inst(id); + uint32 cond = stack[nstack].cond; + switch (ip->opcode()) { + case kInstAltMatch: + // TODO(rsc): Ignoring kInstAltMatch optimization. + // Should implement it in this engine, but it's subtle. + // Fall through. + case kInstAlt: + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, ip->out()) || !AddQ(&workq, ip->out1())) + goto fail; + stack[nstack].id = ip->out1(); + stack[nstack++].cond = cond; + stack[nstack].id = ip->out(); + stack[nstack++].cond = cond; + break; + + case kInstByteRange: { + int nextindex = nodebyid[ip->out()]; + if (nextindex == -1) { + if (nalloc >= maxnodes) { + if (Debug) + LOG(ERROR) + << StringPrintf("Not OnePass: hit node limit %d > %d", + nalloc, maxnodes); + goto fail; + } + nextindex = nalloc; + nodep += statesize; + nodebyid[ip->out()] = nextindex; + nalloc++; + AddQ(&tovisit, ip->out()); + } + if (matched) + cond |= kMatchWins; + for (int c = ip->lo(); c <= ip->hi(); c++) { + int b = bytemap_[c]; + c = unbytemap_[b]; // last c in byte class + uint32 act = node->action[b]; + uint32 newact = (nextindex << kIndexShift) | cond; + if ((act & kImpossible) == kImpossible) { + node->action[b] = newact; + } else if (act != newact) { + if (Debug) { + LOG(ERROR) + << StringPrintf("Not OnePass: conflict on byte " + "%#x at state %d", + c, *it); + } + goto fail; + } + } + if (ip->foldcase()) { + Rune lo = max(ip->lo(), 'a') + 'A' - 'a'; + Rune hi = min(ip->hi(), 'z') + 'A' - 'a'; + for (int c = lo; c <= hi; c++) { + int b = bytemap_[c]; + c = unbytemap_[b]; // last c in class + uint32 act = node->action[b]; + uint32 newact = (nextindex << kIndexShift) | cond; + if ((act & kImpossible) == kImpossible) { + node->action[b] = newact; + } else if (act != newact) { + if (Debug) { + LOG(ERROR) + << StringPrintf("Not OnePass: conflict on byte " + "%#x at state %d", + c, *it); + } + goto fail; + } + } + } + break; + } + + case kInstCapture: + if (ip->cap() < kMaxCap) + cond |= (1 << kCapShift) << ip->cap(); + goto QueueEmpty; + + case kInstEmptyWidth: + cond |= ip->empty(); + goto QueueEmpty; + + case kInstNop: + QueueEmpty: + // kInstCapture and kInstNop always proceed to ip->out(). + // kInstEmptyWidth only sometimes proceeds to ip->out(), + // but as a conservative approximation we assume it always does. + // We could be a little more precise by looking at what c + // is, but that seems like overkill. + + // If already on work queue, (1) is violated: bail out. + if (!AddQ(&workq, ip->out())) { + if (Debug) { + LOG(ERROR) << StringPrintf("Not OnePass: multiple paths" + " %d -> %d\n", + *it, ip->out()); + } + goto fail; + } + stack[nstack].id = ip->out(); + stack[nstack++].cond = cond; + break; + + case kInstMatch: + if (matched) { + // (3) is violated + if (Debug) { + LOG(ERROR) << StringPrintf("Not OnePass: multiple matches" + " from %d\n", *it); + } + goto fail; + } + matched = true; + node->matchcond = cond; + break; + + case kInstFail: + break; + } + } + } + + if (Debug) { // For debugging, dump one-pass NFA to LOG(ERROR). + string dump = "prog dump:\n" + Dump() + "node dump\n"; + map idmap; + for (int i = 0; i < size; i++) + if (nodebyid[i] != -1) + idmap[nodebyid[i]] = i; + + StringAppendF(&dump, "byte ranges:\n"); + int i = 0; + for (int b = 0; b < bytemap_range_; b++) { + int lo = i; + while (bytemap_[i] == b) + i++; + StringAppendF(&dump, "\t%d: %#x-%#x\n", b, lo, i - 1); + } + + for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) { + int id = *it; + int nodeindex = nodebyid[id]; + if (nodeindex == -1) + continue; + OneState* node = IndexToNode(nodes, statesize, nodeindex); + string s; + StringAppendF(&dump, "node %d id=%d: matchcond=%#x\n", + nodeindex, id, node->matchcond); + for (int i = 0; i < bytemap_range_; i++) { + if ((node->action[i] & kImpossible) == kImpossible) + continue; + StringAppendF(&dump, " %d cond %#x -> %d id=%d\n", + i, node->action[i] & 0xFFFF, + node->action[i] >> kIndexShift, + idmap[node->action[i] >> kIndexShift]); + } + } + LOG(ERROR) << dump; + } + + // Overallocated earlier; cut down to actual size. + nodep = new uint8[nalloc*statesize]; + memmove(nodep, nodes, nalloc*statesize); + delete[] nodes; + nodes = nodep; + + onepass_start_ = IndexToNode(nodes, statesize, nodebyid[start()]); + onepass_nodes_ = nodes; + onepass_statesize_ = statesize; + dfa_mem_ -= nalloc*statesize; + + delete[] stack; + delete[] nodebyid; + return true; + +fail: + delete[] stack; + delete[] nodebyid; + delete[] nodes; + return false; +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/parse.cc b/src/openalpr/support/re2/parse.cc new file mode 100644 index 0000000..e6b27d2 --- /dev/null +++ b/src/openalpr/support/re2/parse.cc @@ -0,0 +1,2284 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Regular expression parser. + +// The parser is a simple precedence-based parser with a +// manual stack. The parsing work is done by the methods +// of the ParseState class. The Regexp::Parse function is +// essentially just a lexer that calls the ParseState method +// for each token. + +// The parser recognizes POSIX extended regular expressions +// excluding backreferences, collating elements, and collating +// classes. It also allows the empty string as a regular expression +// and recognizes the Perl escape sequences \d, \s, \w, \D, \S, and \W. +// See regexp.h for rationale. + +#include "util/util.h" +#include "re2/regexp.h" +#include "re2/stringpiece.h" +#include "re2/unicode_casefold.h" +#include "re2/unicode_groups.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// Regular expression parse state. +// The list of parsed regexps so far is maintained as a vector of +// Regexp pointers called the stack. Left parenthesis and vertical +// bar markers are also placed on the stack, as Regexps with +// non-standard opcodes. +// Scanning a left parenthesis causes the parser to push a left parenthesis +// marker on the stack. +// Scanning a vertical bar causes the parser to pop the stack until it finds a +// vertical bar or left parenthesis marker (not popping the marker), +// concatenate all the popped results, and push them back on +// the stack (DoConcatenation). +// Scanning a right parenthesis causes the parser to act as though it +// has seen a vertical bar, which then leaves the top of the stack in the +// form LeftParen regexp VerticalBar regexp VerticalBar ... regexp VerticalBar. +// The parser pops all this off the stack and creates an alternation of the +// regexps (DoAlternation). + +class Regexp::ParseState { + public: + ParseState(ParseFlags flags, const StringPiece& whole_regexp, + RegexpStatus* status); + ~ParseState(); + + ParseFlags flags() { return flags_; } + int rune_max() { return rune_max_; } + + // Parse methods. All public methods return a bool saying + // whether parsing should continue. If a method returns + // false, it has set fields in *status_, and the parser + // should return NULL. + + // Pushes the given regular expression onto the stack. + // Could check for too much memory used here. + bool PushRegexp(Regexp* re); + + // Pushes the literal rune r onto the stack. + bool PushLiteral(Rune r); + + // Pushes a regexp with the given op (and no args) onto the stack. + bool PushSimpleOp(RegexpOp op); + + // Pushes a ^ onto the stack. + bool PushCarat(); + + // Pushes a \b (word == true) or \B (word == false) onto the stack. + bool PushWordBoundary(bool word); + + // Pushes a $ onto the stack. + bool PushDollar(); + + // Pushes a . onto the stack + bool PushDot(); + + // Pushes a repeat operator regexp onto the stack. + // A valid argument for the operator must already be on the stack. + // s is the name of the operator, for use in error messages. + bool PushRepeatOp(RegexpOp op, const StringPiece& s, bool nongreedy); + + // Pushes a repetition regexp onto the stack. + // A valid argument for the operator must already be on the stack. + bool PushRepetition(int min, int max, const StringPiece& s, bool nongreedy); + + // Checks whether a particular regexp op is a marker. + bool IsMarker(RegexpOp op); + + // Processes a left parenthesis in the input. + // Pushes a marker onto the stack. + bool DoLeftParen(const StringPiece& name); + bool DoLeftParenNoCapture(); + + // Processes a vertical bar in the input. + bool DoVerticalBar(); + + // Processes a right parenthesis in the input. + bool DoRightParen(); + + // Processes the end of input, returning the final regexp. + Regexp* DoFinish(); + + // Finishes the regexp if necessary, preparing it for use + // in a more complicated expression. + // If it is a CharClassBuilder, converts into a CharClass. + Regexp* FinishRegexp(Regexp*); + + // These routines don't manipulate the parse stack + // directly, but they do need to look at flags_. + // ParseCharClass also manipulates the internals of Regexp + // while creating *out_re. + + // Parse a character class into *out_re. + // Removes parsed text from s. + bool ParseCharClass(StringPiece* s, Regexp** out_re, + RegexpStatus* status); + + // Parse a character class character into *rp. + // Removes parsed text from s. + bool ParseCCCharacter(StringPiece* s, Rune *rp, + const StringPiece& whole_class, + RegexpStatus* status); + + // Parse a character class range into rr. + // Removes parsed text from s. + bool ParseCCRange(StringPiece* s, RuneRange* rr, + const StringPiece& whole_class, + RegexpStatus* status); + + // Parse a Perl flag set or non-capturing group from s. + bool ParsePerlFlags(StringPiece* s); + + + // Finishes the current concatenation, + // collapsing it into a single regexp on the stack. + void DoConcatenation(); + + // Finishes the current alternation, + // collapsing it to a single regexp on the stack. + void DoAlternation(); + + // Generalized DoAlternation/DoConcatenation. + void DoCollapse(RegexpOp op); + + // Maybe concatenate Literals into LiteralString. + bool MaybeConcatString(int r, ParseFlags flags); + +private: + ParseFlags flags_; + StringPiece whole_regexp_; + RegexpStatus* status_; + Regexp* stacktop_; + int ncap_; // number of capturing parens seen + int rune_max_; // maximum char value for this encoding + + DISALLOW_COPY_AND_ASSIGN(ParseState); +}; + +// Pseudo-operators - only on parse stack. +const RegexpOp kLeftParen = static_cast(kMaxRegexpOp+1); +const RegexpOp kVerticalBar = static_cast(kMaxRegexpOp+2); + +Regexp::ParseState::ParseState(ParseFlags flags, + const StringPiece& whole_regexp, + RegexpStatus* status) + : flags_(flags), whole_regexp_(whole_regexp), + status_(status), stacktop_(NULL), ncap_(0) { + if (flags_ & Latin1) + rune_max_ = 0xFF; + else + rune_max_ = Runemax; +} + +// Cleans up by freeing all the regexps on the stack. +Regexp::ParseState::~ParseState() { + Regexp* next; + for (Regexp* re = stacktop_; re != NULL; re = next) { + next = re->down_; + re->down_ = NULL; + if (re->op() == kLeftParen) + delete re->name_; + re->Decref(); + } +} + +// Finishes the regexp if necessary, preparing it for use in +// a more complex expression. +// If it is a CharClassBuilder, converts into a CharClass. +Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) { + if (re == NULL) + return NULL; + re->down_ = NULL; + + if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) { + CharClassBuilder* ccb = re->ccb_; + re->ccb_ = NULL; + re->cc_ = ccb->GetCharClass(); + delete ccb; + } + + return re; +} + +// Pushes the given regular expression onto the stack. +// Could check for too much memory used here. +bool Regexp::ParseState::PushRegexp(Regexp* re) { + MaybeConcatString(-1, NoParseFlags); + + // Special case: a character class of one character is just + // a literal. This is a common idiom for escaping + // single characters (e.g., [.] instead of \.), and some + // analysis does better with fewer character classes. + // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding. + if (re->op_ == kRegexpCharClass) { + re->ccb_->RemoveAbove(rune_max_); + if (re->ccb_->size() == 1) { + Rune r = re->ccb_->begin()->lo; + re->Decref(); + re = new Regexp(kRegexpLiteral, flags_); + re->rune_ = r; + } else if (re->ccb_->size() == 2) { + Rune r = re->ccb_->begin()->lo; + if ('A' <= r && r <= 'Z' && re->ccb_->Contains(r + 'a' - 'A')) { + re->Decref(); + re = new Regexp(kRegexpLiteral, flags_ | FoldCase); + re->rune_ = r + 'a' - 'A'; + } + } + } + + if (!IsMarker(re->op())) + re->simple_ = re->ComputeSimple(); + re->down_ = stacktop_; + stacktop_ = re; + return true; +} + +// Searches the case folding tables and returns the CaseFold* that contains r. +// If there isn't one, returns the CaseFold* with smallest f->lo bigger than r. +// If there isn't one, returns NULL. +const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) { + const CaseFold* ef = f + n; + + // Binary search for entry containing r. + while (n > 0) { + int m = n/2; + if (f[m].lo <= r && r <= f[m].hi) + return &f[m]; + if (r < f[m].lo) { + n = m; + } else { + f += m+1; + n -= m+1; + } + } + + // There is no entry that contains r, but f points + // where it would have been. Unless f points at + // the end of the array, it points at the next entry + // after r. + if (f < ef) + return f; + + // No entry contains r; no entry contains runes > r. + return NULL; +} + +// Returns the result of applying the fold f to the rune r. +Rune ApplyFold(const CaseFold *f, Rune r) { + switch (f->delta) { + default: + return r + f->delta; + + case EvenOddSkip: // even <-> odd but only applies to every other + if ((r - f->lo) % 2) + return r; + // fall through + case EvenOdd: // even <-> odd + if (r%2 == 0) + return r + 1; + return r - 1; + + case OddEvenSkip: // odd <-> even but only applies to every other + if ((r - f->lo) % 2) + return r; + // fall through + case OddEven: // odd <-> even + if (r%2 == 1) + return r + 1; + return r - 1; + } +} + +// Returns the next Rune in r's folding cycle (see unicode_casefold.h). +// Examples: +// CycleFoldRune('A') = 'a' +// CycleFoldRune('a') = 'A' +// +// CycleFoldRune('K') = 'k' +// CycleFoldRune('k') = 0x212A (Kelvin) +// CycleFoldRune(0x212A) = 'K' +// +// CycleFoldRune('?') = '?' +Rune CycleFoldRune(Rune r) { + const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, r); + if (f == NULL || r < f->lo) + return r; + return ApplyFold(f, r); +} + +// Add lo-hi to the class, along with their fold-equivalent characters. +// If lo-hi is already in the class, assume that the fold-equivalent +// chars are there too, so there's no work to do. +static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) { + // AddFoldedRange calls itself recursively for each rune in the fold cycle. + // Most folding cycles are small: there aren't any bigger than four in the + // current Unicode tables. make_unicode_casefold.py checks that + // the cycles are not too long, and we double-check here using depth. + if (depth > 10) { + LOG(DFATAL) << "AddFoldedRange recurses too much."; + return; + } + + if (!cc->AddRange(lo, hi)) // lo-hi was already there? we're done + return; + + while (lo <= hi) { + const CaseFold* f = LookupCaseFold(unicode_casefold, num_unicode_casefold, lo); + if (f == NULL) // lo has no fold, nor does anything above lo + break; + if (lo < f->lo) { // lo has no fold; next rune with a fold is f->lo + lo = f->lo; + continue; + } + + // Add in the result of folding the range lo - f->hi + // and that range's fold, recursively. + Rune lo1 = lo; + Rune hi1 = min(hi, f->hi); + switch (f->delta) { + default: + lo1 += f->delta; + hi1 += f->delta; + break; + case EvenOdd: + if (lo1%2 == 1) + lo1--; + if (hi1%2 == 0) + hi1++; + break; + case OddEven: + if (lo1%2 == 0) + lo1--; + if (hi1%2 == 1) + hi1++; + break; + } + AddFoldedRange(cc, lo1, hi1, depth+1); + + // Pick up where this fold left off. + lo = f->hi + 1; + } +} + +// Pushes the literal rune r onto the stack. +bool Regexp::ParseState::PushLiteral(Rune r) { + // Do case folding if needed. + if ((flags_ & FoldCase) && CycleFoldRune(r) != r) { + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + Rune r1 = r; + do { + if (!(flags_ & NeverNL) || r != '\n') { + re->ccb_->AddRange(r, r); + } + r = CycleFoldRune(r); + } while (r != r1); + return PushRegexp(re); + } + + // Exclude newline if applicable. + if ((flags_ & NeverNL) && r == '\n') + return PushRegexp(new Regexp(kRegexpNoMatch, flags_)); + + // No fancy stuff worked. Ordinary literal. + if (MaybeConcatString(r, flags_)) + return true; + + Regexp* re = new Regexp(kRegexpLiteral, flags_); + re->rune_ = r; + return PushRegexp(re); +} + +// Pushes a ^ onto the stack. +bool Regexp::ParseState::PushCarat() { + if (flags_ & OneLine) { + return PushSimpleOp(kRegexpBeginText); + } + return PushSimpleOp(kRegexpBeginLine); +} + +// Pushes a \b or \B onto the stack. +bool Regexp::ParseState::PushWordBoundary(bool word) { + if (word) + return PushSimpleOp(kRegexpWordBoundary); + return PushSimpleOp(kRegexpNoWordBoundary); +} + +// Pushes a $ onto the stack. +bool Regexp::ParseState::PushDollar() { + if (flags_ & OneLine) { + // Clumsy marker so that MimicsPCRE() can tell whether + // this kRegexpEndText was a $ and not a \z. + Regexp::ParseFlags oflags = flags_; + flags_ = flags_ | WasDollar; + bool ret = PushSimpleOp(kRegexpEndText); + flags_ = oflags; + return ret; + } + return PushSimpleOp(kRegexpEndLine); +} + +// Pushes a . onto the stack. +bool Regexp::ParseState::PushDot() { + if ((flags_ & DotNL) && !(flags_ & NeverNL)) + return PushSimpleOp(kRegexpAnyChar); + // Rewrite . into [^\n] + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + re->ccb_->AddRange(0, '\n' - 1); + re->ccb_->AddRange('\n' + 1, rune_max_); + return PushRegexp(re); +} + +// Pushes a regexp with the given op (and no args) onto the stack. +bool Regexp::ParseState::PushSimpleOp(RegexpOp op) { + Regexp* re = new Regexp(op, flags_); + return PushRegexp(re); +} + +// Pushes a repeat operator regexp onto the stack. +// A valid argument for the operator must already be on the stack. +// The char c is the name of the operator, for use in error messages. +bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s, + bool nongreedy) { + if (stacktop_ == NULL || IsMarker(stacktop_->op())) { + status_->set_code(kRegexpRepeatArgument); + status_->set_error_arg(s); + return false; + } + Regexp::ParseFlags fl = flags_; + if (nongreedy) + fl = fl ^ NonGreedy; + Regexp* re = new Regexp(op, fl); + re->AllocSub(1); + re->down_ = stacktop_->down_; + re->sub()[0] = FinishRegexp(stacktop_); + re->simple_ = re->ComputeSimple(); + stacktop_ = re; + return true; +} + +// RepetitionWalker reports whether the repetition regexp is valid. +// Valid means that the combination of the top-level repetition +// and any inner repetitions does not exceed n copies of the +// innermost thing. +// This rewalks the regexp tree and is called for every repetition, +// so we have to worry about inducing quadratic behavior in the parser. +// We avoid this by only using RepetitionWalker when min or max >= 2. +// In that case the depth of any >= 2 nesting can only get to 9 without +// triggering a parse error, so each subtree can only be rewalked 9 times. +class RepetitionWalker : public Regexp::Walker { + public: + RepetitionWalker() {} + virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); + virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args); + virtual int ShortVisit(Regexp* re, int parent_arg); + + private: + DISALLOW_COPY_AND_ASSIGN(RepetitionWalker); +}; + +int RepetitionWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { + int arg = parent_arg; + if (re->op() == kRegexpRepeat) { + int m = re->max(); + if (m < 0) { + m = re->min(); + } + if (m > 0) { + arg /= m; + } + } + return arg; +} + +int RepetitionWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args) { + int arg = pre_arg; + for (int i = 0; i < nchild_args; i++) { + if (child_args[i] < arg) { + arg = child_args[i]; + } + } + return arg; +} + +int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) { + // This should never be called, since we use Walk and not + // WalkExponential. + LOG(DFATAL) << "RepetitionWalker::ShortVisit called"; + return 0; +} + +// Pushes a repetition regexp onto the stack. +// A valid argument for the operator must already be on the stack. +bool Regexp::ParseState::PushRepetition(int min, int max, + const StringPiece& s, + bool nongreedy) { + if ((max != -1 && max < min) || min > 1000 || max > 1000) { + status_->set_code(kRegexpRepeatSize); + status_->set_error_arg(s); + return false; + } + if (stacktop_ == NULL || IsMarker(stacktop_->op())) { + status_->set_code(kRegexpRepeatArgument); + status_->set_error_arg(s); + return false; + } + Regexp::ParseFlags fl = flags_; + if (nongreedy) + fl = fl ^ NonGreedy; + Regexp* re = new Regexp(kRegexpRepeat, fl); + re->min_ = min; + re->max_ = max; + re->AllocSub(1); + re->down_ = stacktop_->down_; + re->sub()[0] = FinishRegexp(stacktop_); + re->simple_ = re->ComputeSimple(); + stacktop_ = re; + if (min >= 2 || max >= 2) { + RepetitionWalker w; + if (w.Walk(stacktop_, 1000) == 0) { + status_->set_code(kRegexpRepeatSize); + status_->set_error_arg(s); + return false; + } + } + return true; +} + +// Checks whether a particular regexp op is a marker. +bool Regexp::ParseState::IsMarker(RegexpOp op) { + return op >= kLeftParen; +} + +// Processes a left parenthesis in the input. +// Pushes a marker onto the stack. +bool Regexp::ParseState::DoLeftParen(const StringPiece& name) { + Regexp* re = new Regexp(kLeftParen, flags_); + re->cap_ = ++ncap_; + if (name.data() != NULL) + re->name_ = new string(name.as_string()); + return PushRegexp(re); +} + +// Pushes a non-capturing marker onto the stack. +bool Regexp::ParseState::DoLeftParenNoCapture() { + Regexp* re = new Regexp(kLeftParen, flags_); + re->cap_ = -1; + return PushRegexp(re); +} + +// Adds r to cc, along with r's upper case if foldascii is set. +static void AddLiteral(CharClassBuilder* cc, Rune r, bool foldascii) { + cc->AddRange(r, r); + if (foldascii && 'a' <= r && r <= 'z') + cc->AddRange(r + 'A' - 'a', r + 'A' - 'a'); +} + +// Processes a vertical bar in the input. +bool Regexp::ParseState::DoVerticalBar() { + MaybeConcatString(-1, NoParseFlags); + DoConcatenation(); + + // Below the vertical bar is a list to alternate. + // Above the vertical bar is a list to concatenate. + // We just did the concatenation, so either swap + // the result below the vertical bar or push a new + // vertical bar on the stack. + Regexp* r1; + Regexp* r2; + if ((r1 = stacktop_) != NULL && + (r2 = stacktop_->down_) != NULL && + r2->op() == kVerticalBar) { + // If above and below vertical bar are literal or char class, + // can merge into a single char class. + Regexp* r3; + if ((r1->op() == kRegexpLiteral || + r1->op() == kRegexpCharClass || + r1->op() == kRegexpAnyChar) && + (r3 = r2->down_) != NULL) { + Rune rune; + switch (r3->op()) { + case kRegexpLiteral: // convert to char class + rune = r3->rune_; + r3->op_ = kRegexpCharClass; + r3->cc_ = NULL; + r3->ccb_ = new CharClassBuilder; + AddLiteral(r3->ccb_, rune, r3->parse_flags_ & Regexp::FoldCase); + // fall through + case kRegexpCharClass: + if (r1->op() == kRegexpLiteral) + AddLiteral(r3->ccb_, r1->rune_, + r1->parse_flags_ & Regexp::FoldCase); + else if (r1->op() == kRegexpCharClass) + r3->ccb_->AddCharClass(r1->ccb_); + if (r1->op() == kRegexpAnyChar || r3->ccb_->full()) { + delete r3->ccb_; + r3->ccb_ = NULL; + r3->op_ = kRegexpAnyChar; + } + // fall through + case kRegexpAnyChar: + // pop r1 + stacktop_ = r2; + r1->Decref(); + return true; + default: + break; + } + } + + // Swap r1 below vertical bar (r2). + r1->down_ = r2->down_; + r2->down_ = r1; + stacktop_ = r2; + return true; + } + return PushSimpleOp(kVerticalBar); +} + +// Processes a right parenthesis in the input. +bool Regexp::ParseState::DoRightParen() { + // Finish the current concatenation and alternation. + DoAlternation(); + + // The stack should be: LeftParen regexp + // Remove the LeftParen, leaving the regexp, + // parenthesized. + Regexp* r1; + Regexp* r2; + if ((r1 = stacktop_) == NULL || + (r2 = r1->down_) == NULL || + r2->op() != kLeftParen) { + status_->set_code(kRegexpMissingParen); + status_->set_error_arg(whole_regexp_); + return false; + } + + // Pop off r1, r2. Will Decref or reuse below. + stacktop_ = r2->down_; + + // Restore flags from when paren opened. + Regexp* re = r2; + flags_ = re->parse_flags(); + + // Rewrite LeftParen as capture if needed. + if (re->cap_ > 0) { + re->op_ = kRegexpCapture; + // re->cap_ is already set + re->AllocSub(1); + re->sub()[0] = FinishRegexp(r1); + re->simple_ = re->ComputeSimple(); + } else { + re->Decref(); + re = r1; + } + return PushRegexp(re); +} + +// Processes the end of input, returning the final regexp. +Regexp* Regexp::ParseState::DoFinish() { + DoAlternation(); + Regexp* re = stacktop_; + if (re != NULL && re->down_ != NULL) { + status_->set_code(kRegexpMissingParen); + status_->set_error_arg(whole_regexp_); + return NULL; + } + stacktop_ = NULL; + return FinishRegexp(re); +} + +// Returns the leading regexp that re starts with. +// The returned Regexp* points into a piece of re, +// so it must not be used after the caller calls re->Decref(). +Regexp* Regexp::LeadingRegexp(Regexp* re) { + if (re->op() == kRegexpEmptyMatch) + return NULL; + if (re->op() == kRegexpConcat && re->nsub() >= 2) { + Regexp** sub = re->sub(); + if (sub[0]->op() == kRegexpEmptyMatch) + return NULL; + return sub[0]; + } + return re; +} + +// Removes LeadingRegexp(re) from re and returns what's left. +// Consumes the reference to re and may edit it in place. +// If caller wants to hold on to LeadingRegexp(re), +// must have already Incref'ed it. +Regexp* Regexp::RemoveLeadingRegexp(Regexp* re) { + if (re->op() == kRegexpEmptyMatch) + return re; + if (re->op() == kRegexpConcat && re->nsub() >= 2) { + Regexp** sub = re->sub(); + if (sub[0]->op() == kRegexpEmptyMatch) + return re; + sub[0]->Decref(); + sub[0] = NULL; + if (re->nsub() == 2) { + // Collapse concatenation to single regexp. + Regexp* nre = sub[1]; + sub[1] = NULL; + re->Decref(); + return nre; + } + // 3 or more -> 2 or more. + re->nsub_--; + memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]); + return re; + } + Regexp::ParseFlags pf = re->parse_flags(); + re->Decref(); + return new Regexp(kRegexpEmptyMatch, pf); +} + +// Returns the leading string that re starts with. +// The returned Rune* points into a piece of re, +// so it must not be used after the caller calls re->Decref(). +Rune* Regexp::LeadingString(Regexp* re, int *nrune, + Regexp::ParseFlags *flags) { + while (re->op() == kRegexpConcat && re->nsub() > 0) + re = re->sub()[0]; + + *flags = static_cast(re->parse_flags_ & Regexp::FoldCase); + + if (re->op() == kRegexpLiteral) { + *nrune = 1; + return &re->rune_; + } + + if (re->op() == kRegexpLiteralString) { + *nrune = re->nrunes_; + return re->runes_; + } + + *nrune = 0; + return NULL; +} + +// Removes the first n leading runes from the beginning of re. +// Edits re in place. +void Regexp::RemoveLeadingString(Regexp* re, int n) { + // Chase down concats to find first string. + // For regexps generated by parser, nested concats are + // flattened except when doing so would overflow the 16-bit + // limit on the size of a concatenation, so we should never + // see more than two here. + Regexp* stk[4]; + int d = 0; + while (re->op() == kRegexpConcat) { + if (d < arraysize(stk)) + stk[d++] = re; + re = re->sub()[0]; + } + + // Remove leading string from re. + if (re->op() == kRegexpLiteral) { + re->rune_ = 0; + re->op_ = kRegexpEmptyMatch; + } else if (re->op() == kRegexpLiteralString) { + if (n >= re->nrunes_) { + delete[] re->runes_; + re->runes_ = NULL; + re->nrunes_ = 0; + re->op_ = kRegexpEmptyMatch; + } else if (n == re->nrunes_ - 1) { + Rune rune = re->runes_[re->nrunes_ - 1]; + delete[] re->runes_; + re->runes_ = NULL; + re->nrunes_ = 0; + re->rune_ = rune; + re->op_ = kRegexpLiteral; + } else { + re->nrunes_ -= n; + memmove(re->runes_, re->runes_ + n, re->nrunes_ * sizeof re->runes_[0]); + } + } + + // If re is now empty, concatenations might simplify too. + while (d-- > 0) { + re = stk[d]; + Regexp** sub = re->sub(); + if (sub[0]->op() == kRegexpEmptyMatch) { + sub[0]->Decref(); + sub[0] = NULL; + // Delete first element of concat. + switch (re->nsub()) { + case 0: + case 1: + // Impossible. + LOG(DFATAL) << "Concat of " << re->nsub(); + re->submany_ = NULL; + re->op_ = kRegexpEmptyMatch; + break; + + case 2: { + // Replace re with sub[1]. + Regexp* old = sub[1]; + sub[1] = NULL; + re->Swap(old); + old->Decref(); + break; + } + + default: + // Slide down. + re->nsub_--; + memmove(sub, sub + 1, re->nsub_ * sizeof sub[0]); + break; + } + } + } +} + +// Factors common prefixes from alternation. +// For example, +// ABC|ABD|AEF|BCX|BCY +// simplifies to +// A(B(C|D)|EF)|BC(X|Y) +// which the normal parse state routines will further simplify to +// A(B[CD]|EF)|BC[XY] +// +// Rewrites sub to contain simplified list to alternate and returns +// the new length of sub. Adjusts reference counts accordingly +// (incoming sub[i] decremented, outgoing sub[i] incremented). + +// It's too much of a pain to write this code with an explicit stack, +// so instead we let the caller specify a maximum depth and +// don't simplify beyond that. There are around 15 words of local +// variables and parameters in the frame, so allowing 8 levels +// on a 64-bit machine is still less than a kilobyte of stack and +// probably enough benefit for practical uses. +const int kFactorAlternationMaxDepth = 8; + +int Regexp::FactorAlternation( + Regexp** sub, int n, + Regexp::ParseFlags altflags) { + return FactorAlternationRecursive(sub, n, altflags, + kFactorAlternationMaxDepth); +} + +int Regexp::FactorAlternationRecursive( + Regexp** sub, int n, + Regexp::ParseFlags altflags, + int maxdepth) { + + if (maxdepth <= 0) + return n; + + // Round 1: Factor out common literal prefixes. + Rune *rune = NULL; + int nrune = 0; + Regexp::ParseFlags runeflags = Regexp::NoParseFlags; + int start = 0; + int out = 0; + for (int i = 0; i <= n; i++) { + // Invariant: what was in sub[0:start] has been Decref'ed + // and that space has been reused for sub[0:out] (out <= start). + // + // Invariant: sub[start:i] consists of regexps that all begin + // with the string rune[0:nrune]. + + Rune* rune_i = NULL; + int nrune_i = 0; + Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags; + if (i < n) { + rune_i = LeadingString(sub[i], &nrune_i, &runeflags_i); + if (runeflags_i == runeflags) { + int same = 0; + while (same < nrune && same < nrune_i && rune[same] == rune_i[same]) + same++; + if (same > 0) { + // Matches at least one rune in current range. Keep going around. + nrune = same; + continue; + } + } + } + + // Found end of a run with common leading literal string: + // sub[start:i] all begin with rune[0:nrune] but sub[i] + // does not even begin with rune[0]. + // + // Factor out common string and append factored expression to sub[0:out]. + if (i == start) { + // Nothing to do - first iteration. + } else if (i == start+1) { + // Just one: don't bother factoring. + sub[out++] = sub[start]; + } else { + // Construct factored form: prefix(suffix1|suffix2|...) + Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|... + x[0] = LiteralString(rune, nrune, runeflags); + for (int j = start; j < i; j++) + RemoveLeadingString(sub[j], nrune); + int nn = FactorAlternationRecursive(sub + start, i - start, altflags, + maxdepth - 1); + x[1] = AlternateNoFactor(sub + start, nn, altflags); + sub[out++] = Concat(x, 2, altflags); + } + + // Prepare for next round (if there is one). + if (i < n) { + start = i; + rune = rune_i; + nrune = nrune_i; + runeflags = runeflags_i; + } + } + n = out; + + // Round 2: Factor out common complex prefixes, + // just the first piece of each concatenation, + // whatever it is. This is good enough a lot of the time. + start = 0; + out = 0; + Regexp* first = NULL; + for (int i = 0; i <= n; i++) { + // Invariant: what was in sub[0:start] has been Decref'ed + // and that space has been reused for sub[0:out] (out <= start). + // + // Invariant: sub[start:i] consists of regexps that all begin with first. + + Regexp* first_i = NULL; + if (i < n) { + first_i = LeadingRegexp(sub[i]); + if (first != NULL && Regexp::Equal(first, first_i)) { + continue; + } + } + + // Found end of a run with common leading regexp: + // sub[start:i] all begin with first but sub[i] does not. + // + // Factor out common regexp and append factored expression to sub[0:out]. + if (i == start) { + // Nothing to do - first iteration. + } else if (i == start+1) { + // Just one: don't bother factoring. + sub[out++] = sub[start]; + } else { + // Construct factored form: prefix(suffix1|suffix2|...) + Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|... + x[0] = first->Incref(); + for (int j = start; j < i; j++) + sub[j] = RemoveLeadingRegexp(sub[j]); + int nn = FactorAlternationRecursive(sub + start, i - start, altflags, + maxdepth - 1); + x[1] = AlternateNoFactor(sub + start, nn, altflags); + sub[out++] = Concat(x, 2, altflags); + } + + // Prepare for next round (if there is one). + if (i < n) { + start = i; + first = first_i; + } + } + n = out; + + // Round 3: Collapse runs of single literals into character classes. + start = 0; + out = 0; + for (int i = 0; i <= n; i++) { + // Invariant: what was in sub[0:start] has been Decref'ed + // and that space has been reused for sub[0:out] (out <= start). + // + // Invariant: sub[start:i] consists of regexps that are either + // literal runes or character classes. + + if (i < n && + (sub[i]->op() == kRegexpLiteral || + sub[i]->op() == kRegexpCharClass)) + continue; + + // sub[i] is not a char or char class; + // emit char class for sub[start:i]... + if (i == start) { + // Nothing to do. + } else if (i == start+1) { + sub[out++] = sub[start]; + } else { + // Make new char class. + CharClassBuilder ccb; + for (int j = start; j < i; j++) { + Regexp* re = sub[j]; + if (re->op() == kRegexpCharClass) { + CharClass* cc = re->cc(); + for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it) + ccb.AddRange(it->lo, it->hi); + } else if (re->op() == kRegexpLiteral) { + ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags()); + } else { + LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " " + << re->ToString(); + } + re->Decref(); + } + sub[out++] = NewCharClass(ccb.GetCharClass(), altflags); + } + + // ... and then emit sub[i]. + if (i < n) + sub[out++] = sub[i]; + start = i+1; + } + n = out; + + // Round 4: Collapse runs of empty matches into single empty match. + start = 0; + out = 0; + for (int i = 0; i < n; i++) { + if (i + 1 < n && + sub[i]->op() == kRegexpEmptyMatch && + sub[i+1]->op() == kRegexpEmptyMatch) { + sub[i]->Decref(); + continue; + } + sub[out++] = sub[i]; + } + n = out; + + return n; +} + +// Collapse the regexps on top of the stack, down to the +// first marker, into a new op node (op == kRegexpAlternate +// or op == kRegexpConcat). +void Regexp::ParseState::DoCollapse(RegexpOp op) { + // Scan backward to marker, counting children of composite. + int n = 0; + Regexp* next = NULL; + Regexp* sub; + for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { + next = sub->down_; + if (sub->op_ == op) + n += sub->nsub_; + else + n++; + } + + // If there's just one child, leave it alone. + // (Concat of one thing is that one thing; alternate of one thing is same.) + if (stacktop_ != NULL && stacktop_->down_ == next) + return; + + // Construct op (alternation or concatenation), flattening op of op. + Regexp** subs = new Regexp*[n]; + next = NULL; + int i = n; + for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) { + next = sub->down_; + if (sub->op_ == op) { + Regexp** sub_subs = sub->sub(); + for (int k = sub->nsub_ - 1; k >= 0; k--) + subs[--i] = sub_subs[k]->Incref(); + sub->Decref(); + } else { + subs[--i] = FinishRegexp(sub); + } + } + + Regexp* re = ConcatOrAlternate(op, subs, n, flags_, true); + delete[] subs; + re->simple_ = re->ComputeSimple(); + re->down_ = next; + stacktop_ = re; +} + +// Finishes the current concatenation, +// collapsing it into a single regexp on the stack. +void Regexp::ParseState::DoConcatenation() { + Regexp* r1 = stacktop_; + if (r1 == NULL || IsMarker(r1->op())) { + // empty concatenation is special case + Regexp* re = new Regexp(kRegexpEmptyMatch, flags_); + PushRegexp(re); + } + DoCollapse(kRegexpConcat); +} + +// Finishes the current alternation, +// collapsing it to a single regexp on the stack. +void Regexp::ParseState::DoAlternation() { + DoVerticalBar(); + // Now stack top is kVerticalBar. + Regexp* r1 = stacktop_; + stacktop_ = r1->down_; + r1->Decref(); + DoCollapse(kRegexpAlternate); +} + +// Incremental conversion of concatenated literals into strings. +// If top two elements on stack are both literal or string, +// collapse into single string. +// Don't walk down the stack -- the parser calls this frequently +// enough that below the bottom two is known to be collapsed. +// Only called when another regexp is about to be pushed +// on the stack, so that the topmost literal is not being considered. +// (Otherwise ab* would turn into (ab)*.) +// If r >= 0, consider pushing a literal r on the stack. +// Return whether that happened. +bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) { + Regexp* re1; + Regexp* re2; + if ((re1 = stacktop_) == NULL || (re2 = re1->down_) == NULL) + return false; + + if (re1->op_ != kRegexpLiteral && re1->op_ != kRegexpLiteralString) + return false; + if (re2->op_ != kRegexpLiteral && re2->op_ != kRegexpLiteralString) + return false; + if ((re1->parse_flags_ & FoldCase) != (re2->parse_flags_ & FoldCase)) + return false; + + if (re2->op_ == kRegexpLiteral) { + // convert into string + Rune rune = re2->rune_; + re2->op_ = kRegexpLiteralString; + re2->nrunes_ = 0; + re2->runes_ = NULL; + re2->AddRuneToString(rune); + } + + // push re1 into re2. + if (re1->op_ == kRegexpLiteral) { + re2->AddRuneToString(re1->rune_); + } else { + for (int i = 0; i < re1->nrunes_; i++) + re2->AddRuneToString(re1->runes_[i]); + re1->nrunes_ = 0; + delete[] re1->runes_; + re1->runes_ = NULL; + } + + // reuse re1 if possible + if (r >= 0) { + re1->op_ = kRegexpLiteral; + re1->rune_ = r; + re1->parse_flags_ = flags; + return true; + } + + stacktop_ = re2; + re1->Decref(); + return false; +} + +// Lexing routines. + +// Parses a decimal integer, storing it in *n. +// Sets *s to span the remainder of the string. +// Sets *out_re to the regexp for the class. +static bool ParseInteger(StringPiece* s, int* np) { + if (s->size() == 0 || !isdigit((*s)[0] & 0xFF)) + return false; + // Disallow leading zeros. + if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF)) + return false; + int n = 0; + int c; + while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) { + // Avoid overflow. + if (n >= 100000000) + return false; + n = n*10 + c - '0'; + s->remove_prefix(1); // digit + } + *np = n; + return true; +} + +// Parses a repetition suffix like {1,2} or {2} or {2,}. +// Sets *s to span the remainder of the string on success. +// Sets *lo and *hi to the given range. +// In the case of {2,}, the high number is unbounded; +// sets *hi to -1 to signify this. +// {,2} is NOT a valid suffix. +// The Maybe in the name signifies that the regexp parse +// doesn't fail even if ParseRepetition does, so the StringPiece +// s must NOT be edited unless MaybeParseRepetition returns true. +static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) { + StringPiece s = *sp; + if (s.size() == 0 || s[0] != '{') + return false; + s.remove_prefix(1); // '{' + if (!ParseInteger(&s, lo)) + return false; + if (s.size() == 0) + return false; + if (s[0] == ',') { + s.remove_prefix(1); // ',' + if (s.size() == 0) + return false; + if (s[0] == '}') { + // {2,} means at least 2 + *hi = -1; + } else { + // {2,4} means 2, 3, or 4. + if (!ParseInteger(&s, hi)) + return false; + } + } else { + // {2} means exactly two + *hi = *lo; + } + if (s.size() == 0 || s[0] != '}') + return false; + s.remove_prefix(1); // '}' + *sp = s; + return true; +} + +// Removes the next Rune from the StringPiece and stores it in *r. +// Returns number of bytes removed from sp. +// Behaves as though there is a terminating NUL at the end of sp. +// Argument order is backwards from usual Google style +// but consistent with chartorune. +static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) { + int n; + if (fullrune(sp->data(), sp->size())) { + n = chartorune(r, sp->data()); + // Some copies of chartorune have a bug that accepts + // encodings of values in (10FFFF, 1FFFFF] as valid. + // Those values break the character class algorithm, + // which assumes Runemax is the largest rune. + if (*r > Runemax) { + n = 1; + *r = Runeerror; + } + if (!(n == 1 && *r == Runeerror)) { // no decoding error + sp->remove_prefix(n); + return n; + } + } + + status->set_code(kRegexpBadUTF8); + status->set_error_arg(NULL); + return -1; +} + +// Return whether name is valid UTF-8. +// If not, set status to kRegexpBadUTF8. +static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) { + StringPiece t = s; + Rune r; + while (t.size() > 0) { + if (StringPieceToRune(&r, &t, status) < 0) + return false; + } + return true; +} + +// Is c a hex digit? +static int IsHex(int c) { + return ('0' <= c && c <= '9') || + ('A' <= c && c <= 'F') || + ('a' <= c && c <= 'f'); +} + +// Convert hex digit to value. +static int UnHex(int c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + LOG(DFATAL) << "Bad hex digit " << c; + return 0; +} + +// Parse an escape sequence (e.g., \n, \{). +// Sets *s to span the remainder of the string. +// Sets *rp to the named character. +static bool ParseEscape(StringPiece* s, Rune* rp, + RegexpStatus* status, int rune_max) { + const char* begin = s->begin(); + if (s->size() < 1 || (*s)[0] != '\\') { + // Should not happen - caller always checks. + status->set_code(kRegexpInternalError); + status->set_error_arg(NULL); + return false; + } + if (s->size() < 2) { + status->set_code(kRegexpTrailingBackslash); + status->set_error_arg(NULL); + return false; + } + Rune c, c1; + s->remove_prefix(1); // backslash + if (StringPieceToRune(&c, s, status) < 0) + return false; + int code; + switch (c) { + default: + if (c < Runeself && !isalpha(c) && !isdigit(c)) { + // Escaped non-word characters are always themselves. + // PCRE is not quite so rigorous: it accepts things like + // \q, but we don't. We once rejected \_, but too many + // programs and people insist on using it, so allow \_. + *rp = c; + return true; + } + goto BadEscape; + + // Octal escapes. + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // Single non-zero octal digit is a backreference; not supported. + if (s->size() == 0 || (*s)[0] < '0' || (*s)[0] > '7') + goto BadEscape; + // fall through + case '0': + // consume up to three octal digits; already have one. + code = c - '0'; + if (s->size() > 0 && '0' <= (c = (*s)[0]) && c <= '7') { + code = code * 8 + c - '0'; + s->remove_prefix(1); // digit + if (s->size() > 0) { + c = (*s)[0]; + if ('0' <= c && c <= '7') { + code = code * 8 + c - '0'; + s->remove_prefix(1); // digit + } + } + } + if (code > rune_max) + goto BadEscape; + *rp = code; + return true; + + // Hexadecimal escapes + case 'x': + if (s->size() == 0) + goto BadEscape; + if (StringPieceToRune(&c, s, status) < 0) + return false; + if (c == '{') { + // Any number of digits in braces. + // Update n as we consume the string, so that + // the whole thing gets shown in the error message. + // Perl accepts any text at all; it ignores all text + // after the first non-hex digit. We require only hex digits, + // and at least one. + if (StringPieceToRune(&c, s, status) < 0) + return false; + int nhex = 0; + code = 0; + while (IsHex(c)) { + nhex++; + code = code * 16 + UnHex(c); + if (code > rune_max) + goto BadEscape; + if (s->size() == 0) + goto BadEscape; + if (StringPieceToRune(&c, s, status) < 0) + return false; + } + if (c != '}' || nhex == 0) + goto BadEscape; + *rp = code; + return true; + } + // Easy case: two hex digits. + if (s->size() == 0) + goto BadEscape; + if (StringPieceToRune(&c1, s, status) < 0) + return false; + if (!IsHex(c) || !IsHex(c1)) + goto BadEscape; + *rp = UnHex(c) * 16 + UnHex(c1); + return true; + + // C escapes. + case 'n': + *rp = '\n'; + return true; + case 'r': + *rp = '\r'; + return true; + case 't': + *rp = '\t'; + return true; + + // Less common C escapes. + case 'a': + *rp = '\a'; + return true; + case 'f': + *rp = '\f'; + return true; + case 'v': + *rp = '\v'; + return true; + + // This code is disabled to avoid misparsing + // the Perl word-boundary \b as a backspace + // when in POSIX regexp mode. Surprisingly, + // in Perl, \b means word-boundary but [\b] + // means backspace. We don't support that: + // if you want a backspace embed a literal + // backspace character or use \x08. + // + // case 'b': + // *rp = '\b'; + // return true; + } + + LOG(DFATAL) << "Not reached in ParseEscape."; + +BadEscape: + // Unrecognized escape sequence. + status->set_code(kRegexpBadEscape); + status->set_error_arg(StringPiece(begin, s->data() - begin)); + return false; +} + +// Add a range to the character class, but exclude newline if asked. +// Also handle case folding. +void CharClassBuilder::AddRangeFlags( + Rune lo, Rune hi, Regexp::ParseFlags parse_flags) { + + // Take out \n if the flags say so. + bool cutnl = !(parse_flags & Regexp::ClassNL) || + (parse_flags & Regexp::NeverNL); + if (cutnl && lo <= '\n' && '\n' <= hi) { + if (lo < '\n') + AddRangeFlags(lo, '\n' - 1, parse_flags); + if (hi > '\n') + AddRangeFlags('\n' + 1, hi, parse_flags); + return; + } + + // If folding case, add fold-equivalent characters too. + if (parse_flags & Regexp::FoldCase) + AddFoldedRange(this, lo, hi, 0); + else + AddRange(lo, hi); +} + +// Look for a group with the given name. +static const UGroup* LookupGroup(const StringPiece& name, + const UGroup *groups, int ngroups) { + // Simple name lookup. + for (int i = 0; i < ngroups; i++) + if (StringPiece(groups[i].name) == name) + return &groups[i]; + return NULL; +} + +// Fake UGroup containing all Runes +static URange16 any16[] = { { 0, 65535 } }; +static URange32 any32[] = { { 65536, Runemax } }; +static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 }; + +// Look for a POSIX group with the given name (e.g., "[:^alpha:]") +static const UGroup* LookupPosixGroup(const StringPiece& name) { + return LookupGroup(name, posix_groups, num_posix_groups); +} + +static const UGroup* LookupPerlGroup(const StringPiece& name) { + return LookupGroup(name, perl_groups, num_perl_groups); +} + +// Look for a Unicode group with the given name (e.g., "Han") +static const UGroup* LookupUnicodeGroup(const StringPiece& name) { + // Special case: "Any" means any. + if (name == StringPiece("Any")) + return &anygroup; + return LookupGroup(name, unicode_groups, num_unicode_groups); +} + +// Add a UGroup or its negation to the character class. +static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, + Regexp::ParseFlags parse_flags) { + if (sign == +1) { + for (int i = 0; i < g->nr16; i++) { + cc->AddRangeFlags(g->r16[i].lo, g->r16[i].hi, parse_flags); + } + for (int i = 0; i < g->nr32; i++) { + cc->AddRangeFlags(g->r32[i].lo, g->r32[i].hi, parse_flags); + } + } else { + if (parse_flags & Regexp::FoldCase) { + // Normally adding a case-folded group means + // adding all the extra fold-equivalent runes too. + // But if we're adding the negation of the group, + // we have to exclude all the runes that are fold-equivalent + // to what's already missing. Too hard, so do in two steps. + CharClassBuilder ccb1; + AddUGroup(&ccb1, g, +1, parse_flags); + // If the flags say to take out \n, put it in, so that negating will take it out. + // Normally AddRangeFlags does this, but we're bypassing AddRangeFlags. + bool cutnl = !(parse_flags & Regexp::ClassNL) || + (parse_flags & Regexp::NeverNL); + if (cutnl) { + ccb1.AddRange('\n', '\n'); + } + ccb1.Negate(); + cc->AddCharClass(&ccb1); + return; + } + int next = 0; + for (int i = 0; i < g->nr16; i++) { + if (next < g->r16[i].lo) + cc->AddRangeFlags(next, g->r16[i].lo - 1, parse_flags); + next = g->r16[i].hi + 1; + } + for (int i = 0; i < g->nr32; i++) { + if (next < g->r32[i].lo) + cc->AddRangeFlags(next, g->r32[i].lo - 1, parse_flags); + next = g->r32[i].hi + 1; + } + if (next <= Runemax) + cc->AddRangeFlags(next, Runemax, parse_flags); + } +} + +// Maybe parse a Perl character class escape sequence. +// Only recognizes the Perl character classes (\d \s \w \D \S \W), +// not the Perl empty-string classes (\b \B \A \Z \z). +// On success, sets *s to span the remainder of the string +// and returns the corresponding UGroup. +// The StringPiece must *NOT* be edited unless the call succeeds. +const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) { + if (!(parse_flags & Regexp::PerlClasses)) + return NULL; + if (s->size() < 2 || (*s)[0] != '\\') + return NULL; + // Could use StringPieceToRune, but there aren't + // any non-ASCII Perl group names. + StringPiece name(s->begin(), 2); + const UGroup *g = LookupPerlGroup(name); + if (g == NULL) + return NULL; + s->remove_prefix(name.size()); + return g; +} + +enum ParseStatus { + kParseOk, // Did some parsing. + kParseError, // Found an error. + kParseNothing, // Decided not to parse. +}; + +// Maybe parses a Unicode character group like \p{Han} or \P{Han} +// (the latter is a negated group). +ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags, + CharClassBuilder *cc, + RegexpStatus* status) { + // Decide whether to parse. + if (!(parse_flags & Regexp::UnicodeGroups)) + return kParseNothing; + if (s->size() < 2 || (*s)[0] != '\\') + return kParseNothing; + Rune c = (*s)[1]; + if (c != 'p' && c != 'P') + return kParseNothing; + + // Committed to parse. Results: + int sign = +1; // -1 = negated char class + if (c == 'P') + sign = -1; + StringPiece seq = *s; // \p{Han} or \pL + StringPiece name; // Han or L + s->remove_prefix(2); // '\\', 'p' + + if (!StringPieceToRune(&c, s, status)) + return kParseError; + if (c != '{') { + // Name is the bit of string we just skipped over for c. + const char* p = seq.begin() + 2; + name = StringPiece(p, s->begin() - p); + } else { + // Name is in braces. Look for closing } + size_t end = s->find('}', 0); + if (end == s->npos) { + if (!IsValidUTF8(seq, status)) + return kParseError; + status->set_code(kRegexpBadCharRange); + status->set_error_arg(seq); + return kParseError; + } + name = StringPiece(s->begin(), end); // without '}' + s->remove_prefix(end + 1); // with '}' + if (!IsValidUTF8(name, status)) + return kParseError; + } + + // Chop seq where s now begins. + seq = StringPiece(seq.begin(), s->begin() - seq.begin()); + + // Look up group + if (name.size() > 0 && name[0] == '^') { + sign = -sign; + name.remove_prefix(1); // '^' + } + const UGroup *g = LookupUnicodeGroup(name); + if (g == NULL) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(seq); + return kParseError; + } + + AddUGroup(cc, g, sign, parse_flags); + return kParseOk; +} + +// Parses a character class name like [:alnum:]. +// Sets *s to span the remainder of the string. +// Adds the ranges corresponding to the class to ranges. +static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags, + CharClassBuilder *cc, + RegexpStatus* status) { + // Check begins with [: + const char* p = s->data(); + const char* ep = s->data() + s->size(); + if (ep - p < 2 || p[0] != '[' || p[1] != ':') + return kParseNothing; + + // Look for closing :]. + const char* q; + for (q = p+2; q <= ep-2 && (*q != ':' || *(q+1) != ']'); q++) + ; + + // If no closing :], then ignore. + if (q > ep-2) + return kParseNothing; + + // Got it. Check that it's valid. + q += 2; + StringPiece name(p, q-p); + + const UGroup *g = LookupPosixGroup(name); + if (g == NULL) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(name); + return kParseError; + } + + s->remove_prefix(name.size()); + AddUGroup(cc, g, g->sign, parse_flags); + return kParseOk; +} + +// Parses a character inside a character class. +// There are fewer special characters here than in the rest of the regexp. +// Sets *s to span the remainder of the string. +// Sets *rp to the character. +bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp, + const StringPiece& whole_class, + RegexpStatus* status) { + if (s->size() == 0) { + status->set_code(kRegexpMissingBracket); + status->set_error_arg(whole_class); + return false; + } + + // Allow regular escape sequences even though + // many need not be escaped in this context. + if (s->size() >= 1 && (*s)[0] == '\\') + return ParseEscape(s, rp, status, rune_max_); + + // Otherwise take the next rune. + return StringPieceToRune(rp, s, status) >= 0; +} + +// Parses a character class character, or, if the character +// is followed by a hyphen, parses a character class range. +// For single characters, rr->lo == rr->hi. +// Sets *s to span the remainder of the string. +// Sets *rp to the character. +bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr, + const StringPiece& whole_class, + RegexpStatus* status) { + StringPiece os = *s; + if (!ParseCCCharacter(s, &rr->lo, whole_class, status)) + return false; + // [a-] means (a|-), so check for final ]. + if (s->size() >= 2 && (*s)[0] == '-' && (*s)[1] != ']') { + s->remove_prefix(1); // '-' + if (!ParseCCCharacter(s, &rr->hi, whole_class, status)) + return false; + if (rr->hi < rr->lo) { + status->set_code(kRegexpBadCharRange); + status->set_error_arg(StringPiece(os.data(), s->data() - os.data())); + return false; + } + } else { + rr->hi = rr->lo; + } + return true; +} + +// Parses a possibly-negated character class expression like [^abx-z[:digit:]]. +// Sets *s to span the remainder of the string. +// Sets *out_re to the regexp for the class. +bool Regexp::ParseState::ParseCharClass(StringPiece* s, + Regexp** out_re, + RegexpStatus* status) { + StringPiece whole_class = *s; + if (s->size() == 0 || (*s)[0] != '[') { + // Caller checked this. + status->set_code(kRegexpInternalError); + status->set_error_arg(NULL); + return false; + } + bool negated = false; + Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase); + re->ccb_ = new CharClassBuilder; + s->remove_prefix(1); // '[' + if (s->size() > 0 && (*s)[0] == '^') { + s->remove_prefix(1); // '^' + negated = true; + if (!(flags_ & ClassNL) || (flags_ & NeverNL)) { + // If NL can't match implicitly, then pretend + // negated classes include a leading \n. + re->ccb_->AddRange('\n', '\n'); + } + } + bool first = true; // ] is okay as first char in class + while (s->size() > 0 && ((*s)[0] != ']' || first)) { + // - is only okay unescaped as first or last in class. + // Except that Perl allows - anywhere. + if ((*s)[0] == '-' && !first && !(flags_&PerlX) && + (s->size() == 1 || (*s)[1] != ']')) { + StringPiece t = *s; + t.remove_prefix(1); // '-' + Rune r; + int n = StringPieceToRune(&r, &t, status); + if (n < 0) { + re->Decref(); + return false; + } + status->set_code(kRegexpBadCharRange); + status->set_error_arg(StringPiece(s->data(), 1+n)); + re->Decref(); + return false; + } + first = false; + + // Look for [:alnum:] etc. + if (s->size() > 2 && (*s)[0] == '[' && (*s)[1] == ':') { + switch (ParseCCName(s, flags_, re->ccb_, status)) { + case kParseOk: + continue; + case kParseError: + re->Decref(); + return false; + case kParseNothing: + break; + } + } + + // Look for Unicode character group like \p{Han} + if (s->size() > 2 && + (*s)[0] == '\\' && + ((*s)[1] == 'p' || (*s)[1] == 'P')) { + switch (ParseUnicodeGroup(s, flags_, re->ccb_, status)) { + case kParseOk: + continue; + case kParseError: + re->Decref(); + return false; + case kParseNothing: + break; + } + } + + // Look for Perl character class symbols (extension). + const UGroup *g = MaybeParsePerlCCEscape(s, flags_); + if (g != NULL) { + AddUGroup(re->ccb_, g, g->sign, flags_); + continue; + } + + // Otherwise assume single character or simple range. + RuneRange rr; + if (!ParseCCRange(s, &rr, whole_class, status)) { + re->Decref(); + return false; + } + // AddRangeFlags is usually called in response to a class like + // \p{Foo} or [[:foo:]]; for those, it filters \n out unless + // Regexp::ClassNL is set. In an explicit range or singleton + // like we just parsed, we do not filter \n out, so set ClassNL + // in the flags. + re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL); + } + if (s->size() == 0) { + status->set_code(kRegexpMissingBracket); + status->set_error_arg(whole_class); + re->Decref(); + return false; + } + s->remove_prefix(1); // ']' + + if (negated) + re->ccb_->Negate(); + + *out_re = re; + return true; +} + +// Is this a valid capture name? [A-Za-z0-9_]+ +// PCRE limits names to 32 bytes. +// Python rejects names starting with digits. +// We don't enforce either of those. +static bool IsValidCaptureName(const StringPiece& name) { + if (name.size() == 0) + return false; + for (int i = 0; i < name.size(); i++) { + int c = name[i]; + if (('0' <= c && c <= '9') || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + c == '_') + continue; + return false; + } + return true; +} + +// Parses a Perl flag setting or non-capturing group or both, +// like (?i) or (?: or (?i:. Removes from s, updates parse state. +// The caller must check that s begins with "(?". +// Returns true on success. If the Perl flag is not +// well-formed or not supported, sets status_ and returns false. +bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) { + StringPiece t = *s; + + // Caller is supposed to check this. + if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') { + LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags"; + status_->set_code(kRegexpInternalError); + return false; + } + + t.remove_prefix(2); // "(?" + + // Check for named captures, first introduced in Python's regexp library. + // As usual, there are three slightly different syntaxes: + // + // (?Pexpr) the original, introduced by Python + // (?expr) the .NET alteration, adopted by Perl 5.10 + // (?'name'expr) another .NET alteration, adopted by Perl 5.10 + // + // Perl 5.10 gave in and implemented the Python version too, + // but they claim that the last two are the preferred forms. + // PCRE and languages based on it (specifically, PHP and Ruby) + // support all three as well. EcmaScript 4 uses only the Python form. + // + // In both the open source world (via Code Search) and the + // Google source tree, (?Pname) is the dominant form, + // so that's the one we implement. One is enough. + if (t.size() > 2 && t[0] == 'P' && t[1] == '<') { + // Pull out name. + size_t end = t.find('>', 2); + if (end == t.npos) { + if (!IsValidUTF8(*s, status_)) + return false; + status_->set_code(kRegexpBadNamedCapture); + status_->set_error_arg(*s); + return false; + } + + // t is "P...", t[end] == '>' + StringPiece capture(t.begin()-2, end+3); // "(?P" + StringPiece name(t.begin()+2, end-2); // "name" + if (!IsValidUTF8(name, status_)) + return false; + if (!IsValidCaptureName(name)) { + status_->set_code(kRegexpBadNamedCapture); + status_->set_error_arg(capture); + return false; + } + + if (!DoLeftParen(name)) { + // DoLeftParen's failure set status_. + return false; + } + + s->remove_prefix(capture.end() - s->begin()); + return true; + } + + bool negated = false; + bool sawflags = false; + int nflags = flags_; + Rune c; + for (bool done = false; !done; ) { + if (t.size() == 0) + goto BadPerlOp; + if (StringPieceToRune(&c, &t, status_) < 0) + return false; + switch (c) { + default: + goto BadPerlOp; + + // Parse flags. + case 'i': + sawflags = true; + if (negated) + nflags &= ~FoldCase; + else + nflags |= FoldCase; + break; + + case 'm': // opposite of our OneLine + sawflags = true; + if (negated) + nflags |= OneLine; + else + nflags &= ~OneLine; + break; + + case 's': + sawflags = true; + if (negated) + nflags &= ~DotNL; + else + nflags |= DotNL; + break; + + case 'U': + sawflags = true; + if (negated) + nflags &= ~NonGreedy; + else + nflags |= NonGreedy; + break; + + // Negation + case '-': + if (negated) + goto BadPerlOp; + negated = true; + sawflags = false; + break; + + // Open new group. + case ':': + if (!DoLeftParenNoCapture()) { + // DoLeftParenNoCapture's failure set status_. + return false; + } + done = true; + break; + + // Finish flags. + case ')': + done = true; + break; + } + } + + if (negated && !sawflags) + goto BadPerlOp; + + flags_ = static_cast(nflags); + *s = t; + return true; + +BadPerlOp: + status_->set_code(kRegexpBadPerlOp); + status_->set_error_arg(StringPiece(s->begin(), t.begin() - s->begin())); + return false; +} + +// Converts latin1 (assumed to be encoded as Latin1 bytes) +// into UTF8 encoding in string. +// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is +// deprecated and because it rejects code points 0x80-0x9F. +void ConvertLatin1ToUTF8(const StringPiece& latin1, string* utf) { + char buf[UTFmax]; + + utf->clear(); + for (int i = 0; i < latin1.size(); i++) { + Rune r = latin1[i] & 0xFF; + int n = runetochar(buf, &r); + utf->append(buf, n); + } +} + +// Parses the regular expression given by s, +// returning the corresponding Regexp tree. +// The caller must Decref the return value when done with it. +// Returns NULL on error. +Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags, + RegexpStatus* status) { + // Make status non-NULL (easier on everyone else). + RegexpStatus xstatus; + if (status == NULL) + status = &xstatus; + + ParseState ps(global_flags, s, status); + StringPiece t = s; + + // Convert regexp to UTF-8 (easier on the rest of the parser). + if (global_flags & Latin1) { + string* tmp = new string; + ConvertLatin1ToUTF8(t, tmp); + status->set_tmp(tmp); + t = *tmp; + } + + if (global_flags & Literal) { + // Special parse loop for literal string. + while (t.size() > 0) { + Rune r; + if (StringPieceToRune(&r, &t, status) < 0) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + } + return ps.DoFinish(); + } + + StringPiece lastunary = NULL; + while (t.size() > 0) { + StringPiece isunary = NULL; + switch (t[0]) { + default: { + Rune r; + if (StringPieceToRune(&r, &t, status) < 0) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + break; + } + + case '(': + // "(?" introduces Perl escape. + if ((ps.flags() & PerlX) && (t.size() >= 2 && t[1] == '?')) { + // Flag changes and non-capturing groups. + if (!ps.ParsePerlFlags(&t)) + return NULL; + break; + } + if (ps.flags() & NeverCapture) { + if (!ps.DoLeftParenNoCapture()) + return NULL; + } else { + if (!ps.DoLeftParen(NULL)) + return NULL; + } + t.remove_prefix(1); // '(' + break; + + case '|': + if (!ps.DoVerticalBar()) + return NULL; + t.remove_prefix(1); // '|' + break; + + case ')': + if (!ps.DoRightParen()) + return NULL; + t.remove_prefix(1); // ')' + break; + + case '^': // Beginning of line. + if (!ps.PushCarat()) + return NULL; + t.remove_prefix(1); // '^' + break; + + case '$': // End of line. + if (!ps.PushDollar()) + return NULL; + t.remove_prefix(1); // '$' + break; + + case '.': // Any character (possibly except newline). + if (!ps.PushDot()) + return NULL; + t.remove_prefix(1); // '.' + break; + + case '[': { // Character class. + Regexp* re; + if (!ps.ParseCharClass(&t, &re, status)) + return NULL; + if (!ps.PushRegexp(re)) + return NULL; + break; + } + + case '*': { // Zero or more. + RegexpOp op; + op = kRegexpStar; + goto Rep; + case '+': // One or more. + op = kRegexpPlus; + goto Rep; + case '?': // Zero or one. + op = kRegexpQuest; + goto Rep; + Rep: + StringPiece opstr = t; + bool nongreedy = false; + t.remove_prefix(1); // '*' or '+' or '?' + if (ps.flags() & PerlX) { + if (t.size() > 0 && t[0] == '?') { + nongreedy = true; + t.remove_prefix(1); // '?' + } + if (lastunary.size() > 0) { + // In Perl it is not allowed to stack repetition operators: + // a** is a syntax error, not a double-star. + // (and a++ means something else entirely, which we don't support!) + status->set_code(kRegexpRepeatOp); + status->set_error_arg(StringPiece(lastunary.begin(), + t.begin() - lastunary.begin())); + return NULL; + } + } + opstr.set(opstr.data(), t.data() - opstr.data()); + if (!ps.PushRepeatOp(op, opstr, nongreedy)) + return NULL; + isunary = opstr; + break; + } + + case '{': { // Counted repetition. + int lo, hi; + StringPiece opstr = t; + if (!MaybeParseRepetition(&t, &lo, &hi)) { + // Treat like a literal. + if (!ps.PushLiteral('{')) + return NULL; + t.remove_prefix(1); // '{' + break; + } + bool nongreedy = false; + if (ps.flags() & PerlX) { + if (t.size() > 0 && t[0] == '?') { + nongreedy = true; + t.remove_prefix(1); // '?' + } + if (lastunary.size() > 0) { + // Not allowed to stack repetition operators. + status->set_code(kRegexpRepeatOp); + status->set_error_arg(StringPiece(lastunary.begin(), + t.begin() - lastunary.begin())); + return NULL; + } + } + opstr.set(opstr.data(), t.data() - opstr.data()); + if (!ps.PushRepetition(lo, hi, opstr, nongreedy)) + return NULL; + isunary = opstr; + break; + } + + case '\\': { // Escaped character or Perl sequence. + // \b and \B: word boundary or not + if ((ps.flags() & Regexp::PerlB) && + t.size() >= 2 && (t[1] == 'b' || t[1] == 'B')) { + if (!ps.PushWordBoundary(t[1] == 'b')) + return NULL; + t.remove_prefix(2); // '\\', 'b' + break; + } + + if ((ps.flags() & Regexp::PerlX) && t.size() >= 2) { + if (t[1] == 'A') { + if (!ps.PushSimpleOp(kRegexpBeginText)) + return NULL; + t.remove_prefix(2); // '\\', 'A' + break; + } + if (t[1] == 'z') { + if (!ps.PushSimpleOp(kRegexpEndText)) + return NULL; + t.remove_prefix(2); // '\\', 'z' + break; + } + // Do not recognize \Z, because this library can't + // implement the exact Perl/PCRE semantics. + // (This library treats "(?-m)$" as \z, even though + // in Perl and PCRE it is equivalent to \Z.) + + if (t[1] == 'C') { // \C: any byte [sic] + if (!ps.PushSimpleOp(kRegexpAnyByte)) + return NULL; + t.remove_prefix(2); // '\\', 'C' + break; + } + + if (t[1] == 'Q') { // \Q ... \E: the ... is always literals + t.remove_prefix(2); // '\\', 'Q' + while (t.size() > 0) { + if (t.size() >= 2 && t[0] == '\\' && t[1] == 'E') { + t.remove_prefix(2); // '\\', 'E' + break; + } + Rune r; + if (StringPieceToRune(&r, &t, status) < 0) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + } + break; + } + } + + if (t.size() >= 2 && (t[1] == 'p' || t[1] == 'P')) { + Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); + re->ccb_ = new CharClassBuilder; + switch (ParseUnicodeGroup(&t, ps.flags(), re->ccb_, status)) { + case kParseOk: + if (!ps.PushRegexp(re)) + return NULL; + goto Break2; + case kParseError: + re->Decref(); + return NULL; + case kParseNothing: + re->Decref(); + break; + } + } + + const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags()); + if (g != NULL) { + Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase); + re->ccb_ = new CharClassBuilder; + AddUGroup(re->ccb_, g, g->sign, ps.flags()); + if (!ps.PushRegexp(re)) + return NULL; + break; + } + + Rune r; + if (!ParseEscape(&t, &r, status, ps.rune_max())) + return NULL; + if (!ps.PushLiteral(r)) + return NULL; + break; + } + } + Break2: + lastunary = isunary; + } + return ps.DoFinish(); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/perl_groups.cc b/src/openalpr/support/re2/perl_groups.cc new file mode 100644 index 0000000..422b388 --- /dev/null +++ b/src/openalpr/support/re2/perl_groups.cc @@ -0,0 +1,119 @@ +// GENERATED BY make_perl_groups.pl; DO NOT EDIT. +// make_perl_groups.pl >perl_groups.cc + +#include "re2/unicode_groups.h" + +namespace re2 { + +static const URange16 code1[] = { /* \d */ + { 0x30, 0x39 }, +}; +static const URange16 code2[] = { /* \s */ + { 0x9, 0xa }, + { 0xc, 0xd }, + { 0x20, 0x20 }, +}; +static const URange16 code3[] = { /* \w */ + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x5f, 0x5f }, + { 0x61, 0x7a }, +}; +const UGroup perl_groups[] = { + { "\\d", +1, code1, 1 }, + { "\\D", -1, code1, 1 }, + { "\\s", +1, code2, 3 }, + { "\\S", -1, code2, 3 }, + { "\\w", +1, code3, 4 }, + { "\\W", -1, code3, 4 }, +}; +const int num_perl_groups = 6; +static const URange16 code4[] = { /* [:alnum:] */ + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x61, 0x7a }, +}; +static const URange16 code5[] = { /* [:alpha:] */ + { 0x41, 0x5a }, + { 0x61, 0x7a }, +}; +static const URange16 code6[] = { /* [:ascii:] */ + { 0x0, 0x7f }, +}; +static const URange16 code7[] = { /* [:blank:] */ + { 0x9, 0x9 }, + { 0x20, 0x20 }, +}; +static const URange16 code8[] = { /* [:cntrl:] */ + { 0x0, 0x1f }, + { 0x7f, 0x7f }, +}; +static const URange16 code9[] = { /* [:digit:] */ + { 0x30, 0x39 }, +}; +static const URange16 code10[] = { /* [:graph:] */ + { 0x21, 0x7e }, +}; +static const URange16 code11[] = { /* [:lower:] */ + { 0x61, 0x7a }, +}; +static const URange16 code12[] = { /* [:print:] */ + { 0x20, 0x7e }, +}; +static const URange16 code13[] = { /* [:punct:] */ + { 0x21, 0x2f }, + { 0x3a, 0x40 }, + { 0x5b, 0x60 }, + { 0x7b, 0x7e }, +}; +static const URange16 code14[] = { /* [:space:] */ + { 0x9, 0xd }, + { 0x20, 0x20 }, +}; +static const URange16 code15[] = { /* [:upper:] */ + { 0x41, 0x5a }, +}; +static const URange16 code16[] = { /* [:word:] */ + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x5f, 0x5f }, + { 0x61, 0x7a }, +}; +static const URange16 code17[] = { /* [:xdigit:] */ + { 0x30, 0x39 }, + { 0x41, 0x46 }, + { 0x61, 0x66 }, +}; +const UGroup posix_groups[] = { + { "[:alnum:]", +1, code4, 3 }, + { "[:^alnum:]", -1, code4, 3 }, + { "[:alpha:]", +1, code5, 2 }, + { "[:^alpha:]", -1, code5, 2 }, + { "[:ascii:]", +1, code6, 1 }, + { "[:^ascii:]", -1, code6, 1 }, + { "[:blank:]", +1, code7, 2 }, + { "[:^blank:]", -1, code7, 2 }, + { "[:cntrl:]", +1, code8, 2 }, + { "[:^cntrl:]", -1, code8, 2 }, + { "[:digit:]", +1, code9, 1 }, + { "[:^digit:]", -1, code9, 1 }, + { "[:graph:]", +1, code10, 1 }, + { "[:^graph:]", -1, code10, 1 }, + { "[:lower:]", +1, code11, 1 }, + { "[:^lower:]", -1, code11, 1 }, + { "[:print:]", +1, code12, 1 }, + { "[:^print:]", -1, code12, 1 }, + { "[:punct:]", +1, code13, 4 }, + { "[:^punct:]", -1, code13, 4 }, + { "[:space:]", +1, code14, 2 }, + { "[:^space:]", -1, code14, 2 }, + { "[:upper:]", +1, code15, 1 }, + { "[:^upper:]", -1, code15, 1 }, + { "[:word:]", +1, code16, 4 }, + { "[:^word:]", -1, code16, 4 }, + { "[:xdigit:]", +1, code17, 3 }, + { "[:^xdigit:]", -1, code17, 3 }, +}; +const int num_posix_groups = 28; + +} // namespace re2 diff --git a/src/openalpr/support/re2/prefilter.cc b/src/openalpr/support/re2/prefilter.cc new file mode 100644 index 0000000..153318f --- /dev/null +++ b/src/openalpr/support/re2/prefilter.cc @@ -0,0 +1,709 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "util/util.h" +#include "re2/prefilter.h" +#include "re2.h" +#include "re2/unicode_casefold.h" +#include "re2/walker-inl.h" + +namespace re2 { + +static const int Trace = false; + +typedef set::iterator SSIter; +typedef set::const_iterator ConstSSIter; + +GLOBAL_MUTEX(alloc_id_mutex); +static int alloc_id = 100000; // Used for debugging. +// Initializes a Prefilter, allocating subs_ as necessary. +Prefilter::Prefilter(Op op) { + op_ = op; + subs_ = NULL; + if (op_ == AND || op_ == OR) + subs_ = new vector; + + GLOBAL_MUTEX_LOCK(alloc_id_mutex); + alloc_id_ = alloc_id++; + GLOBAL_MUTEX_UNLOCK(alloc_id_mutex); + VLOG(10) << "alloc_id: " << alloc_id_; +} + +// Destroys a Prefilter. +Prefilter::~Prefilter() { + VLOG(10) << "Deleted: " << alloc_id_; + if (subs_) { + for (size_t i = 0; i < subs_->size(); i++) + delete (*subs_)[i]; + delete subs_; + subs_ = NULL; + } +} + +// Simplify if the node is an empty Or or And. +Prefilter* Prefilter::Simplify() { + if (op_ != AND && op_ != OR) { + return this; + } + + // Nothing left in the AND/OR. + if (subs_->size() == 0) { + if (op_ == AND) + op_ = ALL; // AND of nothing is true + else + op_ = NONE; // OR of nothing is false + + return this; + } + + // Just one subnode: throw away wrapper. + if (subs_->size() == 1) { + Prefilter* a = (*subs_)[0]; + subs_->clear(); + delete this; + return a->Simplify(); + } + + return this; +} + +// Combines two Prefilters together to create an "op" (AND or OR). +// The passed Prefilters will be part of the returned Prefilter or deleted. +// Does lots of work to avoid creating unnecessarily complicated structures. +Prefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { + // If a, b can be rewritten as op, do so. + a = a->Simplify(); + b = b->Simplify(); + + // Canonicalize: a->op <= b->op. + if (a->op() > b->op()) { + Prefilter* t = a; + a = b; + b = t; + } + + // Trivial cases. + // ALL AND b = b + // NONE OR b = b + // ALL OR b = ALL + // NONE AND b = NONE + // Don't need to look at b, because of canonicalization above. + // ALL and NONE are smallest opcodes. + if (a->op() == ALL || a->op() == NONE) { + if ((a->op() == ALL && op == AND) || + (a->op() == NONE && op == OR)) { + delete a; + return b; + } else { + delete b; + return a; + } + } + + // If a and b match op, merge their contents. + if (a->op() == op && b->op() == op) { + for (size_t i = 0; i < b->subs()->size(); i++) { + Prefilter* bb = (*b->subs())[i]; + a->subs()->push_back(bb); + } + b->subs()->clear(); + delete b; + return a; + } + + // If a already has the same op as the op that is under construction + // add in b (similarly if b already has the same op, add in a). + if (b->op() == op) { + Prefilter* t = a; + a = b; + b = t; + } + if (a->op() == op) { + a->subs()->push_back(b); + return a; + } + + // Otherwise just return the op. + Prefilter* c = new Prefilter(op); + c->subs()->push_back(a); + c->subs()->push_back(b); + return c; +} + +Prefilter* Prefilter::And(Prefilter* a, Prefilter* b) { + return AndOr(AND, a, b); +} + +Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { + return AndOr(OR, a, b); +} + +static void SimplifyStringSet(set *ss) { + // Now make sure that the strings aren't redundant. For example, if + // we know "ab" is a required string, then it doesn't help at all to + // know that "abc" is also a required string, so delete "abc". This + // is because, when we are performing a string search to filter + // regexps, matching ab will already allow this regexp to be a + // candidate for match, so further matching abc is redundant. + + for (SSIter i = ss->begin(); i != ss->end(); ++i) { + SSIter j = i; + ++j; + while (j != ss->end()) { + // Increment j early so that we can erase the element it points to. + SSIter old_j = j; + ++j; + if (old_j->find(*i) != string::npos) + ss->erase(old_j); + } + } +} + +Prefilter* Prefilter::OrStrings(set* ss) { + SimplifyStringSet(ss); + Prefilter* or_prefilter = NULL; + if (!ss->empty()) { + or_prefilter = new Prefilter(NONE); + for (SSIter i = ss->begin(); i != ss->end(); ++i) + or_prefilter = Or(or_prefilter, FromString(*i)); + } + return or_prefilter; +} + +static Rune ToLowerRune(Rune r) { + if (r < Runeself) { + if ('A' <= r && r <= 'Z') + r += 'a' - 'A'; + return r; + } + + const CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); + if (f == NULL || r < f->lo) + return r; + return ApplyFold(f, r); +} + +static Rune ToLowerRuneLatin1(Rune r) { + if ('A' <= r && r <= 'Z') + r += 'a' - 'A'; + return r; +} + +Prefilter* Prefilter::FromString(const string& str) { + Prefilter* m = new Prefilter(Prefilter::ATOM); + m->atom_ = str; + return m; +} + +// Information about a regexp used during computation of Prefilter. +// Can be thought of as information about the set of strings matching +// the given regular expression. +class Prefilter::Info { + public: + Info(); + ~Info(); + + // More constructors. They delete their Info* arguments. + static Info* Alt(Info* a, Info* b); + static Info* Concat(Info* a, Info* b); + static Info* And(Info* a, Info* b); + static Info* Star(Info* a); + static Info* Plus(Info* a); + static Info* Quest(Info* a); + static Info* EmptyString(); + static Info* NoMatch(); + static Info* AnyChar(); + static Info* CClass(CharClass* cc, bool latin1); + static Info* Literal(Rune r); + static Info* LiteralLatin1(Rune r); + static Info* AnyMatch(); + + // Format Info as a string. + string ToString(); + + // Caller takes ownership of the Prefilter. + Prefilter* TakeMatch(); + + set& exact() { return exact_; } + + bool is_exact() const { return is_exact_; } + + class Walker; + + private: + set exact_; + + // When is_exact_ is true, the strings that match + // are placed in exact_. When it is no longer an exact + // set of strings that match this RE, then is_exact_ + // is false and the match_ contains the required match + // criteria. + bool is_exact_; + + // Accumulated Prefilter query that any + // match for this regexp is guaranteed to match. + Prefilter* match_; +}; + + +Prefilter::Info::Info() + : is_exact_(false), + match_(NULL) { +} + +Prefilter::Info::~Info() { + delete match_; +} + +Prefilter* Prefilter::Info::TakeMatch() { + if (is_exact_) { + match_ = Prefilter::OrStrings(&exact_); + is_exact_ = false; + } + Prefilter* m = match_; + match_ = NULL; + return m; +} + +// Format a Info in string form. +string Prefilter::Info::ToString() { + if (is_exact_) { + int n = 0; + string s; + for (set::iterator i = exact_.begin(); i != exact_.end(); ++i) { + if (n++ > 0) + s += ","; + s += *i; + } + return s; + } + + if (match_) + return match_->DebugString(); + + return ""; +} + +// Add the strings from src to dst. +static void CopyIn(const set& src, set* dst) { + for (ConstSSIter i = src.begin(); i != src.end(); ++i) + dst->insert(*i); +} + +// Add the cross-product of a and b to dst. +// (For each string i in a and j in b, add i+j.) +static void CrossProduct(const set& a, + const set& b, + set* dst) { + for (ConstSSIter i = a.begin(); i != a.end(); ++i) + for (ConstSSIter j = b.begin(); j != b.end(); ++j) + dst->insert(*i + *j); +} + +// Concats a and b. Requires that both are exact sets. +// Forms an exact set that is a crossproduct of a and b. +Prefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) { + if (a == NULL) + return b; + DCHECK(a->is_exact_); + DCHECK(b && b->is_exact_); + Info *ab = new Info(); + + CrossProduct(a->exact_, b->exact_, &ab->exact_); + ab->is_exact_ = true; + + delete a; + delete b; + return ab; +} + +// Constructs an inexact Info for ab given a and b. +// Used only when a or b is not exact or when the +// exact cross product is likely to be too big. +Prefilter::Info* Prefilter::Info::And(Info* a, Info* b) { + if (a == NULL) + return b; + if (b == NULL) + return a; + + Info *ab = new Info(); + + ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch()); + ab->is_exact_ = false; + delete a; + delete b; + return ab; +} + +// Constructs Info for a|b given a and b. +Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) { + Info *ab = new Info(); + + if (a->is_exact_ && b->is_exact_) { + CopyIn(a->exact_, &ab->exact_); + CopyIn(b->exact_, &ab->exact_); + ab->is_exact_ = true; + } else { + // Either a or b has is_exact_ = false. If the other + // one has is_exact_ = true, we move it to match_ and + // then create a OR of a,b. The resulting Info has + // is_exact_ = false. + ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch()); + ab->is_exact_ = false; + } + + delete a; + delete b; + return ab; +} + +// Constructs Info for a? given a. +Prefilter::Info* Prefilter::Info::Quest(Info *a) { + Info *ab = new Info(); + + ab->is_exact_ = false; + ab->match_ = new Prefilter(ALL); + delete a; + return ab; +} + +// Constructs Info for a* given a. +// Same as a? -- not much to do. +Prefilter::Info* Prefilter::Info::Star(Info *a) { + return Quest(a); +} + +// Constructs Info for a+ given a. If a was exact set, it isn't +// anymore. +Prefilter::Info* Prefilter::Info::Plus(Info *a) { + Info *ab = new Info(); + + ab->match_ = a->TakeMatch(); + ab->is_exact_ = false; + + delete a; + return ab; +} + +static string RuneToString(Rune r) { + char buf[UTFmax]; + int n = runetochar(buf, &r); + return string(buf, n); +} + +static string RuneToStringLatin1(Rune r) { + char c = r & 0xff; + return string(&c, 1); +} + +// Constructs Info for literal rune. +Prefilter::Info* Prefilter::Info::Literal(Rune r) { + Info* info = new Info(); + info->exact_.insert(RuneToString(ToLowerRune(r))); + info->is_exact_ = true; + return info; +} + +// Constructs Info for literal rune for Latin1 encoded string. +Prefilter::Info* Prefilter::Info::LiteralLatin1(Rune r) { + Info* info = new Info(); + info->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); + info->is_exact_ = true; + return info; +} + +// Constructs Info for dot (any character). +Prefilter::Info* Prefilter::Info::AnyChar() { + Prefilter::Info* info = new Prefilter::Info(); + info->match_ = new Prefilter(ALL); + return info; +} + +// Constructs Prefilter::Info for no possible match. +Prefilter::Info* Prefilter::Info::NoMatch() { + Prefilter::Info* info = new Prefilter::Info(); + info->match_ = new Prefilter(NONE); + return info; +} + +// Constructs Prefilter::Info for any possible match. +// This Prefilter::Info is valid for any regular expression, +// since it makes no assertions whatsoever about the +// strings being matched. +Prefilter::Info* Prefilter::Info::AnyMatch() { + Prefilter::Info *info = new Prefilter::Info(); + info->match_ = new Prefilter(ALL); + return info; +} + +// Constructs Prefilter::Info for just the empty string. +Prefilter::Info* Prefilter::Info::EmptyString() { + Prefilter::Info* info = new Prefilter::Info(); + info->is_exact_ = true; + info->exact_.insert(""); + return info; +} + +// Constructs Prefilter::Info for a character class. +typedef CharClass::iterator CCIter; +Prefilter::Info* Prefilter::Info::CClass(CharClass *cc, + bool latin1) { + if (Trace) { + VLOG(0) << "CharClassInfo:"; + for (CCIter i = cc->begin(); i != cc->end(); ++i) + VLOG(0) << " " << i->lo << "-" << i->hi; + } + + // If the class is too large, it's okay to overestimate. + if (cc->size() > 10) + return AnyChar(); + + Prefilter::Info *a = new Prefilter::Info(); + for (CCIter i = cc->begin(); i != cc->end(); ++i) + for (Rune r = i->lo; r <= i->hi; r++) { + if (latin1) { + a->exact_.insert(RuneToStringLatin1(ToLowerRuneLatin1(r))); + } else { + a->exact_.insert(RuneToString(ToLowerRune(r))); + } + } + + + a->is_exact_ = true; + + if (Trace) { + VLOG(0) << " = " << a->ToString(); + } + + return a; +} + +class Prefilter::Info::Walker : public Regexp::Walker { + public: + Walker(bool latin1) : latin1_(latin1) {} + + virtual Info* PostVisit( + Regexp* re, Info* parent_arg, + Info* pre_arg, + Info** child_args, int nchild_args); + + virtual Info* ShortVisit( + Regexp* re, + Info* parent_arg); + + bool latin1() { return latin1_; } + private: + bool latin1_; + DISALLOW_COPY_AND_ASSIGN(Walker); +}; + +Prefilter::Info* Prefilter::BuildInfo(Regexp* re) { + if (Trace) { + LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); + } + + bool latin1 = re->parse_flags() & Regexp::Latin1; + Prefilter::Info::Walker w(latin1); + Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); + + if (w.stopped_early()) { + delete info; + return NULL; + } + + return info; +} + +Prefilter::Info* Prefilter::Info::Walker::ShortVisit( + Regexp* re, Prefilter::Info* parent_arg) { + return AnyMatch(); +} + +// Constructs the Prefilter::Info for the given regular expression. +// Assumes re is simplified. +Prefilter::Info* Prefilter::Info::Walker::PostVisit( + Regexp* re, Prefilter::Info* parent_arg, + Prefilter::Info* pre_arg, Prefilter::Info** child_args, + int nchild_args) { + Prefilter::Info *info; + switch (re->op()) { + default: + case kRegexpRepeat: + LOG(DFATAL) << "Bad regexp op " << re->op(); + info = EmptyString(); + break; + + case kRegexpNoMatch: + info = NoMatch(); + break; + + // These ops match the empty string: + case kRegexpEmptyMatch: // anywhere + case kRegexpBeginLine: // at beginning of line + case kRegexpEndLine: // at end of line + case kRegexpBeginText: // at beginning of text + case kRegexpEndText: // at end of text + case kRegexpWordBoundary: // at word boundary + case kRegexpNoWordBoundary: // not at word boundary + info = EmptyString(); + break; + + case kRegexpLiteral: + if (latin1()) { + info = LiteralLatin1(re->rune()); + } + else { + info = Literal(re->rune()); + } + break; + + case kRegexpLiteralString: + if (re->nrunes() == 0) { + info = NoMatch(); + break; + } + if (latin1()) { + info = LiteralLatin1(re->runes()[0]); + for (int i = 1; i < re->nrunes(); i++) { + info = Concat(info, LiteralLatin1(re->runes()[i])); + } + } else { + info = Literal(re->runes()[0]); + for (int i = 1; i < re->nrunes(); i++) { + info = Concat(info, Literal(re->runes()[i])); + } + } + break; + + case kRegexpConcat: { + // Accumulate in info. + // Exact is concat of recent contiguous exact nodes. + info = NULL; + Info* exact = NULL; + for (int i = 0; i < nchild_args; i++) { + Info* ci = child_args[i]; // child info + if (!ci->is_exact() || + (exact && ci->exact().size() * exact->exact().size() > 16)) { + // Exact run is over. + info = And(info, exact); + exact = NULL; + // Add this child's info. + info = And(info, ci); + } else { + // Append to exact run. + exact = Concat(exact, ci); + } + } + info = And(info, exact); + } + break; + + case kRegexpAlternate: + info = child_args[0]; + for (int i = 1; i < nchild_args; i++) + info = Alt(info, child_args[i]); + VLOG(10) << "Alt: " << info->ToString(); + break; + + case kRegexpStar: + info = Star(child_args[0]); + break; + + case kRegexpQuest: + info = Quest(child_args[0]); + break; + + case kRegexpPlus: + info = Plus(child_args[0]); + break; + + case kRegexpAnyChar: + // Claim nothing, except that it's not empty. + info = AnyChar(); + break; + + case kRegexpCharClass: + info = CClass(re->cc(), latin1()); + break; + + case kRegexpCapture: + // These don't affect the set of matching strings. + info = child_args[0]; + break; + } + + if (Trace) { + VLOG(0) << "BuildInfo " << re->ToString() + << ": " << (info ? info->ToString() : ""); + } + + return info; +} + + +Prefilter* Prefilter::FromRegexp(Regexp* re) { + if (re == NULL) + return NULL; + + Regexp* simple = re->Simplify(); + Prefilter::Info *info = BuildInfo(simple); + + simple->Decref(); + if (info == NULL) + return NULL; + + Prefilter* m = info->TakeMatch(); + + delete info; + return m; +} + +string Prefilter::DebugString() const { + switch (op_) { + default: + LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; + return StringPrintf("op%d", op_); + case NONE: + return "*no-matches*"; + case ATOM: + return atom_; + case ALL: + return ""; + case AND: { + string s = ""; + for (size_t i = 0; i < subs_->size(); i++) { + if (i > 0) + s += " "; + Prefilter* sub = (*subs_)[i]; + s += sub ? sub->DebugString() : ""; + } + return s; + } + case OR: { + string s = "("; + for (size_t i = 0; i < subs_->size(); i++) { + if (i > 0) + s += "|"; + Prefilter* sub = (*subs_)[i]; + s += sub ? sub->DebugString() : ""; + } + s += ")"; + return s; + } + } +} + +Prefilter* Prefilter::FromRE2(const RE2* re2) { + if (re2 == NULL) + return NULL; + + Regexp* regexp = re2->Regexp(); + if (regexp == NULL) + return NULL; + + return FromRegexp(regexp); +} + + +} // namespace re2 diff --git a/src/openalpr/support/re2/prefilter.h b/src/openalpr/support/re2/prefilter.h new file mode 100644 index 0000000..2bc1676 --- /dev/null +++ b/src/openalpr/support/re2/prefilter.h @@ -0,0 +1,105 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Prefilter is the class used to extract string guards from regexps. +// Rather than using Prefilter class directly, use FilteredRE2. +// See filtered_re2.h + +#ifndef RE2_PREFILTER_H_ +#define RE2_PREFILTER_H_ + +#include "util/util.h" + +namespace re2 { + +class RE2; + +class Regexp; + +class Prefilter { + // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h + public: + enum Op { + ALL = 0, // Everything matches + NONE, // Nothing matches + ATOM, // The string atom() must match + AND, // All in subs() must match + OR, // One of subs() must match + }; + + explicit Prefilter(Op op); + ~Prefilter(); + + Op op() { return op_; } + const string& atom() const { return atom_; } + void set_unique_id(int id) { unique_id_ = id; } + int unique_id() const { return unique_id_; } + + // The children of the Prefilter node. + vector* subs() { + CHECK(op_ == AND || op_ == OR); + return subs_; + } + + // Set the children vector. Prefilter takes ownership of subs and + // subs_ will be deleted when Prefilter is deleted. + void set_subs(vector* subs) { subs_ = subs; } + + // Given a RE2, return a Prefilter. The caller takes ownership of + // the Prefilter and should deallocate it. Returns NULL if Prefilter + // cannot be formed. + static Prefilter* FromRE2(const RE2* re2); + + // Returns a readable debug string of the prefilter. + string DebugString() const; + + private: + class Info; + + // Combines two prefilters together to create an AND. The passed + // Prefilters will be part of the returned Prefilter or deleted. + static Prefilter* And(Prefilter* a, Prefilter* b); + + // Combines two prefilters together to create an OR. The passed + // Prefilters will be part of the returned Prefilter or deleted. + static Prefilter* Or(Prefilter* a, Prefilter* b); + + // Generalized And/Or + static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b); + + static Prefilter* FromRegexp(Regexp* a); + + static Prefilter* FromString(const string& str); + + static Prefilter* OrStrings(set* ss); + + static Info* BuildInfo(Regexp* re); + + Prefilter* Simplify(); + + // Kind of Prefilter. + Op op_; + + // Sub-matches for AND or OR Prefilter. + vector* subs_; + + // Actual string to match in leaf node. + string atom_; + + // If different prefilters have the same string atom, or if they are + // structurally the same (e.g., OR of same atom strings) they are + // considered the same unique nodes. This is the id for each unique + // node. This field is populated with a unique id for every node, + // and -1 for duplicate nodes. + int unique_id_; + + // Used for debugging, helps in tracking memory leaks. + int alloc_id_; + + DISALLOW_COPY_AND_ASSIGN(Prefilter); +}; + +} // namespace re2 + +#endif // RE2_PREFILTER_H_ diff --git a/src/openalpr/support/re2/prefilter_tree.cc b/src/openalpr/support/re2/prefilter_tree.cc new file mode 100644 index 0000000..01e8cf9 --- /dev/null +++ b/src/openalpr/support/re2/prefilter_tree.cc @@ -0,0 +1,401 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "util/util.h" +#include "util/flags.h" +#include "re2/prefilter.h" +#include "re2/prefilter_tree.h" +#include "re2.h" + +DEFINE_int32(filtered_re2_min_atom_len, + 3, + "Strings less than this length are not stored as atoms"); + +namespace re2 { + +PrefilterTree::PrefilterTree() + : compiled_(false) { +} + +PrefilterTree::~PrefilterTree() { + for (size_t i = 0; i < prefilter_vec_.size(); i++) + delete prefilter_vec_[i]; + + for (size_t i = 0; i < entries_.size(); i++) + delete entries_[i].parents; +} + +// Functions used for adding and Compiling prefilters to the +// PrefilterTree. +static bool KeepPart(Prefilter* prefilter, int level) { + if (prefilter == NULL) + return false; + + switch (prefilter->op()) { + default: + LOG(DFATAL) << "Unexpected op in KeepPart: " + << prefilter->op(); + return false; + + case Prefilter::ALL: + return false; + + case Prefilter::ATOM: + return prefilter->atom().size() >= + static_cast(FLAGS_filtered_re2_min_atom_len); + + case Prefilter::AND: { + int j = 0; + vector* subs = prefilter->subs(); + for (size_t i = 0; i < subs->size(); i++) + if (KeepPart((*subs)[i], level + 1)) + (*subs)[j++] = (*subs)[i]; + else + delete (*subs)[i]; + + subs->resize(j); + return j > 0; + } + + case Prefilter::OR: + for (size_t i = 0; i < prefilter->subs()->size(); i++) + if (!KeepPart((*prefilter->subs())[i], level + 1)) + return false; + return true; + } +} + +void PrefilterTree::Add(Prefilter *f) { + if (compiled_) { + LOG(DFATAL) << "Add after Compile."; + return; + } + if (f != NULL && !KeepPart(f, 0)) { + delete f; + f = NULL; + } + + prefilter_vec_.push_back(f); +} + +void PrefilterTree::Compile(vector* atom_vec) { + if (compiled_) { + LOG(DFATAL) << "Compile after Compile."; + return; + } + + // We do this check to support some legacy uses of + // PrefilterTree that call Compile before adding any regexps, + // and expect Compile not to have effect. + if (prefilter_vec_.empty()) + return; + + compiled_ = true; + + AssignUniqueIds(atom_vec); + + // Identify nodes that are too common among prefilters and are + // triggering too many parents. Then get rid of them if possible. + // Note that getting rid of a prefilter node simply means they are + // no longer necessary for their parent to trigger; that is, we do + // not miss out on any regexps triggering by getting rid of a + // prefilter node. + for (size_t i = 0; i < entries_.size(); i++) { + StdIntMap* parents = entries_[i].parents; + if (parents->size() > 8) { + // This one triggers too many things. If all the parents are AND + // nodes and have other things guarding them, then get rid of + // this trigger. TODO(vsri): Adjust the threshold appropriately, + // make it a function of total number of nodes? + bool have_other_guard = true; + for (StdIntMap::iterator it = parents->begin(); + it != parents->end(); ++it) { + have_other_guard = have_other_guard && + (entries_[it->first].propagate_up_at_count > 1); + } + + if (have_other_guard) { + for (StdIntMap::iterator it = parents->begin(); + it != parents->end(); ++it) + entries_[it->first].propagate_up_at_count -= 1; + + parents->clear(); // Forget the parents + } + } + } + + PrintDebugInfo(); +} + +Prefilter* PrefilterTree::CanonicalNode(Prefilter* node) { + string node_string = NodeString(node); + map::iterator iter = node_map_.find(node_string); + if (iter == node_map_.end()) + return NULL; + return (*iter).second; +} + +static string Itoa(int n) { + char buf[100]; + snprintf(buf, sizeof buf, "%d", n); + return string(buf); +} + +string PrefilterTree::NodeString(Prefilter* node) const { + // Adding the operation disambiguates AND/OR/atom nodes. + string s = Itoa(node->op()) + ":"; + if (node->op() == Prefilter::ATOM) { + s += node->atom(); + } else { + for (size_t i = 0; i < node->subs()->size(); i++) { + if (i > 0) + s += ','; + s += Itoa((*node->subs())[i]->unique_id()); + } + } + return s; +} + +void PrefilterTree::AssignUniqueIds(vector* atom_vec) { + atom_vec->clear(); + + // Build vector of all filter nodes, sorted topologically + // from top to bottom in v. + vector v; + + // Add the top level nodes of each regexp prefilter. + for (size_t i = 0; i < prefilter_vec_.size(); i++) { + Prefilter* f = prefilter_vec_[i]; + if (f == NULL) + unfiltered_.push_back(i); + + // We push NULL also on to v, so that we maintain the + // mapping of index==regexpid for level=0 prefilter nodes. + v.push_back(f); + } + + // Now add all the descendant nodes. + for (size_t i = 0; i < v.size(); i++) { + Prefilter* f = v[i]; + if (f == NULL) + continue; + if (f->op() == Prefilter::AND || f->op() == Prefilter::OR) { + const vector& subs = *f->subs(); + for (size_t j = 0; j < subs.size(); j++) + v.push_back(subs[j]); + } + } + + // Identify unique nodes. + int unique_id = 0; + for (int i = v.size() - 1; i >= 0; i--) { + Prefilter *node = v[i]; + if (node == NULL) + continue; + node->set_unique_id(-1); + Prefilter* canonical = CanonicalNode(node); + if (canonical == NULL) { + // Any further nodes that have the same node string + // will find this node as the canonical node. + node_map_[NodeString(node)] = node; + if (node->op() == Prefilter::ATOM) { + atom_vec->push_back(node->atom()); + atom_index_to_id_.push_back(unique_id); + } + node->set_unique_id(unique_id++); + } else { + node->set_unique_id(canonical->unique_id()); + } + } + entries_.resize(node_map_.size()); + + // Create parent StdIntMap for the entries. + for (int i = v.size() - 1; i >= 0; i--) { + Prefilter* prefilter = v[i]; + if (prefilter == NULL) + continue; + + if (CanonicalNode(prefilter) != prefilter) + continue; + + Entry* entry = &entries_[prefilter->unique_id()]; + entry->parents = new StdIntMap(); + } + + // Fill the entries. + for (int i = v.size() - 1; i >= 0; i--) { + Prefilter* prefilter = v[i]; + if (prefilter == NULL) + continue; + + if (CanonicalNode(prefilter) != prefilter) + continue; + + Entry* entry = &entries_[prefilter->unique_id()]; + + switch (prefilter->op()) { + default: + case Prefilter::ALL: + LOG(DFATAL) << "Unexpected op: " << prefilter->op(); + return; + + case Prefilter::ATOM: + entry->propagate_up_at_count = 1; + break; + + case Prefilter::OR: + case Prefilter::AND: { + set uniq_child; + for (size_t j = 0; j < prefilter->subs()->size(); j++) { + Prefilter* child = (*prefilter->subs())[j]; + Prefilter* canonical = CanonicalNode(child); + if (canonical == NULL) { + LOG(DFATAL) << "Null canonical node"; + return; + } + int child_id = canonical->unique_id(); + uniq_child.insert(child_id); + // To the child, we want to add to parent indices. + Entry* child_entry = &entries_[child_id]; + if (child_entry->parents->find(prefilter->unique_id()) == + child_entry->parents->end()) { + (*child_entry->parents)[prefilter->unique_id()] = 1; + } + } + entry->propagate_up_at_count = + prefilter->op() == Prefilter::AND ? uniq_child.size() : 1; + + break; + } + } + } + + // For top level nodes, populate regexp id. + for (size_t i = 0; i < prefilter_vec_.size(); i++) { + if (prefilter_vec_[i] == NULL) + continue; + int id = CanonicalNode(prefilter_vec_[i])->unique_id(); + DCHECK_LE(0, id); + Entry* entry = &entries_[id]; + entry->regexps.push_back(i); + } +} + +// Functions for triggering during search. +void PrefilterTree::RegexpsGivenStrings( + const vector& matched_atoms, + vector* regexps) const { + regexps->clear(); + if (!compiled_) { + LOG(WARNING) << "Compile() not called"; + for (size_t i = 0; i < prefilter_vec_.size(); ++i) + regexps->push_back(i); + } else { + if (!prefilter_vec_.empty()) { + IntMap regexps_map(prefilter_vec_.size()); + vector matched_atom_ids; + for (size_t j = 0; j < matched_atoms.size(); j++) { + matched_atom_ids.push_back(atom_index_to_id_[matched_atoms[j]]); + VLOG(10) << "Atom id:" << atom_index_to_id_[matched_atoms[j]]; + } + PropagateMatch(matched_atom_ids, ®exps_map); + for (IntMap::iterator it = regexps_map.begin(); + it != regexps_map.end(); + ++it) + regexps->push_back(it->index()); + + regexps->insert(regexps->end(), unfiltered_.begin(), unfiltered_.end()); + } + } + sort(regexps->begin(), regexps->end()); +} + +void PrefilterTree::PropagateMatch(const vector& atom_ids, + IntMap* regexps) const { + IntMap count(entries_.size()); + IntMap work(entries_.size()); + for (size_t i = 0; i < atom_ids.size(); i++) + work.set(atom_ids[i], 1); + for (IntMap::iterator it = work.begin(); it != work.end(); ++it) { + const Entry& entry = entries_[it->index()]; + VLOG(10) << "Processing: " << it->index(); + // Record regexps triggered. + for (size_t i = 0; i < entry.regexps.size(); i++) { + VLOG(10) << "Regexp triggered: " << entry.regexps[i]; + regexps->set(entry.regexps[i], 1); + } + int c; + // Pass trigger up to parents. + for (StdIntMap::iterator it = entry.parents->begin(); + it != entry.parents->end(); + ++it) { + int j = it->first; + const Entry& parent = entries_[j]; + VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count; + // Delay until all the children have succeeded. + if (parent.propagate_up_at_count > 1) { + if (count.has_index(j)) { + c = count.get_existing(j) + 1; + count.set_existing(j, c); + } else { + c = 1; + count.set_new(j, c); + } + if (c < parent.propagate_up_at_count) + continue; + } + VLOG(10) << "Triggering: " << j; + // Trigger the parent. + work.set(j, 1); + } + } +} + +// Debugging help. +void PrefilterTree::PrintPrefilter(int regexpid) { + LOG(INFO) << DebugNodeString(prefilter_vec_[regexpid]); +} + +void PrefilterTree::PrintDebugInfo() { + VLOG(10) << "#Unique Atoms: " << atom_index_to_id_.size(); + VLOG(10) << "#Unique Nodes: " << entries_.size(); + + for (size_t i = 0; i < entries_.size(); ++i) { + StdIntMap* parents = entries_[i].parents; + const vector& regexps = entries_[i].regexps; + VLOG(10) << "EntryId: " << i + << " N: " << parents->size() << " R: " << regexps.size(); + for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) + VLOG(10) << it->first; + } + VLOG(10) << "Map:"; + for (map::const_iterator iter = node_map_.begin(); + iter != node_map_.end(); ++iter) + VLOG(10) << "NodeId: " << (*iter).second->unique_id() + << " Str: " << (*iter).first; +} + +string PrefilterTree::DebugNodeString(Prefilter* node) const { + string node_string = ""; + + if (node->op() == Prefilter::ATOM) { + DCHECK(!node->atom().empty()); + node_string += node->atom(); + } else { + // Adding the operation disambiguates AND and OR nodes. + node_string += node->op() == Prefilter::AND ? "AND" : "OR"; + node_string += "("; + for (size_t i = 0; i < node->subs()->size(); i++) { + if (i > 0) + node_string += ','; + node_string += Itoa((*node->subs())[i]->unique_id()); + node_string += ":"; + node_string += DebugNodeString((*node->subs())[i]); + } + node_string += ")"; + } + return node_string; +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/prefilter_tree.h b/src/openalpr/support/re2/prefilter_tree.h new file mode 100644 index 0000000..abea55d --- /dev/null +++ b/src/openalpr/support/re2/prefilter_tree.h @@ -0,0 +1,131 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The PrefilterTree class is used to form an AND-OR tree of strings +// that would trigger each regexp. The 'prefilter' of each regexp is +// added tp PrefilterTree, and then PrefilterTree is used to find all +// the unique strings across the prefilters. During search, by using +// matches from a string matching engine, PrefilterTree deduces the +// set of regexps that are to be triggered. The 'string matching +// engine' itself is outside of this class, and the caller can use any +// favorite engine. PrefilterTree provides a set of strings (called +// atoms) that the user of this class should use to do the string +// matching. +// +#ifndef RE2_PREFILTER_TREE_H_ +#define RE2_PREFILTER_TREE_H_ + +#include "util/util.h" +#include "util/sparse_array.h" + +namespace re2 { + +typedef SparseArray IntMap; +typedef map StdIntMap; + +class Prefilter; + +class PrefilterTree { + public: + PrefilterTree(); + ~PrefilterTree(); + + // Adds the prefilter for the next regexp. Note that we assume that + // Add called sequentially for all regexps. All Add calls + // must precede Compile. + void Add(Prefilter* prefilter); + + // The Compile returns a vector of string in atom_vec. + // Call this after all the prefilters are added through Add. + // No calls to Add after Compile are allowed. + // The caller should use the returned set of strings to do string matching. + // Each time a string matches, the corresponding index then has to be + // and passed to RegexpsGivenStrings below. + void Compile(vector* atom_vec); + + // Given the indices of the atoms that matched, returns the indexes + // of regexps that should be searched. The matched_atoms should + // contain all the ids of string atoms that were found to match the + // content. The caller can use any string match engine to perform + // this function. This function is thread safe. + void RegexpsGivenStrings(const vector& matched_atoms, + vector* regexps) const; + + // Print debug prefilter. Also prints unique ids associated with + // nodes of the prefilter of the regexp. + void PrintPrefilter(int regexpid); + + + // Each unique node has a corresponding Entry that helps in + // passing the matching trigger information along the tree. + struct Entry { + public: + // How many children should match before this node triggers the + // parent. For an atom and an OR node, this is 1 and for an AND + // node, it is the number of unique children. + int propagate_up_at_count; + + // When this node is ready to trigger the parent, what are the indices + // of the parent nodes to trigger. The reason there may be more than + // one is because of sharing. For example (abc | def) and (xyz | def) + // are two different nodes, but they share the atom 'def'. So when + // 'def' matches, it triggers two parents, corresponding to the two + // different OR nodes. + StdIntMap* parents; + + // When this node is ready to trigger the parent, what are the + // regexps that are triggered. + vector regexps; + }; + + private: + // This function assigns unique ids to various parts of the + // prefilter, by looking at if these nodes are already in the + // PrefilterTree. + void AssignUniqueIds(vector* atom_vec); + + // Given the matching atoms, find the regexps to be triggered. + void PropagateMatch(const vector& atom_ids, + IntMap* regexps) const; + + // Returns the prefilter node that has the same NodeString as this + // node. For the canonical node, returns node. + Prefilter* CanonicalNode(Prefilter* node); + + // A string that uniquely identifies the node. Assumes that the + // children of node has already been assigned unique ids. + string NodeString(Prefilter* node) const; + + // Recursively constructs a readable prefilter string. + string DebugNodeString(Prefilter* node) const; + + // Used for debugging. + void PrintDebugInfo(); + + // These are all the nodes formed by Compile. Essentially, there is + // one node for each unique atom and each unique AND/OR node. + vector entries_; + + // Map node string to canonical Prefilter node. + map node_map_; + + // indices of regexps that always pass through the filter (since we + // found no required literals in these regexps). + vector unfiltered_; + + // vector of Prefilter for all regexps. + vector prefilter_vec_; + + // Atom index in returned strings to entry id mapping. + vector atom_index_to_id_; + + // Has the prefilter tree been compiled. + bool compiled_; + + DISALLOW_COPY_AND_ASSIGN(PrefilterTree); +}; + +} // namespace + +#endif // RE2_PREFILTER_TREE_H_ diff --git a/src/openalpr/support/re2/prog.cc b/src/openalpr/support/re2/prog.cc new file mode 100644 index 0000000..f326ffd --- /dev/null +++ b/src/openalpr/support/re2/prog.cc @@ -0,0 +1,343 @@ +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Compiled regular expression representation. +// Tested by compile_test.cc + +#include "util/util.h" +#include "util/sparse_set.h" +#include "re2/prog.h" +#include "re2/stringpiece.h" + +namespace re2 { + +// Constructors per Inst opcode + +void Prog::Inst::InitAlt(uint32 out, uint32 out1) { + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstAlt); + out1_ = out1; +} + +void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32 out) { + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstByteRange); + lo_ = lo & 0xFF; + hi_ = hi & 0xFF; + foldcase_ = foldcase; +} + +void Prog::Inst::InitCapture(int cap, uint32 out) { + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstCapture); + cap_ = cap; +} + +void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32 out) { + DCHECK_EQ(out_opcode_, 0); + set_out_opcode(out, kInstEmptyWidth); + empty_ = empty; +} + +void Prog::Inst::InitMatch(int32 id) { + DCHECK_EQ(out_opcode_, 0); + set_opcode(kInstMatch); + match_id_ = id; +} + +void Prog::Inst::InitNop(uint32 out) { + DCHECK_EQ(out_opcode_, 0); + set_opcode(kInstNop); +} + +void Prog::Inst::InitFail() { + DCHECK_EQ(out_opcode_, 0); + set_opcode(kInstFail); +} + +string Prog::Inst::Dump() { + switch (opcode()) { + default: + return StringPrintf("opcode %d", static_cast(opcode())); + + case kInstAlt: + return StringPrintf("alt -> %d | %d", out(), out1_); + + case kInstAltMatch: + return StringPrintf("altmatch -> %d | %d", out(), out1_); + + case kInstByteRange: + return StringPrintf("byte%s [%02x-%02x] -> %d", + foldcase_ ? "/i" : "", + lo_, hi_, out()); + + case kInstCapture: + return StringPrintf("capture %d -> %d", cap_, out()); + + case kInstEmptyWidth: + return StringPrintf("emptywidth %#x -> %d", + static_cast(empty_), out()); + + case kInstMatch: + return StringPrintf("match! %d", match_id()); + + case kInstNop: + return StringPrintf("nop -> %d", out()); + + case kInstFail: + return StringPrintf("fail"); + } +} + +Prog::Prog() + : anchor_start_(false), + anchor_end_(false), + reversed_(false), + did_onepass_(false), + start_(0), + start_unanchored_(0), + size_(0), + byte_inst_count_(0), + bytemap_range_(0), + flags_(0), + onepass_statesize_(0), + inst_(NULL), + dfa_first_(NULL), + dfa_longest_(NULL), + dfa_mem_(0), + delete_dfa_(NULL), + unbytemap_(NULL), + onepass_nodes_(NULL), + onepass_start_(NULL) { +} + +Prog::~Prog() { + if (delete_dfa_) { + if (dfa_first_) + delete_dfa_(dfa_first_); + if (dfa_longest_) + delete_dfa_(dfa_longest_); + } + delete[] onepass_nodes_; + delete[] inst_; + delete[] unbytemap_; +} + +typedef SparseSet Workq; + +static inline void AddToQueue(Workq* q, int id) { + if (id != 0) + q->insert(id); +} + +static string ProgToString(Prog* prog, Workq* q) { + string s; + + for (Workq::iterator i = q->begin(); i != q->end(); ++i) { + int id = *i; + Prog::Inst* ip = prog->inst(id); + StringAppendF(&s, "%d. %s\n", id, ip->Dump().c_str()); + AddToQueue(q, ip->out()); + if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch) + AddToQueue(q, ip->out1()); + } + return s; +} + +string Prog::Dump() { + string map; + if (false) { // Debugging + int lo = 0; + StringAppendF(&map, "byte map:\n"); + for (int i = 0; i < bytemap_range_; i++) { + StringAppendF(&map, "\t%d. [%02x-%02x]\n", i, lo, unbytemap_[i]); + lo = unbytemap_[i] + 1; + } + StringAppendF(&map, "\n"); + } + + Workq q(size_); + AddToQueue(&q, start_); + return map + ProgToString(this, &q); +} + +string Prog::DumpUnanchored() { + Workq q(size_); + AddToQueue(&q, start_unanchored_); + return ProgToString(this, &q); +} + +static bool IsMatch(Prog*, Prog::Inst*); + +// Peep-hole optimizer. +void Prog::Optimize() { + Workq q(size_); + + // Eliminate nops. Most are taken out during compilation + // but a few are hard to avoid. + q.clear(); + AddToQueue(&q, start_); + for (Workq::iterator i = q.begin(); i != q.end(); ++i) { + int id = *i; + + Inst* ip = inst(id); + int j = ip->out(); + Inst* jp; + while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { + j = jp->out(); + } + ip->set_out(j); + AddToQueue(&q, ip->out()); + + if (ip->opcode() == kInstAlt) { + j = ip->out1(); + while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { + j = jp->out(); + } + ip->out1_ = j; + AddToQueue(&q, ip->out1()); + } + } + + // Insert kInstAltMatch instructions + // Look for + // ip: Alt -> j | k + // j: ByteRange [00-FF] -> ip + // k: Match + // or the reverse (the above is the greedy one). + // Rewrite Alt to AltMatch. + q.clear(); + AddToQueue(&q, start_); + for (Workq::iterator i = q.begin(); i != q.end(); ++i) { + int id = *i; + Inst* ip = inst(id); + AddToQueue(&q, ip->out()); + if (ip->opcode() == kInstAlt) + AddToQueue(&q, ip->out1()); + + if (ip->opcode() == kInstAlt) { + Inst* j = inst(ip->out()); + Inst* k = inst(ip->out1()); + if (j->opcode() == kInstByteRange && j->out() == id && + j->lo() == 0x00 && j->hi() == 0xFF && + IsMatch(this, k)) { + ip->set_opcode(kInstAltMatch); + continue; + } + if (IsMatch(this, j) && + k->opcode() == kInstByteRange && k->out() == id && + k->lo() == 0x00 && k->hi() == 0xFF) { + ip->set_opcode(kInstAltMatch); + } + } + } +} + +// Is ip a guaranteed match at end of text, perhaps after some capturing? +static bool IsMatch(Prog* prog, Prog::Inst* ip) { + for (;;) { + switch (ip->opcode()) { + default: + LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode(); + return false; + + case kInstAlt: + case kInstAltMatch: + case kInstByteRange: + case kInstFail: + case kInstEmptyWidth: + return false; + + case kInstCapture: + case kInstNop: + ip = prog->inst(ip->out()); + break; + + case kInstMatch: + return true; + } + } +} + +uint32 Prog::EmptyFlags(const StringPiece& text, const char* p) { + int flags = 0; + + // ^ and \A + if (p == text.begin()) + flags |= kEmptyBeginText | kEmptyBeginLine; + else if (p[-1] == '\n') + flags |= kEmptyBeginLine; + + // $ and \z + if (p == text.end()) + flags |= kEmptyEndText | kEmptyEndLine; + else if (p < text.end() && p[0] == '\n') + flags |= kEmptyEndLine; + + // \b and \B + if (p == text.begin() && p == text.end()) { + // no word boundary here + } else if (p == text.begin()) { + if (IsWordChar(p[0])) + flags |= kEmptyWordBoundary; + } else if (p == text.end()) { + if (IsWordChar(p[-1])) + flags |= kEmptyWordBoundary; + } else { + if (IsWordChar(p[-1]) != IsWordChar(p[0])) + flags |= kEmptyWordBoundary; + } + if (!(flags & kEmptyWordBoundary)) + flags |= kEmptyNonWordBoundary; + + return flags; +} + +void Prog::MarkByteRange(int lo, int hi) { + DCHECK_GE(lo, 0); + DCHECK_GE(hi, 0); + DCHECK_LE(lo, 255); + DCHECK_LE(hi, 255); + DCHECK_LE(lo, hi); + if (0 < lo && lo <= 255) + byterange_.Set(lo - 1); + if (0 <= hi && hi <= 255) + byterange_.Set(hi); +} + +void Prog::ComputeByteMap() { + // Fill in bytemap with byte classes for prog_. + // Ranges of bytes that are treated as indistinguishable + // by the regexp program are mapped to a single byte class. + // The vector prog_->byterange() marks the end of each + // such range. + const Bitmap<256>& v = byterange(); + + COMPILE_ASSERT(8*sizeof(v.Word(0)) == 32, wordsize); + uint8 n = 0; + uint32 bits = 0; + for (int i = 0; i < 256; i++) { + if ((i&31) == 0) + bits = v.Word(i >> 5); + bytemap_[i] = n; + n += bits & 1; + bits >>= 1; + } + bytemap_range_ = bytemap_[255] + 1; + unbytemap_ = new uint8[bytemap_range_]; + for (int i = 0; i < 256; i++) + unbytemap_[bytemap_[i]] = i; + + if (0) { // For debugging: use trivial byte map. + for (int i = 0; i < 256; i++) { + bytemap_[i] = i; + unbytemap_[i] = i; + } + bytemap_range_ = 256; + LOG(INFO) << "Using trivial bytemap."; + } +} + +} // namespace re2 + diff --git a/src/openalpr/support/re2/prog.h b/src/openalpr/support/re2/prog.h new file mode 100644 index 0000000..c67b83c --- /dev/null +++ b/src/openalpr/support/re2/prog.h @@ -0,0 +1,381 @@ +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Compiled representation of regular expressions. +// See regexp.h for the Regexp class, which represents a regular +// expression symbolically. + +#ifndef RE2_PROG_H__ +#define RE2_PROG_H__ + +#include "re2/util/util.h" +#include "re2/util/sparse_array.h" +#include "re2.h" + +namespace re2 { + +// Simple fixed-size bitmap. +template +class Bitmap { + public: + Bitmap() { Reset(); } + int Size() { return Bits; } + + void Reset() { + for (int i = 0; i < Words; i++) + w_[i] = 0; + } + bool Get(int k) const { + return w_[k >> WordLog] & (1<<(k & 31)); + } + void Set(int k) { + w_[k >> WordLog] |= 1<<(k & 31); + } + void Clear(int k) { + w_[k >> WordLog] &= ~(1<<(k & 31)); + } + uint32 Word(int i) const { + return w_[i]; + } + + private: + static const int WordLog = 5; + static const int Words = (Bits+31)/32; + uint32 w_[Words]; + DISALLOW_COPY_AND_ASSIGN(Bitmap); +}; + + +// Opcodes for Inst +enum InstOp { + kInstAlt = 0, // choose between out_ and out1_ + kInstAltMatch, // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa. + kInstByteRange, // next (possible case-folded) byte must be in [lo_, hi_] + kInstCapture, // capturing parenthesis number cap_ + kInstEmptyWidth, // empty-width special (^ $ ...); bit(s) set in empty_ + kInstMatch, // found a match! + kInstNop, // no-op; occasionally unavoidable + kInstFail, // never match; occasionally unavoidable +}; + +// Bit flags for empty-width specials +enum EmptyOp { + kEmptyBeginLine = 1<<0, // ^ - beginning of line + kEmptyEndLine = 1<<1, // $ - end of line + kEmptyBeginText = 1<<2, // \A - beginning of text + kEmptyEndText = 1<<3, // \z - end of text + kEmptyWordBoundary = 1<<4, // \b - word boundary + kEmptyNonWordBoundary = 1<<5, // \B - not \b + kEmptyAllFlags = (1<<6)-1, +}; + +class Regexp; + +class DFA; +struct OneState; + +// Compiled form of regexp program. +class Prog { + public: + Prog(); + ~Prog(); + + // Single instruction in regexp program. + class Inst { + public: + Inst() : out_opcode_(0), out1_(0) { } + + // Constructors per opcode + void InitAlt(uint32 out, uint32 out1); + void InitByteRange(int lo, int hi, int foldcase, uint32 out); + void InitCapture(int cap, uint32 out); + void InitEmptyWidth(EmptyOp empty, uint32 out); + void InitMatch(int id); + void InitNop(uint32 out); + void InitFail(); + + // Getters + int id(Prog* p) { return this - p->inst_; } + InstOp opcode() { return static_cast(out_opcode_&7); } + int out() { return out_opcode_>>3; } + int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; } + int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; } + int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; } + int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; } + int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return foldcase_; } + int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; } + EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; } + bool greedy(Prog *p) { + DCHECK_EQ(opcode(), kInstAltMatch); + return p->inst(out())->opcode() == kInstByteRange; + } + + // Does this inst (an kInstByteRange) match c? + inline bool Matches(int c) { + DCHECK_EQ(opcode(), kInstByteRange); + if (foldcase_ && 'A' <= c && c <= 'Z') + c += 'a' - 'A'; + return lo_ <= c && c <= hi_; + } + + // Returns string representation for debugging. + string Dump(); + + // Maximum instruction id. + // (Must fit in out_opcode_, and PatchList steals another bit.) + static const int kMaxInst = (1<<28) - 1; + + private: + void set_opcode(InstOp opcode) { + out_opcode_ = (out()<<3) | opcode; + } + + void set_out(int out) { + out_opcode_ = (out<<3) | opcode(); + } + + void set_out_opcode(int out, InstOp opcode) { + out_opcode_ = (out<<3) | opcode; + } + + uint32 out_opcode_; // 29 bits of out, 3 (low) bits opcode + union { // additional instruction arguments: + uint32 out1_; // opcode == kInstAlt + // alternate next instruction + + int32 cap_; // opcode == kInstCapture + // Index of capture register (holds text + // position recorded by capturing parentheses). + // For \n (the submatch for the nth parentheses), + // the left parenthesis captures into register 2*n + // and the right one captures into register 2*n+1. + + int32 match_id_; // opcode == kInstMatch + // Match ID to identify this match (for re2::Set). + + struct { // opcode == kInstByteRange + uint8 lo_; // byte range is lo_-hi_ inclusive + uint8 hi_; // + uint8 foldcase_; // convert A-Z to a-z before checking range. + }; + + EmptyOp empty_; // opcode == kInstEmptyWidth + // empty_ is bitwise OR of kEmpty* flags above. + }; + + friend class Compiler; + friend struct PatchList; + friend class Prog; + + DISALLOW_COPY_AND_ASSIGN(Inst); + }; + + // Whether to anchor the search. + enum Anchor { + kUnanchored, // match anywhere + kAnchored, // match only starting at beginning of text + }; + + // Kind of match to look for (for anchor != kFullMatch) + // + // kLongestMatch mode finds the overall longest + // match but still makes its submatch choices the way + // Perl would, not in the way prescribed by POSIX. + // The POSIX rules are much more expensive to implement, + // and no one has needed them. + // + // kFullMatch is not strictly necessary -- we could use + // kLongestMatch and then check the length of the match -- but + // the matching code can run faster if it knows to consider only + // full matches. + enum MatchKind { + kFirstMatch, // like Perl, PCRE + kLongestMatch, // like egrep or POSIX + kFullMatch, // match only entire text; implies anchor==kAnchored + kManyMatch // for SearchDFA, records set of matches + }; + + Inst *inst(int id) { return &inst_[id]; } + int start() { return start_; } + int start_unanchored() { return start_unanchored_; } + void set_start(int start) { start_ = start; } + void set_start_unanchored(int start) { start_unanchored_ = start; } + int64 size() { return size_; } + bool reversed() { return reversed_; } + void set_reversed(bool reversed) { reversed_ = reversed; } + int64 byte_inst_count() { return byte_inst_count_; } + const Bitmap<256>& byterange() { return byterange_; } + void set_dfa_mem(int64 dfa_mem) { dfa_mem_ = dfa_mem; } + int64 dfa_mem() { return dfa_mem_; } + int flags() { return flags_; } + void set_flags(int flags) { flags_ = flags; } + bool anchor_start() { return anchor_start_; } + void set_anchor_start(bool b) { anchor_start_ = b; } + bool anchor_end() { return anchor_end_; } + void set_anchor_end(bool b) { anchor_end_ = b; } + int bytemap_range() { return bytemap_range_; } + const uint8* bytemap() { return bytemap_; } + + // Returns string representation of program for debugging. + string Dump(); + string DumpUnanchored(); + + // Record that at some point in the prog, the bytes in the range + // lo-hi (inclusive) are treated as different from bytes outside the range. + // Tracking this lets the DFA collapse commonly-treated byte ranges + // when recording state pointers, greatly reducing its memory footprint. + void MarkByteRange(int lo, int hi); + + // Returns the set of kEmpty flags that are in effect at + // position p within context. + static uint32 EmptyFlags(const StringPiece& context, const char* p); + + // Returns whether byte c is a word character: ASCII only. + // Used by the implementation of \b and \B. + // This is not right for Unicode, but: + // - it's hard to get right in a byte-at-a-time matching world + // (the DFA has only one-byte lookahead). + // - even if the lookahead were possible, the Progs would be huge. + // This crude approximation is the same one PCRE uses. + static bool IsWordChar(uint8 c) { + return ('A' <= c && c <= 'Z') || + ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || + c == '_'; + } + + // Execution engines. They all search for the regexp (run the prog) + // in text, which is in the larger context (used for ^ $ \b etc). + // Anchor and kind control the kind of search. + // Returns true if match found, false if not. + // If match found, fills match[0..nmatch-1] with submatch info. + // match[0] is overall match, match[1] is first set of parens, etc. + // If a particular submatch is not matched during the regexp match, + // it is set to NULL. + // + // Matching text == StringPiece(NULL, 0) is treated as any other empty + // string, but note that on return, it will not be possible to distinguish + // submatches that matched that empty string from submatches that didn't + // match anything. Either way, match[i] == NULL. + + // Search using NFA: can find submatches but kind of slow. + bool SearchNFA(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + // Search using DFA: much faster than NFA but only finds + // end of match and can use a lot more memory. + // Returns whether a match was found. + // If the DFA runs out of memory, sets *failed to true and returns false. + // If matches != NULL and kind == kManyMatch and there is a match, + // SearchDFA fills matches with the match IDs of the final matching state. + bool SearchDFA(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match0, bool* failed, + vector* matches); + + // Build the entire DFA for the given match kind. FOR TESTING ONLY. + // Usually the DFA is built out incrementally, as needed, which + // avoids lots of unnecessary work. This function is useful only + // for testing purposes. Returns number of states. + int BuildEntireDFA(MatchKind kind); + + // Compute byte map. + void ComputeByteMap(); + + // Run peep-hole optimizer on program. + void Optimize(); + + // One-pass NFA: only correct if IsOnePass() is true, + // but much faster than NFA (competitive with PCRE) + // for those expressions. + bool IsOnePass(); + bool SearchOnePass(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + // Bit-state backtracking. Fast on small cases but uses memory + // proportional to the product of the program size and the text size. + bool SearchBitState(const StringPiece& text, const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + static const int kMaxOnePassCapture = 5; // $0 through $4 + + // Backtracking search: the gold standard against which the other + // implementations are checked. FOR TESTING ONLY. + // It allocates a ton of memory to avoid running forever. + // It is also recursive, so can't use in production (will overflow stacks). + // The name "Unsafe" here is supposed to be a flag that + // you should not be using this function. + bool UnsafeSearchBacktrack(const StringPiece& text, + const StringPiece& context, + Anchor anchor, MatchKind kind, + StringPiece* match, int nmatch); + + // Computes range for any strings matching regexp. The min and max can in + // some cases be arbitrarily precise, so the caller gets to specify the + // maximum desired length of string returned. + // + // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any + // string s that is an anchored match for this regexp satisfies + // min <= s && s <= max. + // + // Note that PossibleMatchRange() will only consider the first copy of an + // infinitely repeated element (i.e., any regexp element followed by a '*' or + // '+' operator). Regexps with "{N}" constructions are not affected, as those + // do not compile down to infinite repetitions. + // + // Returns true on success, false on error. + bool PossibleMatchRange(string* min, string* max, int maxlen); + + // EXPERIMENTAL! SUBJECT TO CHANGE! + // Outputs the program fanout into the given sparse array. + void Fanout(SparseArray* fanout); + + // Compiles a collection of regexps to Prog. Each regexp will have + // its own Match instruction recording the index in the vector. + static Prog* CompileSet(const RE2::Options& options, RE2::Anchor anchor, + Regexp* re); + + private: + friend class Compiler; + + DFA* GetDFA(MatchKind kind); + + bool anchor_start_; // regexp has explicit start anchor + bool anchor_end_; // regexp has explicit end anchor + bool reversed_; // whether program runs backward over input + bool did_onepass_; // has IsOnePass been called? + + int start_; // entry point for program + int start_unanchored_; // unanchored entry point for program + int size_; // number of instructions + int byte_inst_count_; // number of kInstByteRange instructions + int bytemap_range_; // bytemap_[x] < bytemap_range_ + int flags_; // regexp parse flags + int onepass_statesize_; // byte size of each OneState* node + + Inst* inst_; // pointer to instruction array + + Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_ + DFA* volatile dfa_first_; // DFA cached for kFirstMatch + DFA* volatile dfa_longest_; // DFA cached for kLongestMatch and kFullMatch + int64 dfa_mem_; // Maximum memory for DFAs. + void (*delete_dfa_)(DFA* dfa); + + Bitmap<256> byterange_; // byterange.Get(x) true if x ends a + // commonly-treated byte range. + uint8 bytemap_[256]; // map from input bytes to byte classes + uint8 *unbytemap_; // bytemap_[unbytemap_[x]] == x + + uint8* onepass_nodes_; // data for OnePass nodes + OneState* onepass_start_; // start node for OnePass program + + DISALLOW_COPY_AND_ASSIGN(Prog); +}; + +} // namespace re2 + +#endif // RE2_PROG_H__ diff --git a/src/openalpr/support/re2/re2.cc b/src/openalpr/support/re2/re2.cc new file mode 100644 index 0000000..aea5f6e --- /dev/null +++ b/src/openalpr/support/re2/re2.cc @@ -0,0 +1,1246 @@ +// Copyright 2003-2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Regular expression interface RE2. +// +// Originally the PCRE C++ wrapper, but adapted to use +// the new automata-based regular expression engines. + +#include "re2.h" + +#include +#include +#ifndef WIN32 +#include +#endif +#include +#include "util/atomicops.h" +#include "util/util.h" +#include "util/flags.h" +#include "util/sparse_array.h" +#include "re2/prog.h" +#include "re2/regexp.h" + +DEFINE_bool(trace_re2, false, "trace RE2 execution"); + +namespace re2 { + +// Maximum number of args we can set +static const int kMaxArgs = 16; +static const int kVecSize = 1+kMaxArgs; + +const VariadicFunction2 RE2::FullMatch = {}; +const VariadicFunction2 RE2::PartialMatch = {}; +const VariadicFunction2 RE2::Consume = {}; +const VariadicFunction2 RE2::FindAndConsume = {}; + +// This will trigger LNK2005 error in MSVC. +#ifndef COMPILER_MSVC +const int RE2::Options::kDefaultMaxMem; // initialized in re2.h +#endif // COMPILER_MSVC + +RE2::Options::Options(RE2::CannedOptions opt) + : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8), + posix_syntax_(opt == RE2::POSIX), + longest_match_(opt == RE2::POSIX), + log_errors_(opt != RE2::Quiet), + max_mem_(kDefaultMaxMem), + literal_(false), + never_nl_(false), + dot_nl_(false), + never_capture_(false), + case_sensitive_(true), + perl_classes_(false), + word_boundary_(false), + one_line_(false) { +} + +// static empty things for use as const references. +// To avoid global constructors, initialized on demand. +GLOBAL_MUTEX(empty_mutex); +static const string *empty_string; +static const map *empty_named_groups; +static const map *empty_group_names; + +static void InitEmpty() { + GLOBAL_MUTEX_LOCK(empty_mutex); + if (empty_string == NULL) { + empty_string = new string; + empty_named_groups = new map; + empty_group_names = new map; + } + GLOBAL_MUTEX_UNLOCK(empty_mutex); +} + +// Converts from Regexp error code to RE2 error code. +// Maybe some day they will diverge. In any event, this +// hides the existence of Regexp from RE2 users. +static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) { + switch (code) { + case re2::kRegexpSuccess: + return RE2::NoError; + case re2::kRegexpInternalError: + return RE2::ErrorInternal; + case re2::kRegexpBadEscape: + return RE2::ErrorBadEscape; + case re2::kRegexpBadCharClass: + return RE2::ErrorBadCharClass; + case re2::kRegexpBadCharRange: + return RE2::ErrorBadCharRange; + case re2::kRegexpMissingBracket: + return RE2::ErrorMissingBracket; + case re2::kRegexpMissingParen: + return RE2::ErrorMissingParen; + case re2::kRegexpTrailingBackslash: + return RE2::ErrorTrailingBackslash; + case re2::kRegexpRepeatArgument: + return RE2::ErrorRepeatArgument; + case re2::kRegexpRepeatSize: + return RE2::ErrorRepeatSize; + case re2::kRegexpRepeatOp: + return RE2::ErrorRepeatOp; + case re2::kRegexpBadPerlOp: + return RE2::ErrorBadPerlOp; + case re2::kRegexpBadUTF8: + return RE2::ErrorBadUTF8; + case re2::kRegexpBadNamedCapture: + return RE2::ErrorBadNamedCapture; + } + return RE2::ErrorInternal; +} + +static string trunc(const StringPiece& pattern) { + if (pattern.size() < 100) + return pattern.as_string(); + return pattern.substr(0, 100).as_string() + "..."; +} + + +RE2::RE2(const char* pattern) { + Init(pattern, DefaultOptions); +} + +RE2::RE2(const string& pattern) { + Init(pattern, DefaultOptions); +} + +RE2::RE2(const StringPiece& pattern) { + Init(pattern, DefaultOptions); +} + +RE2::RE2(const StringPiece& pattern, const Options& options) { + Init(pattern, options); +} + +int RE2::Options::ParseFlags() const { + int flags = Regexp::ClassNL; + switch (encoding()) { + default: + if (log_errors()) + LOG(ERROR) << "Unknown encoding " << encoding(); + break; + case RE2::Options::EncodingUTF8: + break; + case RE2::Options::EncodingLatin1: + flags |= Regexp::Latin1; + break; + } + + if (!posix_syntax()) + flags |= Regexp::LikePerl; + + if (literal()) + flags |= Regexp::Literal; + + if (never_nl()) + flags |= Regexp::NeverNL; + + if (dot_nl()) + flags |= Regexp::DotNL; + + if (never_capture()) + flags |= Regexp::NeverCapture; + + if (!case_sensitive()) + flags |= Regexp::FoldCase; + + if (perl_classes()) + flags |= Regexp::PerlClasses; + + if (word_boundary()) + flags |= Regexp::PerlB; + + if (one_line()) + flags |= Regexp::OneLine; + + return flags; +} + +void RE2::Init(const StringPiece& pattern, const Options& options) { + mutex_ = new Mutex; + pattern_ = pattern.as_string(); + options_.Copy(options); + InitEmpty(); + error_ = empty_string; + error_code_ = NoError; + suffix_regexp_ = NULL; + entire_regexp_ = NULL; + prog_ = NULL; + rprog_ = NULL; + named_groups_ = NULL; + group_names_ = NULL; + num_captures_ = -1; + + RegexpStatus status; + entire_regexp_ = Regexp::Parse( + pattern_, + static_cast(options_.ParseFlags()), + &status); + if (entire_regexp_ == NULL) { + if (error_ == empty_string) + error_ = new string(status.Text()); + if (options_.log_errors()) { + LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': " + << status.Text(); + } + error_arg_ = status.error_arg().as_string(); + error_code_ = RegexpErrorToRE2(status.code()); + return; + } + + prefix_.clear(); + prefix_foldcase_ = false; + re2::Regexp* suffix; + if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix)) + suffix_regexp_ = suffix; + else + suffix_regexp_ = entire_regexp_->Incref(); + + // Two thirds of the memory goes to the forward Prog, + // one third to the reverse prog, because the forward + // Prog has two DFAs but the reverse prog has one. + prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3); + if (prog_ == NULL) { + if (options_.log_errors()) + LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'"; + error_ = new string("pattern too large - compile failed"); + error_code_ = RE2::ErrorPatternTooLarge; + return; + } + + // Could delay this until the first match call that + // cares about submatch information, but the one-pass + // machine's memory gets cut from the DFA memory budget, + // and that is harder to do if the DFA has already + // been built. + is_one_pass_ = prog_->IsOnePass(); +} + +// Returns rprog_, computing it if needed. +re2::Prog* RE2::ReverseProg() const { + MutexLock l(mutex_); + if (rprog_ == NULL && error_ == empty_string) { + rprog_ = suffix_regexp_->CompileToReverseProg(options_.max_mem()/3); + if (rprog_ == NULL) { + if (options_.log_errors()) + LOG(ERROR) << "Error reverse compiling '" << trunc(pattern_) << "'"; + error_ = new string("pattern too large - reverse compile failed"); + error_code_ = RE2::ErrorPatternTooLarge; + return NULL; + } + } + return rprog_; +} + +RE2::~RE2() { + if (suffix_regexp_) + suffix_regexp_->Decref(); + if (entire_regexp_) + entire_regexp_->Decref(); + delete mutex_; + delete prog_; + delete rprog_; + if (error_ != empty_string) + delete error_; + if (named_groups_ != NULL && named_groups_ != empty_named_groups) + delete named_groups_; + if (group_names_ != NULL && group_names_ != empty_group_names) + delete group_names_; +} + +int RE2::ProgramSize() const { + if (prog_ == NULL) + return -1; + return prog_->size(); +} + +int RE2::ProgramFanout(map* histogram) const { + if (prog_ == NULL) + return -1; + SparseArray fanout(prog_->size()); + prog_->Fanout(&fanout); + histogram->clear(); + for (SparseArray::iterator i = fanout.begin(); i != fanout.end(); ++i) { + // TODO(junyer): Optimise this? + int bucket = 0; + while (1 << bucket < i->second) { + bucket++; + } + (*histogram)[bucket]++; + } + return histogram->rbegin()->first; +} + +// Returns named_groups_, computing it if needed. +const map& RE2::NamedCapturingGroups() const { + MutexLock l(mutex_); + if (!ok()) + return *empty_named_groups; + if (named_groups_ == NULL) { + named_groups_ = suffix_regexp_->NamedCaptures(); + if (named_groups_ == NULL) + named_groups_ = empty_named_groups; + } + return *named_groups_; +} + +// Returns group_names_, computing it if needed. +const map& RE2::CapturingGroupNames() const { + MutexLock l(mutex_); + if (!ok()) + return *empty_group_names; + if (group_names_ == NULL) { + group_names_ = suffix_regexp_->CaptureNames(); + if (group_names_ == NULL) + group_names_ = empty_group_names; + } + return *group_names_; +} + +/***** Convenience interfaces *****/ + +bool RE2::FullMatchN(const StringPiece& text, const RE2& re, + const Arg* const args[], int n) { + return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n); +} + +bool RE2::PartialMatchN(const StringPiece& text, const RE2& re, + const Arg* const args[], int n) { + return re.DoMatch(text, UNANCHORED, NULL, args, n); +} + +bool RE2::ConsumeN(StringPiece* input, const RE2& re, + const Arg* const args[], int n) { + int consumed; + if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) { + input->remove_prefix(consumed); + return true; + } else { + return false; + } +} + +bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re, + const Arg* const args[], int n) { + int consumed; + if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) { + input->remove_prefix(consumed); + return true; + } else { + return false; + } +} + +// Returns the maximum submatch needed for the rewrite to be done by Replace(). +// E.g. if rewrite == "foo \\2,\\1", returns 2. +int RE2::MaxSubmatch(const StringPiece& rewrite) { + int max = 0; + for (const char *s = rewrite.data(), *end = s + rewrite.size(); + s < end; s++) { + if (*s == '\\') { + s++; + int c = (s < end) ? *s : -1; + if (isdigit(c)) { + int n = (c - '0'); + if (n > max) + max = n; + } + } + } + return max; +} + +bool RE2::Replace(string *str, + const RE2& re, + const StringPiece& rewrite) { + StringPiece vec[kVecSize]; + int nvec = 1 + MaxSubmatch(rewrite); + if (nvec > arraysize(vec)) + return false; + if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec)) + return false; + + string s; + if (!re.Rewrite(&s, rewrite, vec, nvec)) + return false; + + assert(vec[0].begin() >= str->data()); + assert(vec[0].end() <= str->data()+str->size()); + str->replace(vec[0].data() - str->data(), vec[0].size(), s); + return true; +} + +int RE2::GlobalReplace(string *str, + const RE2& re, + const StringPiece& rewrite) { + StringPiece vec[kVecSize]; + int nvec = 1 + MaxSubmatch(rewrite); + if (nvec > arraysize(vec)) + return false; + + const char* p = str->data(); + const char* ep = p + str->size(); + const char* lastend = NULL; + string out; + int count = 0; + while (p <= ep) { + if (!re.Match(*str, p - str->data(), str->size(), UNANCHORED, vec, nvec)) + break; + if (p < vec[0].begin()) + out.append(p, vec[0].begin() - p); + if (vec[0].begin() == lastend && vec[0].size() == 0) { + // Disallow empty match at end of last match: skip ahead. + if (p < ep) + out.append(p, 1); + p++; + continue; + } + re.Rewrite(&out, rewrite, vec, nvec); + p = vec[0].end(); + lastend = p; + count++; + } + + if (count == 0) + return 0; + + if (p < ep) + out.append(p, ep - p); + swap(out, *str); + return count; +} + +bool RE2::Extract(const StringPiece &text, + const RE2& re, + const StringPiece &rewrite, + string *out) { + StringPiece vec[kVecSize]; + int nvec = 1 + MaxSubmatch(rewrite); + if (nvec > arraysize(vec)) + return false; + + if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec)) + return false; + + out->clear(); + return re.Rewrite(out, rewrite, vec, nvec); +} + +string RE2::QuoteMeta(const StringPiece& unquoted) { + string result; + result.reserve(unquoted.size() << 1); + + // Escape any ascii character not in [A-Za-z_0-9]. + // + // Note that it's legal to escape a character even if it has no + // special meaning in a regular expression -- so this function does + // that. (This also makes it identical to the perl function of the + // same name except for the null-character special case; + // see `perldoc -f quotemeta`.) + for (int ii = 0; ii < unquoted.length(); ++ii) { + // Note that using 'isalnum' here raises the benchmark time from + // 32ns to 58ns: + if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && + (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && + (unquoted[ii] < '0' || unquoted[ii] > '9') && + unquoted[ii] != '_' && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + !(unquoted[ii] & 128)) { + if (unquoted[ii] == '\0') { // Special handling for null chars. + // Note that this special handling is not strictly required for RE2, + // but this quoting is required for other regexp libraries such as + // PCRE. + // Can't use "\\0" since the next character might be a digit. + result += "\\x00"; + continue; + } + result += '\\'; + } + result += unquoted[ii]; + } + + return result; +} + +bool RE2::PossibleMatchRange(string* min, string* max, int maxlen) const { + if (prog_ == NULL) + return false; + + int n = prefix_.size(); + if (n > maxlen) + n = maxlen; + + // Determine initial min max from prefix_ literal. + string pmin, pmax; + pmin = prefix_.substr(0, n); + pmax = prefix_.substr(0, n); + if (prefix_foldcase_) { + // prefix is ASCII lowercase; change pmin to uppercase. + for (int i = 0; i < n; i++) { + if ('a' <= pmin[i] && pmin[i] <= 'z') + pmin[i] += 'A' - 'a'; + } + } + + // Add to prefix min max using PossibleMatchRange on regexp. + string dmin, dmax; + maxlen -= n; + if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) { + pmin += dmin; + pmax += dmax; + } else if (pmax.size() > 0) { + // prog_->PossibleMatchRange has failed us, + // but we still have useful information from prefix_. + // Round up pmax to allow any possible suffix. + pmax = PrefixSuccessor(pmax); + } else { + // Nothing useful. + *min = ""; + *max = ""; + return false; + } + + *min = pmin; + *max = pmax; + return true; +} + +// Avoid possible locale nonsense in standard strcasecmp. +// The string a is known to be all lowercase. +static int ascii_strcasecmp(const char* a, const char* b, int len) { + const char *ae = a + len; + + for (; a < ae; a++, b++) { + uint8 x = *a; + uint8 y = *b; + if ('A' <= y && y <= 'Z') + y += 'a' - 'A'; + if (x != y) + return x - y; + } + return 0; +} + + +/***** Actual matching and rewriting code *****/ + +bool RE2::Match(const StringPiece& text, + int startpos, + int endpos, + Anchor re_anchor, + StringPiece* submatch, + int nsubmatch) const { + if (!ok() || suffix_regexp_ == NULL) { + if (options_.log_errors()) + LOG(ERROR) << "Invalid RE2: " << *error_; + return false; + } + + if (startpos < 0 || startpos > endpos || endpos > text.size()) { + if (options_.log_errors()) + LOG(ERROR) << "RE2: invalid startpos, endpos pair. [" + << "startpos: " << startpos << ", " + << "endpos: " << endpos << ", " + << "text size: " << text.size() << "]"; + return false; + } + + StringPiece subtext = text; + subtext.remove_prefix(startpos); + subtext.remove_suffix(text.size() - endpos); + + // Use DFAs to find exact location of match, filter out non-matches. + + // Don't ask for the location if we won't use it. + // SearchDFA can do extra optimizations in that case. + StringPiece match; + StringPiece* matchp = &match; + if (nsubmatch == 0) + matchp = NULL; + + int ncap = 1 + NumberOfCapturingGroups(); + if (ncap > nsubmatch) + ncap = nsubmatch; + + // If the regexp is anchored explicitly, must not be in middle of text. + if (prog_->anchor_start() && startpos != 0) + return false; + + // If the regexp is anchored explicitly, update re_anchor + // so that we can potentially fall into a faster case below. + if (prog_->anchor_start() && prog_->anchor_end()) + re_anchor = ANCHOR_BOTH; + else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH) + re_anchor = ANCHOR_START; + + // Check for the required prefix, if any. + int prefixlen = 0; + if (!prefix_.empty()) { + if (startpos != 0) + return false; + prefixlen = prefix_.size(); + if (prefixlen > subtext.size()) + return false; + if (prefix_foldcase_) { + if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0) + return false; + } else { + if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0) + return false; + } + subtext.remove_prefix(prefixlen); + // If there is a required prefix, the anchor must be at least ANCHOR_START. + if (re_anchor != ANCHOR_BOTH) + re_anchor = ANCHOR_START; + } + + Prog::Anchor anchor = Prog::kUnanchored; + Prog::MatchKind kind = Prog::kFirstMatch; + if (options_.longest_match()) + kind = Prog::kLongestMatch; + bool skipped_test = false; + + bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture); + + // SearchBitState allocates a bit vector of size prog_->size() * text.size(). + // It also allocates a stack of 3-word structures which could potentially + // grow as large as prog_->size() * text.size() but in practice is much + // smaller. + // Conditions for using SearchBitState: + const int MaxBitStateProg = 500; // prog_->size() <= Max. + const int MaxBitStateVector = 256*1024; // bit vector size <= Max (bits) + bool can_bit_state = prog_->size() <= MaxBitStateProg; + int bit_state_text_max = MaxBitStateVector / prog_->size(); + + bool dfa_failed = false; + switch (re_anchor) { + default: + case UNANCHORED: { + if (!prog_->SearchDFA(subtext, text, anchor, kind, + matchp, &dfa_failed, NULL)) { + if (dfa_failed) { + // Fall back to NFA below. + skipped_test = true; + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " DFA failed."; + break; + } + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " used DFA - no match."; + return false; + } + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " used DFA - match"; + if (matchp == NULL) // Matched. Don't care where + return true; + // SearchDFA set match[0].end() but didn't know where the + // match started. Run the regexp backward from match[0].end() + // to find the longest possible match -- that's where it started. + Prog* prog = ReverseProg(); + if (prog == NULL) + return false; + if (!prog->SearchDFA(match, text, Prog::kAnchored, + Prog::kLongestMatch, &match, &dfa_failed, NULL)) { + if (dfa_failed) { + // Fall back to NFA below. + skipped_test = true; + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " reverse DFA failed."; + break; + } + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " DFA inconsistency."; + if (options_.log_errors()) + LOG(ERROR) << "DFA inconsistency"; + return false; + } + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " used reverse DFA."; + break; + } + + case ANCHOR_BOTH: + case ANCHOR_START: + if (re_anchor == ANCHOR_BOTH) + kind = Prog::kFullMatch; + anchor = Prog::kAnchored; + + // If only a small amount of text and need submatch + // information anyway and we're going to use OnePass or BitState + // to get it, we might as well not even bother with the DFA: + // OnePass or BitState will be fast enough. + // On tiny texts, OnePass outruns even the DFA, and + // it doesn't have the shared state and occasional mutex that + // the DFA does. + if (can_one_pass && text.size() <= 4096 && + (ncap > 1 || text.size() <= 8)) { + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " skipping DFA for OnePass."; + skipped_test = true; + break; + } + if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) { + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " skipping DFA for BitState."; + skipped_test = true; + break; + } + if (!prog_->SearchDFA(subtext, text, anchor, kind, + &match, &dfa_failed, NULL)) { + if (dfa_failed) { + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " DFA failed."; + skipped_test = true; + break; + } + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " used DFA - no match."; + return false; + } + break; + } + + if (!skipped_test && ncap <= 1) { + // We know exactly where it matches. That's enough. + if (ncap == 1) + submatch[0] = match; + } else { + StringPiece subtext1; + if (skipped_test) { + // DFA ran out of memory or was skipped: + // need to search in entire original text. + subtext1 = subtext; + } else { + // DFA found the exact match location: + // let NFA run an anchored, full match search + // to find submatch locations. + subtext1 = match; + anchor = Prog::kAnchored; + kind = Prog::kFullMatch; + } + + if (can_one_pass && anchor != Prog::kUnanchored) { + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " using OnePass."; + if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) { + if (!skipped_test && options_.log_errors()) + LOG(ERROR) << "SearchOnePass inconsistency"; + return false; + } + } else if (can_bit_state && subtext1.size() <= bit_state_text_max) { + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " using BitState."; + if (!prog_->SearchBitState(subtext1, text, anchor, + kind, submatch, ncap)) { + if (!skipped_test && options_.log_errors()) + LOG(ERROR) << "SearchBitState inconsistency"; + return false; + } + } else { + if (FLAGS_trace_re2) + LOG(INFO) << "Match " << trunc(pattern_) + << " [" << CEscape(subtext) << "]" + << " using NFA."; + if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) { + if (!skipped_test && options_.log_errors()) + LOG(ERROR) << "SearchNFA inconsistency"; + return false; + } + } + } + + // Adjust overall match for required prefix that we stripped off. + if (prefixlen > 0 && nsubmatch > 0) + submatch[0] = StringPiece(submatch[0].begin() - prefixlen, + submatch[0].size() + prefixlen); + + // Zero submatches that don't exist in the regexp. + for (int i = ncap; i < nsubmatch; i++) + submatch[i] = NULL; + return true; +} + +// Internal matcher - like Match() but takes Args not StringPieces. +bool RE2::DoMatch(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const* args, + int n) const { + if (!ok()) { + if (options_.log_errors()) + LOG(ERROR) << "Invalid RE2: " << *error_; + return false; + } + + // Count number of capture groups needed. + int nvec; + if (n == 0 && consumed == NULL) + nvec = 0; + else + nvec = n+1; + + StringPiece* vec; + StringPiece stkvec[kVecSize]; + StringPiece* heapvec = NULL; + + if (nvec <= arraysize(stkvec)) { + vec = stkvec; + } else { + vec = new StringPiece[nvec]; + heapvec = vec; + } + + if (!Match(text, 0, text.size(), anchor, vec, nvec)) { + delete[] heapvec; + return false; + } + + if(consumed != NULL) + *consumed = vec[0].end() - text.begin(); + + if (n == 0 || args == NULL) { + // We are not interested in results + delete[] heapvec; + return true; + } + + int ncap = NumberOfCapturingGroups(); + if (ncap < n) { + // RE has fewer capturing groups than number of arg pointers passed in + VLOG(1) << "Asked for " << n << " but only have " << ncap; + delete[] heapvec; + return false; + } + + // If we got here, we must have matched the whole pattern. + for (int i = 0; i < n; i++) { + const StringPiece& s = vec[i+1]; + if (!args[i]->Parse(s.data(), s.size())) { + // TODO: Should we indicate what the error was? + VLOG(1) << "Parse error on #" << i << " " << s << " " + << (void*)s.data() << "/" << s.size(); + delete[] heapvec; + return false; + } + } + + delete[] heapvec; + return true; +} + +// Append the "rewrite" string, with backslash subsitutions from "vec", +// to string "out". +bool RE2::Rewrite(string *out, const StringPiece &rewrite, + const StringPiece *vec, int veclen) const { + for (const char *s = rewrite.data(), *end = s + rewrite.size(); + s < end; s++) { + int c = *s; + if (c == '\\') { + s++; + c = (s < end) ? *s : -1; + if (isdigit(c)) { + int n = (c - '0'); + if (n >= veclen) { + if (options_.log_errors()) { + LOG(ERROR) << "requested group " << n + << " in regexp " << rewrite.data(); + } + return false; + } + StringPiece snip = vec[n]; + if (snip.size() > 0) + out->append(snip.data(), snip.size()); + } else if (c == '\\') { + out->push_back('\\'); + } else { + if (options_.log_errors()) + LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data(); + return false; + } + } else { + out->push_back(c); + } + } + return true; +} + +// Return the number of capturing subpatterns, or -1 if the +// regexp wasn't valid on construction. +int RE2::NumberOfCapturingGroups() const { + if (suffix_regexp_ == NULL) + return -1; + int n; + ATOMIC_LOAD_RELAXED(n, &num_captures_); + if (n == -1) { + n = suffix_regexp_->NumCaptures(); + ATOMIC_STORE_RELAXED(&num_captures_, n); + } + return n; +} + +// Checks that the rewrite string is well-formed with respect to this +// regular expression. +bool RE2::CheckRewriteString(const StringPiece& rewrite, string* error) const { + int max_token = -1; + for (const char *s = rewrite.data(), *end = s + rewrite.size(); + s < end; s++) { + int c = *s; + if (c != '\\') { + continue; + } + if (++s == end) { + *error = "Rewrite schema error: '\\' not allowed at end."; + return false; + } + c = *s; + if (c == '\\') { + continue; + } + if (!isdigit(c)) { + *error = "Rewrite schema error: " + "'\\' must be followed by a digit or '\\'."; + return false; + } + int n = (c - '0'); + if (max_token < n) { + max_token = n; + } + } + + if (max_token > NumberOfCapturingGroups()) { + SStringPrintf(error, "Rewrite schema requests %d matches, " + "but the regexp only has %d parenthesized subexpressions.", + max_token, NumberOfCapturingGroups()); + return false; + } + return true; +} + +/***** Parsers for various types *****/ + +bool RE2::Arg::parse_null(const char* str, int n, void* dest) { + // We fail if somebody asked us to store into a non-NULL void* pointer + return (dest == NULL); +} + +bool RE2::Arg::parse_string(const char* str, int n, void* dest) { + if (dest == NULL) return true; + reinterpret_cast(dest)->assign(str, n); + return true; +} + +bool RE2::Arg::parse_stringpiece(const char* str, int n, void* dest) { + if (dest == NULL) return true; + reinterpret_cast(dest)->set(str, n); + return true; +} + +bool RE2::Arg::parse_char(const char* str, int n, void* dest) { + if (n != 1) return false; + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = str[0]; + return true; +} + +bool RE2::Arg::parse_uchar(const char* str, int n, void* dest) { + if (n != 1) return false; + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = str[0]; + return true; +} + +// Largest number spec that we are willing to parse +static const int kMaxNumberLength = 32; + +// REQUIRES "buf" must have length at least nbuf. +// Copies "str" into "buf" and null-terminates. +// Overwrites *np with the new length. +static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np, + bool accept_spaces) { + int n = *np; + if (n <= 0) return ""; + if (n > 0 && isspace(*str)) { + // We are less forgiving than the strtoxxx() routines and do not + // allow leading spaces. We do allow leading spaces for floats. + if (!accept_spaces) { + return ""; + } + while (n > 0 && isspace(*str)) { + n--; + str++; + } + } + + // Although buf has a fixed maximum size, we can still handle + // arbitrarily large integers correctly by omitting leading zeros. + // (Numbers that are still too long will be out of range.) + // Before deciding whether str is too long, + // remove leading zeros with s/000+/00/. + // Leaving the leading two zeros in place means that + // we don't change 0000x123 (invalid) into 0x123 (valid). + // Skip over leading - before replacing. + bool neg = false; + if (n >= 1 && str[0] == '-') { + neg = true; + n--; + str++; + } + + if (n >= 3 && str[0] == '0' && str[1] == '0') { + while (n >= 3 && str[2] == '0') { + n--; + str++; + } + } + + if (neg) { // make room in buf for - + n++; + str--; + } + + if (n > nbuf-1) return ""; + + memmove(buf, str, n); + if (neg) { + buf[0] = '-'; + } + buf[n] = '\0'; + *np = n; + return buf; +} + +bool RE2::Arg::parse_long_radix(const char* str, + int n, + void* dest, + int radix) { + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, sizeof buf, str, &n, false); + char* end; + errno = 0; + long r = strtol(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +bool RE2::Arg::parse_ulong_radix(const char* str, + int n, + void* dest, + int radix) { + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, sizeof buf, str, &n, false); + if (str[0] == '-') { + // strtoul() will silently accept negative numbers and parse + // them. This module is more strict and treats them as errors. + return false; + } + + char* end; + errno = 0; + unsigned long r = strtoul(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +bool RE2::Arg::parse_short_radix(const char* str, + int n, + void* dest, + int radix) { + long r; + if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse + if ((short)r != r) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +bool RE2::Arg::parse_ushort_radix(const char* str, + int n, + void* dest, + int radix) { + unsigned long r; + if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse + if ((ushort)r != r) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +bool RE2::Arg::parse_int_radix(const char* str, + int n, + void* dest, + int radix) { + long r; + if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse + if ((int)r != r) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +bool RE2::Arg::parse_uint_radix(const char* str, + int n, + void* dest, + int radix) { + unsigned long r; + if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse + if ((uint)r != r) return false; // Out of range + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +#if RE2_HAVE_LONGLONG +bool RE2::Arg::parse_longlong_radix(const char* str, + int n, + void* dest, + int radix) { + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, sizeof buf, str, &n, false); + char* end; + errno = 0; + int64 r = strtoll(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} + +bool RE2::Arg::parse_ulonglong_radix(const char* str, + int n, + void* dest, + int radix) { + if (n == 0) return false; + char buf[kMaxNumberLength+1]; + str = TerminateNumber(buf, sizeof buf, str, &n, false); + if (str[0] == '-') { + // strtoull() will silently accept negative numbers and parse + // them. This module is more strict and treats them as errors. + return false; + } + char* end; + errno = 0; + uint64 r = strtoull(str, &end, radix); + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + *(reinterpret_cast(dest)) = r; + return true; +} +#endif + +static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) { + if (n == 0) return false; + static const int kMaxLength = 200; + char buf[kMaxLength+1]; + str = TerminateNumber(buf, sizeof buf, str, &n, true); + char* end; + errno = 0; + double r; + if (isfloat) { + r = strtof(str, &end); + } else { + r = strtod(str, &end); + } + if (end != str + n) return false; // Leftover junk + if (errno) return false; + if (dest == NULL) return true; + if (isfloat) { + *(reinterpret_cast(dest)) = r; + } else { + *(reinterpret_cast(dest)) = r; + } + return true; +} + +bool RE2::Arg::parse_double(const char* str, int n, void* dest) { + return parse_double_float(str, n, false, dest); +} + +bool RE2::Arg::parse_float(const char* str, int n, void* dest) { + return parse_double_float(str, n, true, dest); +} + + +#define DEFINE_INTEGER_PARSERS(name) \ + bool RE2::Arg::parse_##name(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 10); \ + } \ + bool RE2::Arg::parse_##name##_hex(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 16); \ + } \ + bool RE2::Arg::parse_##name##_octal(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 8); \ + } \ + bool RE2::Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \ + return parse_##name##_radix(str, n, dest, 0); \ + } + +DEFINE_INTEGER_PARSERS(short); +DEFINE_INTEGER_PARSERS(ushort); +DEFINE_INTEGER_PARSERS(int); +DEFINE_INTEGER_PARSERS(uint); +DEFINE_INTEGER_PARSERS(long); +DEFINE_INTEGER_PARSERS(ulong); +DEFINE_INTEGER_PARSERS(longlong); +DEFINE_INTEGER_PARSERS(ulonglong); + +#undef DEFINE_INTEGER_PARSERS + +} // namespace re2 diff --git a/src/openalpr/support/re2/regexp.cc b/src/openalpr/support/re2/regexp.cc new file mode 100644 index 0000000..3667fda --- /dev/null +++ b/src/openalpr/support/re2/regexp.cc @@ -0,0 +1,937 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Regular expression representation. +// Tested by parse_test.cc + +#include "util/util.h" +#include "re2/regexp.h" +#include "re2/stringpiece.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// Constructor. Allocates vectors as appropriate for operator. +Regexp::Regexp(RegexpOp op, ParseFlags parse_flags) + : op_(op), + simple_(false), + parse_flags_(static_cast(parse_flags)), + ref_(1), + nsub_(0), + down_(NULL) { + subone_ = NULL; + memset(the_union_, 0, sizeof the_union_); +} + +// Destructor. Assumes already cleaned up children. +// Private: use Decref() instead of delete to destroy Regexps. +// Can't call Decref on the sub-Regexps here because +// that could cause arbitrarily deep recursion, so +// required Decref() to have handled them for us. +Regexp::~Regexp() { + if (nsub_ > 0) + LOG(DFATAL) << "Regexp not destroyed."; + + switch (op_) { + default: + break; + case kRegexpCapture: + delete name_; + break; + case kRegexpLiteralString: + delete[] runes_; + break; + case kRegexpCharClass: + if (cc_) + cc_->Delete(); + delete ccb_; + break; + } +} + +// If it's possible to destroy this regexp without recurring, +// do so and return true. Else return false. +bool Regexp::QuickDestroy() { + if (nsub_ == 0) { + delete this; + return true; + } + return false; +} + +static map *ref_map; +GLOBAL_MUTEX(ref_mutex); + +int Regexp::Ref() { + if (ref_ < kMaxRef) + return ref_; + + GLOBAL_MUTEX_LOCK(ref_mutex); + int r = 0; + if (ref_map != NULL) { + r = (*ref_map)[this]; + } + GLOBAL_MUTEX_UNLOCK(ref_mutex); + return r; +} + +// Increments reference count, returns object as convenience. +Regexp* Regexp::Incref() { + if (ref_ >= kMaxRef-1) { + // Store ref count in overflow map. + GLOBAL_MUTEX_LOCK(ref_mutex); + if (ref_map == NULL) { + ref_map = new map; + } + if (ref_ == kMaxRef) { + // already overflowed + (*ref_map)[this]++; + } else { + // overflowing now + (*ref_map)[this] = kMaxRef; + ref_ = kMaxRef; + } + GLOBAL_MUTEX_UNLOCK(ref_mutex); + return this; + } + + ref_++; + return this; +} + +// Decrements reference count and deletes this object if count reaches 0. +void Regexp::Decref() { + if (ref_ == kMaxRef) { + // Ref count is stored in overflow map. + GLOBAL_MUTEX_LOCK(ref_mutex); + int r = (*ref_map)[this] - 1; + if (r < kMaxRef) { + ref_ = r; + ref_map->erase(this); + } else { + (*ref_map)[this] = r; + } + GLOBAL_MUTEX_UNLOCK(ref_mutex); + return; + } + ref_--; + if (ref_ == 0) + Destroy(); +} + +// Deletes this object; ref count has count reached 0. +void Regexp::Destroy() { + if (QuickDestroy()) + return; + + // Handle recursive Destroy with explicit stack + // to avoid arbitrarily deep recursion on process stack [sigh]. + down_ = NULL; + Regexp* stack = this; + while (stack != NULL) { + Regexp* re = stack; + stack = re->down_; + if (re->ref_ != 0) + LOG(DFATAL) << "Bad reference count " << re->ref_; + if (re->nsub_ > 0) { + Regexp** subs = re->sub(); + for (int i = 0; i < re->nsub_; i++) { + Regexp* sub = subs[i]; + if (sub == NULL) + continue; + if (sub->ref_ == kMaxRef) + sub->Decref(); + else + --sub->ref_; + if (sub->ref_ == 0 && !sub->QuickDestroy()) { + sub->down_ = stack; + stack = sub; + } + } + if (re->nsub_ > 1) + delete[] subs; + re->nsub_ = 0; + } + delete re; + } +} + +void Regexp::AddRuneToString(Rune r) { + DCHECK(op_ == kRegexpLiteralString); + if (nrunes_ == 0) { + // start with 8 + runes_ = new Rune[8]; + } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) { + // double on powers of two + Rune *old = runes_; + runes_ = new Rune[nrunes_ * 2]; + for (int i = 0; i < nrunes_; i++) + runes_[i] = old[i]; + delete[] old; + } + + runes_[nrunes_++] = r; +} + +Regexp* Regexp::HaveMatch(int match_id, ParseFlags flags) { + Regexp* re = new Regexp(kRegexpHaveMatch, flags); + re->match_id_ = match_id; + return re; +} + +Regexp* Regexp::Plus(Regexp* sub, ParseFlags flags) { + if (sub->op() == kRegexpPlus && sub->parse_flags() == flags) + return sub; + Regexp* re = new Regexp(kRegexpPlus, flags); + re->AllocSub(1); + re->sub()[0] = sub; + return re; +} + +Regexp* Regexp::Star(Regexp* sub, ParseFlags flags) { + if (sub->op() == kRegexpStar && sub->parse_flags() == flags) + return sub; + Regexp* re = new Regexp(kRegexpStar, flags); + re->AllocSub(1); + re->sub()[0] = sub; + return re; +} + +Regexp* Regexp::Quest(Regexp* sub, ParseFlags flags) { + if (sub->op() == kRegexpQuest && sub->parse_flags() == flags) + return sub; + Regexp* re = new Regexp(kRegexpQuest, flags); + re->AllocSub(1); + re->sub()[0] = sub; + return re; +} + +Regexp* Regexp::ConcatOrAlternate(RegexpOp op, Regexp** sub, int nsub, + ParseFlags flags, bool can_factor) { + if (nsub == 1) + return sub[0]; + + if (nsub == 0) { + if (op == kRegexpAlternate) + return new Regexp(kRegexpNoMatch, flags); + else + return new Regexp(kRegexpEmptyMatch, flags); + } + + Regexp** subcopy = NULL; + if (op == kRegexpAlternate && can_factor) { + // Going to edit sub; make a copy so we don't step on caller. + subcopy = new Regexp*[nsub]; + memmove(subcopy, sub, nsub * sizeof sub[0]); + sub = subcopy; + nsub = FactorAlternation(sub, nsub, flags); + if (nsub == 1) { + Regexp* re = sub[0]; + delete[] subcopy; + return re; + } + } + + if (nsub > kMaxNsub) { + // Too many subexpressions to fit in a single Regexp. + // Make a two-level tree. Two levels gets us to 65535^2. + int nbigsub = (nsub+kMaxNsub-1)/kMaxNsub; + Regexp* re = new Regexp(op, flags); + re->AllocSub(nbigsub); + Regexp** subs = re->sub(); + for (int i = 0; i < nbigsub - 1; i++) + subs[i] = ConcatOrAlternate(op, sub+i*kMaxNsub, kMaxNsub, flags, false); + subs[nbigsub - 1] = ConcatOrAlternate(op, sub+(nbigsub-1)*kMaxNsub, + nsub - (nbigsub-1)*kMaxNsub, flags, + false); + delete[] subcopy; + return re; + } + + Regexp* re = new Regexp(op, flags); + re->AllocSub(nsub); + Regexp** subs = re->sub(); + for (int i = 0; i < nsub; i++) + subs[i] = sub[i]; + + delete[] subcopy; + return re; +} + +Regexp* Regexp::Concat(Regexp** sub, int nsub, ParseFlags flags) { + return ConcatOrAlternate(kRegexpConcat, sub, nsub, flags, false); +} + +Regexp* Regexp::Alternate(Regexp** sub, int nsub, ParseFlags flags) { + return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, true); +} + +Regexp* Regexp::AlternateNoFactor(Regexp** sub, int nsub, ParseFlags flags) { + return ConcatOrAlternate(kRegexpAlternate, sub, nsub, flags, false); +} + +Regexp* Regexp::Capture(Regexp* sub, ParseFlags flags, int cap) { + Regexp* re = new Regexp(kRegexpCapture, flags); + re->AllocSub(1); + re->sub()[0] = sub; + re->cap_ = cap; + return re; +} + +Regexp* Regexp::Repeat(Regexp* sub, ParseFlags flags, int min, int max) { + Regexp* re = new Regexp(kRegexpRepeat, flags); + re->AllocSub(1); + re->sub()[0] = sub; + re->min_ = min; + re->max_ = max; + return re; +} + +Regexp* Regexp::NewLiteral(Rune rune, ParseFlags flags) { + Regexp* re = new Regexp(kRegexpLiteral, flags); + re->rune_ = rune; + return re; +} + +Regexp* Regexp::LiteralString(Rune* runes, int nrunes, ParseFlags flags) { + if (nrunes <= 0) + return new Regexp(kRegexpEmptyMatch, flags); + if (nrunes == 1) + return NewLiteral(runes[0], flags); + Regexp* re = new Regexp(kRegexpLiteralString, flags); + for (int i = 0; i < nrunes; i++) + re->AddRuneToString(runes[i]); + return re; +} + +Regexp* Regexp::NewCharClass(CharClass* cc, ParseFlags flags) { + Regexp* re = new Regexp(kRegexpCharClass, flags); + re->cc_ = cc; + return re; +} + +// Swaps this and that in place. +void Regexp::Swap(Regexp* that) { + // Can use memmove because Regexp is just a struct (no vtable). + char tmp[sizeof *this]; + memmove(tmp, this, sizeof tmp); + memmove(this, that, sizeof tmp); + memmove(that, tmp, sizeof tmp); +} + +// Tests equality of all top-level structure but not subregexps. +static bool TopEqual(Regexp* a, Regexp* b) { + if (a->op() != b->op()) + return false; + + switch (a->op()) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpBeginText: + return true; + + case kRegexpEndText: + // The parse flags remember whether it's \z or (?-m:$), + // which matters when testing against PCRE. + return ((a->parse_flags() ^ b->parse_flags()) & Regexp::WasDollar) == 0; + + case kRegexpLiteral: + return a->rune() == b->rune() && + ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0; + + case kRegexpLiteralString: + return a->nrunes() == b->nrunes() && + ((a->parse_flags() ^ b->parse_flags()) & Regexp::FoldCase) == 0 && + memcmp(a->runes(), b->runes(), + a->nrunes() * sizeof a->runes()[0]) == 0; + + case kRegexpAlternate: + case kRegexpConcat: + return a->nsub() == b->nsub(); + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0; + + case kRegexpRepeat: + return ((a->parse_flags() ^ b->parse_flags()) & Regexp::NonGreedy) == 0 && + a->min() == b->min() && + a->max() == b->max(); + + case kRegexpCapture: + return a->cap() == b->cap() && a->name() == b->name(); + + case kRegexpHaveMatch: + return a->match_id() == b->match_id(); + + case kRegexpCharClass: { + CharClass* acc = a->cc(); + CharClass* bcc = b->cc(); + return acc->size() == bcc->size() && + acc->end() - acc->begin() == bcc->end() - bcc->begin() && + memcmp(acc->begin(), bcc->begin(), + (acc->end() - acc->begin()) * sizeof acc->begin()[0]) == 0; + } + } + + LOG(DFATAL) << "Unexpected op in Regexp::Equal: " << a->op(); + return 0; +} + +bool Regexp::Equal(Regexp* a, Regexp* b) { + if (a == NULL || b == NULL) + return a == b; + + if (!TopEqual(a, b)) + return false; + + // Fast path: + // return without allocating vector if there are no subregexps. + switch (a->op()) { + case kRegexpAlternate: + case kRegexpConcat: + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpRepeat: + case kRegexpCapture: + break; + + default: + return true; + } + + // Committed to doing real work. + // The stack (vector) has pairs of regexps waiting to + // be compared. The regexps are only equal if + // all the pairs end up being equal. + vector stk; + + for (;;) { + // Invariant: TopEqual(a, b) == true. + Regexp* a2; + Regexp* b2; + switch (a->op()) { + default: + break; + case kRegexpAlternate: + case kRegexpConcat: + for (int i = 0; i < a->nsub(); i++) { + a2 = a->sub()[i]; + b2 = b->sub()[i]; + if (!TopEqual(a2, b2)) + return false; + stk.push_back(a2); + stk.push_back(b2); + } + break; + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpRepeat: + case kRegexpCapture: + a2 = a->sub()[0]; + b2 = b->sub()[0]; + if (!TopEqual(a2, b2)) + return false; + // Really: + // stk.push_back(a2); + // stk.push_back(b2); + // break; + // but faster to assign directly and loop. + a = a2; + b = b2; + continue; + } + + int n = stk.size(); + if (n == 0) + break; + + a = stk[n-2]; + b = stk[n-1]; + stk.resize(n-2); + } + + return true; +} + +// Keep in sync with enum RegexpStatusCode in regexp.h +static const char *kErrorStrings[] = { + "no error", + "unexpected error", + "invalid escape sequence", + "invalid character class", + "invalid character class range", + "missing ]", + "missing )", + "trailing \\", + "no argument for repetition operator", + "invalid repetition size", + "bad repetition operator", + "invalid perl operator", + "invalid UTF-8", + "invalid named capture group", +}; + +string RegexpStatus::CodeText(enum RegexpStatusCode code) { + if (code < 0 || code >= arraysize(kErrorStrings)) + code = kRegexpInternalError; + return kErrorStrings[code]; +} + +string RegexpStatus::Text() const { + if (error_arg_.empty()) + return CodeText(code_); + string s; + s.append(CodeText(code_)); + s.append(": "); + s.append(error_arg_.data(), error_arg_.size()); + return s; +} + +void RegexpStatus::Copy(const RegexpStatus& status) { + code_ = status.code_; + error_arg_ = status.error_arg_; +} + +typedef int Ignored; // Walker doesn't exist + +// Walker subclass to count capturing parens in regexp. +class NumCapturesWalker : public Regexp::Walker { + public: + NumCapturesWalker() : ncapture_(0) {} + int ncapture() { return ncapture_; } + + virtual Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { + if (re->op() == kRegexpCapture) + ncapture_++; + return ignored; + } + virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { + // Should never be called: we use Walk not WalkExponential. + LOG(DFATAL) << "NumCapturesWalker::ShortVisit called"; + return ignored; + } + + private: + int ncapture_; + DISALLOW_COPY_AND_ASSIGN(NumCapturesWalker); +}; + +int Regexp::NumCaptures() { + NumCapturesWalker w; + w.Walk(this, 0); + return w.ncapture(); +} + +// Walker class to build map of named capture groups and their indices. +class NamedCapturesWalker : public Regexp::Walker { + public: + NamedCapturesWalker() : map_(NULL) {} + ~NamedCapturesWalker() { delete map_; } + + map* TakeMap() { + map* m = map_; + map_ = NULL; + return m; + } + + Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { + if (re->op() == kRegexpCapture && re->name() != NULL) { + // Allocate map once we find a name. + if (map_ == NULL) + map_ = new map; + + // Record first occurrence of each name. + // (The rule is that if you have the same name + // multiple times, only the leftmost one counts.) + if (map_->find(*re->name()) == map_->end()) + (*map_)[*re->name()] = re->cap(); + } + return ignored; + } + + virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { + // Should never be called: we use Walk not WalkExponential. + LOG(DFATAL) << "NamedCapturesWalker::ShortVisit called"; + return ignored; + } + + private: + map* map_; + DISALLOW_COPY_AND_ASSIGN(NamedCapturesWalker); +}; + +map* Regexp::NamedCaptures() { + NamedCapturesWalker w; + w.Walk(this, 0); + return w.TakeMap(); +} + +// Walker class to build map from capture group indices to their names. +class CaptureNamesWalker : public Regexp::Walker { + public: + CaptureNamesWalker() : map_(NULL) {} + ~CaptureNamesWalker() { delete map_; } + + map* TakeMap() { + map* m = map_; + map_ = NULL; + return m; + } + + Ignored PreVisit(Regexp* re, Ignored ignored, bool* stop) { + if (re->op() == kRegexpCapture && re->name() != NULL) { + // Allocate map once we find a name. + if (map_ == NULL) + map_ = new map; + + (*map_)[re->cap()] = *re->name(); + } + return ignored; + } + + virtual Ignored ShortVisit(Regexp* re, Ignored ignored) { + // Should never be called: we use Walk not WalkExponential. + LOG(DFATAL) << "CaptureNamesWalker::ShortVisit called"; + return ignored; + } + + private: + map* map_; + DISALLOW_COPY_AND_ASSIGN(CaptureNamesWalker); +}; + +map* Regexp::CaptureNames() { + CaptureNamesWalker w; + w.Walk(this, 0); + return w.TakeMap(); +} + +// Determines whether regexp matches must be anchored +// with a fixed string prefix. If so, returns the prefix and +// the regexp that remains after the prefix. The prefix might +// be ASCII case-insensitive. +bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) { + // No need for a walker: the regexp must be of the form + // 1. some number of ^ anchors + // 2. a literal char or string + // 3. the rest + prefix->clear(); + *foldcase = false; + *suffix = NULL; + if (op_ != kRegexpConcat) + return false; + + // Some number of anchors, then a literal or concatenation. + int i = 0; + Regexp** sub = this->sub(); + while (i < nsub_ && sub[i]->op_ == kRegexpBeginText) + i++; + if (i == 0 || i >= nsub_) + return false; + + Regexp* re = sub[i]; + switch (re->op_) { + default: + return false; + + case kRegexpLiteralString: + // Convert to string in proper encoding. + if (re->parse_flags() & Latin1) { + prefix->resize(re->nrunes_); + for (int j = 0; j < re->nrunes_; j++) + (*prefix)[j] = re->runes_[j]; + } else { + // Convert to UTF-8 in place. + // Assume worst-case space and then trim. + prefix->resize(re->nrunes_ * UTFmax); + char *p = &(*prefix)[0]; + for (int j = 0; j < re->nrunes_; j++) { + Rune r = re->runes_[j]; + if (r < Runeself) + *p++ = r; + else + p += runetochar(p, &r); + } + prefix->resize(p - &(*prefix)[0]); + } + break; + + case kRegexpLiteral: + if ((re->parse_flags() & Latin1) || re->rune_ < Runeself) { + prefix->append(1, re->rune_); + } else { + char buf[UTFmax]; + prefix->append(buf, runetochar(buf, &re->rune_)); + } + break; + } + *foldcase = (sub[i]->parse_flags() & FoldCase); + i++; + + // The rest. + if (i < nsub_) { + for (int j = i; j < nsub_; j++) + sub[j]->Incref(); + re = Concat(sub + i, nsub_ - i, parse_flags()); + } else { + re = new Regexp(kRegexpEmptyMatch, parse_flags()); + } + *suffix = re; + return true; +} + +// Character class builder is a balanced binary tree (STL set) +// containing non-overlapping, non-abutting RuneRanges. +// The less-than operator used in the tree treats two +// ranges as equal if they overlap at all, so that +// lookups for a particular Rune are possible. + +CharClassBuilder::CharClassBuilder() { + nrunes_ = 0; + upper_ = 0; + lower_ = 0; +} + +// Add lo-hi to the class; return whether class got bigger. +bool CharClassBuilder::AddRange(Rune lo, Rune hi) { + if (hi < lo) + return false; + + if (lo <= 'z' && hi >= 'A') { + // Overlaps some alpha, maybe not all. + // Update bitmaps telling which ASCII letters are in the set. + Rune lo1 = max(lo, 'A'); + Rune hi1 = min(hi, 'Z'); + if (lo1 <= hi1) + upper_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'A'); + + lo1 = max(lo, 'a'); + hi1 = min(hi, 'z'); + if (lo1 <= hi1) + lower_ |= ((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - 'a'); + } + + { // Check whether lo, hi is already in the class. + iterator it = ranges_.find(RuneRange(lo, lo)); + if (it != end() && it->lo <= lo && hi <= it->hi) + return false; + } + + // Look for a range abutting lo on the left. + // If it exists, take it out and increase our range. + if (lo > 0) { + iterator it = ranges_.find(RuneRange(lo-1, lo-1)); + if (it != end()) { + lo = it->lo; + if (it->hi > hi) + hi = it->hi; + nrunes_ -= it->hi - it->lo + 1; + ranges_.erase(it); + } + } + + // Look for a range abutting hi on the right. + // If it exists, take it out and increase our range. + if (hi < Runemax) { + iterator it = ranges_.find(RuneRange(hi+1, hi+1)); + if (it != end()) { + hi = it->hi; + nrunes_ -= it->hi - it->lo + 1; + ranges_.erase(it); + } + } + + // Look for ranges between lo and hi. Take them out. + // This is only safe because the set has no overlapping ranges. + // We've already removed any ranges abutting lo and hi, so + // any that overlap [lo, hi] must be contained within it. + for (;;) { + iterator it = ranges_.find(RuneRange(lo, hi)); + if (it == end()) + break; + nrunes_ -= it->hi - it->lo + 1; + ranges_.erase(it); + } + + // Finally, add [lo, hi]. + nrunes_ += hi - lo + 1; + ranges_.insert(RuneRange(lo, hi)); + return true; +} + +void CharClassBuilder::AddCharClass(CharClassBuilder *cc) { + for (iterator it = cc->begin(); it != cc->end(); ++it) + AddRange(it->lo, it->hi); +} + +bool CharClassBuilder::Contains(Rune r) { + return ranges_.find(RuneRange(r, r)) != end(); +} + +// Does the character class behave the same on A-Z as on a-z? +bool CharClassBuilder::FoldsASCII() { + return ((upper_ ^ lower_) & AlphaMask) == 0; +} + +CharClassBuilder* CharClassBuilder::Copy() { + CharClassBuilder* cc = new CharClassBuilder; + for (iterator it = begin(); it != end(); ++it) + cc->ranges_.insert(RuneRange(it->lo, it->hi)); + cc->upper_ = upper_; + cc->lower_ = lower_; + cc->nrunes_ = nrunes_; + return cc; +} + + + +void CharClassBuilder::RemoveAbove(Rune r) { + if (r >= Runemax) + return; + + if (r < 'z') { + if (r < 'a') + lower_ = 0; + else + lower_ &= AlphaMask >> ('z' - r); + } + + if (r < 'Z') { + if (r < 'A') + upper_ = 0; + else + upper_ &= AlphaMask >> ('Z' - r); + } + + for (;;) { + + iterator it = ranges_.find(RuneRange(r + 1, Runemax)); + if (it == end()) + break; + RuneRange rr = *it; + ranges_.erase(it); + nrunes_ -= rr.hi - rr.lo + 1; + if (rr.lo <= r) { + rr.hi = r; + ranges_.insert(rr); + nrunes_ += rr.hi - rr.lo + 1; + } + } +} + +void CharClassBuilder::Negate() { + // Build up negation and then copy in. + // Could edit ranges in place, but C++ won't let me. + vector v; + v.reserve(ranges_.size() + 1); + + // In negation, first range begins at 0, unless + // the current class begins at 0. + iterator it = begin(); + if (it == end()) { + v.push_back(RuneRange(0, Runemax)); + } else { + int nextlo = 0; + if (it->lo == 0) { + nextlo = it->hi + 1; + ++it; + } + for (; it != end(); ++it) { + v.push_back(RuneRange(nextlo, it->lo - 1)); + nextlo = it->hi + 1; + } + if (nextlo <= Runemax) + v.push_back(RuneRange(nextlo, Runemax)); + } + + ranges_.clear(); + for (size_t i = 0; i < v.size(); i++) + ranges_.insert(v[i]); + + upper_ = AlphaMask & ~upper_; + lower_ = AlphaMask & ~lower_; + nrunes_ = Runemax+1 - nrunes_; +} + +// Character class is a sorted list of ranges. +// The ranges are allocated in the same block as the header, +// necessitating a special allocator and Delete method. + +CharClass* CharClass::New(int maxranges) { + CharClass* cc; + uint8* data = new uint8[sizeof *cc + maxranges*sizeof cc->ranges_[0]]; + cc = reinterpret_cast(data); + cc->ranges_ = reinterpret_cast(data + sizeof *cc); + cc->nranges_ = 0; + cc->folds_ascii_ = false; + cc->nrunes_ = 0; + return cc; +} + +void CharClass::Delete() { + uint8 *data = reinterpret_cast(this); + delete[] data; +} + +CharClass* CharClass::Negate() { + CharClass* cc = CharClass::New(nranges_+1); + cc->folds_ascii_ = folds_ascii_; + cc->nrunes_ = Runemax + 1 - nrunes_; + int n = 0; + int nextlo = 0; + for (CharClass::iterator it = begin(); it != end(); ++it) { + if (it->lo == nextlo) { + nextlo = it->hi + 1; + } else { + cc->ranges_[n++] = RuneRange(nextlo, it->lo - 1); + nextlo = it->hi + 1; + } + } + if (nextlo <= Runemax) + cc->ranges_[n++] = RuneRange(nextlo, Runemax); + cc->nranges_ = n; + return cc; +} + +bool CharClass::Contains(Rune r) { + RuneRange* rr = ranges_; + int n = nranges_; + while (n > 0) { + int m = n/2; + if (rr[m].hi < r) { + rr += m+1; + n -= m+1; + } else if (r < rr[m].lo) { + n = m; + } else { // rr[m].lo <= r && r <= rr[m].hi + return true; + } + } + return false; +} + +CharClass* CharClassBuilder::GetCharClass() { + CharClass* cc = CharClass::New(ranges_.size()); + size_t n = 0; + for (iterator it = begin(); it != end(); ++it) + cc->ranges_[n++] = *it; + cc->nranges_ = n; + DCHECK_LE(static_cast(n), ranges_.size()); + cc->nrunes_ = nrunes_; + cc->folds_ascii_ = FoldsASCII(); + return cc; +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/regexp.h b/src/openalpr/support/re2/regexp.h new file mode 100644 index 0000000..60eb993 --- /dev/null +++ b/src/openalpr/support/re2/regexp.h @@ -0,0 +1,633 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// --- SPONSORED LINK -------------------------------------------------- +// If you want to use this library for regular expression matching, +// you should use re2/re2.h, which provides a class RE2 that +// mimics the PCRE interface provided by PCRE's C++ wrappers. +// This header describes the low-level interface used to implement RE2 +// and may change in backwards-incompatible ways from time to time. +// In contrast, RE2's interface will not. +// --------------------------------------------------------------------- + +// Regular expression library: parsing, execution, and manipulation +// of regular expressions. +// +// Any operation that traverses the Regexp structures should be written +// using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested +// regular expressions such as x++++++++++++++++++++... might cause recursive +// traversals to overflow the stack. +// +// It is the caller's responsibility to provide appropriate mutual exclusion +// around manipulation of the regexps. RE2 does this. +// +// PARSING +// +// Regexp::Parse parses regular expressions encoded in UTF-8. +// The default syntax is POSIX extended regular expressions, +// with the following changes: +// +// 1. Backreferences (optional in POSIX EREs) are not supported. +// (Supporting them precludes the use of DFA-based +// matching engines.) +// +// 2. Collating elements and collation classes are not supported. +// (No one has needed or wanted them.) +// +// The exact syntax accepted can be modified by passing flags to +// Regexp::Parse. In particular, many of the basic Perl additions +// are available. The flags are documented below (search for LikePerl). +// +// If parsed with the flag Regexp::Latin1, both the regular expression +// and the input to the matching routines are assumed to be encoded in +// Latin-1, not UTF-8. +// +// EXECUTION +// +// Once Regexp has parsed a regular expression, it provides methods +// to search text using that regular expression. These methods are +// implemented via calling out to other regular expression libraries. +// (Let's call them the sublibraries.) +// +// To call a sublibrary, Regexp does not simply prepare a +// string version of the regular expression and hand it to the +// sublibrary. Instead, Regexp prepares, from its own parsed form, the +// corresponding internal representation used by the sublibrary. +// This has the drawback of needing to know the internal representation +// used by the sublibrary, but it has two important benefits: +// +// 1. The syntax and meaning of regular expressions is guaranteed +// to be that used by Regexp's parser, not the syntax expected +// by the sublibrary. Regexp might accept a restricted or +// expanded syntax for regular expressions as compared with +// the sublibrary. As long as Regexp can translate from its +// internal form into the sublibrary's, clients need not know +// exactly which sublibrary they are using. +// +// 2. The sublibrary parsers are bypassed. For whatever reason, +// sublibrary regular expression parsers often have security +// problems. For example, plan9grep's regular expression parser +// has a buffer overflow in its handling of large character +// classes, and PCRE's parser has had buffer overflow problems +// in the past. Security-team requires sandboxing of sublibrary +// regular expression parsers. Avoiding the sublibrary parsers +// avoids the sandbox. +// +// The execution methods we use now are provided by the compiled form, +// Prog, described in prog.h +// +// MANIPULATION +// +// Unlike other regular expression libraries, Regexp makes its parsed +// form accessible to clients, so that client code can analyze the +// parsed regular expressions. + +#ifndef RE2_REGEXP_H__ +#define RE2_REGEXP_H__ + +#include "util/util.h" +#include "re2/stringpiece.h" + +namespace re2 { + +// Keep in sync with string list kOpcodeNames[] in testing/dump.cc +enum RegexpOp { + // Matches no strings. + kRegexpNoMatch = 1, + + // Matches empty string. + kRegexpEmptyMatch, + + // Matches rune_. + kRegexpLiteral, + + // Matches runes_. + kRegexpLiteralString, + + // Matches concatenation of sub_[0..nsub-1]. + kRegexpConcat, + // Matches union of sub_[0..nsub-1]. + kRegexpAlternate, + + // Matches sub_[0] zero or more times. + kRegexpStar, + // Matches sub_[0] one or more times. + kRegexpPlus, + // Matches sub_[0] zero or one times. + kRegexpQuest, + + // Matches sub_[0] at least min_ times, at most max_ times. + // max_ == -1 means no upper limit. + kRegexpRepeat, + + // Parenthesized (capturing) subexpression. Index is cap_. + // Optionally, capturing name is name_. + kRegexpCapture, + + // Matches any character. + kRegexpAnyChar, + + // Matches any byte [sic]. + kRegexpAnyByte, + + // Matches empty string at beginning of line. + kRegexpBeginLine, + // Matches empty string at end of line. + kRegexpEndLine, + + // Matches word boundary "\b". + kRegexpWordBoundary, + // Matches not-a-word boundary "\B". + kRegexpNoWordBoundary, + + // Matches empty string at beginning of text. + kRegexpBeginText, + // Matches empty string at end of text. + kRegexpEndText, + + // Matches character class given by cc_. + kRegexpCharClass, + + // Forces match of entire expression right now, + // with match ID match_id_ (used by RE2::Set). + kRegexpHaveMatch, + + kMaxRegexpOp = kRegexpHaveMatch, +}; + +// Keep in sync with string list in regexp.cc +enum RegexpStatusCode { + // No error + kRegexpSuccess = 0, + + // Unexpected error + kRegexpInternalError, + + // Parse errors + kRegexpBadEscape, // bad escape sequence + kRegexpBadCharClass, // bad character class + kRegexpBadCharRange, // bad character class range + kRegexpMissingBracket, // missing closing ] + kRegexpMissingParen, // missing closing ) + kRegexpTrailingBackslash, // at end of regexp + kRegexpRepeatArgument, // repeat argument missing, e.g. "*" + kRegexpRepeatSize, // bad repetition argument + kRegexpRepeatOp, // bad repetition operator + kRegexpBadPerlOp, // bad perl operator + kRegexpBadUTF8, // invalid UTF-8 in regexp + kRegexpBadNamedCapture, // bad named capture +}; + +// Error status for certain operations. +class RegexpStatus { + public: + RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {} + ~RegexpStatus() { delete tmp_; } + + void set_code(enum RegexpStatusCode code) { code_ = code; } + void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; } + void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; } + enum RegexpStatusCode code() const { return code_; } + const StringPiece& error_arg() const { return error_arg_; } + bool ok() const { return code() == kRegexpSuccess; } + + // Copies state from status. + void Copy(const RegexpStatus& status); + + // Returns text equivalent of code, e.g.: + // "Bad character class" + static string CodeText(enum RegexpStatusCode code); + + // Returns text describing error, e.g.: + // "Bad character class: [z-a]" + string Text() const; + + private: + enum RegexpStatusCode code_; // Kind of error + StringPiece error_arg_; // Piece of regexp containing syntax error. + string* tmp_; // Temporary storage, possibly where error_arg_ is. + + DISALLOW_COPY_AND_ASSIGN(RegexpStatus); +}; + +// Walker to implement Simplify. +class SimplifyWalker; + +// Compiled form; see prog.h +class Prog; + +struct RuneRange { + RuneRange() : lo(0), hi(0) { } + RuneRange(int l, int h) : lo(l), hi(h) { } + Rune lo; + Rune hi; +}; + +// Less-than on RuneRanges treats a == b if they overlap at all. +// This lets us look in a set to find the range covering a particular Rune. +struct RuneRangeLess { + bool operator()(const RuneRange& a, const RuneRange& b) const { + return a.hi < b.lo; + } +}; + +class CharClassBuilder; + +class CharClass { + public: + void Delete(); + + typedef RuneRange* iterator; + iterator begin() { return ranges_; } + iterator end() { return ranges_ + nranges_; } + + int size() { return nrunes_; } + bool empty() { return nrunes_ == 0; } + bool full() { return nrunes_ == Runemax+1; } + bool FoldsASCII() { return folds_ascii_; } + + bool Contains(Rune r); + CharClass* Negate(); + + private: + CharClass(); // not implemented + ~CharClass(); // not implemented + static CharClass* New(int maxranges); + + friend class CharClassBuilder; + + bool folds_ascii_; + int nrunes_; + RuneRange *ranges_; + int nranges_; + DISALLOW_COPY_AND_ASSIGN(CharClass); +}; + +class Regexp { + public: + + // Flags for parsing. Can be ORed together. + enum ParseFlags { + NoParseFlags = 0, + FoldCase = 1<<0, // Fold case during matching (case-insensitive). + Literal = 1<<1, // Treat s as literal string instead of a regexp. + ClassNL = 1<<2, // Allow char classes like [^a-z] and \D and \s + // and [[:space:]] to match newline. + DotNL = 1<<3, // Allow . to match newline. + MatchNL = ClassNL | DotNL, + OneLine = 1<<4, // Treat ^ and $ as only matching at beginning and + // end of text, not around embedded newlines. + // (Perl's default) + Latin1 = 1<<5, // Regexp and text are in Latin1, not UTF-8. + NonGreedy = 1<<6, // Repetition operators are non-greedy by default. + PerlClasses = 1<<7, // Allow Perl character classes like \d. + PerlB = 1<<8, // Allow Perl's \b and \B. + PerlX = 1<<9, // Perl extensions: + // non-capturing parens - (?: ) + // non-greedy operators - *? +? ?? {}? + // flag edits - (?i) (?-i) (?i: ) + // i - FoldCase + // m - !OneLine + // s - DotNL + // U - NonGreedy + // line ends: \A \z + // \Q and \E to disable/enable metacharacters + // (?Pexpr) for named captures + // \C to match any single byte + UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group + // and \P{Han} for its negation. + NeverNL = 1<<11, // Never match NL, even if the regexp mentions + // it explicitly. + NeverCapture = 1<<12, // Parse all parens as non-capturing. + + // As close to Perl as we can get. + LikePerl = ClassNL | OneLine | PerlClasses | PerlB | PerlX | + UnicodeGroups, + + // Internal use only. + WasDollar = 1<<15, // on kRegexpEndText: was $ in regexp text + }; + + // Get. No set, Regexps are logically immutable once created. + RegexpOp op() { return static_cast(op_); } + int nsub() { return nsub_; } + bool simple() { return simple_; } + enum ParseFlags parse_flags() { return static_cast(parse_flags_); } + int Ref(); // For testing. + + Regexp** sub() { + if(nsub_ <= 1) + return &subone_; + else + return submany_; + } + + int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; } + int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; } + Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; } + CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; } + int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; } + const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; } + Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; } + int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; } + int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; } + + // Increments reference count, returns object as convenience. + Regexp* Incref(); + + // Decrements reference count and deletes this object if count reaches 0. + void Decref(); + + // Parses string s to produce regular expression, returned. + // Caller must release return value with re->Decref(). + // On failure, sets *status (if status != NULL) and returns NULL. + static Regexp* Parse(const StringPiece& s, ParseFlags flags, + RegexpStatus* status); + + // Returns a _new_ simplified version of the current regexp. + // Does not edit the current regexp. + // Caller must release return value with re->Decref(). + // Simplified means that counted repetition has been rewritten + // into simpler terms and all Perl/POSIX features have been + // removed. The result will capture exactly the same + // subexpressions the original did, unless formatted with ToString. + Regexp* Simplify(); + friend class SimplifyWalker; + + // Parses the regexp src and then simplifies it and sets *dst to the + // string representation of the simplified form. Returns true on success. + // Returns false and sets *status (if status != NULL) on parse error. + static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags, + string* dst, + RegexpStatus* status); + + // Returns the number of capturing groups in the regexp. + int NumCaptures(); + friend class NumCapturesWalker; + + // Returns a map from names to capturing group indices, + // or NULL if the regexp contains no named capture groups. + // The caller is responsible for deleting the map. + map* NamedCaptures(); + + // Returns a map from capturing group indices to capturing group + // names or NULL if the regexp contains no named capture groups. The + // caller is responsible for deleting the map. + map* CaptureNames(); + + // Returns a string representation of the current regexp, + // using as few parentheses as possible. + string ToString(); + + // Convenience functions. They consume the passed reference, + // so in many cases you should use, e.g., Plus(re->Incref(), flags). + // They do not consume allocated arrays like subs or runes. + static Regexp* Plus(Regexp* sub, ParseFlags flags); + static Regexp* Star(Regexp* sub, ParseFlags flags); + static Regexp* Quest(Regexp* sub, ParseFlags flags); + static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags); + static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags); + static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap); + static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max); + static Regexp* NewLiteral(Rune rune, ParseFlags flags); + static Regexp* NewCharClass(CharClass* cc, ParseFlags flags); + static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags); + static Regexp* HaveMatch(int match_id, ParseFlags flags); + + // Like Alternate but does not factor out common prefixes. + static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags); + + // Debugging function. Returns string format for regexp + // that makes structure clear. Does NOT use regexp syntax. + string Dump(); + + // Helper traversal class, defined fully in walker-inl.h. + template class Walker; + + // Compile to Prog. See prog.h + // Reverse prog expects to be run over text backward. + // Construction and execution of prog will + // stay within approximately max_mem bytes of memory. + // If max_mem <= 0, a reasonable default is used. + Prog* CompileToProg(int64 max_mem); + Prog* CompileToReverseProg(int64 max_mem); + + // Whether to expect this library to find exactly the same answer as PCRE + // when running this regexp. Most regexps do mimic PCRE exactly, but a few + // obscure cases behave differently. Technically this is more a property + // of the Prog than the Regexp, but the computation is much easier to do + // on the Regexp. See mimics_pcre.cc for the exact conditions. + bool MimicsPCRE(); + + // Benchmarking function. + void NullWalk(); + + // Whether every match of this regexp must be anchored and + // begin with a non-empty fixed string (perhaps after ASCII + // case-folding). If so, returns the prefix and the sub-regexp that + // follows it. + bool RequiredPrefix(string* prefix, bool *foldcase, Regexp** suffix); + + private: + // Constructor allocates vectors as appropriate for operator. + explicit Regexp(RegexpOp op, ParseFlags parse_flags); + + // Use Decref() instead of delete to release Regexps. + // This is private to catch deletes at compile time. + ~Regexp(); + void Destroy(); + bool QuickDestroy(); + + // Helpers for Parse. Listed here so they can edit Regexps. + class ParseState; + friend class ParseState; + friend bool ParseCharClass(StringPiece* s, Regexp** out_re, + RegexpStatus* status); + + // Helper for testing [sic]. + friend bool RegexpEqualTestingOnly(Regexp*, Regexp*); + + // Computes whether Regexp is already simple. + bool ComputeSimple(); + + // Constructor that generates a concatenation or alternation, + // enforcing the limit on the number of subexpressions for + // a particular Regexp. + static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs, + ParseFlags flags, bool can_factor); + + // Returns the leading string that re starts with. + // The returned Rune* points into a piece of re, + // so it must not be used after the caller calls re->Decref(). + static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags); + + // Removes the first n leading runes from the beginning of re. + // Edits re in place. + static void RemoveLeadingString(Regexp* re, int n); + + // Returns the leading regexp in re's top-level concatenation. + // The returned Regexp* points at re or a sub-expression of re, + // so it must not be used after the caller calls re->Decref(). + static Regexp* LeadingRegexp(Regexp* re); + + // Removes LeadingRegexp(re) from re and returns the remainder. + // Might edit re in place. + static Regexp* RemoveLeadingRegexp(Regexp* re); + + // Simplifies an alternation of literal strings by factoring out + // common prefixes. + static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags); + static int FactorAlternationRecursive(Regexp** sub, int nsub, + ParseFlags flags, int maxdepth); + + // Is a == b? Only efficient on regexps that have not been through + // Simplify yet - the expansion of a kRegexpRepeat will make this + // take a long time. Do not call on such regexps, hence private. + static bool Equal(Regexp* a, Regexp* b); + + // Allocate space for n sub-regexps. + void AllocSub(int n) { + if (n < 0 || static_cast(n) != n) + LOG(FATAL) << "Cannot AllocSub " << n; + if (n > 1) + submany_ = new Regexp*[n]; + nsub_ = n; + } + + // Add Rune to LiteralString + void AddRuneToString(Rune r); + + // Swaps this with that, in place. + void Swap(Regexp *that); + + // Operator. See description of operators above. + // uint8 instead of RegexpOp to control space usage. + uint8 op_; + + // Is this regexp structure already simple + // (has it been returned by Simplify)? + // uint8 instead of bool to control space usage. + uint8 simple_; + + // Flags saved from parsing and used during execution. + // (Only FoldCase is used.) + // uint16 instead of ParseFlags to control space usage. + uint16 parse_flags_; + + // Reference count. Exists so that SimplifyRegexp can build + // regexp structures that are dags rather than trees to avoid + // exponential blowup in space requirements. + // uint16 to control space usage. + // The standard regexp routines will never generate a + // ref greater than the maximum repeat count (100), + // but even so, Incref and Decref consult an overflow map + // when ref_ reaches kMaxRef. + uint16 ref_; + static const uint16 kMaxRef = 0xffff; + + // Subexpressions. + // uint16 to control space usage. + // Concat and Alternate handle larger numbers of subexpressions + // by building concatenation or alternation trees. + // Other routines should call Concat or Alternate instead of + // filling in sub() by hand. + uint16 nsub_; + static const uint16 kMaxNsub = 0xffff; + union { + Regexp** submany_; // if nsub_ > 1 + Regexp* subone_; // if nsub_ == 1 + }; + + // Extra space for parse and teardown stacks. + Regexp* down_; + + // Arguments to operator. See description of operators above. + union { + struct { // Repeat + int max_; + int min_; + }; + struct { // Capture + int cap_; + string* name_; + }; + struct { // LiteralString + int nrunes_; + Rune* runes_; + }; + struct { // CharClass + // These two could be in separate union members, + // but it wouldn't save any space (there are other two-word structs) + // and keeping them separate avoids confusion during parsing. + CharClass* cc_; + CharClassBuilder* ccb_; + }; + Rune rune_; // Literal + int match_id_; // HaveMatch + void *the_union_[2]; // as big as any other element, for memset + }; + + DISALLOW_COPY_AND_ASSIGN(Regexp); +}; + +// Character class set: contains non-overlapping, non-abutting RuneRanges. +typedef set RuneRangeSet; + +class CharClassBuilder { + public: + CharClassBuilder(); + + typedef RuneRangeSet::iterator iterator; + iterator begin() { return ranges_.begin(); } + iterator end() { return ranges_.end(); } + + int size() { return nrunes_; } + bool empty() { return nrunes_ == 0; } + bool full() { return nrunes_ == Runemax+1; } + + bool Contains(Rune r); + bool FoldsASCII(); + bool AddRange(Rune lo, Rune hi); // returns whether class changed + CharClassBuilder* Copy(); + void AddCharClass(CharClassBuilder* cc); + void Negate(); + void RemoveAbove(Rune r); + CharClass* GetCharClass(); + void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags); + + private: + static const uint32 AlphaMask = (1<<26) - 1; + uint32 upper_; // bitmap of A-Z + uint32 lower_; // bitmap of a-z + int nrunes_; + RuneRangeSet ranges_; + DISALLOW_COPY_AND_ASSIGN(CharClassBuilder); +}; + +// Tell g++ that bitwise ops on ParseFlags produce ParseFlags. +inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, Regexp::ParseFlags b) +{ + return static_cast(static_cast(a) | static_cast(b)); +} + +inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b) +{ + return static_cast(static_cast(a) ^ static_cast(b)); +} + +inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b) +{ + return static_cast(static_cast(a) & static_cast(b)); +} + +inline Regexp::ParseFlags operator~(Regexp::ParseFlags a) +{ + return static_cast(~static_cast(a)); +} + + + +} // namespace re2 + +#endif // RE2_REGEXP_H__ diff --git a/src/openalpr/support/re2/set.cc b/src/openalpr/support/re2/set.cc new file mode 100644 index 0000000..057a029 --- /dev/null +++ b/src/openalpr/support/re2/set.cc @@ -0,0 +1,113 @@ +// Copyright 2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/set.h" + +#include "util/util.h" +#include "re2/stringpiece.h" +#include "re2/prog.h" +#include "re2.h" +#include "re2/regexp.h" + +using namespace re2; + +RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { + options_.Copy(options); + anchor_ = anchor; + prog_ = NULL; + compiled_ = false; +} + +RE2::Set::~Set() { + for (size_t i = 0; i < re_.size(); i++) + re_[i]->Decref(); + delete prog_; +} + +int RE2::Set::Add(const StringPiece& pattern, string* error) { + if (compiled_) { + LOG(DFATAL) << "RE2::Set::Add after Compile"; + return -1; + } + + Regexp::ParseFlags pf = static_cast( + options_.ParseFlags()); + + RegexpStatus status; + re2::Regexp* re = Regexp::Parse(pattern, pf, &status); + if (re == NULL) { + if (error != NULL) + *error = status.Text(); + if (options_.log_errors()) + LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); + return -1; + } + + // Concatenate with match index and push on vector. + int n = re_.size(); + re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); + if (re->op() == kRegexpConcat) { + int nsub = re->nsub(); + re2::Regexp** sub = new re2::Regexp*[nsub + 1]; + for (int i = 0; i < nsub; i++) + sub[i] = re->sub()[i]->Incref(); + sub[nsub] = m; + re->Decref(); + re = re2::Regexp::Concat(sub, nsub + 1, pf); + delete[] sub; + } else { + re2::Regexp* sub[2]; + sub[0] = re; + sub[1] = m; + re = re2::Regexp::Concat(sub, 2, pf); + } + re_.push_back(re); + return n; +} + +bool RE2::Set::Compile() { + if (compiled_) { + LOG(DFATAL) << "RE2::Set::Compile multiple times"; + return false; + } + compiled_ = true; + + Regexp::ParseFlags pf = static_cast( + options_.ParseFlags()); + re2::Regexp* re = re2::Regexp::Alternate(const_cast(&re_[0]), + re_.size(), pf); + re_.clear(); + re2::Regexp* sre = re->Simplify(); + re->Decref(); + re = sre; + if (re == NULL) { + if (options_.log_errors()) + LOG(ERROR) << "Error simplifying during Compile."; + return false; + } + + prog_ = Prog::CompileSet(options_, anchor_, re); + return prog_ != NULL; +} + +bool RE2::Set::Match(const StringPiece& text, vector* v) const { + if (!compiled_) { + LOG(DFATAL) << "RE2::Set::Match without Compile"; + return false; + } + v->clear(); + bool failed; + bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, + Prog::kManyMatch, NULL, &failed, v); + if (failed) + LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space"; + + if (ret == false) + return false; + if (v->size() == 0) { + LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set"; + return false; + } + return true; +} diff --git a/src/openalpr/support/re2/set.h b/src/openalpr/support/re2/set.h new file mode 100644 index 0000000..1e2647e --- /dev/null +++ b/src/openalpr/support/re2/set.h @@ -0,0 +1,55 @@ +// Copyright 2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_SET_H +#define RE2_SET_H + +#include +#include + +#include "re2.h" + +namespace re2 { +using std::vector; + +// An RE2::Set represents a collection of regexps that can +// be searched for simultaneously. +class RE2::Set { + public: + Set(const RE2::Options& options, RE2::Anchor anchor); + ~Set(); + + // Add adds regexp pattern to the set, interpreted using the RE2 options. + // (The RE2 constructor's default options parameter is RE2::UTF8.) + // Add returns the regexp index that will be used to identify + // it in the result of Match, or -1 if the regexp cannot be parsed. + // Indices are assigned in sequential order starting from 0. + // Error returns do not increment the index. + // If an error occurs and error != NULL, *error will hold an error message. + int Add(const StringPiece& pattern, string* error); + + // Compile prepares the Set for matching. + // Add must not be called again after Compile. + // Compile must be called before FullMatch or PartialMatch. + // Compile may return false if it runs out of memory. + bool Compile(); + + // Match returns true if text matches any of the regexps in the set. + // If so, it fills v with the indices of the matching regexps. + bool Match(const StringPiece& text, vector* v) const; + + private: + RE2::Options options_; + RE2::Anchor anchor_; + vector re_; + re2::Prog* prog_; + bool compiled_; + //DISALLOW_COPY_AND_ASSIGN(Set); + Set(const Set&); + void operator=(const Set&); +}; + +} // namespace re2 + +#endif // RE2_SET_H diff --git a/src/openalpr/support/re2/simplify.cc b/src/openalpr/support/re2/simplify.cc new file mode 100644 index 0000000..9c0021e --- /dev/null +++ b/src/openalpr/support/re2/simplify.cc @@ -0,0 +1,392 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Rewrite POSIX and other features in re +// to use simple extended regular expression features. +// Also sort and simplify character classes. + +#include "util/util.h" +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +// Parses the regexp src and then simplifies it and sets *dst to the +// string representation of the simplified form. Returns true on success. +// Returns false and sets *error (if error != NULL) on error. +bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags, + string* dst, + RegexpStatus* status) { + Regexp* re = Parse(src, flags, status); + if (re == NULL) + return false; + Regexp* sre = re->Simplify(); + re->Decref(); + if (sre == NULL) { + // Should not happen, since Simplify never fails. + LOG(ERROR) << "Simplify failed on " << src; + if (status) { + status->set_code(kRegexpInternalError); + status->set_error_arg(src); + } + return false; + } + *dst = sre->ToString(); + sre->Decref(); + return true; +} + +// Assuming the simple_ flags on the children are accurate, +// is this Regexp* simple? +bool Regexp::ComputeSimple() { + Regexp** subs; + switch (op_) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpLiteral: + case kRegexpLiteralString: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpBeginText: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpEndText: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpHaveMatch: + return true; + case kRegexpConcat: + case kRegexpAlternate: + // These are simple as long as the subpieces are simple. + subs = sub(); + for (int i = 0; i < nsub_; i++) + if (!subs[i]->simple_) + return false; + return true; + case kRegexpCharClass: + // Simple as long as the char class is not empty, not full. + if (ccb_ != NULL) + return !ccb_->empty() && !ccb_->full(); + return !cc_->empty() && !cc_->full(); + case kRegexpCapture: + subs = sub(); + return subs[0]->simple_; + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + subs = sub(); + if (!subs[0]->simple_) + return false; + switch (subs[0]->op_) { + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpEmptyMatch: + case kRegexpNoMatch: + return false; + default: + break; + } + return true; + case kRegexpRepeat: + return false; + } + LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_; + return false; +} + +// Walker subclass used by Simplify. +// The simplify walk is purely post-recursive: given the simplified children, +// PostVisit creates the simplified result. +// The child_args are simplified Regexp*s. +class SimplifyWalker : public Regexp::Walker { + public: + SimplifyWalker() {} + virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop); + virtual Regexp* PostVisit(Regexp* re, + Regexp* parent_arg, + Regexp* pre_arg, + Regexp** child_args, int nchild_args); + virtual Regexp* Copy(Regexp* re); + virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); + + private: + // These functions are declared inside SimplifyWalker so that + // they can edit the private fields of the Regexps they construct. + + // Creates a concatenation of two Regexp, consuming refs to re1 and re2. + // Caller must Decref return value when done with it. + static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags); + + // Simplifies the expression re{min,max} in terms of *, +, and ?. + // Returns a new regexp. Does not edit re. Does not consume reference to re. + // Caller must Decref return value when done with it. + static Regexp* SimplifyRepeat(Regexp* re, int min, int max, + Regexp::ParseFlags parse_flags); + + // Simplifies a character class by expanding any named classes + // into rune ranges. Does not edit re. Does not consume ref to re. + // Caller must Decref return value when done with it. + static Regexp* SimplifyCharClass(Regexp* re); + + DISALLOW_COPY_AND_ASSIGN(SimplifyWalker); +}; + +// Simplifies a regular expression, returning a new regexp. +// The new regexp uses traditional Unix egrep features only, +// plus the Perl (?:) non-capturing parentheses. +// Otherwise, no POSIX or Perl additions. The new regexp +// captures exactly the same subexpressions (with the same indices) +// as the original. +// Does not edit current object. +// Caller must Decref() return value when done with it. + +Regexp* Regexp::Simplify() { + if (simple_) + return Incref(); + SimplifyWalker w; + return w.Walk(this, NULL); +} + +#define Simplify DontCallSimplify // Avoid accidental recursion + +Regexp* SimplifyWalker::Copy(Regexp* re) { + return re->Incref(); +} + +Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { + // This should never be called, since we use Walk and not + // WalkExponential. + LOG(DFATAL) << "SimplifyWalker::ShortVisit called"; + return re->Incref(); +} + +Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) { + if (re->simple_) { + *stop = true; + return re->Incref(); + } + return NULL; +} + +Regexp* SimplifyWalker::PostVisit(Regexp* re, + Regexp* parent_arg, + Regexp* pre_arg, + Regexp** child_args, + int nchild_args) { + switch (re->op()) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpLiteral: + case kRegexpLiteralString: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpBeginText: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpEndText: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpHaveMatch: + // All these are always simple. + re->simple_ = true; + return re->Incref(); + + case kRegexpConcat: + case kRegexpAlternate: { + // These are simple as long as the subpieces are simple. + // Two passes to avoid allocation in the common case. + bool changed = false; + Regexp** subs = re->sub(); + for (int i = 0; i < re->nsub_; i++) { + Regexp* sub = subs[i]; + Regexp* newsub = child_args[i]; + if (newsub != sub) { + changed = true; + break; + } + } + if (!changed) { + for (int i = 0; i < re->nsub_; i++) { + Regexp* newsub = child_args[i]; + newsub->Decref(); + } + re->simple_ = true; + return re->Incref(); + } + Regexp* nre = new Regexp(re->op(), re->parse_flags()); + nre->AllocSub(re->nsub_); + Regexp** nre_subs = nre->sub(); + for (int i = 0; i nsub_; i++) + nre_subs[i] = child_args[i]; + nre->simple_ = true; + return nre; + } + + case kRegexpCapture: { + Regexp* newsub = child_args[0]; + if (newsub == re->sub()[0]) { + newsub->Decref(); + re->simple_ = true; + return re->Incref(); + } + Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags()); + nre->AllocSub(1); + nre->sub()[0] = newsub; + nre->cap_ = re->cap_; + nre->simple_ = true; + return nre; + } + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: { + Regexp* newsub = child_args[0]; + // Special case: repeat the empty string as much as + // you want, but it's still the empty string. + if (newsub->op() == kRegexpEmptyMatch) + return newsub; + + // These are simple as long as the subpiece is simple. + if (newsub == re->sub()[0]) { + newsub->Decref(); + re->simple_ = true; + return re->Incref(); + } + + // These are also idempotent if flags are constant. + if (re->op() == newsub->op() && + re->parse_flags() == newsub->parse_flags()) + return newsub; + + Regexp* nre = new Regexp(re->op(), re->parse_flags()); + nre->AllocSub(1); + nre->sub()[0] = newsub; + nre->simple_ = true; + return nre; + } + + case kRegexpRepeat: { + Regexp* newsub = child_args[0]; + // Special case: repeat the empty string as much as + // you want, but it's still the empty string. + if (newsub->op() == kRegexpEmptyMatch) + return newsub; + + Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_, + re->parse_flags()); + newsub->Decref(); + nre->simple_ = true; + return nre; + } + + case kRegexpCharClass: { + Regexp* nre = SimplifyCharClass(re); + nre->simple_ = true; + return nre; + } + } + + LOG(ERROR) << "Simplify case not handled: " << re->op(); + return re->Incref(); +} + +// Creates a concatenation of two Regexp, consuming refs to re1 and re2. +// Returns a new Regexp, handing the ref to the caller. +Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2, + Regexp::ParseFlags parse_flags) { + Regexp* re = new Regexp(kRegexpConcat, parse_flags); + re->AllocSub(2); + Regexp** subs = re->sub(); + subs[0] = re1; + subs[1] = re2; + return re; +} + +// Simplifies the expression re{min,max} in terms of *, +, and ?. +// Returns a new regexp. Does not edit re. Does not consume reference to re. +// Caller must Decref return value when done with it. +// The result will *not* necessarily have the right capturing parens +// if you call ToString() and re-parse it: (x){2} becomes (x)(x), +// but in the Regexp* representation, both (x) are marked as $1. +Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max, + Regexp::ParseFlags f) { + // x{n,} means at least n matches of x. + if (max == -1) { + // Special case: x{0,} is x* + if (min == 0) + return Regexp::Star(re->Incref(), f); + + // Special case: x{1,} is x+ + if (min == 1) + return Regexp::Plus(re->Incref(), f); + + // General case: x{4,} is xxxx+ + Regexp* nre = new Regexp(kRegexpConcat, f); + nre->AllocSub(min); + Regexp** nre_subs = nre->sub(); + for (int i = 0; i < min-1; i++) + nre_subs[i] = re->Incref(); + nre_subs[min-1] = Regexp::Plus(re->Incref(), f); + return nre; + } + + // Special case: (x){0} matches only empty string. + if (min == 0 && max == 0) + return new Regexp(kRegexpEmptyMatch, f); + + // Special case: x{1} is just x. + if (min == 1 && max == 1) + return re->Incref(); + + // General case: x{n,m} means n copies of x and m copies of x?. + // The machine will do less work if we nest the final m copies, + // so that x{2,5} = xx(x(x(x)?)?)? + + // Build leading prefix: xx. Capturing only on the last one. + Regexp* nre = NULL; + if (min > 0) { + nre = new Regexp(kRegexpConcat, f); + nre->AllocSub(min); + Regexp** nre_subs = nre->sub(); + for (int i = 0; i < min; i++) + nre_subs[i] = re->Incref(); + } + + // Build and attach suffix: (x(x(x)?)?)? + if (max > min) { + Regexp* suf = Regexp::Quest(re->Incref(), f); + for (int i = min+1; i < max; i++) + suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f); + if (nre == NULL) + nre = suf; + else + nre = Concat2(nre, suf, f); + } + + if (nre == NULL) { + // Some degenerate case, like min > max, or min < max < 0. + // This shouldn't happen, because the parser rejects such regexps. + LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max; + return new Regexp(kRegexpNoMatch, f); + } + + return nre; +} + +// Simplifies a character class. +// Caller must Decref return value when done with it. +Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) { + CharClass* cc = re->cc(); + + // Special cases + if (cc->empty()) + return new Regexp(kRegexpNoMatch, re->parse_flags()); + if (cc->full()) + return new Regexp(kRegexpAnyChar, re->parse_flags()); + + return re->Incref(); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/stringpiece.cc b/src/openalpr/support/re2/stringpiece.cc new file mode 100644 index 0000000..f9e2294 --- /dev/null +++ b/src/openalpr/support/re2/stringpiece.cc @@ -0,0 +1,91 @@ +// Copyright 2004 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/stringpiece.h" +#include "util/util.h" + +using re2::StringPiece; + +std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { + o.write(piece.data(), piece.size()); + return o; +} + +bool StringPiece::_equal(const StringPiece& x, const StringPiece& y) { + int len = x.size(); + if (len != y.size()) { + return false; + } + const char* p = x.data(); + const char* p2 = y.data(); + // Test last byte in case strings share large common prefix + if ((len > 0) && (p[len-1] != p2[len-1])) return false; + const char* p_limit = p + len; + for (; p < p_limit; p++, p2++) { + if (*p != *p2) + return false; + } + return true; +} + +void StringPiece::CopyToString(string* target) const { + target->assign(ptr_, length_); +} + +int StringPiece::copy(char* buf, size_type n, size_type pos) const { + int ret = min(length_ - pos, n); + memcpy(buf, ptr_ + pos, ret); + return ret; +} + +bool StringPiece::contains(StringPiece s) const { + return (size_t)find(s, 0) != npos; +} + +int StringPiece::find(const StringPiece& s, size_type pos) const { + if (length_ < 0 || pos > static_cast(length_)) + return npos; + + const char* result = std::search(ptr_ + pos, ptr_ + length_, + s.ptr_, s.ptr_ + s.length_); + const size_type xpos = result - ptr_; + return xpos + s.length_ <= static_cast(length_) ? xpos : npos; +} + +int StringPiece::find(char c, size_type pos) const { + if (length_ <= 0 || pos >= static_cast(length_)) { + return npos; + } + const char* result = std::find(ptr_ + pos, ptr_ + length_, c); + return result != ptr_ + length_ ? result - ptr_ : npos; +} + +int StringPiece::rfind(const StringPiece& s, size_type pos) const { + if (length_ < s.length_) return npos; + const size_t ulen = length_; + if (s.length_ == 0) return min(ulen, pos); + + const char* last = ptr_ + min(ulen - s.length_, pos) + s.length_; + const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); + return result != last ? result - ptr_ : npos; +} + +int StringPiece::rfind(char c, size_type pos) const { + if (length_ <= 0) return npos; + for (int i = min(pos, static_cast(length_ - 1)); + i >= 0; --i) { + if (ptr_[i] == c) { + return i; + } + } + return npos; +} + +StringPiece StringPiece::substr(size_type pos, size_type n) const { + if (pos > static_cast(length_)) pos = static_cast(length_); + if (n > length_ - pos) n = length_ - pos; + return StringPiece(ptr_ + pos, n); +} + +const StringPiece::size_type StringPiece::npos = size_type(-1); diff --git a/src/openalpr/support/re2/stringpiece.h b/src/openalpr/support/re2/stringpiece.h new file mode 100644 index 0000000..bc8bf40 --- /dev/null +++ b/src/openalpr/support/re2/stringpiece.h @@ -0,0 +1,185 @@ +// Copyright 2001-2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A string-like object that points to a sized piece of memory. +// +// Functions or methods may use const StringPiece& parameters to accept either +// a "const char*" or a "string" value that will be implicitly converted to +// a StringPiece. The implicit conversion means that it is often appropriate +// to include this .h file in other files rather than forward-declaring +// StringPiece as would be appropriate for most other Google classes. +// +// Systematic usage of StringPiece is encouraged as it will reduce unnecessary +// conversions from "const char*" to "string" and back again. +// +// +// Arghh! I wish C++ literals were "string". + +#ifndef STRINGS_STRINGPIECE_H__ +#define STRINGS_STRINGPIECE_H__ + +#include +#include +#include +#include +#include + +namespace re2 { + +class StringPiece { + private: + const char* ptr_; + int length_; + + public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected. + StringPiece() : ptr_(NULL), length_(0) { } + StringPiece(const char* str) + : ptr_(str), length_((str == NULL) ? 0 : static_cast(strlen(str))) { } + StringPiece(const std::string& str) + : ptr_(str.data()), length_(static_cast(str.size())) { } + StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { } + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. + const char* data() const { return ptr_; } + int size() const { return length_; } + int length() const { return length_; } + bool empty() const { return length_ == 0; } + + void clear() { ptr_ = NULL; length_ = 0; } + void set(const char* data, int len) { ptr_ = data; length_ = len; } + void set(const char* str) { + ptr_ = str; + if (str != NULL) + length_ = static_cast(strlen(str)); + else + length_ = 0; + } + void set(const void* data, int len) { + ptr_ = reinterpret_cast(data); + length_ = len; + } + + char operator[](int i) const { return ptr_[i]; } + + void remove_prefix(int n) { + ptr_ += n; + length_ -= n; + } + + void remove_suffix(int n) { + length_ -= n; + } + + int compare(const StringPiece& x) const { + int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_)); + if (r == 0) { + if (length_ < x.length_) r = -1; + else if (length_ > x.length_) r = +1; + } + return r; + } + + std::string as_string() const { + return std::string(data(), size()); + } + // We also define ToString() here, since many other string-like + // interfaces name the routine that converts to a C++ string + // "ToString", and it's confusing to have the method that does that + // for a StringPiece be called "as_string()". We also leave the + // "as_string()" method defined here for existing code. + std::string ToString() const { + return std::string(data(), size()); + } + + void CopyToString(std::string* target) const; + void AppendToString(std::string* target) const; + + // Does "this" start with "x" + bool starts_with(const StringPiece& x) const { + return ((length_ >= x.length_) && + (memcmp(ptr_, x.ptr_, x.length_) == 0)); + } + + // Does "this" end with "x" + bool ends_with(const StringPiece& x) const { + return ((length_ >= x.length_) && + (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); + } + + // standard STL container boilerplate + typedef char value_type; + typedef const char* pointer; + typedef const char& reference; + typedef const char& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + static const size_type npos; + typedef const char* const_iterator; + typedef const char* iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + iterator begin() const { return ptr_; } + iterator end() const { return ptr_ + length_; } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(ptr_ + length_); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(ptr_); + } + // STLS says return size_type, but Google says return int + int max_size() const { return length_; } + int capacity() const { return length_; } + + int copy(char* buf, size_type n, size_type pos = 0) const; + + bool contains(StringPiece s) const; + + int find(const StringPiece& s, size_type pos = 0) const; + int find(char c, size_type pos = 0) const; + int rfind(const StringPiece& s, size_type pos = npos) const; + int rfind(char c, size_type pos = npos) const; + + StringPiece substr(size_type pos, size_type n = npos) const; + + static bool _equal(const StringPiece&, const StringPiece&); +}; + +inline bool operator==(const StringPiece& x, const StringPiece& y) { + return StringPiece::_equal(x, y); +} + +inline bool operator!=(const StringPiece& x, const StringPiece& y) { + return !(x == y); +} + +inline bool operator<(const StringPiece& x, const StringPiece& y) { + const int r = memcmp(x.data(), y.data(), + std::min(x.size(), y.size())); + return ((r < 0) || ((r == 0) && (x.size() < y.size()))); +} + +inline bool operator>(const StringPiece& x, const StringPiece& y) { + return y < x; +} + +inline bool operator<=(const StringPiece& x, const StringPiece& y) { + return !(x > y); +} + +inline bool operator>=(const StringPiece& x, const StringPiece& y) { + return !(x < y); +} + +} // namespace re2 + +// allow StringPiece to be logged +extern std::ostream& operator<<(std::ostream& o, const re2::StringPiece& piece); + +#endif // STRINGS_STRINGPIECE_H__ diff --git a/src/openalpr/support/re2/tostring.cc b/src/openalpr/support/re2/tostring.cc new file mode 100644 index 0000000..56ca7df --- /dev/null +++ b/src/openalpr/support/re2/tostring.cc @@ -0,0 +1,341 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Format a regular expression structure as a string. +// Tested by parse_test.cc + +#include "util/util.h" +#include "re2/regexp.h" +#include "re2/walker-inl.h" + +namespace re2 { + +enum { + PrecAtom, + PrecUnary, + PrecConcat, + PrecAlternate, + PrecEmpty, + PrecParen, + PrecToplevel, +}; + +// Helper function. See description below. +static void AppendCCRange(string* t, Rune lo, Rune hi); + +// Walker to generate string in s_. +// The arg pointers are actually integers giving the +// context precedence. +// The child_args are always NULL. +class ToStringWalker : public Regexp::Walker { + public: + explicit ToStringWalker(string* t) : t_(t) {} + + virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); + virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args); + virtual int ShortVisit(Regexp* re, int parent_arg) { + return 0; + } + + private: + string* t_; // The string the walker appends to. + + DISALLOW_COPY_AND_ASSIGN(ToStringWalker); +}; + +string Regexp::ToString() { + string t; + ToStringWalker w(&t); + w.WalkExponential(this, PrecToplevel, 100000); + if (w.stopped_early()) + t += " [truncated]"; + return t; +} + +#define ToString DontCallToString // Avoid accidental recursion. + +// Visits re before children are processed. +// Appends ( if needed and passes new precedence to children. +int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) { + int prec = parent_arg; + int nprec = PrecAtom; + + switch (re->op()) { + case kRegexpNoMatch: + case kRegexpEmptyMatch: + case kRegexpLiteral: + case kRegexpAnyChar: + case kRegexpAnyByte: + case kRegexpBeginLine: + case kRegexpEndLine: + case kRegexpBeginText: + case kRegexpEndText: + case kRegexpWordBoundary: + case kRegexpNoWordBoundary: + case kRegexpCharClass: + case kRegexpHaveMatch: + nprec = PrecAtom; + break; + + case kRegexpConcat: + case kRegexpLiteralString: + if (prec < PrecConcat) + t_->append("(?:"); + nprec = PrecConcat; + break; + + case kRegexpAlternate: + if (prec < PrecAlternate) + t_->append("(?:"); + nprec = PrecAlternate; + break; + + case kRegexpCapture: + t_->append("("); + if (re->name()) { + t_->append("?P<"); + t_->append(*re->name()); + t_->append(">"); + } + nprec = PrecParen; + break; + + case kRegexpStar: + case kRegexpPlus: + case kRegexpQuest: + case kRegexpRepeat: + if (prec < PrecUnary) + t_->append("(?:"); + // The subprecedence here is PrecAtom instead of PrecUnary + // because PCRE treats two unary ops in a row as a parse error. + nprec = PrecAtom; + break; + } + + return nprec; +} + +static void AppendLiteral(string *t, Rune r, bool foldcase) { + if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { + t->append(1, '\\'); + t->append(1, r); + } else if (foldcase && 'a' <= r && r <= 'z') { + if ('a' <= r && r <= 'z') + r += 'A' - 'a'; + t->append(1, '['); + t->append(1, r); + t->append(1, r + 'a' - 'A'); + t->append(1, ']'); + } else { + AppendCCRange(t, r, r); + } +} + +// Visits re after children are processed. +// For childless regexps, all the work is done here. +// For regexps with children, append any unary suffixes or ). +int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, + int* child_args, int nchild_args) { + int prec = parent_arg; + switch (re->op()) { + case kRegexpNoMatch: + // There's no simple symbol for "no match", but + // [^0-Runemax] excludes everything. + t_->append("[^\\x00-\\x{10ffff}]"); + break; + + case kRegexpEmptyMatch: + // Append (?:) to make empty string visible, + // unless this is already being parenthesized. + if (prec < PrecEmpty) + t_->append("(?:)"); + break; + + case kRegexpLiteral: + AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase); + break; + + case kRegexpLiteralString: + for (int i = 0; i < re->nrunes(); i++) + AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase); + if (prec < PrecConcat) + t_->append(")"); + break; + + case kRegexpConcat: + if (prec < PrecConcat) + t_->append(")"); + break; + + case kRegexpAlternate: + // Clumsy but workable: the children all appended | + // at the end of their strings, so just remove the last one. + if ((*t_)[t_->size()-1] == '|') + t_->erase(t_->size()-1); + else + LOG(DFATAL) << "Bad final char: " << t_; + if (prec < PrecAlternate) + t_->append(")"); + break; + + case kRegexpStar: + t_->append("*"); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpPlus: + t_->append("+"); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpQuest: + t_->append("?"); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpRepeat: + if (re->max() == -1) + t_->append(StringPrintf("{%d,}", re->min())); + else if (re->min() == re->max()) + t_->append(StringPrintf("{%d}", re->min())); + else + t_->append(StringPrintf("{%d,%d}", re->min(), re->max())); + if (re->parse_flags() & Regexp::NonGreedy) + t_->append("?"); + if (prec < PrecUnary) + t_->append(")"); + break; + + case kRegexpAnyChar: + t_->append("."); + break; + + case kRegexpAnyByte: + t_->append("\\C"); + break; + + case kRegexpBeginLine: + t_->append("^"); + break; + + case kRegexpEndLine: + t_->append("$"); + break; + + case kRegexpBeginText: + t_->append("(?-m:^)"); + break; + + case kRegexpEndText: + if (re->parse_flags() & Regexp::WasDollar) + t_->append("(?-m:$)"); + else + t_->append("\\z"); + break; + + case kRegexpWordBoundary: + t_->append("\\b"); + break; + + case kRegexpNoWordBoundary: + t_->append("\\B"); + break; + + case kRegexpCharClass: { + if (re->cc()->size() == 0) { + t_->append("[^\\x00-\\x{10ffff}]"); + break; + } + t_->append("["); + // Heuristic: show class as negated if it contains the + // non-character 0xFFFE. + CharClass* cc = re->cc(); + if (cc->Contains(0xFFFE)) { + cc = cc->Negate(); + t_->append("^"); + } + for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) + AppendCCRange(t_, i->lo, i->hi); + if (cc != re->cc()) + cc->Delete(); + t_->append("]"); + break; + } + + case kRegexpCapture: + t_->append(")"); + break; + + case kRegexpHaveMatch: + // There's no syntax accepted by the parser to generate + // this node (it is generated by RE2::Set) so make something + // up that is readable but won't compile. + t_->append("(?HaveMatch:%d)", re->match_id()); + break; + } + + // If the parent is an alternation, append the | for it. + if (prec == PrecAlternate) + t_->append("|"); + + return 0; +} + +// Appends a rune for use in a character class to the string t. +static void AppendCCChar(string* t, Rune r) { + if (0x20 <= r && r <= 0x7E) { + if (strchr("[]^-\\", r)) + t->append("\\"); + t->append(1, r); + return; + } + switch (r) { + default: + break; + + case '\r': + t->append("\\r"); + return; + + case '\t': + t->append("\\t"); + return; + + case '\n': + t->append("\\n"); + return; + + case '\f': + t->append("\\f"); + return; + } + + if (r < 0x100) { + StringAppendF(t, "\\x%02x", static_cast(r)); + return; + } + StringAppendF(t, "\\x{%x}", static_cast(r)); +} + +static void AppendCCRange(string* t, Rune lo, Rune hi) { + if (lo > hi) + return; + AppendCCChar(t, lo); + if (lo < hi) { + t->append("-"); + AppendCCChar(t, hi); + } +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/unicode_casefold.cc b/src/openalpr/support/re2/unicode_casefold.cc new file mode 100644 index 0000000..2293cc7 --- /dev/null +++ b/src/openalpr/support/re2/unicode_casefold.cc @@ -0,0 +1,480 @@ + +// GENERATED BY make_unicode_casefold.py; DO NOT EDIT. +// make_unicode_casefold.py >unicode_casefold.cc + +#include "re2/unicode_casefold.h" + +namespace re2 { + + +// 1034 groups, 2089 pairs, 289 ranges +const CaseFold unicode_casefold[] = { + { 65, 90, 32 }, + { 97, 106, -32 }, + { 107, 107, 8383 }, + { 108, 114, -32 }, + { 115, 115, 268 }, + { 116, 122, -32 }, + { 181, 181, 743 }, + { 192, 214, 32 }, + { 216, 222, 32 }, + { 223, 223, 7615 }, + { 224, 228, -32 }, + { 229, 229, 8262 }, + { 230, 246, -32 }, + { 248, 254, -32 }, + { 255, 255, 121 }, + { 256, 303, EvenOdd }, + { 306, 311, EvenOdd }, + { 313, 328, OddEven }, + { 330, 375, EvenOdd }, + { 376, 376, -121 }, + { 377, 382, OddEven }, + { 383, 383, -300 }, + { 384, 384, 195 }, + { 385, 385, 210 }, + { 386, 389, EvenOdd }, + { 390, 390, 206 }, + { 391, 392, OddEven }, + { 393, 394, 205 }, + { 395, 396, OddEven }, + { 398, 398, 79 }, + { 399, 399, 202 }, + { 400, 400, 203 }, + { 401, 402, OddEven }, + { 403, 403, 205 }, + { 404, 404, 207 }, + { 405, 405, 97 }, + { 406, 406, 211 }, + { 407, 407, 209 }, + { 408, 409, EvenOdd }, + { 410, 410, 163 }, + { 412, 412, 211 }, + { 413, 413, 213 }, + { 414, 414, 130 }, + { 415, 415, 214 }, + { 416, 421, EvenOdd }, + { 422, 422, 218 }, + { 423, 424, OddEven }, + { 425, 425, 218 }, + { 428, 429, EvenOdd }, + { 430, 430, 218 }, + { 431, 432, OddEven }, + { 433, 434, 217 }, + { 435, 438, OddEven }, + { 439, 439, 219 }, + { 440, 441, EvenOdd }, + { 444, 445, EvenOdd }, + { 447, 447, 56 }, + { 452, 452, EvenOdd }, + { 453, 453, OddEven }, + { 454, 454, -2 }, + { 455, 455, OddEven }, + { 456, 456, EvenOdd }, + { 457, 457, -2 }, + { 458, 458, EvenOdd }, + { 459, 459, OddEven }, + { 460, 460, -2 }, + { 461, 476, OddEven }, + { 477, 477, -79 }, + { 478, 495, EvenOdd }, + { 497, 497, OddEven }, + { 498, 498, EvenOdd }, + { 499, 499, -2 }, + { 500, 501, EvenOdd }, + { 502, 502, -97 }, + { 503, 503, -56 }, + { 504, 543, EvenOdd }, + { 544, 544, -130 }, + { 546, 563, EvenOdd }, + { 570, 570, 10795 }, + { 571, 572, OddEven }, + { 573, 573, -163 }, + { 574, 574, 10792 }, + { 575, 576, 10815 }, + { 577, 578, OddEven }, + { 579, 579, -195 }, + { 580, 580, 69 }, + { 581, 581, 71 }, + { 582, 591, EvenOdd }, + { 592, 592, 10783 }, + { 593, 593, 10780 }, + { 594, 594, 10782 }, + { 595, 595, -210 }, + { 596, 596, -206 }, + { 598, 599, -205 }, + { 601, 601, -202 }, + { 603, 603, -203 }, + { 608, 608, -205 }, + { 611, 611, -207 }, + { 613, 613, 42280 }, + { 614, 614, 42308 }, + { 616, 616, -209 }, + { 617, 617, -211 }, + { 619, 619, 10743 }, + { 623, 623, -211 }, + { 625, 625, 10749 }, + { 626, 626, -213 }, + { 629, 629, -214 }, + { 637, 637, 10727 }, + { 640, 640, -218 }, + { 643, 643, -218 }, + { 648, 648, -218 }, + { 649, 649, -69 }, + { 650, 651, -217 }, + { 652, 652, -71 }, + { 658, 658, -219 }, + { 837, 837, 84 }, + { 880, 883, EvenOdd }, + { 886, 887, EvenOdd }, + { 891, 893, 130 }, + { 902, 902, 38 }, + { 904, 906, 37 }, + { 908, 908, 64 }, + { 910, 911, 63 }, + { 913, 929, 32 }, + { 931, 931, 31 }, + { 932, 939, 32 }, + { 940, 940, -38 }, + { 941, 943, -37 }, + { 945, 945, -32 }, + { 946, 946, 30 }, + { 947, 948, -32 }, + { 949, 949, 64 }, + { 950, 951, -32 }, + { 952, 952, 25 }, + { 953, 953, 7173 }, + { 954, 954, 54 }, + { 955, 955, -32 }, + { 956, 956, -775 }, + { 957, 959, -32 }, + { 960, 960, 22 }, + { 961, 961, 48 }, + { 962, 962, EvenOdd }, + { 963, 965, -32 }, + { 966, 966, 15 }, + { 967, 968, -32 }, + { 969, 969, 7517 }, + { 970, 971, -32 }, + { 972, 972, -64 }, + { 973, 974, -63 }, + { 975, 975, 8 }, + { 976, 976, -62 }, + { 977, 977, 35 }, + { 981, 981, -47 }, + { 982, 982, -54 }, + { 983, 983, -8 }, + { 984, 1007, EvenOdd }, + { 1008, 1008, -86 }, + { 1009, 1009, -80 }, + { 1010, 1010, 7 }, + { 1012, 1012, -92 }, + { 1013, 1013, -96 }, + { 1015, 1016, OddEven }, + { 1017, 1017, -7 }, + { 1018, 1019, EvenOdd }, + { 1021, 1023, -130 }, + { 1024, 1039, 80 }, + { 1040, 1071, 32 }, + { 1072, 1103, -32 }, + { 1104, 1119, -80 }, + { 1120, 1153, EvenOdd }, + { 1162, 1215, EvenOdd }, + { 1216, 1216, 15 }, + { 1217, 1230, OddEven }, + { 1231, 1231, -15 }, + { 1232, 1319, EvenOdd }, + { 1329, 1366, 48 }, + { 1377, 1414, -48 }, + { 4256, 4293, 7264 }, + { 4295, 4295, 7264 }, + { 4301, 4301, 7264 }, + { 7545, 7545, 35332 }, + { 7549, 7549, 3814 }, + { 7680, 7776, EvenOdd }, + { 7777, 7777, 58 }, + { 7778, 7829, EvenOdd }, + { 7835, 7835, -59 }, + { 7838, 7838, -7615 }, + { 7840, 7935, EvenOdd }, + { 7936, 7943, 8 }, + { 7944, 7951, -8 }, + { 7952, 7957, 8 }, + { 7960, 7965, -8 }, + { 7968, 7975, 8 }, + { 7976, 7983, -8 }, + { 7984, 7991, 8 }, + { 7992, 7999, -8 }, + { 8000, 8005, 8 }, + { 8008, 8013, -8 }, + { 8017, 8017, 8 }, + { 8019, 8019, 8 }, + { 8021, 8021, 8 }, + { 8023, 8023, 8 }, + { 8025, 8025, -8 }, + { 8027, 8027, -8 }, + { 8029, 8029, -8 }, + { 8031, 8031, -8 }, + { 8032, 8039, 8 }, + { 8040, 8047, -8 }, + { 8048, 8049, 74 }, + { 8050, 8053, 86 }, + { 8054, 8055, 100 }, + { 8056, 8057, 128 }, + { 8058, 8059, 112 }, + { 8060, 8061, 126 }, + { 8064, 8071, 8 }, + { 8072, 8079, -8 }, + { 8080, 8087, 8 }, + { 8088, 8095, -8 }, + { 8096, 8103, 8 }, + { 8104, 8111, -8 }, + { 8112, 8113, 8 }, + { 8115, 8115, 9 }, + { 8120, 8121, -8 }, + { 8122, 8123, -74 }, + { 8124, 8124, -9 }, + { 8126, 8126, -7289 }, + { 8131, 8131, 9 }, + { 8136, 8139, -86 }, + { 8140, 8140, -9 }, + { 8144, 8145, 8 }, + { 8152, 8153, -8 }, + { 8154, 8155, -100 }, + { 8160, 8161, 8 }, + { 8165, 8165, 7 }, + { 8168, 8169, -8 }, + { 8170, 8171, -112 }, + { 8172, 8172, -7 }, + { 8179, 8179, 9 }, + { 8184, 8185, -128 }, + { 8186, 8187, -126 }, + { 8188, 8188, -9 }, + { 8486, 8486, -7549 }, + { 8490, 8490, -8415 }, + { 8491, 8491, -8294 }, + { 8498, 8498, 28 }, + { 8526, 8526, -28 }, + { 8544, 8559, 16 }, + { 8560, 8575, -16 }, + { 8579, 8580, OddEven }, + { 9398, 9423, 26 }, + { 9424, 9449, -26 }, + { 11264, 11310, 48 }, + { 11312, 11358, -48 }, + { 11360, 11361, EvenOdd }, + { 11362, 11362, -10743 }, + { 11363, 11363, -3814 }, + { 11364, 11364, -10727 }, + { 11365, 11365, -10795 }, + { 11366, 11366, -10792 }, + { 11367, 11372, OddEven }, + { 11373, 11373, -10780 }, + { 11374, 11374, -10749 }, + { 11375, 11375, -10783 }, + { 11376, 11376, -10782 }, + { 11378, 11379, EvenOdd }, + { 11381, 11382, OddEven }, + { 11390, 11391, -10815 }, + { 11392, 11491, EvenOdd }, + { 11499, 11502, OddEven }, + { 11506, 11507, EvenOdd }, + { 11520, 11557, -7264 }, + { 11559, 11559, -7264 }, + { 11565, 11565, -7264 }, + { 42560, 42605, EvenOdd }, + { 42624, 42647, EvenOdd }, + { 42786, 42799, EvenOdd }, + { 42802, 42863, EvenOdd }, + { 42873, 42876, OddEven }, + { 42877, 42877, -35332 }, + { 42878, 42887, EvenOdd }, + { 42891, 42892, OddEven }, + { 42893, 42893, -42280 }, + { 42896, 42899, EvenOdd }, + { 42912, 42921, EvenOdd }, + { 42922, 42922, -42308 }, + { 65313, 65338, 32 }, + { 65345, 65370, -32 }, + { 66560, 66599, 40 }, + { 66600, 66639, -40 }, +}; +const int num_unicode_casefold = 289; + +// 1034 groups, 1055 pairs, 167 ranges +const CaseFold unicode_tolower[] = { + { 65, 90, 32 }, + { 181, 181, 775 }, + { 192, 214, 32 }, + { 216, 222, 32 }, + { 256, 302, EvenOddSkip }, + { 306, 310, EvenOddSkip }, + { 313, 327, OddEvenSkip }, + { 330, 374, EvenOddSkip }, + { 376, 376, -121 }, + { 377, 381, OddEvenSkip }, + { 383, 383, -268 }, + { 385, 385, 210 }, + { 386, 388, EvenOddSkip }, + { 390, 390, 206 }, + { 391, 391, OddEven }, + { 393, 394, 205 }, + { 395, 395, OddEven }, + { 398, 398, 79 }, + { 399, 399, 202 }, + { 400, 400, 203 }, + { 401, 401, OddEven }, + { 403, 403, 205 }, + { 404, 404, 207 }, + { 406, 406, 211 }, + { 407, 407, 209 }, + { 408, 408, EvenOdd }, + { 412, 412, 211 }, + { 413, 413, 213 }, + { 415, 415, 214 }, + { 416, 420, EvenOddSkip }, + { 422, 422, 218 }, + { 423, 423, OddEven }, + { 425, 425, 218 }, + { 428, 428, EvenOdd }, + { 430, 430, 218 }, + { 431, 431, OddEven }, + { 433, 434, 217 }, + { 435, 437, OddEvenSkip }, + { 439, 439, 219 }, + { 440, 440, EvenOdd }, + { 444, 444, EvenOdd }, + { 452, 452, 2 }, + { 453, 453, OddEven }, + { 455, 455, 2 }, + { 456, 456, EvenOdd }, + { 458, 458, 2 }, + { 459, 475, OddEvenSkip }, + { 478, 494, EvenOddSkip }, + { 497, 497, 2 }, + { 498, 500, EvenOddSkip }, + { 502, 502, -97 }, + { 503, 503, -56 }, + { 504, 542, EvenOddSkip }, + { 544, 544, -130 }, + { 546, 562, EvenOddSkip }, + { 570, 570, 10795 }, + { 571, 571, OddEven }, + { 573, 573, -163 }, + { 574, 574, 10792 }, + { 577, 577, OddEven }, + { 579, 579, -195 }, + { 580, 580, 69 }, + { 581, 581, 71 }, + { 582, 590, EvenOddSkip }, + { 837, 837, 116 }, + { 880, 882, EvenOddSkip }, + { 886, 886, EvenOdd }, + { 902, 902, 38 }, + { 904, 906, 37 }, + { 908, 908, 64 }, + { 910, 911, 63 }, + { 913, 929, 32 }, + { 931, 939, 32 }, + { 962, 962, EvenOdd }, + { 975, 975, 8 }, + { 976, 976, -30 }, + { 977, 977, -25 }, + { 981, 981, -15 }, + { 982, 982, -22 }, + { 984, 1006, EvenOddSkip }, + { 1008, 1008, -54 }, + { 1009, 1009, -48 }, + { 1012, 1012, -60 }, + { 1013, 1013, -64 }, + { 1015, 1015, OddEven }, + { 1017, 1017, -7 }, + { 1018, 1018, EvenOdd }, + { 1021, 1023, -130 }, + { 1024, 1039, 80 }, + { 1040, 1071, 32 }, + { 1120, 1152, EvenOddSkip }, + { 1162, 1214, EvenOddSkip }, + { 1216, 1216, 15 }, + { 1217, 1229, OddEvenSkip }, + { 1232, 1318, EvenOddSkip }, + { 1329, 1366, 48 }, + { 4256, 4293, 7264 }, + { 4295, 4295, 7264 }, + { 4301, 4301, 7264 }, + { 7680, 7828, EvenOddSkip }, + { 7835, 7835, -58 }, + { 7838, 7838, -7615 }, + { 7840, 7934, EvenOddSkip }, + { 7944, 7951, -8 }, + { 7960, 7965, -8 }, + { 7976, 7983, -8 }, + { 7992, 7999, -8 }, + { 8008, 8013, -8 }, + { 8025, 8025, -8 }, + { 8027, 8027, -8 }, + { 8029, 8029, -8 }, + { 8031, 8031, -8 }, + { 8040, 8047, -8 }, + { 8072, 8079, -8 }, + { 8088, 8095, -8 }, + { 8104, 8111, -8 }, + { 8120, 8121, -8 }, + { 8122, 8123, -74 }, + { 8124, 8124, -9 }, + { 8126, 8126, -7173 }, + { 8136, 8139, -86 }, + { 8140, 8140, -9 }, + { 8152, 8153, -8 }, + { 8154, 8155, -100 }, + { 8168, 8169, -8 }, + { 8170, 8171, -112 }, + { 8172, 8172, -7 }, + { 8184, 8185, -128 }, + { 8186, 8187, -126 }, + { 8188, 8188, -9 }, + { 8486, 8486, -7517 }, + { 8490, 8490, -8383 }, + { 8491, 8491, -8262 }, + { 8498, 8498, 28 }, + { 8544, 8559, 16 }, + { 8579, 8579, OddEven }, + { 9398, 9423, 26 }, + { 11264, 11310, 48 }, + { 11360, 11360, EvenOdd }, + { 11362, 11362, -10743 }, + { 11363, 11363, -3814 }, + { 11364, 11364, -10727 }, + { 11367, 11371, OddEvenSkip }, + { 11373, 11373, -10780 }, + { 11374, 11374, -10749 }, + { 11375, 11375, -10783 }, + { 11376, 11376, -10782 }, + { 11378, 11378, EvenOdd }, + { 11381, 11381, OddEven }, + { 11390, 11391, -10815 }, + { 11392, 11490, EvenOddSkip }, + { 11499, 11501, OddEvenSkip }, + { 11506, 11506, EvenOdd }, + { 42560, 42604, EvenOddSkip }, + { 42624, 42646, EvenOddSkip }, + { 42786, 42798, EvenOddSkip }, + { 42802, 42862, EvenOddSkip }, + { 42873, 42875, OddEvenSkip }, + { 42877, 42877, -35332 }, + { 42878, 42886, EvenOddSkip }, + { 42891, 42891, OddEven }, + { 42893, 42893, -42280 }, + { 42896, 42898, EvenOddSkip }, + { 42912, 42920, EvenOddSkip }, + { 42922, 42922, -42308 }, + { 65313, 65338, 32 }, + { 66560, 66599, 40 }, +}; +const int num_unicode_tolower = 167; + + + +} // namespace re2 + + diff --git a/src/openalpr/support/re2/unicode_casefold.h b/src/openalpr/support/re2/unicode_casefold.h new file mode 100644 index 0000000..1671140 --- /dev/null +++ b/src/openalpr/support/re2/unicode_casefold.h @@ -0,0 +1,75 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Unicode case folding tables. + +// The Unicode case folding tables encode the mapping from one Unicode point +// to the next largest Unicode point with equivalent folding. The largest +// point wraps back to the first. For example, the tables map: +// +// 'A' -> 'a' +// 'a' -> 'A' +// +// 'K' -> 'k' +// 'k' -> 'K' (Kelvin symbol) +// 'K' -> 'K' +// +// Like everything Unicode, these tables are big. If we represent the table +// as a sorted list of uint32 pairs, it has 2049 entries and is 16 kB. +// Most table entries look like the ones around them: +// 'A' maps to 'A'+32, 'B' maps to 'B'+32, etc. +// Instead of listing all the pairs explicitly, we make a list of ranges +// and deltas, so that the table entries for 'A' through 'Z' can be represented +// as a single entry { 'A', 'Z', +32 }. +// +// In addition to blocks that map to each other (A-Z mapping to a-z) +// there are blocks of pairs that individually map to each other +// (for example, 0100<->0101, 0102<->0103, 0104<->0105, ...). +// For those, the special delta value EvenOdd marks even/odd pairs +// (if even, add 1; if odd, subtract 1), and OddEven marks odd/even pairs. +// +// In this form, the table has 274 entries, about 3kB. If we were to split +// the table into one for 16-bit codes and an overflow table for larger ones, +// we could get it down to about 1.5kB, but that's not worth the complexity. +// +// The grouped form also allows for efficient fold range calculations +// rather than looping one character at a time. + +#ifndef RE2_UNICODE_CASEFOLD_H__ +#define RE2_UNICODE_CASEFOLD_H__ + +#include "util/util.h" + +namespace re2 { + +enum { + EvenOdd = 1, + OddEven = -1, + EvenOddSkip = 1<<30, + OddEvenSkip, +}; + +struct CaseFold { + Rune lo; + Rune hi; + int32 delta; +}; + +extern const CaseFold unicode_casefold[]; +extern const int num_unicode_casefold; + +extern const CaseFold unicode_tolower[]; +extern const int num_unicode_tolower; + +// Returns the CaseFold* in the tables that contains rune. +// If rune is not in the tables, returns the first CaseFold* after rune. +// If rune is larger than any value in the tables, returns NULL. +extern const CaseFold* LookupCaseFold(const CaseFold*, int, Rune rune); + +// Returns the result of applying the fold f to the rune r. +extern Rune ApplyFold(const CaseFold *f, Rune r); + +} // namespace re2 + +#endif // RE2_UNICODE_CASEFOLD_H__ diff --git a/src/openalpr/support/re2/unicode_groups.cc b/src/openalpr/support/re2/unicode_groups.cc new file mode 100644 index 0000000..0df585e --- /dev/null +++ b/src/openalpr/support/re2/unicode_groups.cc @@ -0,0 +1,5078 @@ + +// GENERATED BY make_unicode_groups.py; DO NOT EDIT. +// make_unicode_groups.py >unicode_groups.cc + +#include "re2/unicode_groups.h" + +namespace re2 { + + +static const URange16 Ps_range16[] = { + { 40, 40 }, + { 91, 91 }, + { 123, 123 }, + { 3898, 3898 }, + { 3900, 3900 }, + { 5787, 5787 }, + { 8218, 8218 }, + { 8222, 8222 }, + { 8261, 8261 }, + { 8317, 8317 }, + { 8333, 8333 }, + { 8968, 8968 }, + { 8970, 8970 }, + { 9001, 9001 }, + { 10088, 10088 }, + { 10090, 10090 }, + { 10092, 10092 }, + { 10094, 10094 }, + { 10096, 10096 }, + { 10098, 10098 }, + { 10100, 10100 }, + { 10181, 10181 }, + { 10214, 10214 }, + { 10216, 10216 }, + { 10218, 10218 }, + { 10220, 10220 }, + { 10222, 10222 }, + { 10627, 10627 }, + { 10629, 10629 }, + { 10631, 10631 }, + { 10633, 10633 }, + { 10635, 10635 }, + { 10637, 10637 }, + { 10639, 10639 }, + { 10641, 10641 }, + { 10643, 10643 }, + { 10645, 10645 }, + { 10647, 10647 }, + { 10712, 10712 }, + { 10714, 10714 }, + { 10748, 10748 }, + { 11810, 11810 }, + { 11812, 11812 }, + { 11814, 11814 }, + { 11816, 11816 }, + { 12296, 12296 }, + { 12298, 12298 }, + { 12300, 12300 }, + { 12302, 12302 }, + { 12304, 12304 }, + { 12308, 12308 }, + { 12310, 12310 }, + { 12312, 12312 }, + { 12314, 12314 }, + { 12317, 12317 }, + { 64830, 64830 }, + { 65047, 65047 }, + { 65077, 65077 }, + { 65079, 65079 }, + { 65081, 65081 }, + { 65083, 65083 }, + { 65085, 65085 }, + { 65087, 65087 }, + { 65089, 65089 }, + { 65091, 65091 }, + { 65095, 65095 }, + { 65113, 65113 }, + { 65115, 65115 }, + { 65117, 65117 }, + { 65288, 65288 }, + { 65339, 65339 }, + { 65371, 65371 }, + { 65375, 65375 }, + { 65378, 65378 }, +}; +static const URange16 Nl_range16[] = { + { 5870, 5872 }, + { 8544, 8578 }, + { 8581, 8584 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12346 }, + { 42726, 42735 }, +}; +static const URange32 Nl_range32[] = { + { 65856, 65908 }, + { 66369, 66369 }, + { 66378, 66378 }, + { 66513, 66517 }, + { 74752, 74850 }, +}; +static const URange16 No_range16[] = { + { 178, 179 }, + { 185, 185 }, + { 188, 190 }, + { 2548, 2553 }, + { 2930, 2935 }, + { 3056, 3058 }, + { 3192, 3198 }, + { 3440, 3445 }, + { 3882, 3891 }, + { 4969, 4988 }, + { 6128, 6137 }, + { 6618, 6618 }, + { 8304, 8304 }, + { 8308, 8313 }, + { 8320, 8329 }, + { 8528, 8543 }, + { 8585, 8585 }, + { 9312, 9371 }, + { 9450, 9471 }, + { 10102, 10131 }, + { 11517, 11517 }, + { 12690, 12693 }, + { 12832, 12841 }, + { 12872, 12879 }, + { 12881, 12895 }, + { 12928, 12937 }, + { 12977, 12991 }, + { 43056, 43061 }, +}; +static const URange32 No_range32[] = { + { 65799, 65843 }, + { 65909, 65912 }, + { 65930, 65930 }, + { 66336, 66339 }, + { 67672, 67679 }, + { 67862, 67867 }, + { 68160, 68167 }, + { 68221, 68222 }, + { 68440, 68447 }, + { 68472, 68479 }, + { 69216, 69246 }, + { 69714, 69733 }, + { 119648, 119665 }, + { 127232, 127242 }, +}; +static const URange16 Lo_range16[] = { + { 170, 170 }, + { 186, 186 }, + { 443, 443 }, + { 448, 451 }, + { 660, 660 }, + { 1488, 1514 }, + { 1520, 1522 }, + { 1568, 1599 }, + { 1601, 1610 }, + { 1646, 1647 }, + { 1649, 1747 }, + { 1749, 1749 }, + { 1774, 1775 }, + { 1786, 1788 }, + { 1791, 1791 }, + { 1808, 1808 }, + { 1810, 1839 }, + { 1869, 1957 }, + { 1969, 1969 }, + { 1994, 2026 }, + { 2048, 2069 }, + { 2112, 2136 }, + { 2208, 2208 }, + { 2210, 2220 }, + { 2308, 2361 }, + { 2365, 2365 }, + { 2384, 2384 }, + { 2392, 2401 }, + { 2418, 2423 }, + { 2425, 2431 }, + { 2437, 2444 }, + { 2447, 2448 }, + { 2451, 2472 }, + { 2474, 2480 }, + { 2482, 2482 }, + { 2486, 2489 }, + { 2493, 2493 }, + { 2510, 2510 }, + { 2524, 2525 }, + { 2527, 2529 }, + { 2544, 2545 }, + { 2565, 2570 }, + { 2575, 2576 }, + { 2579, 2600 }, + { 2602, 2608 }, + { 2610, 2611 }, + { 2613, 2614 }, + { 2616, 2617 }, + { 2649, 2652 }, + { 2654, 2654 }, + { 2674, 2676 }, + { 2693, 2701 }, + { 2703, 2705 }, + { 2707, 2728 }, + { 2730, 2736 }, + { 2738, 2739 }, + { 2741, 2745 }, + { 2749, 2749 }, + { 2768, 2768 }, + { 2784, 2785 }, + { 2821, 2828 }, + { 2831, 2832 }, + { 2835, 2856 }, + { 2858, 2864 }, + { 2866, 2867 }, + { 2869, 2873 }, + { 2877, 2877 }, + { 2908, 2909 }, + { 2911, 2913 }, + { 2929, 2929 }, + { 2947, 2947 }, + { 2949, 2954 }, + { 2958, 2960 }, + { 2962, 2965 }, + { 2969, 2970 }, + { 2972, 2972 }, + { 2974, 2975 }, + { 2979, 2980 }, + { 2984, 2986 }, + { 2990, 3001 }, + { 3024, 3024 }, + { 3077, 3084 }, + { 3086, 3088 }, + { 3090, 3112 }, + { 3114, 3123 }, + { 3125, 3129 }, + { 3133, 3133 }, + { 3160, 3161 }, + { 3168, 3169 }, + { 3205, 3212 }, + { 3214, 3216 }, + { 3218, 3240 }, + { 3242, 3251 }, + { 3253, 3257 }, + { 3261, 3261 }, + { 3294, 3294 }, + { 3296, 3297 }, + { 3313, 3314 }, + { 3333, 3340 }, + { 3342, 3344 }, + { 3346, 3386 }, + { 3389, 3389 }, + { 3406, 3406 }, + { 3424, 3425 }, + { 3450, 3455 }, + { 3461, 3478 }, + { 3482, 3505 }, + { 3507, 3515 }, + { 3517, 3517 }, + { 3520, 3526 }, + { 3585, 3632 }, + { 3634, 3635 }, + { 3648, 3653 }, + { 3713, 3714 }, + { 3716, 3716 }, + { 3719, 3720 }, + { 3722, 3722 }, + { 3725, 3725 }, + { 3732, 3735 }, + { 3737, 3743 }, + { 3745, 3747 }, + { 3749, 3749 }, + { 3751, 3751 }, + { 3754, 3755 }, + { 3757, 3760 }, + { 3762, 3763 }, + { 3773, 3773 }, + { 3776, 3780 }, + { 3804, 3807 }, + { 3840, 3840 }, + { 3904, 3911 }, + { 3913, 3948 }, + { 3976, 3980 }, + { 4096, 4138 }, + { 4159, 4159 }, + { 4176, 4181 }, + { 4186, 4189 }, + { 4193, 4193 }, + { 4197, 4198 }, + { 4206, 4208 }, + { 4213, 4225 }, + { 4238, 4238 }, + { 4304, 4346 }, + { 4349, 4680 }, + { 4682, 4685 }, + { 4688, 4694 }, + { 4696, 4696 }, + { 4698, 4701 }, + { 4704, 4744 }, + { 4746, 4749 }, + { 4752, 4784 }, + { 4786, 4789 }, + { 4792, 4798 }, + { 4800, 4800 }, + { 4802, 4805 }, + { 4808, 4822 }, + { 4824, 4880 }, + { 4882, 4885 }, + { 4888, 4954 }, + { 4992, 5007 }, + { 5024, 5108 }, + { 5121, 5740 }, + { 5743, 5759 }, + { 5761, 5786 }, + { 5792, 5866 }, + { 5888, 5900 }, + { 5902, 5905 }, + { 5920, 5937 }, + { 5952, 5969 }, + { 5984, 5996 }, + { 5998, 6000 }, + { 6016, 6067 }, + { 6108, 6108 }, + { 6176, 6210 }, + { 6212, 6263 }, + { 6272, 6312 }, + { 6314, 6314 }, + { 6320, 6389 }, + { 6400, 6428 }, + { 6480, 6509 }, + { 6512, 6516 }, + { 6528, 6571 }, + { 6593, 6599 }, + { 6656, 6678 }, + { 6688, 6740 }, + { 6917, 6963 }, + { 6981, 6987 }, + { 7043, 7072 }, + { 7086, 7087 }, + { 7098, 7141 }, + { 7168, 7203 }, + { 7245, 7247 }, + { 7258, 7287 }, + { 7401, 7404 }, + { 7406, 7409 }, + { 7413, 7414 }, + { 8501, 8504 }, + { 11568, 11623 }, + { 11648, 11670 }, + { 11680, 11686 }, + { 11688, 11694 }, + { 11696, 11702 }, + { 11704, 11710 }, + { 11712, 11718 }, + { 11720, 11726 }, + { 11728, 11734 }, + { 11736, 11742 }, + { 12294, 12294 }, + { 12348, 12348 }, + { 12353, 12438 }, + { 12447, 12447 }, + { 12449, 12538 }, + { 12543, 12543 }, + { 12549, 12589 }, + { 12593, 12686 }, + { 12704, 12730 }, + { 12784, 12799 }, + { 13312, 19893 }, + { 19968, 40908 }, + { 40960, 40980 }, + { 40982, 42124 }, + { 42192, 42231 }, + { 42240, 42507 }, + { 42512, 42527 }, + { 42538, 42539 }, + { 42606, 42606 }, + { 42656, 42725 }, + { 43003, 43009 }, + { 43011, 43013 }, + { 43015, 43018 }, + { 43020, 43042 }, + { 43072, 43123 }, + { 43138, 43187 }, + { 43250, 43255 }, + { 43259, 43259 }, + { 43274, 43301 }, + { 43312, 43334 }, + { 43360, 43388 }, + { 43396, 43442 }, + { 43520, 43560 }, + { 43584, 43586 }, + { 43588, 43595 }, + { 43616, 43631 }, + { 43633, 43638 }, + { 43642, 43642 }, + { 43648, 43695 }, + { 43697, 43697 }, + { 43701, 43702 }, + { 43705, 43709 }, + { 43712, 43712 }, + { 43714, 43714 }, + { 43739, 43740 }, + { 43744, 43754 }, + { 43762, 43762 }, + { 43777, 43782 }, + { 43785, 43790 }, + { 43793, 43798 }, + { 43808, 43814 }, + { 43816, 43822 }, + { 43968, 44002 }, + { 44032, 55203 }, + { 55216, 55238 }, + { 55243, 55291 }, + { 63744, 64109 }, + { 64112, 64217 }, + { 64285, 64285 }, + { 64287, 64296 }, + { 64298, 64310 }, + { 64312, 64316 }, + { 64318, 64318 }, + { 64320, 64321 }, + { 64323, 64324 }, + { 64326, 64433 }, + { 64467, 64829 }, + { 64848, 64911 }, + { 64914, 64967 }, + { 65008, 65019 }, + { 65136, 65140 }, + { 65142, 65276 }, + { 65382, 65391 }, + { 65393, 65437 }, + { 65440, 65470 }, + { 65474, 65479 }, + { 65482, 65487 }, + { 65490, 65495 }, + { 65498, 65500 }, +}; +static const URange32 Lo_range32[] = { + { 65536, 65547 }, + { 65549, 65574 }, + { 65576, 65594 }, + { 65596, 65597 }, + { 65599, 65613 }, + { 65616, 65629 }, + { 65664, 65786 }, + { 66176, 66204 }, + { 66208, 66256 }, + { 66304, 66334 }, + { 66352, 66368 }, + { 66370, 66377 }, + { 66432, 66461 }, + { 66464, 66499 }, + { 66504, 66511 }, + { 66640, 66717 }, + { 67584, 67589 }, + { 67592, 67592 }, + { 67594, 67637 }, + { 67639, 67640 }, + { 67644, 67644 }, + { 67647, 67669 }, + { 67840, 67861 }, + { 67872, 67897 }, + { 67968, 68023 }, + { 68030, 68031 }, + { 68096, 68096 }, + { 68112, 68115 }, + { 68117, 68119 }, + { 68121, 68147 }, + { 68192, 68220 }, + { 68352, 68405 }, + { 68416, 68437 }, + { 68448, 68466 }, + { 68608, 68680 }, + { 69635, 69687 }, + { 69763, 69807 }, + { 69840, 69864 }, + { 69891, 69926 }, + { 70019, 70066 }, + { 70081, 70084 }, + { 71296, 71338 }, + { 73728, 74606 }, + { 77824, 78894 }, + { 92160, 92728 }, + { 93952, 94020 }, + { 94032, 94032 }, + { 110592, 110593 }, + { 126464, 126467 }, + { 126469, 126495 }, + { 126497, 126498 }, + { 126500, 126500 }, + { 126503, 126503 }, + { 126505, 126514 }, + { 126516, 126519 }, + { 126521, 126521 }, + { 126523, 126523 }, + { 126530, 126530 }, + { 126535, 126535 }, + { 126537, 126537 }, + { 126539, 126539 }, + { 126541, 126543 }, + { 126545, 126546 }, + { 126548, 126548 }, + { 126551, 126551 }, + { 126553, 126553 }, + { 126555, 126555 }, + { 126557, 126557 }, + { 126559, 126559 }, + { 126561, 126562 }, + { 126564, 126564 }, + { 126567, 126570 }, + { 126572, 126578 }, + { 126580, 126583 }, + { 126585, 126588 }, + { 126590, 126590 }, + { 126592, 126601 }, + { 126603, 126619 }, + { 126625, 126627 }, + { 126629, 126633 }, + { 126635, 126651 }, + { 131072, 173782 }, + { 173824, 177972 }, + { 177984, 178205 }, + { 194560, 195101 }, +}; +static const URange16 Ll_range16[] = { + { 97, 122 }, + { 181, 181 }, + { 223, 246 }, + { 248, 255 }, + { 257, 257 }, + { 259, 259 }, + { 261, 261 }, + { 263, 263 }, + { 265, 265 }, + { 267, 267 }, + { 269, 269 }, + { 271, 271 }, + { 273, 273 }, + { 275, 275 }, + { 277, 277 }, + { 279, 279 }, + { 281, 281 }, + { 283, 283 }, + { 285, 285 }, + { 287, 287 }, + { 289, 289 }, + { 291, 291 }, + { 293, 293 }, + { 295, 295 }, + { 297, 297 }, + { 299, 299 }, + { 301, 301 }, + { 303, 303 }, + { 305, 305 }, + { 307, 307 }, + { 309, 309 }, + { 311, 312 }, + { 314, 314 }, + { 316, 316 }, + { 318, 318 }, + { 320, 320 }, + { 322, 322 }, + { 324, 324 }, + { 326, 326 }, + { 328, 329 }, + { 331, 331 }, + { 333, 333 }, + { 335, 335 }, + { 337, 337 }, + { 339, 339 }, + { 341, 341 }, + { 343, 343 }, + { 345, 345 }, + { 347, 347 }, + { 349, 349 }, + { 351, 351 }, + { 353, 353 }, + { 355, 355 }, + { 357, 357 }, + { 359, 359 }, + { 361, 361 }, + { 363, 363 }, + { 365, 365 }, + { 367, 367 }, + { 369, 369 }, + { 371, 371 }, + { 373, 373 }, + { 375, 375 }, + { 378, 378 }, + { 380, 380 }, + { 382, 384 }, + { 387, 387 }, + { 389, 389 }, + { 392, 392 }, + { 396, 397 }, + { 402, 402 }, + { 405, 405 }, + { 409, 411 }, + { 414, 414 }, + { 417, 417 }, + { 419, 419 }, + { 421, 421 }, + { 424, 424 }, + { 426, 427 }, + { 429, 429 }, + { 432, 432 }, + { 436, 436 }, + { 438, 438 }, + { 441, 442 }, + { 445, 447 }, + { 454, 454 }, + { 457, 457 }, + { 460, 460 }, + { 462, 462 }, + { 464, 464 }, + { 466, 466 }, + { 468, 468 }, + { 470, 470 }, + { 472, 472 }, + { 474, 474 }, + { 476, 477 }, + { 479, 479 }, + { 481, 481 }, + { 483, 483 }, + { 485, 485 }, + { 487, 487 }, + { 489, 489 }, + { 491, 491 }, + { 493, 493 }, + { 495, 496 }, + { 499, 499 }, + { 501, 501 }, + { 505, 505 }, + { 507, 507 }, + { 509, 509 }, + { 511, 511 }, + { 513, 513 }, + { 515, 515 }, + { 517, 517 }, + { 519, 519 }, + { 521, 521 }, + { 523, 523 }, + { 525, 525 }, + { 527, 527 }, + { 529, 529 }, + { 531, 531 }, + { 533, 533 }, + { 535, 535 }, + { 537, 537 }, + { 539, 539 }, + { 541, 541 }, + { 543, 543 }, + { 545, 545 }, + { 547, 547 }, + { 549, 549 }, + { 551, 551 }, + { 553, 553 }, + { 555, 555 }, + { 557, 557 }, + { 559, 559 }, + { 561, 561 }, + { 563, 569 }, + { 572, 572 }, + { 575, 576 }, + { 578, 578 }, + { 583, 583 }, + { 585, 585 }, + { 587, 587 }, + { 589, 589 }, + { 591, 659 }, + { 661, 687 }, + { 881, 881 }, + { 883, 883 }, + { 887, 887 }, + { 891, 893 }, + { 912, 912 }, + { 940, 974 }, + { 976, 977 }, + { 981, 983 }, + { 985, 985 }, + { 987, 987 }, + { 989, 989 }, + { 991, 991 }, + { 993, 993 }, + { 995, 995 }, + { 997, 997 }, + { 999, 999 }, + { 1001, 1001 }, + { 1003, 1003 }, + { 1005, 1005 }, + { 1007, 1011 }, + { 1013, 1013 }, + { 1016, 1016 }, + { 1019, 1020 }, + { 1072, 1119 }, + { 1121, 1121 }, + { 1123, 1123 }, + { 1125, 1125 }, + { 1127, 1127 }, + { 1129, 1129 }, + { 1131, 1131 }, + { 1133, 1133 }, + { 1135, 1135 }, + { 1137, 1137 }, + { 1139, 1139 }, + { 1141, 1141 }, + { 1143, 1143 }, + { 1145, 1145 }, + { 1147, 1147 }, + { 1149, 1149 }, + { 1151, 1151 }, + { 1153, 1153 }, + { 1163, 1163 }, + { 1165, 1165 }, + { 1167, 1167 }, + { 1169, 1169 }, + { 1171, 1171 }, + { 1173, 1173 }, + { 1175, 1175 }, + { 1177, 1177 }, + { 1179, 1179 }, + { 1181, 1181 }, + { 1183, 1183 }, + { 1185, 1185 }, + { 1187, 1187 }, + { 1189, 1189 }, + { 1191, 1191 }, + { 1193, 1193 }, + { 1195, 1195 }, + { 1197, 1197 }, + { 1199, 1199 }, + { 1201, 1201 }, + { 1203, 1203 }, + { 1205, 1205 }, + { 1207, 1207 }, + { 1209, 1209 }, + { 1211, 1211 }, + { 1213, 1213 }, + { 1215, 1215 }, + { 1218, 1218 }, + { 1220, 1220 }, + { 1222, 1222 }, + { 1224, 1224 }, + { 1226, 1226 }, + { 1228, 1228 }, + { 1230, 1231 }, + { 1233, 1233 }, + { 1235, 1235 }, + { 1237, 1237 }, + { 1239, 1239 }, + { 1241, 1241 }, + { 1243, 1243 }, + { 1245, 1245 }, + { 1247, 1247 }, + { 1249, 1249 }, + { 1251, 1251 }, + { 1253, 1253 }, + { 1255, 1255 }, + { 1257, 1257 }, + { 1259, 1259 }, + { 1261, 1261 }, + { 1263, 1263 }, + { 1265, 1265 }, + { 1267, 1267 }, + { 1269, 1269 }, + { 1271, 1271 }, + { 1273, 1273 }, + { 1275, 1275 }, + { 1277, 1277 }, + { 1279, 1279 }, + { 1281, 1281 }, + { 1283, 1283 }, + { 1285, 1285 }, + { 1287, 1287 }, + { 1289, 1289 }, + { 1291, 1291 }, + { 1293, 1293 }, + { 1295, 1295 }, + { 1297, 1297 }, + { 1299, 1299 }, + { 1301, 1301 }, + { 1303, 1303 }, + { 1305, 1305 }, + { 1307, 1307 }, + { 1309, 1309 }, + { 1311, 1311 }, + { 1313, 1313 }, + { 1315, 1315 }, + { 1317, 1317 }, + { 1319, 1319 }, + { 1377, 1415 }, + { 7424, 7467 }, + { 7531, 7543 }, + { 7545, 7578 }, + { 7681, 7681 }, + { 7683, 7683 }, + { 7685, 7685 }, + { 7687, 7687 }, + { 7689, 7689 }, + { 7691, 7691 }, + { 7693, 7693 }, + { 7695, 7695 }, + { 7697, 7697 }, + { 7699, 7699 }, + { 7701, 7701 }, + { 7703, 7703 }, + { 7705, 7705 }, + { 7707, 7707 }, + { 7709, 7709 }, + { 7711, 7711 }, + { 7713, 7713 }, + { 7715, 7715 }, + { 7717, 7717 }, + { 7719, 7719 }, + { 7721, 7721 }, + { 7723, 7723 }, + { 7725, 7725 }, + { 7727, 7727 }, + { 7729, 7729 }, + { 7731, 7731 }, + { 7733, 7733 }, + { 7735, 7735 }, + { 7737, 7737 }, + { 7739, 7739 }, + { 7741, 7741 }, + { 7743, 7743 }, + { 7745, 7745 }, + { 7747, 7747 }, + { 7749, 7749 }, + { 7751, 7751 }, + { 7753, 7753 }, + { 7755, 7755 }, + { 7757, 7757 }, + { 7759, 7759 }, + { 7761, 7761 }, + { 7763, 7763 }, + { 7765, 7765 }, + { 7767, 7767 }, + { 7769, 7769 }, + { 7771, 7771 }, + { 7773, 7773 }, + { 7775, 7775 }, + { 7777, 7777 }, + { 7779, 7779 }, + { 7781, 7781 }, + { 7783, 7783 }, + { 7785, 7785 }, + { 7787, 7787 }, + { 7789, 7789 }, + { 7791, 7791 }, + { 7793, 7793 }, + { 7795, 7795 }, + { 7797, 7797 }, + { 7799, 7799 }, + { 7801, 7801 }, + { 7803, 7803 }, + { 7805, 7805 }, + { 7807, 7807 }, + { 7809, 7809 }, + { 7811, 7811 }, + { 7813, 7813 }, + { 7815, 7815 }, + { 7817, 7817 }, + { 7819, 7819 }, + { 7821, 7821 }, + { 7823, 7823 }, + { 7825, 7825 }, + { 7827, 7827 }, + { 7829, 7837 }, + { 7839, 7839 }, + { 7841, 7841 }, + { 7843, 7843 }, + { 7845, 7845 }, + { 7847, 7847 }, + { 7849, 7849 }, + { 7851, 7851 }, + { 7853, 7853 }, + { 7855, 7855 }, + { 7857, 7857 }, + { 7859, 7859 }, + { 7861, 7861 }, + { 7863, 7863 }, + { 7865, 7865 }, + { 7867, 7867 }, + { 7869, 7869 }, + { 7871, 7871 }, + { 7873, 7873 }, + { 7875, 7875 }, + { 7877, 7877 }, + { 7879, 7879 }, + { 7881, 7881 }, + { 7883, 7883 }, + { 7885, 7885 }, + { 7887, 7887 }, + { 7889, 7889 }, + { 7891, 7891 }, + { 7893, 7893 }, + { 7895, 7895 }, + { 7897, 7897 }, + { 7899, 7899 }, + { 7901, 7901 }, + { 7903, 7903 }, + { 7905, 7905 }, + { 7907, 7907 }, + { 7909, 7909 }, + { 7911, 7911 }, + { 7913, 7913 }, + { 7915, 7915 }, + { 7917, 7917 }, + { 7919, 7919 }, + { 7921, 7921 }, + { 7923, 7923 }, + { 7925, 7925 }, + { 7927, 7927 }, + { 7929, 7929 }, + { 7931, 7931 }, + { 7933, 7933 }, + { 7935, 7943 }, + { 7952, 7957 }, + { 7968, 7975 }, + { 7984, 7991 }, + { 8000, 8005 }, + { 8016, 8023 }, + { 8032, 8039 }, + { 8048, 8061 }, + { 8064, 8071 }, + { 8080, 8087 }, + { 8096, 8103 }, + { 8112, 8116 }, + { 8118, 8119 }, + { 8126, 8126 }, + { 8130, 8132 }, + { 8134, 8135 }, + { 8144, 8147 }, + { 8150, 8151 }, + { 8160, 8167 }, + { 8178, 8180 }, + { 8182, 8183 }, + { 8458, 8458 }, + { 8462, 8463 }, + { 8467, 8467 }, + { 8495, 8495 }, + { 8500, 8500 }, + { 8505, 8505 }, + { 8508, 8509 }, + { 8518, 8521 }, + { 8526, 8526 }, + { 8580, 8580 }, + { 11312, 11358 }, + { 11361, 11361 }, + { 11365, 11366 }, + { 11368, 11368 }, + { 11370, 11370 }, + { 11372, 11372 }, + { 11377, 11377 }, + { 11379, 11380 }, + { 11382, 11387 }, + { 11393, 11393 }, + { 11395, 11395 }, + { 11397, 11397 }, + { 11399, 11399 }, + { 11401, 11401 }, + { 11403, 11403 }, + { 11405, 11405 }, + { 11407, 11407 }, + { 11409, 11409 }, + { 11411, 11411 }, + { 11413, 11413 }, + { 11415, 11415 }, + { 11417, 11417 }, + { 11419, 11419 }, + { 11421, 11421 }, + { 11423, 11423 }, + { 11425, 11425 }, + { 11427, 11427 }, + { 11429, 11429 }, + { 11431, 11431 }, + { 11433, 11433 }, + { 11435, 11435 }, + { 11437, 11437 }, + { 11439, 11439 }, + { 11441, 11441 }, + { 11443, 11443 }, + { 11445, 11445 }, + { 11447, 11447 }, + { 11449, 11449 }, + { 11451, 11451 }, + { 11453, 11453 }, + { 11455, 11455 }, + { 11457, 11457 }, + { 11459, 11459 }, + { 11461, 11461 }, + { 11463, 11463 }, + { 11465, 11465 }, + { 11467, 11467 }, + { 11469, 11469 }, + { 11471, 11471 }, + { 11473, 11473 }, + { 11475, 11475 }, + { 11477, 11477 }, + { 11479, 11479 }, + { 11481, 11481 }, + { 11483, 11483 }, + { 11485, 11485 }, + { 11487, 11487 }, + { 11489, 11489 }, + { 11491, 11492 }, + { 11500, 11500 }, + { 11502, 11502 }, + { 11507, 11507 }, + { 11520, 11557 }, + { 11559, 11559 }, + { 11565, 11565 }, + { 42561, 42561 }, + { 42563, 42563 }, + { 42565, 42565 }, + { 42567, 42567 }, + { 42569, 42569 }, + { 42571, 42571 }, + { 42573, 42573 }, + { 42575, 42575 }, + { 42577, 42577 }, + { 42579, 42579 }, + { 42581, 42581 }, + { 42583, 42583 }, + { 42585, 42585 }, + { 42587, 42587 }, + { 42589, 42589 }, + { 42591, 42591 }, + { 42593, 42593 }, + { 42595, 42595 }, + { 42597, 42597 }, + { 42599, 42599 }, + { 42601, 42601 }, + { 42603, 42603 }, + { 42605, 42605 }, + { 42625, 42625 }, + { 42627, 42627 }, + { 42629, 42629 }, + { 42631, 42631 }, + { 42633, 42633 }, + { 42635, 42635 }, + { 42637, 42637 }, + { 42639, 42639 }, + { 42641, 42641 }, + { 42643, 42643 }, + { 42645, 42645 }, + { 42647, 42647 }, + { 42787, 42787 }, + { 42789, 42789 }, + { 42791, 42791 }, + { 42793, 42793 }, + { 42795, 42795 }, + { 42797, 42797 }, + { 42799, 42801 }, + { 42803, 42803 }, + { 42805, 42805 }, + { 42807, 42807 }, + { 42809, 42809 }, + { 42811, 42811 }, + { 42813, 42813 }, + { 42815, 42815 }, + { 42817, 42817 }, + { 42819, 42819 }, + { 42821, 42821 }, + { 42823, 42823 }, + { 42825, 42825 }, + { 42827, 42827 }, + { 42829, 42829 }, + { 42831, 42831 }, + { 42833, 42833 }, + { 42835, 42835 }, + { 42837, 42837 }, + { 42839, 42839 }, + { 42841, 42841 }, + { 42843, 42843 }, + { 42845, 42845 }, + { 42847, 42847 }, + { 42849, 42849 }, + { 42851, 42851 }, + { 42853, 42853 }, + { 42855, 42855 }, + { 42857, 42857 }, + { 42859, 42859 }, + { 42861, 42861 }, + { 42863, 42863 }, + { 42865, 42872 }, + { 42874, 42874 }, + { 42876, 42876 }, + { 42879, 42879 }, + { 42881, 42881 }, + { 42883, 42883 }, + { 42885, 42885 }, + { 42887, 42887 }, + { 42892, 42892 }, + { 42894, 42894 }, + { 42897, 42897 }, + { 42899, 42899 }, + { 42913, 42913 }, + { 42915, 42915 }, + { 42917, 42917 }, + { 42919, 42919 }, + { 42921, 42921 }, + { 43002, 43002 }, + { 64256, 64262 }, + { 64275, 64279 }, + { 65345, 65370 }, +}; +static const URange32 Ll_range32[] = { + { 66600, 66639 }, + { 119834, 119859 }, + { 119886, 119892 }, + { 119894, 119911 }, + { 119938, 119963 }, + { 119990, 119993 }, + { 119995, 119995 }, + { 119997, 120003 }, + { 120005, 120015 }, + { 120042, 120067 }, + { 120094, 120119 }, + { 120146, 120171 }, + { 120198, 120223 }, + { 120250, 120275 }, + { 120302, 120327 }, + { 120354, 120379 }, + { 120406, 120431 }, + { 120458, 120485 }, + { 120514, 120538 }, + { 120540, 120545 }, + { 120572, 120596 }, + { 120598, 120603 }, + { 120630, 120654 }, + { 120656, 120661 }, + { 120688, 120712 }, + { 120714, 120719 }, + { 120746, 120770 }, + { 120772, 120777 }, + { 120779, 120779 }, +}; +static const URange16 Lm_range16[] = { + { 688, 705 }, + { 710, 721 }, + { 736, 740 }, + { 748, 748 }, + { 750, 750 }, + { 884, 884 }, + { 890, 890 }, + { 1369, 1369 }, + { 1600, 1600 }, + { 1765, 1766 }, + { 2036, 2037 }, + { 2042, 2042 }, + { 2074, 2074 }, + { 2084, 2084 }, + { 2088, 2088 }, + { 2417, 2417 }, + { 3654, 3654 }, + { 3782, 3782 }, + { 4348, 4348 }, + { 6103, 6103 }, + { 6211, 6211 }, + { 6823, 6823 }, + { 7288, 7293 }, + { 7468, 7530 }, + { 7544, 7544 }, + { 7579, 7615 }, + { 8305, 8305 }, + { 8319, 8319 }, + { 8336, 8348 }, + { 11388, 11389 }, + { 11631, 11631 }, + { 11823, 11823 }, + { 12293, 12293 }, + { 12337, 12341 }, + { 12347, 12347 }, + { 12445, 12446 }, + { 12540, 12542 }, + { 40981, 40981 }, + { 42232, 42237 }, + { 42508, 42508 }, + { 42623, 42623 }, + { 42775, 42783 }, + { 42864, 42864 }, + { 42888, 42888 }, + { 43000, 43001 }, + { 43471, 43471 }, + { 43632, 43632 }, + { 43741, 43741 }, + { 43763, 43764 }, + { 65392, 65392 }, + { 65438, 65439 }, +}; +static const URange32 Lm_range32[] = { + { 94099, 94111 }, +}; +static const URange16 Nd_range16[] = { + { 48, 57 }, + { 1632, 1641 }, + { 1776, 1785 }, + { 1984, 1993 }, + { 2406, 2415 }, + { 2534, 2543 }, + { 2662, 2671 }, + { 2790, 2799 }, + { 2918, 2927 }, + { 3046, 3055 }, + { 3174, 3183 }, + { 3302, 3311 }, + { 3430, 3439 }, + { 3664, 3673 }, + { 3792, 3801 }, + { 3872, 3881 }, + { 4160, 4169 }, + { 4240, 4249 }, + { 6112, 6121 }, + { 6160, 6169 }, + { 6470, 6479 }, + { 6608, 6617 }, + { 6784, 6793 }, + { 6800, 6809 }, + { 6992, 7001 }, + { 7088, 7097 }, + { 7232, 7241 }, + { 7248, 7257 }, + { 42528, 42537 }, + { 43216, 43225 }, + { 43264, 43273 }, + { 43472, 43481 }, + { 43600, 43609 }, + { 44016, 44025 }, + { 65296, 65305 }, +}; +static const URange32 Nd_range32[] = { + { 66720, 66729 }, + { 69734, 69743 }, + { 69872, 69881 }, + { 69942, 69951 }, + { 70096, 70105 }, + { 71360, 71369 }, + { 120782, 120831 }, +}; +static const URange16 Pc_range16[] = { + { 95, 95 }, + { 8255, 8256 }, + { 8276, 8276 }, + { 65075, 65076 }, + { 65101, 65103 }, + { 65343, 65343 }, +}; +static const URange16 Lt_range16[] = { + { 453, 453 }, + { 456, 456 }, + { 459, 459 }, + { 498, 498 }, + { 8072, 8079 }, + { 8088, 8095 }, + { 8104, 8111 }, + { 8124, 8124 }, + { 8140, 8140 }, + { 8188, 8188 }, +}; +static const URange16 Lu_range16[] = { + { 65, 90 }, + { 192, 214 }, + { 216, 222 }, + { 256, 256 }, + { 258, 258 }, + { 260, 260 }, + { 262, 262 }, + { 264, 264 }, + { 266, 266 }, + { 268, 268 }, + { 270, 270 }, + { 272, 272 }, + { 274, 274 }, + { 276, 276 }, + { 278, 278 }, + { 280, 280 }, + { 282, 282 }, + { 284, 284 }, + { 286, 286 }, + { 288, 288 }, + { 290, 290 }, + { 292, 292 }, + { 294, 294 }, + { 296, 296 }, + { 298, 298 }, + { 300, 300 }, + { 302, 302 }, + { 304, 304 }, + { 306, 306 }, + { 308, 308 }, + { 310, 310 }, + { 313, 313 }, + { 315, 315 }, + { 317, 317 }, + { 319, 319 }, + { 321, 321 }, + { 323, 323 }, + { 325, 325 }, + { 327, 327 }, + { 330, 330 }, + { 332, 332 }, + { 334, 334 }, + { 336, 336 }, + { 338, 338 }, + { 340, 340 }, + { 342, 342 }, + { 344, 344 }, + { 346, 346 }, + { 348, 348 }, + { 350, 350 }, + { 352, 352 }, + { 354, 354 }, + { 356, 356 }, + { 358, 358 }, + { 360, 360 }, + { 362, 362 }, + { 364, 364 }, + { 366, 366 }, + { 368, 368 }, + { 370, 370 }, + { 372, 372 }, + { 374, 374 }, + { 376, 377 }, + { 379, 379 }, + { 381, 381 }, + { 385, 386 }, + { 388, 388 }, + { 390, 391 }, + { 393, 395 }, + { 398, 401 }, + { 403, 404 }, + { 406, 408 }, + { 412, 413 }, + { 415, 416 }, + { 418, 418 }, + { 420, 420 }, + { 422, 423 }, + { 425, 425 }, + { 428, 428 }, + { 430, 431 }, + { 433, 435 }, + { 437, 437 }, + { 439, 440 }, + { 444, 444 }, + { 452, 452 }, + { 455, 455 }, + { 458, 458 }, + { 461, 461 }, + { 463, 463 }, + { 465, 465 }, + { 467, 467 }, + { 469, 469 }, + { 471, 471 }, + { 473, 473 }, + { 475, 475 }, + { 478, 478 }, + { 480, 480 }, + { 482, 482 }, + { 484, 484 }, + { 486, 486 }, + { 488, 488 }, + { 490, 490 }, + { 492, 492 }, + { 494, 494 }, + { 497, 497 }, + { 500, 500 }, + { 502, 504 }, + { 506, 506 }, + { 508, 508 }, + { 510, 510 }, + { 512, 512 }, + { 514, 514 }, + { 516, 516 }, + { 518, 518 }, + { 520, 520 }, + { 522, 522 }, + { 524, 524 }, + { 526, 526 }, + { 528, 528 }, + { 530, 530 }, + { 532, 532 }, + { 534, 534 }, + { 536, 536 }, + { 538, 538 }, + { 540, 540 }, + { 542, 542 }, + { 544, 544 }, + { 546, 546 }, + { 548, 548 }, + { 550, 550 }, + { 552, 552 }, + { 554, 554 }, + { 556, 556 }, + { 558, 558 }, + { 560, 560 }, + { 562, 562 }, + { 570, 571 }, + { 573, 574 }, + { 577, 577 }, + { 579, 582 }, + { 584, 584 }, + { 586, 586 }, + { 588, 588 }, + { 590, 590 }, + { 880, 880 }, + { 882, 882 }, + { 886, 886 }, + { 902, 902 }, + { 904, 906 }, + { 908, 908 }, + { 910, 911 }, + { 913, 929 }, + { 931, 939 }, + { 975, 975 }, + { 978, 980 }, + { 984, 984 }, + { 986, 986 }, + { 988, 988 }, + { 990, 990 }, + { 992, 992 }, + { 994, 994 }, + { 996, 996 }, + { 998, 998 }, + { 1000, 1000 }, + { 1002, 1002 }, + { 1004, 1004 }, + { 1006, 1006 }, + { 1012, 1012 }, + { 1015, 1015 }, + { 1017, 1018 }, + { 1021, 1071 }, + { 1120, 1120 }, + { 1122, 1122 }, + { 1124, 1124 }, + { 1126, 1126 }, + { 1128, 1128 }, + { 1130, 1130 }, + { 1132, 1132 }, + { 1134, 1134 }, + { 1136, 1136 }, + { 1138, 1138 }, + { 1140, 1140 }, + { 1142, 1142 }, + { 1144, 1144 }, + { 1146, 1146 }, + { 1148, 1148 }, + { 1150, 1150 }, + { 1152, 1152 }, + { 1162, 1162 }, + { 1164, 1164 }, + { 1166, 1166 }, + { 1168, 1168 }, + { 1170, 1170 }, + { 1172, 1172 }, + { 1174, 1174 }, + { 1176, 1176 }, + { 1178, 1178 }, + { 1180, 1180 }, + { 1182, 1182 }, + { 1184, 1184 }, + { 1186, 1186 }, + { 1188, 1188 }, + { 1190, 1190 }, + { 1192, 1192 }, + { 1194, 1194 }, + { 1196, 1196 }, + { 1198, 1198 }, + { 1200, 1200 }, + { 1202, 1202 }, + { 1204, 1204 }, + { 1206, 1206 }, + { 1208, 1208 }, + { 1210, 1210 }, + { 1212, 1212 }, + { 1214, 1214 }, + { 1216, 1217 }, + { 1219, 1219 }, + { 1221, 1221 }, + { 1223, 1223 }, + { 1225, 1225 }, + { 1227, 1227 }, + { 1229, 1229 }, + { 1232, 1232 }, + { 1234, 1234 }, + { 1236, 1236 }, + { 1238, 1238 }, + { 1240, 1240 }, + { 1242, 1242 }, + { 1244, 1244 }, + { 1246, 1246 }, + { 1248, 1248 }, + { 1250, 1250 }, + { 1252, 1252 }, + { 1254, 1254 }, + { 1256, 1256 }, + { 1258, 1258 }, + { 1260, 1260 }, + { 1262, 1262 }, + { 1264, 1264 }, + { 1266, 1266 }, + { 1268, 1268 }, + { 1270, 1270 }, + { 1272, 1272 }, + { 1274, 1274 }, + { 1276, 1276 }, + { 1278, 1278 }, + { 1280, 1280 }, + { 1282, 1282 }, + { 1284, 1284 }, + { 1286, 1286 }, + { 1288, 1288 }, + { 1290, 1290 }, + { 1292, 1292 }, + { 1294, 1294 }, + { 1296, 1296 }, + { 1298, 1298 }, + { 1300, 1300 }, + { 1302, 1302 }, + { 1304, 1304 }, + { 1306, 1306 }, + { 1308, 1308 }, + { 1310, 1310 }, + { 1312, 1312 }, + { 1314, 1314 }, + { 1316, 1316 }, + { 1318, 1318 }, + { 1329, 1366 }, + { 4256, 4293 }, + { 4295, 4295 }, + { 4301, 4301 }, + { 7680, 7680 }, + { 7682, 7682 }, + { 7684, 7684 }, + { 7686, 7686 }, + { 7688, 7688 }, + { 7690, 7690 }, + { 7692, 7692 }, + { 7694, 7694 }, + { 7696, 7696 }, + { 7698, 7698 }, + { 7700, 7700 }, + { 7702, 7702 }, + { 7704, 7704 }, + { 7706, 7706 }, + { 7708, 7708 }, + { 7710, 7710 }, + { 7712, 7712 }, + { 7714, 7714 }, + { 7716, 7716 }, + { 7718, 7718 }, + { 7720, 7720 }, + { 7722, 7722 }, + { 7724, 7724 }, + { 7726, 7726 }, + { 7728, 7728 }, + { 7730, 7730 }, + { 7732, 7732 }, + { 7734, 7734 }, + { 7736, 7736 }, + { 7738, 7738 }, + { 7740, 7740 }, + { 7742, 7742 }, + { 7744, 7744 }, + { 7746, 7746 }, + { 7748, 7748 }, + { 7750, 7750 }, + { 7752, 7752 }, + { 7754, 7754 }, + { 7756, 7756 }, + { 7758, 7758 }, + { 7760, 7760 }, + { 7762, 7762 }, + { 7764, 7764 }, + { 7766, 7766 }, + { 7768, 7768 }, + { 7770, 7770 }, + { 7772, 7772 }, + { 7774, 7774 }, + { 7776, 7776 }, + { 7778, 7778 }, + { 7780, 7780 }, + { 7782, 7782 }, + { 7784, 7784 }, + { 7786, 7786 }, + { 7788, 7788 }, + { 7790, 7790 }, + { 7792, 7792 }, + { 7794, 7794 }, + { 7796, 7796 }, + { 7798, 7798 }, + { 7800, 7800 }, + { 7802, 7802 }, + { 7804, 7804 }, + { 7806, 7806 }, + { 7808, 7808 }, + { 7810, 7810 }, + { 7812, 7812 }, + { 7814, 7814 }, + { 7816, 7816 }, + { 7818, 7818 }, + { 7820, 7820 }, + { 7822, 7822 }, + { 7824, 7824 }, + { 7826, 7826 }, + { 7828, 7828 }, + { 7838, 7838 }, + { 7840, 7840 }, + { 7842, 7842 }, + { 7844, 7844 }, + { 7846, 7846 }, + { 7848, 7848 }, + { 7850, 7850 }, + { 7852, 7852 }, + { 7854, 7854 }, + { 7856, 7856 }, + { 7858, 7858 }, + { 7860, 7860 }, + { 7862, 7862 }, + { 7864, 7864 }, + { 7866, 7866 }, + { 7868, 7868 }, + { 7870, 7870 }, + { 7872, 7872 }, + { 7874, 7874 }, + { 7876, 7876 }, + { 7878, 7878 }, + { 7880, 7880 }, + { 7882, 7882 }, + { 7884, 7884 }, + { 7886, 7886 }, + { 7888, 7888 }, + { 7890, 7890 }, + { 7892, 7892 }, + { 7894, 7894 }, + { 7896, 7896 }, + { 7898, 7898 }, + { 7900, 7900 }, + { 7902, 7902 }, + { 7904, 7904 }, + { 7906, 7906 }, + { 7908, 7908 }, + { 7910, 7910 }, + { 7912, 7912 }, + { 7914, 7914 }, + { 7916, 7916 }, + { 7918, 7918 }, + { 7920, 7920 }, + { 7922, 7922 }, + { 7924, 7924 }, + { 7926, 7926 }, + { 7928, 7928 }, + { 7930, 7930 }, + { 7932, 7932 }, + { 7934, 7934 }, + { 7944, 7951 }, + { 7960, 7965 }, + { 7976, 7983 }, + { 7992, 7999 }, + { 8008, 8013 }, + { 8025, 8025 }, + { 8027, 8027 }, + { 8029, 8029 }, + { 8031, 8031 }, + { 8040, 8047 }, + { 8120, 8123 }, + { 8136, 8139 }, + { 8152, 8155 }, + { 8168, 8172 }, + { 8184, 8187 }, + { 8450, 8450 }, + { 8455, 8455 }, + { 8459, 8461 }, + { 8464, 8466 }, + { 8469, 8469 }, + { 8473, 8477 }, + { 8484, 8484 }, + { 8486, 8486 }, + { 8488, 8488 }, + { 8490, 8493 }, + { 8496, 8499 }, + { 8510, 8511 }, + { 8517, 8517 }, + { 8579, 8579 }, + { 11264, 11310 }, + { 11360, 11360 }, + { 11362, 11364 }, + { 11367, 11367 }, + { 11369, 11369 }, + { 11371, 11371 }, + { 11373, 11376 }, + { 11378, 11378 }, + { 11381, 11381 }, + { 11390, 11392 }, + { 11394, 11394 }, + { 11396, 11396 }, + { 11398, 11398 }, + { 11400, 11400 }, + { 11402, 11402 }, + { 11404, 11404 }, + { 11406, 11406 }, + { 11408, 11408 }, + { 11410, 11410 }, + { 11412, 11412 }, + { 11414, 11414 }, + { 11416, 11416 }, + { 11418, 11418 }, + { 11420, 11420 }, + { 11422, 11422 }, + { 11424, 11424 }, + { 11426, 11426 }, + { 11428, 11428 }, + { 11430, 11430 }, + { 11432, 11432 }, + { 11434, 11434 }, + { 11436, 11436 }, + { 11438, 11438 }, + { 11440, 11440 }, + { 11442, 11442 }, + { 11444, 11444 }, + { 11446, 11446 }, + { 11448, 11448 }, + { 11450, 11450 }, + { 11452, 11452 }, + { 11454, 11454 }, + { 11456, 11456 }, + { 11458, 11458 }, + { 11460, 11460 }, + { 11462, 11462 }, + { 11464, 11464 }, + { 11466, 11466 }, + { 11468, 11468 }, + { 11470, 11470 }, + { 11472, 11472 }, + { 11474, 11474 }, + { 11476, 11476 }, + { 11478, 11478 }, + { 11480, 11480 }, + { 11482, 11482 }, + { 11484, 11484 }, + { 11486, 11486 }, + { 11488, 11488 }, + { 11490, 11490 }, + { 11499, 11499 }, + { 11501, 11501 }, + { 11506, 11506 }, + { 42560, 42560 }, + { 42562, 42562 }, + { 42564, 42564 }, + { 42566, 42566 }, + { 42568, 42568 }, + { 42570, 42570 }, + { 42572, 42572 }, + { 42574, 42574 }, + { 42576, 42576 }, + { 42578, 42578 }, + { 42580, 42580 }, + { 42582, 42582 }, + { 42584, 42584 }, + { 42586, 42586 }, + { 42588, 42588 }, + { 42590, 42590 }, + { 42592, 42592 }, + { 42594, 42594 }, + { 42596, 42596 }, + { 42598, 42598 }, + { 42600, 42600 }, + { 42602, 42602 }, + { 42604, 42604 }, + { 42624, 42624 }, + { 42626, 42626 }, + { 42628, 42628 }, + { 42630, 42630 }, + { 42632, 42632 }, + { 42634, 42634 }, + { 42636, 42636 }, + { 42638, 42638 }, + { 42640, 42640 }, + { 42642, 42642 }, + { 42644, 42644 }, + { 42646, 42646 }, + { 42786, 42786 }, + { 42788, 42788 }, + { 42790, 42790 }, + { 42792, 42792 }, + { 42794, 42794 }, + { 42796, 42796 }, + { 42798, 42798 }, + { 42802, 42802 }, + { 42804, 42804 }, + { 42806, 42806 }, + { 42808, 42808 }, + { 42810, 42810 }, + { 42812, 42812 }, + { 42814, 42814 }, + { 42816, 42816 }, + { 42818, 42818 }, + { 42820, 42820 }, + { 42822, 42822 }, + { 42824, 42824 }, + { 42826, 42826 }, + { 42828, 42828 }, + { 42830, 42830 }, + { 42832, 42832 }, + { 42834, 42834 }, + { 42836, 42836 }, + { 42838, 42838 }, + { 42840, 42840 }, + { 42842, 42842 }, + { 42844, 42844 }, + { 42846, 42846 }, + { 42848, 42848 }, + { 42850, 42850 }, + { 42852, 42852 }, + { 42854, 42854 }, + { 42856, 42856 }, + { 42858, 42858 }, + { 42860, 42860 }, + { 42862, 42862 }, + { 42873, 42873 }, + { 42875, 42875 }, + { 42877, 42878 }, + { 42880, 42880 }, + { 42882, 42882 }, + { 42884, 42884 }, + { 42886, 42886 }, + { 42891, 42891 }, + { 42893, 42893 }, + { 42896, 42896 }, + { 42898, 42898 }, + { 42912, 42912 }, + { 42914, 42914 }, + { 42916, 42916 }, + { 42918, 42918 }, + { 42920, 42920 }, + { 42922, 42922 }, + { 65313, 65338 }, +}; +static const URange32 Lu_range32[] = { + { 66560, 66599 }, + { 119808, 119833 }, + { 119860, 119885 }, + { 119912, 119937 }, + { 119964, 119964 }, + { 119966, 119967 }, + { 119970, 119970 }, + { 119973, 119974 }, + { 119977, 119980 }, + { 119982, 119989 }, + { 120016, 120041 }, + { 120068, 120069 }, + { 120071, 120074 }, + { 120077, 120084 }, + { 120086, 120092 }, + { 120120, 120121 }, + { 120123, 120126 }, + { 120128, 120132 }, + { 120134, 120134 }, + { 120138, 120144 }, + { 120172, 120197 }, + { 120224, 120249 }, + { 120276, 120301 }, + { 120328, 120353 }, + { 120380, 120405 }, + { 120432, 120457 }, + { 120488, 120512 }, + { 120546, 120570 }, + { 120604, 120628 }, + { 120662, 120686 }, + { 120720, 120744 }, + { 120778, 120778 }, +}; +static const URange16 Pf_range16[] = { + { 187, 187 }, + { 8217, 8217 }, + { 8221, 8221 }, + { 8250, 8250 }, + { 11779, 11779 }, + { 11781, 11781 }, + { 11786, 11786 }, + { 11789, 11789 }, + { 11805, 11805 }, + { 11809, 11809 }, +}; +static const URange16 Pd_range16[] = { + { 45, 45 }, + { 1418, 1418 }, + { 1470, 1470 }, + { 5120, 5120 }, + { 6150, 6150 }, + { 8208, 8213 }, + { 11799, 11799 }, + { 11802, 11802 }, + { 11834, 11835 }, + { 12316, 12316 }, + { 12336, 12336 }, + { 12448, 12448 }, + { 65073, 65074 }, + { 65112, 65112 }, + { 65123, 65123 }, + { 65293, 65293 }, +}; +static const URange16 Pe_range16[] = { + { 41, 41 }, + { 93, 93 }, + { 125, 125 }, + { 3899, 3899 }, + { 3901, 3901 }, + { 5788, 5788 }, + { 8262, 8262 }, + { 8318, 8318 }, + { 8334, 8334 }, + { 8969, 8969 }, + { 8971, 8971 }, + { 9002, 9002 }, + { 10089, 10089 }, + { 10091, 10091 }, + { 10093, 10093 }, + { 10095, 10095 }, + { 10097, 10097 }, + { 10099, 10099 }, + { 10101, 10101 }, + { 10182, 10182 }, + { 10215, 10215 }, + { 10217, 10217 }, + { 10219, 10219 }, + { 10221, 10221 }, + { 10223, 10223 }, + { 10628, 10628 }, + { 10630, 10630 }, + { 10632, 10632 }, + { 10634, 10634 }, + { 10636, 10636 }, + { 10638, 10638 }, + { 10640, 10640 }, + { 10642, 10642 }, + { 10644, 10644 }, + { 10646, 10646 }, + { 10648, 10648 }, + { 10713, 10713 }, + { 10715, 10715 }, + { 10749, 10749 }, + { 11811, 11811 }, + { 11813, 11813 }, + { 11815, 11815 }, + { 11817, 11817 }, + { 12297, 12297 }, + { 12299, 12299 }, + { 12301, 12301 }, + { 12303, 12303 }, + { 12305, 12305 }, + { 12309, 12309 }, + { 12311, 12311 }, + { 12313, 12313 }, + { 12315, 12315 }, + { 12318, 12319 }, + { 64831, 64831 }, + { 65048, 65048 }, + { 65078, 65078 }, + { 65080, 65080 }, + { 65082, 65082 }, + { 65084, 65084 }, + { 65086, 65086 }, + { 65088, 65088 }, + { 65090, 65090 }, + { 65092, 65092 }, + { 65096, 65096 }, + { 65114, 65114 }, + { 65116, 65116 }, + { 65118, 65118 }, + { 65289, 65289 }, + { 65341, 65341 }, + { 65373, 65373 }, + { 65376, 65376 }, + { 65379, 65379 }, +}; +static const URange16 Pi_range16[] = { + { 171, 171 }, + { 8216, 8216 }, + { 8219, 8220 }, + { 8223, 8223 }, + { 8249, 8249 }, + { 11778, 11778 }, + { 11780, 11780 }, + { 11785, 11785 }, + { 11788, 11788 }, + { 11804, 11804 }, + { 11808, 11808 }, +}; +static const URange16 Po_range16[] = { + { 33, 35 }, + { 37, 39 }, + { 42, 42 }, + { 44, 44 }, + { 46, 47 }, + { 58, 59 }, + { 63, 64 }, + { 92, 92 }, + { 161, 161 }, + { 167, 167 }, + { 182, 183 }, + { 191, 191 }, + { 894, 894 }, + { 903, 903 }, + { 1370, 1375 }, + { 1417, 1417 }, + { 1472, 1472 }, + { 1475, 1475 }, + { 1478, 1478 }, + { 1523, 1524 }, + { 1545, 1546 }, + { 1548, 1549 }, + { 1563, 1563 }, + { 1566, 1567 }, + { 1642, 1645 }, + { 1748, 1748 }, + { 1792, 1805 }, + { 2039, 2041 }, + { 2096, 2110 }, + { 2142, 2142 }, + { 2404, 2405 }, + { 2416, 2416 }, + { 2800, 2800 }, + { 3572, 3572 }, + { 3663, 3663 }, + { 3674, 3675 }, + { 3844, 3858 }, + { 3860, 3860 }, + { 3973, 3973 }, + { 4048, 4052 }, + { 4057, 4058 }, + { 4170, 4175 }, + { 4347, 4347 }, + { 4960, 4968 }, + { 5741, 5742 }, + { 5867, 5869 }, + { 5941, 5942 }, + { 6100, 6102 }, + { 6104, 6106 }, + { 6144, 6149 }, + { 6151, 6154 }, + { 6468, 6469 }, + { 6686, 6687 }, + { 6816, 6822 }, + { 6824, 6829 }, + { 7002, 7008 }, + { 7164, 7167 }, + { 7227, 7231 }, + { 7294, 7295 }, + { 7360, 7367 }, + { 7379, 7379 }, + { 8214, 8215 }, + { 8224, 8231 }, + { 8240, 8248 }, + { 8251, 8254 }, + { 8257, 8259 }, + { 8263, 8273 }, + { 8275, 8275 }, + { 8277, 8286 }, + { 11513, 11516 }, + { 11518, 11519 }, + { 11632, 11632 }, + { 11776, 11777 }, + { 11782, 11784 }, + { 11787, 11787 }, + { 11790, 11798 }, + { 11800, 11801 }, + { 11803, 11803 }, + { 11806, 11807 }, + { 11818, 11822 }, + { 11824, 11833 }, + { 12289, 12291 }, + { 12349, 12349 }, + { 12539, 12539 }, + { 42238, 42239 }, + { 42509, 42511 }, + { 42611, 42611 }, + { 42622, 42622 }, + { 42738, 42743 }, + { 43124, 43127 }, + { 43214, 43215 }, + { 43256, 43258 }, + { 43310, 43311 }, + { 43359, 43359 }, + { 43457, 43469 }, + { 43486, 43487 }, + { 43612, 43615 }, + { 43742, 43743 }, + { 43760, 43761 }, + { 44011, 44011 }, + { 65040, 65046 }, + { 65049, 65049 }, + { 65072, 65072 }, + { 65093, 65094 }, + { 65097, 65100 }, + { 65104, 65106 }, + { 65108, 65111 }, + { 65119, 65121 }, + { 65128, 65128 }, + { 65130, 65131 }, + { 65281, 65283 }, + { 65285, 65287 }, + { 65290, 65290 }, + { 65292, 65292 }, + { 65294, 65295 }, + { 65306, 65307 }, + { 65311, 65312 }, + { 65340, 65340 }, + { 65377, 65377 }, + { 65380, 65381 }, +}; +static const URange32 Po_range32[] = { + { 65792, 65794 }, + { 66463, 66463 }, + { 66512, 66512 }, + { 67671, 67671 }, + { 67871, 67871 }, + { 67903, 67903 }, + { 68176, 68184 }, + { 68223, 68223 }, + { 68409, 68415 }, + { 69703, 69709 }, + { 69819, 69820 }, + { 69822, 69825 }, + { 69952, 69955 }, + { 70085, 70088 }, + { 74864, 74867 }, +}; +static const URange16 Me_range16[] = { + { 1160, 1161 }, + { 8413, 8416 }, + { 8418, 8420 }, + { 42608, 42610 }, +}; +static const URange16 C_range16[] = { + { 0, 31 }, + { 127, 159 }, + { 173, 173 }, + { 1536, 1540 }, + { 1564, 1564 }, + { 1757, 1757 }, + { 1807, 1807 }, + { 6158, 6158 }, + { 8203, 8207 }, + { 8234, 8238 }, + { 8288, 8292 }, + { 8294, 8303 }, + { 55296, 63743 }, + { 65279, 65279 }, + { 65529, 65531 }, +}; +static const URange32 C_range32[] = { + { 69821, 69821 }, + { 119155, 119162 }, + { 917505, 917505 }, + { 917536, 917631 }, + { 983040, 1048573 }, + { 1048576, 1114109 }, +}; +static const URange16 Mc_range16[] = { + { 2307, 2307 }, + { 2363, 2363 }, + { 2366, 2368 }, + { 2377, 2380 }, + { 2382, 2383 }, + { 2434, 2435 }, + { 2494, 2496 }, + { 2503, 2504 }, + { 2507, 2508 }, + { 2519, 2519 }, + { 2563, 2563 }, + { 2622, 2624 }, + { 2691, 2691 }, + { 2750, 2752 }, + { 2761, 2761 }, + { 2763, 2764 }, + { 2818, 2819 }, + { 2878, 2878 }, + { 2880, 2880 }, + { 2887, 2888 }, + { 2891, 2892 }, + { 2903, 2903 }, + { 3006, 3007 }, + { 3009, 3010 }, + { 3014, 3016 }, + { 3018, 3020 }, + { 3031, 3031 }, + { 3073, 3075 }, + { 3137, 3140 }, + { 3202, 3203 }, + { 3262, 3262 }, + { 3264, 3268 }, + { 3271, 3272 }, + { 3274, 3275 }, + { 3285, 3286 }, + { 3330, 3331 }, + { 3390, 3392 }, + { 3398, 3400 }, + { 3402, 3404 }, + { 3415, 3415 }, + { 3458, 3459 }, + { 3535, 3537 }, + { 3544, 3551 }, + { 3570, 3571 }, + { 3902, 3903 }, + { 3967, 3967 }, + { 4139, 4140 }, + { 4145, 4145 }, + { 4152, 4152 }, + { 4155, 4156 }, + { 4182, 4183 }, + { 4194, 4196 }, + { 4199, 4205 }, + { 4227, 4228 }, + { 4231, 4236 }, + { 4239, 4239 }, + { 4250, 4252 }, + { 6070, 6070 }, + { 6078, 6085 }, + { 6087, 6088 }, + { 6435, 6438 }, + { 6441, 6443 }, + { 6448, 6449 }, + { 6451, 6456 }, + { 6576, 6592 }, + { 6600, 6601 }, + { 6681, 6682 }, + { 6741, 6741 }, + { 6743, 6743 }, + { 6753, 6753 }, + { 6755, 6756 }, + { 6765, 6770 }, + { 6916, 6916 }, + { 6965, 6965 }, + { 6971, 6971 }, + { 6973, 6977 }, + { 6979, 6980 }, + { 7042, 7042 }, + { 7073, 7073 }, + { 7078, 7079 }, + { 7082, 7082 }, + { 7084, 7085 }, + { 7143, 7143 }, + { 7146, 7148 }, + { 7150, 7150 }, + { 7154, 7155 }, + { 7204, 7211 }, + { 7220, 7221 }, + { 7393, 7393 }, + { 7410, 7411 }, + { 12334, 12335 }, + { 43043, 43044 }, + { 43047, 43047 }, + { 43136, 43137 }, + { 43188, 43203 }, + { 43346, 43347 }, + { 43395, 43395 }, + { 43444, 43445 }, + { 43450, 43451 }, + { 43453, 43456 }, + { 43567, 43568 }, + { 43571, 43572 }, + { 43597, 43597 }, + { 43643, 43643 }, + { 43755, 43755 }, + { 43758, 43759 }, + { 43765, 43765 }, + { 44003, 44004 }, + { 44006, 44007 }, + { 44009, 44010 }, + { 44012, 44012 }, +}; +static const URange32 Mc_range32[] = { + { 69632, 69632 }, + { 69634, 69634 }, + { 69762, 69762 }, + { 69808, 69810 }, + { 69815, 69816 }, + { 69932, 69932 }, + { 70018, 70018 }, + { 70067, 70069 }, + { 70079, 70080 }, + { 71340, 71340 }, + { 71342, 71343 }, + { 71350, 71350 }, + { 94033, 94078 }, + { 119141, 119142 }, + { 119149, 119154 }, +}; +static const URange16 Mn_range16[] = { + { 768, 879 }, + { 1155, 1159 }, + { 1425, 1469 }, + { 1471, 1471 }, + { 1473, 1474 }, + { 1476, 1477 }, + { 1479, 1479 }, + { 1552, 1562 }, + { 1611, 1631 }, + { 1648, 1648 }, + { 1750, 1756 }, + { 1759, 1764 }, + { 1767, 1768 }, + { 1770, 1773 }, + { 1809, 1809 }, + { 1840, 1866 }, + { 1958, 1968 }, + { 2027, 2035 }, + { 2070, 2073 }, + { 2075, 2083 }, + { 2085, 2087 }, + { 2089, 2093 }, + { 2137, 2139 }, + { 2276, 2302 }, + { 2304, 2306 }, + { 2362, 2362 }, + { 2364, 2364 }, + { 2369, 2376 }, + { 2381, 2381 }, + { 2385, 2391 }, + { 2402, 2403 }, + { 2433, 2433 }, + { 2492, 2492 }, + { 2497, 2500 }, + { 2509, 2509 }, + { 2530, 2531 }, + { 2561, 2562 }, + { 2620, 2620 }, + { 2625, 2626 }, + { 2631, 2632 }, + { 2635, 2637 }, + { 2641, 2641 }, + { 2672, 2673 }, + { 2677, 2677 }, + { 2689, 2690 }, + { 2748, 2748 }, + { 2753, 2757 }, + { 2759, 2760 }, + { 2765, 2765 }, + { 2786, 2787 }, + { 2817, 2817 }, + { 2876, 2876 }, + { 2879, 2879 }, + { 2881, 2884 }, + { 2893, 2893 }, + { 2902, 2902 }, + { 2914, 2915 }, + { 2946, 2946 }, + { 3008, 3008 }, + { 3021, 3021 }, + { 3134, 3136 }, + { 3142, 3144 }, + { 3146, 3149 }, + { 3157, 3158 }, + { 3170, 3171 }, + { 3260, 3260 }, + { 3263, 3263 }, + { 3270, 3270 }, + { 3276, 3277 }, + { 3298, 3299 }, + { 3393, 3396 }, + { 3405, 3405 }, + { 3426, 3427 }, + { 3530, 3530 }, + { 3538, 3540 }, + { 3542, 3542 }, + { 3633, 3633 }, + { 3636, 3642 }, + { 3655, 3662 }, + { 3761, 3761 }, + { 3764, 3769 }, + { 3771, 3772 }, + { 3784, 3789 }, + { 3864, 3865 }, + { 3893, 3893 }, + { 3895, 3895 }, + { 3897, 3897 }, + { 3953, 3966 }, + { 3968, 3972 }, + { 3974, 3975 }, + { 3981, 3991 }, + { 3993, 4028 }, + { 4038, 4038 }, + { 4141, 4144 }, + { 4146, 4151 }, + { 4153, 4154 }, + { 4157, 4158 }, + { 4184, 4185 }, + { 4190, 4192 }, + { 4209, 4212 }, + { 4226, 4226 }, + { 4229, 4230 }, + { 4237, 4237 }, + { 4253, 4253 }, + { 4957, 4959 }, + { 5906, 5908 }, + { 5938, 5940 }, + { 5970, 5971 }, + { 6002, 6003 }, + { 6068, 6069 }, + { 6071, 6077 }, + { 6086, 6086 }, + { 6089, 6099 }, + { 6109, 6109 }, + { 6155, 6157 }, + { 6313, 6313 }, + { 6432, 6434 }, + { 6439, 6440 }, + { 6450, 6450 }, + { 6457, 6459 }, + { 6679, 6680 }, + { 6683, 6683 }, + { 6742, 6742 }, + { 6744, 6750 }, + { 6752, 6752 }, + { 6754, 6754 }, + { 6757, 6764 }, + { 6771, 6780 }, + { 6783, 6783 }, + { 6912, 6915 }, + { 6964, 6964 }, + { 6966, 6970 }, + { 6972, 6972 }, + { 6978, 6978 }, + { 7019, 7027 }, + { 7040, 7041 }, + { 7074, 7077 }, + { 7080, 7081 }, + { 7083, 7083 }, + { 7142, 7142 }, + { 7144, 7145 }, + { 7149, 7149 }, + { 7151, 7153 }, + { 7212, 7219 }, + { 7222, 7223 }, + { 7376, 7378 }, + { 7380, 7392 }, + { 7394, 7400 }, + { 7405, 7405 }, + { 7412, 7412 }, + { 7616, 7654 }, + { 7676, 7679 }, + { 8400, 8412 }, + { 8417, 8417 }, + { 8421, 8432 }, + { 11503, 11505 }, + { 11647, 11647 }, + { 11744, 11775 }, + { 12330, 12333 }, + { 12441, 12442 }, + { 42607, 42607 }, + { 42612, 42621 }, + { 42655, 42655 }, + { 42736, 42737 }, + { 43010, 43010 }, + { 43014, 43014 }, + { 43019, 43019 }, + { 43045, 43046 }, + { 43204, 43204 }, + { 43232, 43249 }, + { 43302, 43309 }, + { 43335, 43345 }, + { 43392, 43394 }, + { 43443, 43443 }, + { 43446, 43449 }, + { 43452, 43452 }, + { 43561, 43566 }, + { 43569, 43570 }, + { 43573, 43574 }, + { 43587, 43587 }, + { 43596, 43596 }, + { 43696, 43696 }, + { 43698, 43700 }, + { 43703, 43704 }, + { 43710, 43711 }, + { 43713, 43713 }, + { 43756, 43757 }, + { 43766, 43766 }, + { 44005, 44005 }, + { 44008, 44008 }, + { 44013, 44013 }, + { 64286, 64286 }, + { 65024, 65039 }, + { 65056, 65062 }, +}; +static const URange32 Mn_range32[] = { + { 66045, 66045 }, + { 68097, 68099 }, + { 68101, 68102 }, + { 68108, 68111 }, + { 68152, 68154 }, + { 68159, 68159 }, + { 69633, 69633 }, + { 69688, 69702 }, + { 69760, 69761 }, + { 69811, 69814 }, + { 69817, 69818 }, + { 69888, 69890 }, + { 69927, 69931 }, + { 69933, 69940 }, + { 70016, 70017 }, + { 70070, 70078 }, + { 71339, 71339 }, + { 71341, 71341 }, + { 71344, 71349 }, + { 71351, 71351 }, + { 94095, 94098 }, + { 119143, 119145 }, + { 119163, 119170 }, + { 119173, 119179 }, + { 119210, 119213 }, + { 119362, 119364 }, + { 917760, 917999 }, +}; +static const URange16 M_range16[] = { + { 768, 879 }, + { 1155, 1161 }, + { 1425, 1469 }, + { 1471, 1471 }, + { 1473, 1474 }, + { 1476, 1477 }, + { 1479, 1479 }, + { 1552, 1562 }, + { 1611, 1631 }, + { 1648, 1648 }, + { 1750, 1756 }, + { 1759, 1764 }, + { 1767, 1768 }, + { 1770, 1773 }, + { 1809, 1809 }, + { 1840, 1866 }, + { 1958, 1968 }, + { 2027, 2035 }, + { 2070, 2073 }, + { 2075, 2083 }, + { 2085, 2087 }, + { 2089, 2093 }, + { 2137, 2139 }, + { 2276, 2302 }, + { 2304, 2307 }, + { 2362, 2364 }, + { 2366, 2383 }, + { 2385, 2391 }, + { 2402, 2403 }, + { 2433, 2435 }, + { 2492, 2492 }, + { 2494, 2500 }, + { 2503, 2504 }, + { 2507, 2509 }, + { 2519, 2519 }, + { 2530, 2531 }, + { 2561, 2563 }, + { 2620, 2620 }, + { 2622, 2626 }, + { 2631, 2632 }, + { 2635, 2637 }, + { 2641, 2641 }, + { 2672, 2673 }, + { 2677, 2677 }, + { 2689, 2691 }, + { 2748, 2748 }, + { 2750, 2757 }, + { 2759, 2761 }, + { 2763, 2765 }, + { 2786, 2787 }, + { 2817, 2819 }, + { 2876, 2876 }, + { 2878, 2884 }, + { 2887, 2888 }, + { 2891, 2893 }, + { 2902, 2903 }, + { 2914, 2915 }, + { 2946, 2946 }, + { 3006, 3010 }, + { 3014, 3016 }, + { 3018, 3021 }, + { 3031, 3031 }, + { 3073, 3075 }, + { 3134, 3140 }, + { 3142, 3144 }, + { 3146, 3149 }, + { 3157, 3158 }, + { 3170, 3171 }, + { 3202, 3203 }, + { 3260, 3260 }, + { 3262, 3268 }, + { 3270, 3272 }, + { 3274, 3277 }, + { 3285, 3286 }, + { 3298, 3299 }, + { 3330, 3331 }, + { 3390, 3396 }, + { 3398, 3400 }, + { 3402, 3405 }, + { 3415, 3415 }, + { 3426, 3427 }, + { 3458, 3459 }, + { 3530, 3530 }, + { 3535, 3540 }, + { 3542, 3542 }, + { 3544, 3551 }, + { 3570, 3571 }, + { 3633, 3633 }, + { 3636, 3642 }, + { 3655, 3662 }, + { 3761, 3761 }, + { 3764, 3769 }, + { 3771, 3772 }, + { 3784, 3789 }, + { 3864, 3865 }, + { 3893, 3893 }, + { 3895, 3895 }, + { 3897, 3897 }, + { 3902, 3903 }, + { 3953, 3972 }, + { 3974, 3975 }, + { 3981, 3991 }, + { 3993, 4028 }, + { 4038, 4038 }, + { 4139, 4158 }, + { 4182, 4185 }, + { 4190, 4192 }, + { 4194, 4196 }, + { 4199, 4205 }, + { 4209, 4212 }, + { 4226, 4237 }, + { 4239, 4239 }, + { 4250, 4253 }, + { 4957, 4959 }, + { 5906, 5908 }, + { 5938, 5940 }, + { 5970, 5971 }, + { 6002, 6003 }, + { 6068, 6099 }, + { 6109, 6109 }, + { 6155, 6157 }, + { 6313, 6313 }, + { 6432, 6443 }, + { 6448, 6459 }, + { 6576, 6592 }, + { 6600, 6601 }, + { 6679, 6683 }, + { 6741, 6750 }, + { 6752, 6780 }, + { 6783, 6783 }, + { 6912, 6916 }, + { 6964, 6980 }, + { 7019, 7027 }, + { 7040, 7042 }, + { 7073, 7085 }, + { 7142, 7155 }, + { 7204, 7223 }, + { 7376, 7378 }, + { 7380, 7400 }, + { 7405, 7405 }, + { 7410, 7412 }, + { 7616, 7654 }, + { 7676, 7679 }, + { 8400, 8432 }, + { 11503, 11505 }, + { 11647, 11647 }, + { 11744, 11775 }, + { 12330, 12335 }, + { 12441, 12442 }, + { 42607, 42610 }, + { 42612, 42621 }, + { 42655, 42655 }, + { 42736, 42737 }, + { 43010, 43010 }, + { 43014, 43014 }, + { 43019, 43019 }, + { 43043, 43047 }, + { 43136, 43137 }, + { 43188, 43204 }, + { 43232, 43249 }, + { 43302, 43309 }, + { 43335, 43347 }, + { 43392, 43395 }, + { 43443, 43456 }, + { 43561, 43574 }, + { 43587, 43587 }, + { 43596, 43597 }, + { 43643, 43643 }, + { 43696, 43696 }, + { 43698, 43700 }, + { 43703, 43704 }, + { 43710, 43711 }, + { 43713, 43713 }, + { 43755, 43759 }, + { 43765, 43766 }, + { 44003, 44010 }, + { 44012, 44013 }, + { 64286, 64286 }, + { 65024, 65039 }, + { 65056, 65062 }, +}; +static const URange32 M_range32[] = { + { 66045, 66045 }, + { 68097, 68099 }, + { 68101, 68102 }, + { 68108, 68111 }, + { 68152, 68154 }, + { 68159, 68159 }, + { 69632, 69634 }, + { 69688, 69702 }, + { 69760, 69762 }, + { 69808, 69818 }, + { 69888, 69890 }, + { 69927, 69940 }, + { 70016, 70018 }, + { 70067, 70080 }, + { 71339, 71351 }, + { 94033, 94078 }, + { 94095, 94098 }, + { 119141, 119145 }, + { 119149, 119154 }, + { 119163, 119170 }, + { 119173, 119179 }, + { 119210, 119213 }, + { 119362, 119364 }, + { 917760, 917999 }, +}; +static const URange16 L_range16[] = { + { 65, 90 }, + { 97, 122 }, + { 170, 170 }, + { 181, 181 }, + { 186, 186 }, + { 192, 214 }, + { 216, 246 }, + { 248, 705 }, + { 710, 721 }, + { 736, 740 }, + { 748, 748 }, + { 750, 750 }, + { 880, 884 }, + { 886, 887 }, + { 890, 893 }, + { 902, 902 }, + { 904, 906 }, + { 908, 908 }, + { 910, 929 }, + { 931, 1013 }, + { 1015, 1153 }, + { 1162, 1319 }, + { 1329, 1366 }, + { 1369, 1369 }, + { 1377, 1415 }, + { 1488, 1514 }, + { 1520, 1522 }, + { 1568, 1610 }, + { 1646, 1647 }, + { 1649, 1747 }, + { 1749, 1749 }, + { 1765, 1766 }, + { 1774, 1775 }, + { 1786, 1788 }, + { 1791, 1791 }, + { 1808, 1808 }, + { 1810, 1839 }, + { 1869, 1957 }, + { 1969, 1969 }, + { 1994, 2026 }, + { 2036, 2037 }, + { 2042, 2042 }, + { 2048, 2069 }, + { 2074, 2074 }, + { 2084, 2084 }, + { 2088, 2088 }, + { 2112, 2136 }, + { 2208, 2208 }, + { 2210, 2220 }, + { 2308, 2361 }, + { 2365, 2365 }, + { 2384, 2384 }, + { 2392, 2401 }, + { 2417, 2423 }, + { 2425, 2431 }, + { 2437, 2444 }, + { 2447, 2448 }, + { 2451, 2472 }, + { 2474, 2480 }, + { 2482, 2482 }, + { 2486, 2489 }, + { 2493, 2493 }, + { 2510, 2510 }, + { 2524, 2525 }, + { 2527, 2529 }, + { 2544, 2545 }, + { 2565, 2570 }, + { 2575, 2576 }, + { 2579, 2600 }, + { 2602, 2608 }, + { 2610, 2611 }, + { 2613, 2614 }, + { 2616, 2617 }, + { 2649, 2652 }, + { 2654, 2654 }, + { 2674, 2676 }, + { 2693, 2701 }, + { 2703, 2705 }, + { 2707, 2728 }, + { 2730, 2736 }, + { 2738, 2739 }, + { 2741, 2745 }, + { 2749, 2749 }, + { 2768, 2768 }, + { 2784, 2785 }, + { 2821, 2828 }, + { 2831, 2832 }, + { 2835, 2856 }, + { 2858, 2864 }, + { 2866, 2867 }, + { 2869, 2873 }, + { 2877, 2877 }, + { 2908, 2909 }, + { 2911, 2913 }, + { 2929, 2929 }, + { 2947, 2947 }, + { 2949, 2954 }, + { 2958, 2960 }, + { 2962, 2965 }, + { 2969, 2970 }, + { 2972, 2972 }, + { 2974, 2975 }, + { 2979, 2980 }, + { 2984, 2986 }, + { 2990, 3001 }, + { 3024, 3024 }, + { 3077, 3084 }, + { 3086, 3088 }, + { 3090, 3112 }, + { 3114, 3123 }, + { 3125, 3129 }, + { 3133, 3133 }, + { 3160, 3161 }, + { 3168, 3169 }, + { 3205, 3212 }, + { 3214, 3216 }, + { 3218, 3240 }, + { 3242, 3251 }, + { 3253, 3257 }, + { 3261, 3261 }, + { 3294, 3294 }, + { 3296, 3297 }, + { 3313, 3314 }, + { 3333, 3340 }, + { 3342, 3344 }, + { 3346, 3386 }, + { 3389, 3389 }, + { 3406, 3406 }, + { 3424, 3425 }, + { 3450, 3455 }, + { 3461, 3478 }, + { 3482, 3505 }, + { 3507, 3515 }, + { 3517, 3517 }, + { 3520, 3526 }, + { 3585, 3632 }, + { 3634, 3635 }, + { 3648, 3654 }, + { 3713, 3714 }, + { 3716, 3716 }, + { 3719, 3720 }, + { 3722, 3722 }, + { 3725, 3725 }, + { 3732, 3735 }, + { 3737, 3743 }, + { 3745, 3747 }, + { 3749, 3749 }, + { 3751, 3751 }, + { 3754, 3755 }, + { 3757, 3760 }, + { 3762, 3763 }, + { 3773, 3773 }, + { 3776, 3780 }, + { 3782, 3782 }, + { 3804, 3807 }, + { 3840, 3840 }, + { 3904, 3911 }, + { 3913, 3948 }, + { 3976, 3980 }, + { 4096, 4138 }, + { 4159, 4159 }, + { 4176, 4181 }, + { 4186, 4189 }, + { 4193, 4193 }, + { 4197, 4198 }, + { 4206, 4208 }, + { 4213, 4225 }, + { 4238, 4238 }, + { 4256, 4293 }, + { 4295, 4295 }, + { 4301, 4301 }, + { 4304, 4346 }, + { 4348, 4680 }, + { 4682, 4685 }, + { 4688, 4694 }, + { 4696, 4696 }, + { 4698, 4701 }, + { 4704, 4744 }, + { 4746, 4749 }, + { 4752, 4784 }, + { 4786, 4789 }, + { 4792, 4798 }, + { 4800, 4800 }, + { 4802, 4805 }, + { 4808, 4822 }, + { 4824, 4880 }, + { 4882, 4885 }, + { 4888, 4954 }, + { 4992, 5007 }, + { 5024, 5108 }, + { 5121, 5740 }, + { 5743, 5759 }, + { 5761, 5786 }, + { 5792, 5866 }, + { 5888, 5900 }, + { 5902, 5905 }, + { 5920, 5937 }, + { 5952, 5969 }, + { 5984, 5996 }, + { 5998, 6000 }, + { 6016, 6067 }, + { 6103, 6103 }, + { 6108, 6108 }, + { 6176, 6263 }, + { 6272, 6312 }, + { 6314, 6314 }, + { 6320, 6389 }, + { 6400, 6428 }, + { 6480, 6509 }, + { 6512, 6516 }, + { 6528, 6571 }, + { 6593, 6599 }, + { 6656, 6678 }, + { 6688, 6740 }, + { 6823, 6823 }, + { 6917, 6963 }, + { 6981, 6987 }, + { 7043, 7072 }, + { 7086, 7087 }, + { 7098, 7141 }, + { 7168, 7203 }, + { 7245, 7247 }, + { 7258, 7293 }, + { 7401, 7404 }, + { 7406, 7409 }, + { 7413, 7414 }, + { 7424, 7615 }, + { 7680, 7957 }, + { 7960, 7965 }, + { 7968, 8005 }, + { 8008, 8013 }, + { 8016, 8023 }, + { 8025, 8025 }, + { 8027, 8027 }, + { 8029, 8029 }, + { 8031, 8061 }, + { 8064, 8116 }, + { 8118, 8124 }, + { 8126, 8126 }, + { 8130, 8132 }, + { 8134, 8140 }, + { 8144, 8147 }, + { 8150, 8155 }, + { 8160, 8172 }, + { 8178, 8180 }, + { 8182, 8188 }, + { 8305, 8305 }, + { 8319, 8319 }, + { 8336, 8348 }, + { 8450, 8450 }, + { 8455, 8455 }, + { 8458, 8467 }, + { 8469, 8469 }, + { 8473, 8477 }, + { 8484, 8484 }, + { 8486, 8486 }, + { 8488, 8488 }, + { 8490, 8493 }, + { 8495, 8505 }, + { 8508, 8511 }, + { 8517, 8521 }, + { 8526, 8526 }, + { 8579, 8580 }, + { 11264, 11310 }, + { 11312, 11358 }, + { 11360, 11492 }, + { 11499, 11502 }, + { 11506, 11507 }, + { 11520, 11557 }, + { 11559, 11559 }, + { 11565, 11565 }, + { 11568, 11623 }, + { 11631, 11631 }, + { 11648, 11670 }, + { 11680, 11686 }, + { 11688, 11694 }, + { 11696, 11702 }, + { 11704, 11710 }, + { 11712, 11718 }, + { 11720, 11726 }, + { 11728, 11734 }, + { 11736, 11742 }, + { 11823, 11823 }, + { 12293, 12294 }, + { 12337, 12341 }, + { 12347, 12348 }, + { 12353, 12438 }, + { 12445, 12447 }, + { 12449, 12538 }, + { 12540, 12543 }, + { 12549, 12589 }, + { 12593, 12686 }, + { 12704, 12730 }, + { 12784, 12799 }, + { 13312, 19893 }, + { 19968, 40908 }, + { 40960, 42124 }, + { 42192, 42237 }, + { 42240, 42508 }, + { 42512, 42527 }, + { 42538, 42539 }, + { 42560, 42606 }, + { 42623, 42647 }, + { 42656, 42725 }, + { 42775, 42783 }, + { 42786, 42888 }, + { 42891, 42894 }, + { 42896, 42899 }, + { 42912, 42922 }, + { 43000, 43009 }, + { 43011, 43013 }, + { 43015, 43018 }, + { 43020, 43042 }, + { 43072, 43123 }, + { 43138, 43187 }, + { 43250, 43255 }, + { 43259, 43259 }, + { 43274, 43301 }, + { 43312, 43334 }, + { 43360, 43388 }, + { 43396, 43442 }, + { 43471, 43471 }, + { 43520, 43560 }, + { 43584, 43586 }, + { 43588, 43595 }, + { 43616, 43638 }, + { 43642, 43642 }, + { 43648, 43695 }, + { 43697, 43697 }, + { 43701, 43702 }, + { 43705, 43709 }, + { 43712, 43712 }, + { 43714, 43714 }, + { 43739, 43741 }, + { 43744, 43754 }, + { 43762, 43764 }, + { 43777, 43782 }, + { 43785, 43790 }, + { 43793, 43798 }, + { 43808, 43814 }, + { 43816, 43822 }, + { 43968, 44002 }, + { 44032, 55203 }, + { 55216, 55238 }, + { 55243, 55291 }, + { 63744, 64109 }, + { 64112, 64217 }, + { 64256, 64262 }, + { 64275, 64279 }, + { 64285, 64285 }, + { 64287, 64296 }, + { 64298, 64310 }, + { 64312, 64316 }, + { 64318, 64318 }, + { 64320, 64321 }, + { 64323, 64324 }, + { 64326, 64433 }, + { 64467, 64829 }, + { 64848, 64911 }, + { 64914, 64967 }, + { 65008, 65019 }, + { 65136, 65140 }, + { 65142, 65276 }, + { 65313, 65338 }, + { 65345, 65370 }, + { 65382, 65470 }, + { 65474, 65479 }, + { 65482, 65487 }, + { 65490, 65495 }, + { 65498, 65500 }, +}; +static const URange32 L_range32[] = { + { 65536, 65547 }, + { 65549, 65574 }, + { 65576, 65594 }, + { 65596, 65597 }, + { 65599, 65613 }, + { 65616, 65629 }, + { 65664, 65786 }, + { 66176, 66204 }, + { 66208, 66256 }, + { 66304, 66334 }, + { 66352, 66368 }, + { 66370, 66377 }, + { 66432, 66461 }, + { 66464, 66499 }, + { 66504, 66511 }, + { 66560, 66717 }, + { 67584, 67589 }, + { 67592, 67592 }, + { 67594, 67637 }, + { 67639, 67640 }, + { 67644, 67644 }, + { 67647, 67669 }, + { 67840, 67861 }, + { 67872, 67897 }, + { 67968, 68023 }, + { 68030, 68031 }, + { 68096, 68096 }, + { 68112, 68115 }, + { 68117, 68119 }, + { 68121, 68147 }, + { 68192, 68220 }, + { 68352, 68405 }, + { 68416, 68437 }, + { 68448, 68466 }, + { 68608, 68680 }, + { 69635, 69687 }, + { 69763, 69807 }, + { 69840, 69864 }, + { 69891, 69926 }, + { 70019, 70066 }, + { 70081, 70084 }, + { 71296, 71338 }, + { 73728, 74606 }, + { 77824, 78894 }, + { 92160, 92728 }, + { 93952, 94020 }, + { 94032, 94032 }, + { 94099, 94111 }, + { 110592, 110593 }, + { 119808, 119892 }, + { 119894, 119964 }, + { 119966, 119967 }, + { 119970, 119970 }, + { 119973, 119974 }, + { 119977, 119980 }, + { 119982, 119993 }, + { 119995, 119995 }, + { 119997, 120003 }, + { 120005, 120069 }, + { 120071, 120074 }, + { 120077, 120084 }, + { 120086, 120092 }, + { 120094, 120121 }, + { 120123, 120126 }, + { 120128, 120132 }, + { 120134, 120134 }, + { 120138, 120144 }, + { 120146, 120485 }, + { 120488, 120512 }, + { 120514, 120538 }, + { 120540, 120570 }, + { 120572, 120596 }, + { 120598, 120628 }, + { 120630, 120654 }, + { 120656, 120686 }, + { 120688, 120712 }, + { 120714, 120744 }, + { 120746, 120770 }, + { 120772, 120779 }, + { 126464, 126467 }, + { 126469, 126495 }, + { 126497, 126498 }, + { 126500, 126500 }, + { 126503, 126503 }, + { 126505, 126514 }, + { 126516, 126519 }, + { 126521, 126521 }, + { 126523, 126523 }, + { 126530, 126530 }, + { 126535, 126535 }, + { 126537, 126537 }, + { 126539, 126539 }, + { 126541, 126543 }, + { 126545, 126546 }, + { 126548, 126548 }, + { 126551, 126551 }, + { 126553, 126553 }, + { 126555, 126555 }, + { 126557, 126557 }, + { 126559, 126559 }, + { 126561, 126562 }, + { 126564, 126564 }, + { 126567, 126570 }, + { 126572, 126578 }, + { 126580, 126583 }, + { 126585, 126588 }, + { 126590, 126590 }, + { 126592, 126601 }, + { 126603, 126619 }, + { 126625, 126627 }, + { 126629, 126633 }, + { 126635, 126651 }, + { 131072, 173782 }, + { 173824, 177972 }, + { 177984, 178205 }, + { 194560, 195101 }, +}; +static const URange16 N_range16[] = { + { 48, 57 }, + { 178, 179 }, + { 185, 185 }, + { 188, 190 }, + { 1632, 1641 }, + { 1776, 1785 }, + { 1984, 1993 }, + { 2406, 2415 }, + { 2534, 2543 }, + { 2548, 2553 }, + { 2662, 2671 }, + { 2790, 2799 }, + { 2918, 2927 }, + { 2930, 2935 }, + { 3046, 3058 }, + { 3174, 3183 }, + { 3192, 3198 }, + { 3302, 3311 }, + { 3430, 3445 }, + { 3664, 3673 }, + { 3792, 3801 }, + { 3872, 3891 }, + { 4160, 4169 }, + { 4240, 4249 }, + { 4969, 4988 }, + { 5870, 5872 }, + { 6112, 6121 }, + { 6128, 6137 }, + { 6160, 6169 }, + { 6470, 6479 }, + { 6608, 6618 }, + { 6784, 6793 }, + { 6800, 6809 }, + { 6992, 7001 }, + { 7088, 7097 }, + { 7232, 7241 }, + { 7248, 7257 }, + { 8304, 8304 }, + { 8308, 8313 }, + { 8320, 8329 }, + { 8528, 8578 }, + { 8581, 8585 }, + { 9312, 9371 }, + { 9450, 9471 }, + { 10102, 10131 }, + { 11517, 11517 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12346 }, + { 12690, 12693 }, + { 12832, 12841 }, + { 12872, 12879 }, + { 12881, 12895 }, + { 12928, 12937 }, + { 12977, 12991 }, + { 42528, 42537 }, + { 42726, 42735 }, + { 43056, 43061 }, + { 43216, 43225 }, + { 43264, 43273 }, + { 43472, 43481 }, + { 43600, 43609 }, + { 44016, 44025 }, + { 65296, 65305 }, +}; +static const URange32 N_range32[] = { + { 65799, 65843 }, + { 65856, 65912 }, + { 65930, 65930 }, + { 66336, 66339 }, + { 66369, 66369 }, + { 66378, 66378 }, + { 66513, 66517 }, + { 66720, 66729 }, + { 67672, 67679 }, + { 67862, 67867 }, + { 68160, 68167 }, + { 68221, 68222 }, + { 68440, 68447 }, + { 68472, 68479 }, + { 69216, 69246 }, + { 69714, 69743 }, + { 69872, 69881 }, + { 69942, 69951 }, + { 70096, 70105 }, + { 71360, 71369 }, + { 74752, 74850 }, + { 119648, 119665 }, + { 120782, 120831 }, + { 127232, 127242 }, +}; +static const URange16 Sk_range16[] = { + { 94, 94 }, + { 96, 96 }, + { 168, 168 }, + { 175, 175 }, + { 180, 180 }, + { 184, 184 }, + { 706, 709 }, + { 722, 735 }, + { 741, 747 }, + { 749, 749 }, + { 751, 767 }, + { 885, 885 }, + { 900, 901 }, + { 8125, 8125 }, + { 8127, 8129 }, + { 8141, 8143 }, + { 8157, 8159 }, + { 8173, 8175 }, + { 8189, 8190 }, + { 12443, 12444 }, + { 42752, 42774 }, + { 42784, 42785 }, + { 42889, 42890 }, + { 64434, 64449 }, + { 65342, 65342 }, + { 65344, 65344 }, + { 65507, 65507 }, +}; +static const URange16 P_range16[] = { + { 33, 35 }, + { 37, 42 }, + { 44, 47 }, + { 58, 59 }, + { 63, 64 }, + { 91, 93 }, + { 95, 95 }, + { 123, 123 }, + { 125, 125 }, + { 161, 161 }, + { 167, 167 }, + { 171, 171 }, + { 182, 183 }, + { 187, 187 }, + { 191, 191 }, + { 894, 894 }, + { 903, 903 }, + { 1370, 1375 }, + { 1417, 1418 }, + { 1470, 1470 }, + { 1472, 1472 }, + { 1475, 1475 }, + { 1478, 1478 }, + { 1523, 1524 }, + { 1545, 1546 }, + { 1548, 1549 }, + { 1563, 1563 }, + { 1566, 1567 }, + { 1642, 1645 }, + { 1748, 1748 }, + { 1792, 1805 }, + { 2039, 2041 }, + { 2096, 2110 }, + { 2142, 2142 }, + { 2404, 2405 }, + { 2416, 2416 }, + { 2800, 2800 }, + { 3572, 3572 }, + { 3663, 3663 }, + { 3674, 3675 }, + { 3844, 3858 }, + { 3860, 3860 }, + { 3898, 3901 }, + { 3973, 3973 }, + { 4048, 4052 }, + { 4057, 4058 }, + { 4170, 4175 }, + { 4347, 4347 }, + { 4960, 4968 }, + { 5120, 5120 }, + { 5741, 5742 }, + { 5787, 5788 }, + { 5867, 5869 }, + { 5941, 5942 }, + { 6100, 6102 }, + { 6104, 6106 }, + { 6144, 6154 }, + { 6468, 6469 }, + { 6686, 6687 }, + { 6816, 6822 }, + { 6824, 6829 }, + { 7002, 7008 }, + { 7164, 7167 }, + { 7227, 7231 }, + { 7294, 7295 }, + { 7360, 7367 }, + { 7379, 7379 }, + { 8208, 8231 }, + { 8240, 8259 }, + { 8261, 8273 }, + { 8275, 8286 }, + { 8317, 8318 }, + { 8333, 8334 }, + { 8968, 8971 }, + { 9001, 9002 }, + { 10088, 10101 }, + { 10181, 10182 }, + { 10214, 10223 }, + { 10627, 10648 }, + { 10712, 10715 }, + { 10748, 10749 }, + { 11513, 11516 }, + { 11518, 11519 }, + { 11632, 11632 }, + { 11776, 11822 }, + { 11824, 11835 }, + { 12289, 12291 }, + { 12296, 12305 }, + { 12308, 12319 }, + { 12336, 12336 }, + { 12349, 12349 }, + { 12448, 12448 }, + { 12539, 12539 }, + { 42238, 42239 }, + { 42509, 42511 }, + { 42611, 42611 }, + { 42622, 42622 }, + { 42738, 42743 }, + { 43124, 43127 }, + { 43214, 43215 }, + { 43256, 43258 }, + { 43310, 43311 }, + { 43359, 43359 }, + { 43457, 43469 }, + { 43486, 43487 }, + { 43612, 43615 }, + { 43742, 43743 }, + { 43760, 43761 }, + { 44011, 44011 }, + { 64830, 64831 }, + { 65040, 65049 }, + { 65072, 65106 }, + { 65108, 65121 }, + { 65123, 65123 }, + { 65128, 65128 }, + { 65130, 65131 }, + { 65281, 65283 }, + { 65285, 65290 }, + { 65292, 65295 }, + { 65306, 65307 }, + { 65311, 65312 }, + { 65339, 65341 }, + { 65343, 65343 }, + { 65371, 65371 }, + { 65373, 65373 }, + { 65375, 65381 }, +}; +static const URange32 P_range32[] = { + { 65792, 65794 }, + { 66463, 66463 }, + { 66512, 66512 }, + { 67671, 67671 }, + { 67871, 67871 }, + { 67903, 67903 }, + { 68176, 68184 }, + { 68223, 68223 }, + { 68409, 68415 }, + { 69703, 69709 }, + { 69819, 69820 }, + { 69822, 69825 }, + { 69952, 69955 }, + { 70085, 70088 }, + { 74864, 74867 }, +}; +static const URange16 S_range16[] = { + { 36, 36 }, + { 43, 43 }, + { 60, 62 }, + { 94, 94 }, + { 96, 96 }, + { 124, 124 }, + { 126, 126 }, + { 162, 166 }, + { 168, 169 }, + { 172, 172 }, + { 174, 177 }, + { 180, 180 }, + { 184, 184 }, + { 215, 215 }, + { 247, 247 }, + { 706, 709 }, + { 722, 735 }, + { 741, 747 }, + { 749, 749 }, + { 751, 767 }, + { 885, 885 }, + { 900, 901 }, + { 1014, 1014 }, + { 1154, 1154 }, + { 1423, 1423 }, + { 1542, 1544 }, + { 1547, 1547 }, + { 1550, 1551 }, + { 1758, 1758 }, + { 1769, 1769 }, + { 1789, 1790 }, + { 2038, 2038 }, + { 2546, 2547 }, + { 2554, 2555 }, + { 2801, 2801 }, + { 2928, 2928 }, + { 3059, 3066 }, + { 3199, 3199 }, + { 3449, 3449 }, + { 3647, 3647 }, + { 3841, 3843 }, + { 3859, 3859 }, + { 3861, 3863 }, + { 3866, 3871 }, + { 3892, 3892 }, + { 3894, 3894 }, + { 3896, 3896 }, + { 4030, 4037 }, + { 4039, 4044 }, + { 4046, 4047 }, + { 4053, 4056 }, + { 4254, 4255 }, + { 5008, 5017 }, + { 6107, 6107 }, + { 6464, 6464 }, + { 6622, 6655 }, + { 7009, 7018 }, + { 7028, 7036 }, + { 8125, 8125 }, + { 8127, 8129 }, + { 8141, 8143 }, + { 8157, 8159 }, + { 8173, 8175 }, + { 8189, 8190 }, + { 8260, 8260 }, + { 8274, 8274 }, + { 8314, 8316 }, + { 8330, 8332 }, + { 8352, 8378 }, + { 8448, 8449 }, + { 8451, 8454 }, + { 8456, 8457 }, + { 8468, 8468 }, + { 8470, 8472 }, + { 8478, 8483 }, + { 8485, 8485 }, + { 8487, 8487 }, + { 8489, 8489 }, + { 8494, 8494 }, + { 8506, 8507 }, + { 8512, 8516 }, + { 8522, 8525 }, + { 8527, 8527 }, + { 8592, 8967 }, + { 8972, 9000 }, + { 9003, 9203 }, + { 9216, 9254 }, + { 9280, 9290 }, + { 9372, 9449 }, + { 9472, 9983 }, + { 9985, 10087 }, + { 10132, 10180 }, + { 10183, 10213 }, + { 10224, 10626 }, + { 10649, 10711 }, + { 10716, 10747 }, + { 10750, 11084 }, + { 11088, 11097 }, + { 11493, 11498 }, + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12272, 12283 }, + { 12292, 12292 }, + { 12306, 12307 }, + { 12320, 12320 }, + { 12342, 12343 }, + { 12350, 12351 }, + { 12443, 12444 }, + { 12688, 12689 }, + { 12694, 12703 }, + { 12736, 12771 }, + { 12800, 12830 }, + { 12842, 12871 }, + { 12880, 12880 }, + { 12896, 12927 }, + { 12938, 12976 }, + { 12992, 13054 }, + { 13056, 13311 }, + { 19904, 19967 }, + { 42128, 42182 }, + { 42752, 42774 }, + { 42784, 42785 }, + { 42889, 42890 }, + { 43048, 43051 }, + { 43062, 43065 }, + { 43639, 43641 }, + { 64297, 64297 }, + { 64434, 64449 }, + { 65020, 65021 }, + { 65122, 65122 }, + { 65124, 65126 }, + { 65129, 65129 }, + { 65284, 65284 }, + { 65291, 65291 }, + { 65308, 65310 }, + { 65342, 65342 }, + { 65344, 65344 }, + { 65372, 65372 }, + { 65374, 65374 }, + { 65504, 65510 }, + { 65512, 65518 }, + { 65532, 65533 }, +}; +static const URange32 S_range32[] = { + { 65847, 65855 }, + { 65913, 65929 }, + { 65936, 65947 }, + { 66000, 66044 }, + { 118784, 119029 }, + { 119040, 119078 }, + { 119081, 119140 }, + { 119146, 119148 }, + { 119171, 119172 }, + { 119180, 119209 }, + { 119214, 119261 }, + { 119296, 119361 }, + { 119365, 119365 }, + { 119552, 119638 }, + { 120513, 120513 }, + { 120539, 120539 }, + { 120571, 120571 }, + { 120597, 120597 }, + { 120629, 120629 }, + { 120655, 120655 }, + { 120687, 120687 }, + { 120713, 120713 }, + { 120745, 120745 }, + { 120771, 120771 }, + { 126704, 126705 }, + { 126976, 127019 }, + { 127024, 127123 }, + { 127136, 127150 }, + { 127153, 127166 }, + { 127169, 127183 }, + { 127185, 127199 }, + { 127248, 127278 }, + { 127280, 127339 }, + { 127344, 127386 }, + { 127462, 127490 }, + { 127504, 127546 }, + { 127552, 127560 }, + { 127568, 127569 }, + { 127744, 127776 }, + { 127792, 127797 }, + { 127799, 127868 }, + { 127872, 127891 }, + { 127904, 127940 }, + { 127942, 127946 }, + { 127968, 127984 }, + { 128000, 128062 }, + { 128064, 128064 }, + { 128066, 128247 }, + { 128249, 128252 }, + { 128256, 128317 }, + { 128320, 128323 }, + { 128336, 128359 }, + { 128507, 128576 }, + { 128581, 128591 }, + { 128640, 128709 }, + { 128768, 128883 }, +}; +static const URange16 So_range16[] = { + { 166, 166 }, + { 169, 169 }, + { 174, 174 }, + { 176, 176 }, + { 1154, 1154 }, + { 1550, 1551 }, + { 1758, 1758 }, + { 1769, 1769 }, + { 1789, 1790 }, + { 2038, 2038 }, + { 2554, 2554 }, + { 2928, 2928 }, + { 3059, 3064 }, + { 3066, 3066 }, + { 3199, 3199 }, + { 3449, 3449 }, + { 3841, 3843 }, + { 3859, 3859 }, + { 3861, 3863 }, + { 3866, 3871 }, + { 3892, 3892 }, + { 3894, 3894 }, + { 3896, 3896 }, + { 4030, 4037 }, + { 4039, 4044 }, + { 4046, 4047 }, + { 4053, 4056 }, + { 4254, 4255 }, + { 5008, 5017 }, + { 6464, 6464 }, + { 6622, 6655 }, + { 7009, 7018 }, + { 7028, 7036 }, + { 8448, 8449 }, + { 8451, 8454 }, + { 8456, 8457 }, + { 8468, 8468 }, + { 8470, 8471 }, + { 8478, 8483 }, + { 8485, 8485 }, + { 8487, 8487 }, + { 8489, 8489 }, + { 8494, 8494 }, + { 8506, 8507 }, + { 8522, 8522 }, + { 8524, 8525 }, + { 8527, 8527 }, + { 8597, 8601 }, + { 8604, 8607 }, + { 8609, 8610 }, + { 8612, 8613 }, + { 8615, 8621 }, + { 8623, 8653 }, + { 8656, 8657 }, + { 8659, 8659 }, + { 8661, 8691 }, + { 8960, 8967 }, + { 8972, 8991 }, + { 8994, 9000 }, + { 9003, 9083 }, + { 9085, 9114 }, + { 9140, 9179 }, + { 9186, 9203 }, + { 9216, 9254 }, + { 9280, 9290 }, + { 9372, 9449 }, + { 9472, 9654 }, + { 9656, 9664 }, + { 9666, 9719 }, + { 9728, 9838 }, + { 9840, 9983 }, + { 9985, 10087 }, + { 10132, 10175 }, + { 10240, 10495 }, + { 11008, 11055 }, + { 11077, 11078 }, + { 11088, 11097 }, + { 11493, 11498 }, + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12272, 12283 }, + { 12292, 12292 }, + { 12306, 12307 }, + { 12320, 12320 }, + { 12342, 12343 }, + { 12350, 12351 }, + { 12688, 12689 }, + { 12694, 12703 }, + { 12736, 12771 }, + { 12800, 12830 }, + { 12842, 12871 }, + { 12880, 12880 }, + { 12896, 12927 }, + { 12938, 12976 }, + { 12992, 13054 }, + { 13056, 13311 }, + { 19904, 19967 }, + { 42128, 42182 }, + { 43048, 43051 }, + { 43062, 43063 }, + { 43065, 43065 }, + { 43639, 43641 }, + { 65021, 65021 }, + { 65508, 65508 }, + { 65512, 65512 }, + { 65517, 65518 }, + { 65532, 65533 }, +}; +static const URange32 So_range32[] = { + { 65847, 65855 }, + { 65913, 65929 }, + { 65936, 65947 }, + { 66000, 66044 }, + { 118784, 119029 }, + { 119040, 119078 }, + { 119081, 119140 }, + { 119146, 119148 }, + { 119171, 119172 }, + { 119180, 119209 }, + { 119214, 119261 }, + { 119296, 119361 }, + { 119365, 119365 }, + { 119552, 119638 }, + { 126976, 127019 }, + { 127024, 127123 }, + { 127136, 127150 }, + { 127153, 127166 }, + { 127169, 127183 }, + { 127185, 127199 }, + { 127248, 127278 }, + { 127280, 127339 }, + { 127344, 127386 }, + { 127462, 127490 }, + { 127504, 127546 }, + { 127552, 127560 }, + { 127568, 127569 }, + { 127744, 127776 }, + { 127792, 127797 }, + { 127799, 127868 }, + { 127872, 127891 }, + { 127904, 127940 }, + { 127942, 127946 }, + { 127968, 127984 }, + { 128000, 128062 }, + { 128064, 128064 }, + { 128066, 128247 }, + { 128249, 128252 }, + { 128256, 128317 }, + { 128320, 128323 }, + { 128336, 128359 }, + { 128507, 128576 }, + { 128581, 128591 }, + { 128640, 128709 }, + { 128768, 128883 }, +}; +static const URange16 Sm_range16[] = { + { 43, 43 }, + { 60, 62 }, + { 124, 124 }, + { 126, 126 }, + { 172, 172 }, + { 177, 177 }, + { 215, 215 }, + { 247, 247 }, + { 1014, 1014 }, + { 1542, 1544 }, + { 8260, 8260 }, + { 8274, 8274 }, + { 8314, 8316 }, + { 8330, 8332 }, + { 8472, 8472 }, + { 8512, 8516 }, + { 8523, 8523 }, + { 8592, 8596 }, + { 8602, 8603 }, + { 8608, 8608 }, + { 8611, 8611 }, + { 8614, 8614 }, + { 8622, 8622 }, + { 8654, 8655 }, + { 8658, 8658 }, + { 8660, 8660 }, + { 8692, 8959 }, + { 8992, 8993 }, + { 9084, 9084 }, + { 9115, 9139 }, + { 9180, 9185 }, + { 9655, 9655 }, + { 9665, 9665 }, + { 9720, 9727 }, + { 9839, 9839 }, + { 10176, 10180 }, + { 10183, 10213 }, + { 10224, 10239 }, + { 10496, 10626 }, + { 10649, 10711 }, + { 10716, 10747 }, + { 10750, 11007 }, + { 11056, 11076 }, + { 11079, 11084 }, + { 64297, 64297 }, + { 65122, 65122 }, + { 65124, 65126 }, + { 65291, 65291 }, + { 65308, 65310 }, + { 65372, 65372 }, + { 65374, 65374 }, + { 65506, 65506 }, + { 65513, 65516 }, +}; +static const URange32 Sm_range32[] = { + { 120513, 120513 }, + { 120539, 120539 }, + { 120571, 120571 }, + { 120597, 120597 }, + { 120629, 120629 }, + { 120655, 120655 }, + { 120687, 120687 }, + { 120713, 120713 }, + { 120745, 120745 }, + { 120771, 120771 }, + { 126704, 126705 }, +}; +static const URange16 Sc_range16[] = { + { 36, 36 }, + { 162, 165 }, + { 1423, 1423 }, + { 1547, 1547 }, + { 2546, 2547 }, + { 2555, 2555 }, + { 2801, 2801 }, + { 3065, 3065 }, + { 3647, 3647 }, + { 6107, 6107 }, + { 8352, 8378 }, + { 43064, 43064 }, + { 65020, 65020 }, + { 65129, 65129 }, + { 65284, 65284 }, + { 65504, 65505 }, + { 65509, 65510 }, +}; +static const URange16 Z_range16[] = { + { 32, 32 }, + { 160, 160 }, + { 5760, 5760 }, + { 8192, 8202 }, + { 8232, 8233 }, + { 8239, 8239 }, + { 8287, 8287 }, + { 12288, 12288 }, +}; +static const URange16 Zl_range16[] = { + { 8232, 8232 }, +}; +static const URange16 Co_range16[] = { + { 57344, 63743 }, +}; +static const URange32 Co_range32[] = { + { 983040, 1048573 }, + { 1048576, 1114109 }, +}; +static const URange16 Cc_range16[] = { + { 0, 31 }, + { 127, 159 }, +}; +static const URange16 Cf_range16[] = { + { 173, 173 }, + { 1536, 1540 }, + { 1564, 1564 }, + { 1757, 1757 }, + { 1807, 1807 }, + { 6158, 6158 }, + { 8203, 8207 }, + { 8234, 8238 }, + { 8288, 8292 }, + { 8294, 8303 }, + { 65279, 65279 }, + { 65529, 65531 }, +}; +static const URange32 Cf_range32[] = { + { 69821, 69821 }, + { 119155, 119162 }, + { 917505, 917505 }, + { 917536, 917631 }, +}; +static const URange16 Cs_range16[] = { + { 55296, 57343 }, +}; +static const URange16 Zp_range16[] = { + { 8233, 8233 }, +}; +static const URange16 Zs_range16[] = { + { 32, 32 }, + { 160, 160 }, + { 5760, 5760 }, + { 8192, 8202 }, + { 8239, 8239 }, + { 8287, 8287 }, + { 12288, 12288 }, +}; +static const URange16 Thaana_range16[] = { + { 1920, 1969 }, +}; +static const URange16 Telugu_range16[] = { + { 3073, 3075 }, + { 3077, 3084 }, + { 3086, 3088 }, + { 3090, 3112 }, + { 3114, 3123 }, + { 3125, 3129 }, + { 3133, 3140 }, + { 3142, 3144 }, + { 3146, 3149 }, + { 3157, 3158 }, + { 3160, 3161 }, + { 3168, 3171 }, + { 3174, 3183 }, + { 3192, 3199 }, +}; +static const URange16 Cyrillic_range16[] = { + { 1024, 1156 }, + { 1159, 1319 }, + { 7467, 7467 }, + { 7544, 7544 }, + { 11744, 11775 }, + { 42560, 42647 }, + { 42655, 42655 }, +}; +static const URange16 Hangul_range16[] = { + { 4352, 4607 }, + { 12334, 12335 }, + { 12593, 12686 }, + { 12800, 12830 }, + { 12896, 12926 }, + { 43360, 43388 }, + { 44032, 55203 }, + { 55216, 55238 }, + { 55243, 55291 }, + { 65440, 65470 }, + { 65474, 65479 }, + { 65482, 65487 }, + { 65490, 65495 }, + { 65498, 65500 }, +}; +static const URange32 Old_South_Arabian_range32[] = { + { 68192, 68223 }, +}; +static const URange16 Ethiopic_range16[] = { + { 4608, 4680 }, + { 4682, 4685 }, + { 4688, 4694 }, + { 4696, 4696 }, + { 4698, 4701 }, + { 4704, 4744 }, + { 4746, 4749 }, + { 4752, 4784 }, + { 4786, 4789 }, + { 4792, 4798 }, + { 4800, 4800 }, + { 4802, 4805 }, + { 4808, 4822 }, + { 4824, 4880 }, + { 4882, 4885 }, + { 4888, 4954 }, + { 4957, 4988 }, + { 4992, 5017 }, + { 11648, 11670 }, + { 11680, 11686 }, + { 11688, 11694 }, + { 11696, 11702 }, + { 11704, 11710 }, + { 11712, 11718 }, + { 11720, 11726 }, + { 11728, 11734 }, + { 11736, 11742 }, + { 43777, 43782 }, + { 43785, 43790 }, + { 43793, 43798 }, + { 43808, 43814 }, + { 43816, 43822 }, +}; +static const URange16 Inherited_range16[] = { + { 768, 879 }, + { 1157, 1158 }, + { 1611, 1621 }, + { 1648, 1648 }, + { 2385, 2386 }, + { 7376, 7378 }, + { 7380, 7392 }, + { 7394, 7400 }, + { 7405, 7405 }, + { 7412, 7412 }, + { 7616, 7654 }, + { 7676, 7679 }, + { 8204, 8205 }, + { 8400, 8432 }, + { 12330, 12333 }, + { 12441, 12442 }, + { 65024, 65039 }, + { 65056, 65062 }, +}; +static const URange32 Inherited_range32[] = { + { 66045, 66045 }, + { 119143, 119145 }, + { 119163, 119170 }, + { 119173, 119179 }, + { 119210, 119213 }, + { 917760, 917999 }, +}; +static const URange32 Meroitic_Cursive_range32[] = { + { 68000, 68023 }, + { 68030, 68031 }, +}; +static const URange16 Han_range16[] = { + { 11904, 11929 }, + { 11931, 12019 }, + { 12032, 12245 }, + { 12293, 12293 }, + { 12295, 12295 }, + { 12321, 12329 }, + { 12344, 12347 }, + { 13312, 19893 }, + { 19968, 40908 }, + { 63744, 64109 }, + { 64112, 64217 }, +}; +static const URange32 Han_range32[] = { + { 131072, 173782 }, + { 173824, 177972 }, + { 177984, 178205 }, + { 194560, 195101 }, +}; +static const URange16 Armenian_range16[] = { + { 1329, 1366 }, + { 1369, 1375 }, + { 1377, 1415 }, + { 1418, 1418 }, + { 1423, 1423 }, + { 64275, 64279 }, +}; +static const URange16 Tamil_range16[] = { + { 2946, 2947 }, + { 2949, 2954 }, + { 2958, 2960 }, + { 2962, 2965 }, + { 2969, 2970 }, + { 2972, 2972 }, + { 2974, 2975 }, + { 2979, 2980 }, + { 2984, 2986 }, + { 2990, 3001 }, + { 3006, 3010 }, + { 3014, 3016 }, + { 3018, 3021 }, + { 3024, 3024 }, + { 3031, 3031 }, + { 3046, 3066 }, +}; +static const URange16 Bopomofo_range16[] = { + { 746, 747 }, + { 12549, 12589 }, + { 12704, 12730 }, +}; +static const URange16 Sundanese_range16[] = { + { 7040, 7103 }, + { 7360, 7367 }, +}; +static const URange16 Tagalog_range16[] = { + { 5888, 5900 }, + { 5902, 5908 }, +}; +static const URange16 Malayalam_range16[] = { + { 3330, 3331 }, + { 3333, 3340 }, + { 3342, 3344 }, + { 3346, 3386 }, + { 3389, 3396 }, + { 3398, 3400 }, + { 3402, 3406 }, + { 3415, 3415 }, + { 3424, 3427 }, + { 3430, 3445 }, + { 3449, 3455 }, +}; +static const URange32 Carian_range32[] = { + { 66208, 66256 }, +}; +static const URange16 Hiragana_range16[] = { + { 12353, 12438 }, + { 12445, 12447 }, +}; +static const URange32 Hiragana_range32[] = { + { 110593, 110593 }, + { 127488, 127488 }, +}; +static const URange16 Tagbanwa_range16[] = { + { 5984, 5996 }, + { 5998, 6000 }, + { 6002, 6003 }, +}; +static const URange16 Meetei_Mayek_range16[] = { + { 43744, 43766 }, + { 43968, 44013 }, + { 44016, 44025 }, +}; +static const URange16 Tai_Le_range16[] = { + { 6480, 6509 }, + { 6512, 6516 }, +}; +static const URange16 Kayah_Li_range16[] = { + { 43264, 43311 }, +}; +static const URange16 Buginese_range16[] = { + { 6656, 6683 }, + { 6686, 6687 }, +}; +static const URange32 Kharoshthi_range32[] = { + { 68096, 68099 }, + { 68101, 68102 }, + { 68108, 68115 }, + { 68117, 68119 }, + { 68121, 68147 }, + { 68152, 68154 }, + { 68159, 68167 }, + { 68176, 68184 }, +}; +static const URange16 Tai_Tham_range16[] = { + { 6688, 6750 }, + { 6752, 6780 }, + { 6783, 6793 }, + { 6800, 6809 }, + { 6816, 6829 }, +}; +static const URange32 Old_Italic_range32[] = { + { 66304, 66334 }, + { 66336, 66339 }, +}; +static const URange32 Old_Persian_range32[] = { + { 66464, 66499 }, + { 66504, 66517 }, +}; +static const URange16 Latin_range16[] = { + { 65, 90 }, + { 97, 122 }, + { 170, 170 }, + { 186, 186 }, + { 192, 214 }, + { 216, 246 }, + { 248, 696 }, + { 736, 740 }, + { 7424, 7461 }, + { 7468, 7516 }, + { 7522, 7525 }, + { 7531, 7543 }, + { 7545, 7614 }, + { 7680, 7935 }, + { 8305, 8305 }, + { 8319, 8319 }, + { 8336, 8348 }, + { 8490, 8491 }, + { 8498, 8498 }, + { 8526, 8526 }, + { 8544, 8584 }, + { 11360, 11391 }, + { 42786, 42887 }, + { 42891, 42894 }, + { 42896, 42899 }, + { 42912, 42922 }, + { 43000, 43007 }, + { 64256, 64262 }, + { 65313, 65338 }, + { 65345, 65370 }, +}; +static const URange16 Saurashtra_range16[] = { + { 43136, 43204 }, + { 43214, 43225 }, +}; +static const URange32 Shavian_range32[] = { + { 66640, 66687 }, +}; +static const URange16 Georgian_range16[] = { + { 4256, 4293 }, + { 4295, 4295 }, + { 4301, 4301 }, + { 4304, 4346 }, + { 4348, 4351 }, + { 11520, 11557 }, + { 11559, 11559 }, + { 11565, 11565 }, +}; +static const URange16 Batak_range16[] = { + { 7104, 7155 }, + { 7164, 7167 }, +}; +static const URange16 Devanagari_range16[] = { + { 2304, 2384 }, + { 2387, 2403 }, + { 2406, 2423 }, + { 2425, 2431 }, + { 43232, 43259 }, +}; +static const URange16 Thai_range16[] = { + { 3585, 3642 }, + { 3648, 3675 }, +}; +static const URange16 Tibetan_range16[] = { + { 3840, 3911 }, + { 3913, 3948 }, + { 3953, 3991 }, + { 3993, 4028 }, + { 4030, 4044 }, + { 4046, 4052 }, + { 4057, 4058 }, +}; +static const URange16 Tifinagh_range16[] = { + { 11568, 11623 }, + { 11631, 11632 }, + { 11647, 11647 }, +}; +static const URange32 Ugaritic_range32[] = { + { 66432, 66461 }, + { 66463, 66463 }, +}; +static const URange16 Braille_range16[] = { + { 10240, 10495 }, +}; +static const URange16 Greek_range16[] = { + { 880, 883 }, + { 885, 887 }, + { 890, 893 }, + { 900, 900 }, + { 902, 902 }, + { 904, 906 }, + { 908, 908 }, + { 910, 929 }, + { 931, 993 }, + { 1008, 1023 }, + { 7462, 7466 }, + { 7517, 7521 }, + { 7526, 7530 }, + { 7615, 7615 }, + { 7936, 7957 }, + { 7960, 7965 }, + { 7968, 8005 }, + { 8008, 8013 }, + { 8016, 8023 }, + { 8025, 8025 }, + { 8027, 8027 }, + { 8029, 8029 }, + { 8031, 8061 }, + { 8064, 8116 }, + { 8118, 8132 }, + { 8134, 8147 }, + { 8150, 8155 }, + { 8157, 8175 }, + { 8178, 8180 }, + { 8182, 8190 }, + { 8486, 8486 }, +}; +static const URange32 Greek_range32[] = { + { 65856, 65930 }, + { 119296, 119365 }, +}; +static const URange32 Lycian_range32[] = { + { 66176, 66204 }, +}; +static const URange16 Tai_Viet_range16[] = { + { 43648, 43714 }, + { 43739, 43743 }, +}; +static const URange16 Vai_range16[] = { + { 42240, 42539 }, +}; +static const URange16 Ogham_range16[] = { + { 5760, 5788 }, +}; +static const URange32 Inscriptional_Parthian_range32[] = { + { 68416, 68437 }, + { 68440, 68447 }, +}; +static const URange16 Cham_range16[] = { + { 43520, 43574 }, + { 43584, 43597 }, + { 43600, 43609 }, + { 43612, 43615 }, +}; +static const URange16 Syriac_range16[] = { + { 1792, 1805 }, + { 1807, 1866 }, + { 1869, 1871 }, +}; +static const URange16 Runic_range16[] = { + { 5792, 5866 }, + { 5870, 5872 }, +}; +static const URange32 Gothic_range32[] = { + { 66352, 66378 }, +}; +static const URange16 Katakana_range16[] = { + { 12449, 12538 }, + { 12541, 12543 }, + { 12784, 12799 }, + { 13008, 13054 }, + { 13056, 13143 }, + { 65382, 65391 }, + { 65393, 65437 }, +}; +static const URange32 Katakana_range32[] = { + { 110592, 110592 }, +}; +static const URange32 Osmanya_range32[] = { + { 66688, 66717 }, + { 66720, 66729 }, +}; +static const URange16 New_Tai_Lue_range16[] = { + { 6528, 6571 }, + { 6576, 6601 }, + { 6608, 6618 }, + { 6622, 6623 }, +}; +static const URange16 Ol_Chiki_range16[] = { + { 7248, 7295 }, +}; +static const URange16 Limbu_range16[] = { + { 6400, 6428 }, + { 6432, 6443 }, + { 6448, 6459 }, + { 6464, 6464 }, + { 6468, 6479 }, +}; +static const URange16 Cherokee_range16[] = { + { 5024, 5108 }, +}; +static const URange32 Miao_range32[] = { + { 93952, 94020 }, + { 94032, 94078 }, + { 94095, 94111 }, +}; +static const URange16 Oriya_range16[] = { + { 2817, 2819 }, + { 2821, 2828 }, + { 2831, 2832 }, + { 2835, 2856 }, + { 2858, 2864 }, + { 2866, 2867 }, + { 2869, 2873 }, + { 2876, 2884 }, + { 2887, 2888 }, + { 2891, 2893 }, + { 2902, 2903 }, + { 2908, 2909 }, + { 2911, 2915 }, + { 2918, 2935 }, +}; +static const URange32 Sharada_range32[] = { + { 70016, 70088 }, + { 70096, 70105 }, +}; +static const URange16 Gujarati_range16[] = { + { 2689, 2691 }, + { 2693, 2701 }, + { 2703, 2705 }, + { 2707, 2728 }, + { 2730, 2736 }, + { 2738, 2739 }, + { 2741, 2745 }, + { 2748, 2757 }, + { 2759, 2761 }, + { 2763, 2765 }, + { 2768, 2768 }, + { 2784, 2787 }, + { 2790, 2801 }, +}; +static const URange32 Inscriptional_Pahlavi_range32[] = { + { 68448, 68466 }, + { 68472, 68479 }, +}; +static const URange16 Khmer_range16[] = { + { 6016, 6109 }, + { 6112, 6121 }, + { 6128, 6137 }, + { 6624, 6655 }, +}; +static const URange32 Cuneiform_range32[] = { + { 73728, 74606 }, + { 74752, 74850 }, + { 74864, 74867 }, +}; +static const URange16 Mandaic_range16[] = { + { 2112, 2139 }, + { 2142, 2142 }, +}; +static const URange16 Syloti_Nagri_range16[] = { + { 43008, 43051 }, +}; +static const URange16 Nko_range16[] = { + { 1984, 2042 }, +}; +static const URange16 Canadian_Aboriginal_range16[] = { + { 5120, 5759 }, + { 6320, 6389 }, +}; +static const URange32 Meroitic_Hieroglyphs_range32[] = { + { 67968, 67999 }, +}; +static const URange32 Phoenician_range32[] = { + { 67840, 67867 }, + { 67871, 67871 }, +}; +static const URange16 Bengali_range16[] = { + { 2433, 2435 }, + { 2437, 2444 }, + { 2447, 2448 }, + { 2451, 2472 }, + { 2474, 2480 }, + { 2482, 2482 }, + { 2486, 2489 }, + { 2492, 2500 }, + { 2503, 2504 }, + { 2507, 2510 }, + { 2519, 2519 }, + { 2524, 2525 }, + { 2527, 2531 }, + { 2534, 2555 }, +}; +static const URange32 Kaithi_range32[] = { + { 69760, 69825 }, +}; +static const URange16 Glagolitic_range16[] = { + { 11264, 11310 }, + { 11312, 11358 }, +}; +static const URange32 Imperial_Aramaic_range32[] = { + { 67648, 67669 }, + { 67671, 67679 }, +}; +static const URange32 Sora_Sompeng_range32[] = { + { 69840, 69864 }, + { 69872, 69881 }, +}; +static const URange16 Gurmukhi_range16[] = { + { 2561, 2563 }, + { 2565, 2570 }, + { 2575, 2576 }, + { 2579, 2600 }, + { 2602, 2608 }, + { 2610, 2611 }, + { 2613, 2614 }, + { 2616, 2617 }, + { 2620, 2620 }, + { 2622, 2626 }, + { 2631, 2632 }, + { 2635, 2637 }, + { 2641, 2641 }, + { 2649, 2652 }, + { 2654, 2654 }, + { 2662, 2677 }, +}; +static const URange16 Javanese_range16[] = { + { 43392, 43469 }, + { 43472, 43481 }, + { 43486, 43487 }, +}; +static const URange16 Phags_Pa_range16[] = { + { 43072, 43127 }, +}; +static const URange32 Cypriot_range32[] = { + { 67584, 67589 }, + { 67592, 67592 }, + { 67594, 67637 }, + { 67639, 67640 }, + { 67644, 67644 }, + { 67647, 67647 }, +}; +static const URange16 Kannada_range16[] = { + { 3202, 3203 }, + { 3205, 3212 }, + { 3214, 3216 }, + { 3218, 3240 }, + { 3242, 3251 }, + { 3253, 3257 }, + { 3260, 3268 }, + { 3270, 3272 }, + { 3274, 3277 }, + { 3285, 3286 }, + { 3294, 3294 }, + { 3296, 3299 }, + { 3302, 3311 }, + { 3313, 3314 }, +}; +static const URange16 Mongolian_range16[] = { + { 6144, 6145 }, + { 6148, 6148 }, + { 6150, 6158 }, + { 6160, 6169 }, + { 6176, 6263 }, + { 6272, 6314 }, +}; +static const URange16 Sinhala_range16[] = { + { 3458, 3459 }, + { 3461, 3478 }, + { 3482, 3505 }, + { 3507, 3515 }, + { 3517, 3517 }, + { 3520, 3526 }, + { 3530, 3530 }, + { 3535, 3540 }, + { 3542, 3542 }, + { 3544, 3551 }, + { 3570, 3572 }, +}; +static const URange32 Brahmi_range32[] = { + { 69632, 69709 }, + { 69714, 69743 }, +}; +static const URange32 Deseret_range32[] = { + { 66560, 66639 }, +}; +static const URange16 Rejang_range16[] = { + { 43312, 43347 }, + { 43359, 43359 }, +}; +static const URange16 Yi_range16[] = { + { 40960, 42124 }, + { 42128, 42182 }, +}; +static const URange16 Balinese_range16[] = { + { 6912, 6987 }, + { 6992, 7036 }, +}; +static const URange16 Lao_range16[] = { + { 3713, 3714 }, + { 3716, 3716 }, + { 3719, 3720 }, + { 3722, 3722 }, + { 3725, 3725 }, + { 3732, 3735 }, + { 3737, 3743 }, + { 3745, 3747 }, + { 3749, 3749 }, + { 3751, 3751 }, + { 3754, 3755 }, + { 3757, 3769 }, + { 3771, 3773 }, + { 3776, 3780 }, + { 3782, 3782 }, + { 3784, 3789 }, + { 3792, 3801 }, + { 3804, 3807 }, +}; +static const URange16 Hanunoo_range16[] = { + { 5920, 5940 }, +}; +static const URange32 Linear_B_range32[] = { + { 65536, 65547 }, + { 65549, 65574 }, + { 65576, 65594 }, + { 65596, 65597 }, + { 65599, 65613 }, + { 65616, 65629 }, + { 65664, 65786 }, +}; +static const URange32 Old_Turkic_range32[] = { + { 68608, 68680 }, +}; +static const URange16 Lepcha_range16[] = { + { 7168, 7223 }, + { 7227, 7241 }, + { 7245, 7247 }, +}; +static const URange32 Lydian_range32[] = { + { 67872, 67897 }, + { 67903, 67903 }, +}; +static const URange32 Egyptian_Hieroglyphs_range32[] = { + { 77824, 78894 }, +}; +static const URange16 Samaritan_range16[] = { + { 2048, 2093 }, + { 2096, 2110 }, +}; +static const URange16 Lisu_range16[] = { + { 42192, 42239 }, +}; +static const URange16 Buhid_range16[] = { + { 5952, 5971 }, +}; +static const URange16 Common_range16[] = { + { 0, 64 }, + { 91, 96 }, + { 123, 169 }, + { 171, 185 }, + { 187, 191 }, + { 215, 215 }, + { 247, 247 }, + { 697, 735 }, + { 741, 745 }, + { 748, 767 }, + { 884, 884 }, + { 894, 894 }, + { 901, 901 }, + { 903, 903 }, + { 1417, 1417 }, + { 1548, 1548 }, + { 1563, 1563 }, + { 1567, 1567 }, + { 1600, 1600 }, + { 1632, 1641 }, + { 1757, 1757 }, + { 2404, 2405 }, + { 3647, 3647 }, + { 4053, 4056 }, + { 4347, 4347 }, + { 5867, 5869 }, + { 5941, 5942 }, + { 6146, 6147 }, + { 6149, 6149 }, + { 7379, 7379 }, + { 7393, 7393 }, + { 7401, 7404 }, + { 7406, 7411 }, + { 7413, 7414 }, + { 8192, 8203 }, + { 8206, 8292 }, + { 8294, 8304 }, + { 8308, 8318 }, + { 8320, 8334 }, + { 8352, 8378 }, + { 8448, 8485 }, + { 8487, 8489 }, + { 8492, 8497 }, + { 8499, 8525 }, + { 8527, 8543 }, + { 8585, 8585 }, + { 8592, 9203 }, + { 9216, 9254 }, + { 9280, 9290 }, + { 9312, 9983 }, + { 9985, 10239 }, + { 10496, 11084 }, + { 11088, 11097 }, + { 11776, 11835 }, + { 12272, 12283 }, + { 12288, 12292 }, + { 12294, 12294 }, + { 12296, 12320 }, + { 12336, 12343 }, + { 12348, 12351 }, + { 12443, 12444 }, + { 12448, 12448 }, + { 12539, 12540 }, + { 12688, 12703 }, + { 12736, 12771 }, + { 12832, 12895 }, + { 12927, 13007 }, + { 13144, 13311 }, + { 19904, 19967 }, + { 42752, 42785 }, + { 42888, 42890 }, + { 43056, 43065 }, + { 43471, 43471 }, + { 64830, 64831 }, + { 65021, 65021 }, + { 65040, 65049 }, + { 65072, 65106 }, + { 65108, 65126 }, + { 65128, 65131 }, + { 65279, 65279 }, + { 65281, 65312 }, + { 65339, 65344 }, + { 65371, 65381 }, + { 65392, 65392 }, + { 65438, 65439 }, + { 65504, 65510 }, + { 65512, 65518 }, + { 65529, 65533 }, +}; +static const URange32 Common_range32[] = { + { 65792, 65794 }, + { 65799, 65843 }, + { 65847, 65855 }, + { 65936, 65947 }, + { 66000, 66044 }, + { 118784, 119029 }, + { 119040, 119078 }, + { 119081, 119142 }, + { 119146, 119162 }, + { 119171, 119172 }, + { 119180, 119209 }, + { 119214, 119261 }, + { 119552, 119638 }, + { 119648, 119665 }, + { 119808, 119892 }, + { 119894, 119964 }, + { 119966, 119967 }, + { 119970, 119970 }, + { 119973, 119974 }, + { 119977, 119980 }, + { 119982, 119993 }, + { 119995, 119995 }, + { 119997, 120003 }, + { 120005, 120069 }, + { 120071, 120074 }, + { 120077, 120084 }, + { 120086, 120092 }, + { 120094, 120121 }, + { 120123, 120126 }, + { 120128, 120132 }, + { 120134, 120134 }, + { 120138, 120144 }, + { 120146, 120485 }, + { 120488, 120779 }, + { 120782, 120831 }, + { 126976, 127019 }, + { 127024, 127123 }, + { 127136, 127150 }, + { 127153, 127166 }, + { 127169, 127183 }, + { 127185, 127199 }, + { 127232, 127242 }, + { 127248, 127278 }, + { 127280, 127339 }, + { 127344, 127386 }, + { 127462, 127487 }, + { 127489, 127490 }, + { 127504, 127546 }, + { 127552, 127560 }, + { 127568, 127569 }, + { 127744, 127776 }, + { 127792, 127797 }, + { 127799, 127868 }, + { 127872, 127891 }, + { 127904, 127940 }, + { 127942, 127946 }, + { 127968, 127984 }, + { 128000, 128062 }, + { 128064, 128064 }, + { 128066, 128247 }, + { 128249, 128252 }, + { 128256, 128317 }, + { 128320, 128323 }, + { 128336, 128359 }, + { 128507, 128576 }, + { 128581, 128591 }, + { 128640, 128709 }, + { 128768, 128883 }, + { 917505, 917505 }, + { 917536, 917631 }, +}; +static const URange16 Coptic_range16[] = { + { 994, 1007 }, + { 11392, 11507 }, + { 11513, 11519 }, +}; +static const URange32 Chakma_range32[] = { + { 69888, 69940 }, + { 69942, 69955 }, +}; +static const URange16 Arabic_range16[] = { + { 1536, 1540 }, + { 1542, 1547 }, + { 1549, 1562 }, + { 1564, 1564 }, + { 1566, 1566 }, + { 1568, 1599 }, + { 1601, 1610 }, + { 1622, 1631 }, + { 1642, 1647 }, + { 1649, 1756 }, + { 1758, 1791 }, + { 1872, 1919 }, + { 2208, 2208 }, + { 2210, 2220 }, + { 2276, 2302 }, + { 64336, 64449 }, + { 64467, 64829 }, + { 64848, 64911 }, + { 64914, 64967 }, + { 65008, 65020 }, + { 65136, 65140 }, + { 65142, 65276 }, +}; +static const URange32 Arabic_range32[] = { + { 69216, 69246 }, + { 126464, 126467 }, + { 126469, 126495 }, + { 126497, 126498 }, + { 126500, 126500 }, + { 126503, 126503 }, + { 126505, 126514 }, + { 126516, 126519 }, + { 126521, 126521 }, + { 126523, 126523 }, + { 126530, 126530 }, + { 126535, 126535 }, + { 126537, 126537 }, + { 126539, 126539 }, + { 126541, 126543 }, + { 126545, 126546 }, + { 126548, 126548 }, + { 126551, 126551 }, + { 126553, 126553 }, + { 126555, 126555 }, + { 126557, 126557 }, + { 126559, 126559 }, + { 126561, 126562 }, + { 126564, 126564 }, + { 126567, 126570 }, + { 126572, 126578 }, + { 126580, 126583 }, + { 126585, 126588 }, + { 126590, 126590 }, + { 126592, 126601 }, + { 126603, 126619 }, + { 126625, 126627 }, + { 126629, 126633 }, + { 126635, 126651 }, + { 126704, 126705 }, +}; +static const URange16 Bamum_range16[] = { + { 42656, 42743 }, +}; +static const URange32 Bamum_range32[] = { + { 92160, 92728 }, +}; +static const URange16 Myanmar_range16[] = { + { 4096, 4255 }, + { 43616, 43643 }, +}; +static const URange32 Avestan_range32[] = { + { 68352, 68405 }, + { 68409, 68415 }, +}; +static const URange16 Hebrew_range16[] = { + { 1425, 1479 }, + { 1488, 1514 }, + { 1520, 1524 }, + { 64285, 64310 }, + { 64312, 64316 }, + { 64318, 64318 }, + { 64320, 64321 }, + { 64323, 64324 }, + { 64326, 64335 }, +}; +static const URange32 Takri_range32[] = { + { 71296, 71351 }, + { 71360, 71369 }, +}; +// 3867 16-bit ranges, 723 32-bit ranges +const UGroup unicode_groups[] = { + { "Arabic", +1, Arabic_range16, 22, Arabic_range32, 35 }, + { "Armenian", +1, Armenian_range16, 6, 0, 0 }, + { "Avestan", +1, 0, 0, Avestan_range32, 2 }, + { "Balinese", +1, Balinese_range16, 2, 0, 0 }, + { "Bamum", +1, Bamum_range16, 1, Bamum_range32, 1 }, + { "Batak", +1, Batak_range16, 2, 0, 0 }, + { "Bengali", +1, Bengali_range16, 14, 0, 0 }, + { "Bopomofo", +1, Bopomofo_range16, 3, 0, 0 }, + { "Brahmi", +1, 0, 0, Brahmi_range32, 2 }, + { "Braille", +1, Braille_range16, 1, 0, 0 }, + { "Buginese", +1, Buginese_range16, 2, 0, 0 }, + { "Buhid", +1, Buhid_range16, 1, 0, 0 }, + { "C", +1, C_range16, 15, C_range32, 6 }, + { "Canadian_Aboriginal", +1, Canadian_Aboriginal_range16, 2, 0, 0 }, + { "Carian", +1, 0, 0, Carian_range32, 1 }, + { "Cc", +1, Cc_range16, 2, 0, 0 }, + { "Cf", +1, Cf_range16, 12, Cf_range32, 4 }, + { "Chakma", +1, 0, 0, Chakma_range32, 2 }, + { "Cham", +1, Cham_range16, 4, 0, 0 }, + { "Cherokee", +1, Cherokee_range16, 1, 0, 0 }, + { "Co", +1, Co_range16, 1, Co_range32, 2 }, + { "Common", +1, Common_range16, 88, Common_range32, 70 }, + { "Coptic", +1, Coptic_range16, 3, 0, 0 }, + { "Cs", +1, Cs_range16, 1, 0, 0 }, + { "Cuneiform", +1, 0, 0, Cuneiform_range32, 3 }, + { "Cypriot", +1, 0, 0, Cypriot_range32, 6 }, + { "Cyrillic", +1, Cyrillic_range16, 7, 0, 0 }, + { "Deseret", +1, 0, 0, Deseret_range32, 1 }, + { "Devanagari", +1, Devanagari_range16, 5, 0, 0 }, + { "Egyptian_Hieroglyphs", +1, 0, 0, Egyptian_Hieroglyphs_range32, 1 }, + { "Ethiopic", +1, Ethiopic_range16, 32, 0, 0 }, + { "Georgian", +1, Georgian_range16, 8, 0, 0 }, + { "Glagolitic", +1, Glagolitic_range16, 2, 0, 0 }, + { "Gothic", +1, 0, 0, Gothic_range32, 1 }, + { "Greek", +1, Greek_range16, 31, Greek_range32, 2 }, + { "Gujarati", +1, Gujarati_range16, 13, 0, 0 }, + { "Gurmukhi", +1, Gurmukhi_range16, 16, 0, 0 }, + { "Han", +1, Han_range16, 11, Han_range32, 4 }, + { "Hangul", +1, Hangul_range16, 14, 0, 0 }, + { "Hanunoo", +1, Hanunoo_range16, 1, 0, 0 }, + { "Hebrew", +1, Hebrew_range16, 9, 0, 0 }, + { "Hiragana", +1, Hiragana_range16, 2, Hiragana_range32, 2 }, + { "Imperial_Aramaic", +1, 0, 0, Imperial_Aramaic_range32, 2 }, + { "Inherited", +1, Inherited_range16, 18, Inherited_range32, 6 }, + { "Inscriptional_Pahlavi", +1, 0, 0, Inscriptional_Pahlavi_range32, 2 }, + { "Inscriptional_Parthian", +1, 0, 0, Inscriptional_Parthian_range32, 2 }, + { "Javanese", +1, Javanese_range16, 3, 0, 0 }, + { "Kaithi", +1, 0, 0, Kaithi_range32, 1 }, + { "Kannada", +1, Kannada_range16, 14, 0, 0 }, + { "Katakana", +1, Katakana_range16, 7, Katakana_range32, 1 }, + { "Kayah_Li", +1, Kayah_Li_range16, 1, 0, 0 }, + { "Kharoshthi", +1, 0, 0, Kharoshthi_range32, 8 }, + { "Khmer", +1, Khmer_range16, 4, 0, 0 }, + { "L", +1, L_range16, 370, L_range32, 116 }, + { "Lao", +1, Lao_range16, 18, 0, 0 }, + { "Latin", +1, Latin_range16, 30, 0, 0 }, + { "Lepcha", +1, Lepcha_range16, 3, 0, 0 }, + { "Limbu", +1, Limbu_range16, 5, 0, 0 }, + { "Linear_B", +1, 0, 0, Linear_B_range32, 7 }, + { "Lisu", +1, Lisu_range16, 1, 0, 0 }, + { "Ll", +1, Ll_range16, 582, Ll_range32, 29 }, + { "Lm", +1, Lm_range16, 51, Lm_range32, 1 }, + { "Lo", +1, Lo_range16, 286, Lo_range32, 85 }, + { "Lt", +1, Lt_range16, 10, 0, 0 }, + { "Lu", +1, Lu_range16, 576, Lu_range32, 32 }, + { "Lycian", +1, 0, 0, Lycian_range32, 1 }, + { "Lydian", +1, 0, 0, Lydian_range32, 2 }, + { "M", +1, M_range16, 180, M_range32, 24 }, + { "Malayalam", +1, Malayalam_range16, 11, 0, 0 }, + { "Mandaic", +1, Mandaic_range16, 2, 0, 0 }, + { "Mc", +1, Mc_range16, 111, Mc_range32, 15 }, + { "Me", +1, Me_range16, 4, 0, 0 }, + { "Meetei_Mayek", +1, Meetei_Mayek_range16, 3, 0, 0 }, + { "Meroitic_Cursive", +1, 0, 0, Meroitic_Cursive_range32, 2 }, + { "Meroitic_Hieroglyphs", +1, 0, 0, Meroitic_Hieroglyphs_range32, 1 }, + { "Miao", +1, 0, 0, Miao_range32, 3 }, + { "Mn", +1, Mn_range16, 194, Mn_range32, 27 }, + { "Mongolian", +1, Mongolian_range16, 6, 0, 0 }, + { "Myanmar", +1, Myanmar_range16, 2, 0, 0 }, + { "N", +1, N_range16, 64, N_range32, 24 }, + { "Nd", +1, Nd_range16, 35, Nd_range32, 7 }, + { "New_Tai_Lue", +1, New_Tai_Lue_range16, 4, 0, 0 }, + { "Nko", +1, Nko_range16, 1, 0, 0 }, + { "Nl", +1, Nl_range16, 7, Nl_range32, 5 }, + { "No", +1, No_range16, 28, No_range32, 14 }, + { "Ogham", +1, Ogham_range16, 1, 0, 0 }, + { "Ol_Chiki", +1, Ol_Chiki_range16, 1, 0, 0 }, + { "Old_Italic", +1, 0, 0, Old_Italic_range32, 2 }, + { "Old_Persian", +1, 0, 0, Old_Persian_range32, 2 }, + { "Old_South_Arabian", +1, 0, 0, Old_South_Arabian_range32, 1 }, + { "Old_Turkic", +1, 0, 0, Old_Turkic_range32, 1 }, + { "Oriya", +1, Oriya_range16, 14, 0, 0 }, + { "Osmanya", +1, 0, 0, Osmanya_range32, 2 }, + { "P", +1, P_range16, 126, P_range32, 15 }, + { "Pc", +1, Pc_range16, 6, 0, 0 }, + { "Pd", +1, Pd_range16, 16, 0, 0 }, + { "Pe", +1, Pe_range16, 72, 0, 0 }, + { "Pf", +1, Pf_range16, 10, 0, 0 }, + { "Phags_Pa", +1, Phags_Pa_range16, 1, 0, 0 }, + { "Phoenician", +1, 0, 0, Phoenician_range32, 2 }, + { "Pi", +1, Pi_range16, 11, 0, 0 }, + { "Po", +1, Po_range16, 120, Po_range32, 15 }, + { "Ps", +1, Ps_range16, 74, 0, 0 }, + { "Rejang", +1, Rejang_range16, 2, 0, 0 }, + { "Runic", +1, Runic_range16, 2, 0, 0 }, + { "S", +1, S_range16, 143, S_range32, 56 }, + { "Samaritan", +1, Samaritan_range16, 2, 0, 0 }, + { "Saurashtra", +1, Saurashtra_range16, 2, 0, 0 }, + { "Sc", +1, Sc_range16, 17, 0, 0 }, + { "Sharada", +1, 0, 0, Sharada_range32, 2 }, + { "Shavian", +1, 0, 0, Shavian_range32, 1 }, + { "Sinhala", +1, Sinhala_range16, 11, 0, 0 }, + { "Sk", +1, Sk_range16, 27, 0, 0 }, + { "Sm", +1, Sm_range16, 53, Sm_range32, 11 }, + { "So", +1, So_range16, 108, So_range32, 45 }, + { "Sora_Sompeng", +1, 0, 0, Sora_Sompeng_range32, 2 }, + { "Sundanese", +1, Sundanese_range16, 2, 0, 0 }, + { "Syloti_Nagri", +1, Syloti_Nagri_range16, 1, 0, 0 }, + { "Syriac", +1, Syriac_range16, 3, 0, 0 }, + { "Tagalog", +1, Tagalog_range16, 2, 0, 0 }, + { "Tagbanwa", +1, Tagbanwa_range16, 3, 0, 0 }, + { "Tai_Le", +1, Tai_Le_range16, 2, 0, 0 }, + { "Tai_Tham", +1, Tai_Tham_range16, 5, 0, 0 }, + { "Tai_Viet", +1, Tai_Viet_range16, 2, 0, 0 }, + { "Takri", +1, 0, 0, Takri_range32, 2 }, + { "Tamil", +1, Tamil_range16, 16, 0, 0 }, + { "Telugu", +1, Telugu_range16, 14, 0, 0 }, + { "Thaana", +1, Thaana_range16, 1, 0, 0 }, + { "Thai", +1, Thai_range16, 2, 0, 0 }, + { "Tibetan", +1, Tibetan_range16, 7, 0, 0 }, + { "Tifinagh", +1, Tifinagh_range16, 3, 0, 0 }, + { "Ugaritic", +1, 0, 0, Ugaritic_range32, 2 }, + { "Vai", +1, Vai_range16, 1, 0, 0 }, + { "Yi", +1, Yi_range16, 2, 0, 0 }, + { "Z", +1, Z_range16, 8, 0, 0 }, + { "Zl", +1, Zl_range16, 1, 0, 0 }, + { "Zp", +1, Zp_range16, 1, 0, 0 }, + { "Zs", +1, Zs_range16, 7, 0, 0 }, +}; +const int num_unicode_groups = 138; + + +} // namespace re2 + + diff --git a/src/openalpr/support/re2/unicode_groups.h b/src/openalpr/support/re2/unicode_groups.h new file mode 100644 index 0000000..fc1c253 --- /dev/null +++ b/src/openalpr/support/re2/unicode_groups.h @@ -0,0 +1,64 @@ +// Copyright 2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Unicode character groups. + +// The codes get split into ranges of 16-bit codes +// and ranges of 32-bit codes. It would be simpler +// to use only 32-bit ranges, but these tables are large +// enough to warrant extra care. +// +// Using just 32-bit ranges gives 27 kB of data. +// Adding 16-bit ranges gives 18 kB of data. +// Adding an extra table of 16-bit singletons would reduce +// to 16.5 kB of data but make the data harder to use; +// we don't bother. + +#ifndef RE2_UNICODE_GROUPS_H__ +#define RE2_UNICODE_GROUPS_H__ + +#include "util/util.h" + +namespace re2 { + +struct URange16 +{ + uint16 lo; + uint16 hi; +}; + +struct URange32 +{ + Rune lo; + Rune hi; +}; + +struct UGroup +{ + const char *name; + int sign; // +1 for [abc], -1 for [^abc] + const URange16 *r16; + int nr16; + const URange32 *r32; + int nr32; +}; + +// Named by property or script name (e.g., "Nd", "N", "Han"). +// Negated groups are not included. +extern const UGroup unicode_groups[]; +extern const int num_unicode_groups; + +// Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). +// Negated groups are included. +extern const UGroup posix_groups[]; +extern const int num_posix_groups; + +// Named by Perl name (e.g., "\\d", "\\D"). +// Negated groups are included. +extern const UGroup perl_groups[]; +extern const int num_perl_groups; + +} // namespace re2 + +#endif // RE2_UNICODE_GROUPS_H__ diff --git a/src/openalpr/support/re2/util/atomicops.h b/src/openalpr/support/re2/util/atomicops.h new file mode 100644 index 0000000..d69b075 --- /dev/null +++ b/src/openalpr/support/re2/util/atomicops.h @@ -0,0 +1,164 @@ +// Copyright 2006-2008 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_UTIL_ATOMICOPS_H__ +#define RE2_UTIL_ATOMICOPS_H__ + +// The memory ordering constraints resemble the ones in C11. +// RELAXED - no memory ordering, just an atomic operation. +// CONSUME - data-dependent ordering. +// ACQUIRE - prevents memory accesses from hoisting above the operation. +// RELEASE - prevents memory accesses from sinking below the operation. + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if !defined(OS_NACL) && (__has_builtin(__atomic_load_n) || (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__ >= 40801)) + +#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_RELAXED); } while (0) +#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_CONSUME); } while (0) +#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_ACQUIRE); } while (0) +#define ATOMIC_STORE_RELAXED(p, v) __atomic_store_n((p), (v), __ATOMIC_RELAXED) +#define ATOMIC_STORE_RELEASE(p, v) __atomic_store_n((p), (v), __ATOMIC_RELEASE) + +#else // old compiler + +#define ATOMIC_LOAD_RELAXED(x, p) do { (x) = *(p); } while (0) +#define ATOMIC_LOAD_CONSUME(x, p) do { (x) = *(p); MaybeReadMemoryBarrier(); } while (0) +#define ATOMIC_LOAD_ACQUIRE(x, p) do { (x) = *(p); ReadMemoryBarrier(); } while (0) +#define ATOMIC_STORE_RELAXED(p, v) do { *(p) = (v); } while (0) +#define ATOMIC_STORE_RELEASE(p, v) do { WriteMemoryBarrier(); *(p) = (v); } while (0) + +// WriteMemoryBarrier(), ReadMemoryBarrier() and MaybeReadMemoryBarrier() +// are an implementation detail and must not be used in the rest of the code. + +#if defined(__i386__) + +static inline void WriteMemoryBarrier() { + int x; + __asm__ __volatile__("xchgl (%0),%0" // The lock prefix is implicit for xchg. + :: "r" (&x)); +} + +#elif defined(__x86_64__) + +// 64-bit implementations of memory barrier can be simpler, because +// "sfence" is guaranteed to exist. +static inline void WriteMemoryBarrier() { + __asm__ __volatile__("sfence" : : : "memory"); +} + +#elif defined(__ppc__) || defined(__powerpc64__) + +static inline void WriteMemoryBarrier() { + __asm__ __volatile__("eieio" : : : "memory"); +} + +#elif defined(__alpha__) + +static inline void WriteMemoryBarrier() { + __asm__ __volatile__("wmb" : : : "memory"); +} + +#elif defined(__aarch64__) + +static inline void WriteMemoryBarrier() { + __asm__ __volatile__("dmb st" : : : "memory"); +} + +#elif defined(__arm__) && defined(__linux__) + +// Linux on ARM puts a suitable memory barrier at a magic address for us to call. +static inline void WriteMemoryBarrier() { + ((void(*)(void))0xffff0fa0)(); +} + +#elif defined(__windows__) + +// Windows +inline void WriteMemoryBarrier() { + LONG x; + ::InterlockedExchange(&x, 0); +} + +#elif defined(OS_NACL) + +// Native Client +inline void WriteMemoryBarrier() { + __sync_synchronize(); +} + +#elif defined(__mips__) + +inline void WriteMemoryBarrier() { + __asm__ __volatile__("sync" : : : "memory"); +} + +#else + +#include "util/mutex.h" + +static inline void WriteMemoryBarrier() { + // Slight overkill, but good enough: + // any mutex implementation must have + // a read barrier after the lock operation and + // a write barrier before the unlock operation. + // + // It may be worthwhile to write architecture-specific + // barriers for the common platforms, as above, but + // this is a correct fallback. + re2::Mutex mu; + re2::MutexLock l(&mu); +} + +#endif + +// Alpha has very weak memory ordering. If relying on WriteBarriers, one must +// use read barriers for the readers too. +#if defined(__alpha__) + +static inline void MaybeReadMemoryBarrier() { + __asm__ __volatile__("mb" : : : "memory"); +} + +#else + +static inline void MaybeReadMemoryBarrier() {} + +#endif // __alpha__ + +// Read barrier for various targets. + +#if defined(__aarch64__) + +static inline void ReadMemoryBarrier() { + __asm__ __volatile__("dmb ld" : : : "memory"); +} + +#elif defined(__alpha__) + +static inline void ReadMemoryBarrier() { + __asm__ __volatile__("mb" : : : "memory"); +} + +#elif defined(__mips__) + +inline void ReadMemoryBarrier() { + __asm__ __volatile__("sync" : : : "memory"); +} + +#else + +static inline void ReadMemoryBarrier() {} + +#endif + +#endif // old compiler + +#ifndef NO_THREAD_SAFETY_ANALYSIS +#define NO_THREAD_SAFETY_ANALYSIS +#endif + +#endif // RE2_UTIL_ATOMICOPS_H__ diff --git a/src/openalpr/support/re2/util/benchmark.h b/src/openalpr/support/re2/util/benchmark.h new file mode 100644 index 0000000..31bbd53 --- /dev/null +++ b/src/openalpr/support/re2/util/benchmark.h @@ -0,0 +1,41 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_UTIL_BENCHMARK_H__ +#define RE2_UTIL_BENCHMARK_H__ + +namespace testing { +struct Benchmark { + const char* name; + void (*fn)(int); + void (*fnr)(int, int); + int lo; + int hi; + int threadlo; + int threadhi; + + void Register(); + Benchmark(const char* name, void (*f)(int)) { Clear(name); fn = f; Register(); } + Benchmark(const char* name, void (*f)(int, int), int l, int h) { Clear(name); fnr = f; lo = l; hi = h; Register(); } + void Clear(const char* n) { name = n; fn = 0; fnr = 0; lo = 0; hi = 0; threadlo = 0; threadhi = 0; } + Benchmark* ThreadRange(int lo, int hi) { threadlo = lo; threadhi = hi; return this; } +}; +} // namespace testing + +void SetBenchmarkBytesProcessed(long long); +void StopBenchmarkTiming(); +void StartBenchmarkTiming(); +void BenchmarkMemoryUsage(); +void SetBenchmarkItemsProcessed(int); + +int NumCPUs(); + +#define BENCHMARK(f) \ + ::testing::Benchmark* _benchmark_##f = (new ::testing::Benchmark(#f, f)) + +#define BENCHMARK_RANGE(f, lo, hi) \ + ::testing::Benchmark* _benchmark_##f = \ + (new ::testing::Benchmark(#f, f, lo, hi)) + +#endif // RE2_UTIL_BENCHMARK_H__ diff --git a/src/openalpr/support/re2/util/flags.h b/src/openalpr/support/re2/util/flags.h new file mode 100644 index 0000000..98d5c06 --- /dev/null +++ b/src/openalpr/support/re2/util/flags.h @@ -0,0 +1,27 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Simplified version of Google's command line flags. +// Does not support parsing the command line. +// If you want to do that, see +// https://gflags.github.io/gflags/ + +#ifndef RE2_UTIL_FLAGS_H__ +#define RE2_UTIL_FLAGS_H__ + +#define DEFINE_flag(type, name, deflt, desc) \ + namespace re2 { type FLAGS_##name = deflt; } + +#define DECLARE_flag(type, name) \ + namespace re2 { extern type FLAGS_##name; } + +#define DEFINE_bool(name, deflt, desc) DEFINE_flag(bool, name, deflt, desc) +#define DEFINE_int32(name, deflt, desc) DEFINE_flag(int32, name, deflt, desc) +#define DEFINE_string(name, deflt, desc) DEFINE_flag(string, name, deflt, desc) + +#define DECLARE_bool(name) DECLARE_flag(bool, name) +#define DECLARE_int32(name) DECLARE_flag(int32, name) +#define DECLARE_string(name) DECLARE_flag(string, name) + +#endif // RE2_UTIL_FLAGS_H__ diff --git a/src/openalpr/support/re2/util/hash.cc b/src/openalpr/support/re2/util/hash.cc new file mode 100644 index 0000000..8a49346 --- /dev/null +++ b/src/openalpr/support/re2/util/hash.cc @@ -0,0 +1,231 @@ +// Modified by Russ Cox to add "namespace re2". +// Also threw away all but hashword and hashword2. +// http://burtleburtle.net/bob/c/lookup3.c + +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ + +#include "re2/util/util.h" + +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +namespace re2 { + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hashword() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hashword() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32 hashword( +const uint32 *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32 initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + + +/* +-------------------------------------------------------------------- +hashword2() -- same as hashword(), but take two seeds and return two +32-bit values. pc and pb must both be nonnull, and *pc and *pb must +both be initialized with seeds. If you pass in (*pb)==0, the output +(*pc) will be the same as the return value from hashword(). +-------------------------------------------------------------------- +*/ +void hashword2 ( +const uint32 *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32 *pc, /* IN: seed OUT: primary hash value */ +uint32 *pb) /* IN: more seed OUT: secondary hash value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; + c += *pb; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + *pc=c; *pb=b; +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/util/logging.h b/src/openalpr/support/re2/util/logging.h new file mode 100644 index 0000000..5eaf1dc --- /dev/null +++ b/src/openalpr/support/re2/util/logging.h @@ -0,0 +1,86 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Simplified version of Google's logging. + +#ifndef RE2_UTIL_LOGGING_H__ +#define RE2_UTIL_LOGGING_H__ + +#include /* for fwrite */ +#include + +// Debug-only checking. +#define DCHECK(condition) assert(condition) +#define DCHECK_EQ(val1, val2) assert((val1) == (val2)) +#define DCHECK_NE(val1, val2) assert((val1) != (val2)) +#define DCHECK_LE(val1, val2) assert((val1) <= (val2)) +#define DCHECK_LT(val1, val2) assert((val1) < (val2)) +#define DCHECK_GE(val1, val2) assert((val1) >= (val2)) +#define DCHECK_GT(val1, val2) assert((val1) > (val2)) + +// Always-on checking +#define CHECK(x) if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x +#define CHECK_LT(x, y) CHECK((x) < (y)) +#define CHECK_GT(x, y) CHECK((x) > (y)) +#define CHECK_LE(x, y) CHECK((x) <= (y)) +#define CHECK_GE(x, y) CHECK((x) >= (y)) +#define CHECK_EQ(x, y) CHECK((x) == (y)) +#define CHECK_NE(x, y) CHECK((x) != (y)) + +#define LOG_INFO LogMessage(__FILE__, __LINE__) +#define LOG_ERROR LOG_INFO +#define LOG_WARNING LOG_INFO +#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__) +#define LOG_QFATAL LOG_FATAL + +#define VLOG(x) if((x)>0){}else LOG_INFO.stream() + +#ifdef NDEBUG +#define DEBUG_MODE 0 +#define LOG_DFATAL LOG_ERROR +#else +#define DEBUG_MODE 1 +#define LOG_DFATAL LOG_FATAL +#endif + +#define LOG(severity) LOG_ ## severity.stream() + +class LogMessage { + public: + LogMessage(const char* file, int line) : flushed_(false) { + stream() << file << ":" << line << ": "; + } + void Flush() { + stream() << "\n"; + string s = str_.str(); + size_t n = s.size(); + if (fwrite(s.data(), 1, n, stderr) < n) {} // shut up gcc + flushed_ = true; + } + ~LogMessage() { + if (!flushed_) { + Flush(); + } + } + ostream& stream() { return str_; } + + private: + bool flushed_; + std::ostringstream str_; + DISALLOW_COPY_AND_ASSIGN(LogMessage); +}; + +class LogMessageFatal : public LogMessage { + public: + LogMessageFatal(const char* file, int line) + : LogMessage(file, line) { } + ~LogMessageFatal() { + Flush(); + abort(); + } + private: + DISALLOW_COPY_AND_ASSIGN(LogMessageFatal); +}; + +#endif // RE2_UTIL_LOGGING_H__ diff --git a/src/openalpr/support/re2/util/mutex.h b/src/openalpr/support/re2/util/mutex.h new file mode 100644 index 0000000..19a49d5 --- /dev/null +++ b/src/openalpr/support/re2/util/mutex.h @@ -0,0 +1,213 @@ +// Copyright 2007 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + * A simple mutex wrapper, supporting locks and read-write locks. + * You should assume the locks are *not* re-entrant. + */ + +#ifndef RE2_UTIL_MUTEX_H_ +#define RE2_UTIL_MUTEX_H_ + +#include + +namespace re2 { + +#ifndef WIN32 + #define HAVE_PTHREAD 1 + #define HAVE_RWLOCK 1 +#endif + +#if defined(NO_THREADS) + typedef int MutexType; // to keep a lock-count +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + // Needed for pthread_rwlock_*. If it causes problems, you could take it + // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it + // *does* cause problems for FreeBSD, or MacOSX, but isn't needed + // for locking there.) +# ifdef __linux__ +# undef _XOPEN_SOURCE +# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls +# endif +# include + typedef pthread_rwlock_t MutexType; +#elif defined(HAVE_PTHREAD) +# include + typedef pthread_mutex_t MutexType; +#elif defined(_WIN32) +# define WIN32_LEAN_AND_MEAN // We only need minimal includes +# ifdef GMUTEX_TRYLOCK + // We need Windows NT or later for TryEnterCriticalSection(). If you + // don't need that functionality, you can remove these _WIN32_WINNT + // lines, and change TryLock() to assert(0) or something. +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0400 +# endif +# endif +# include + typedef CRITICAL_SECTION MutexType; +#else +# error Need to implement mutex.h for your architecture, or #define NO_THREADS +#endif + +class Mutex { + public: + // Create a Mutex that is not held by anybody. + inline Mutex(); + + // Destructor + inline ~Mutex(); + + inline void Lock(); // Block if needed until free then acquire exclusively + inline void Unlock(); // Release a lock acquired via Lock() + inline bool TryLock(); // If free, Lock() and return true, else return false + // Note that on systems that don't support read-write locks, these may + // be implemented as synonyms to Lock() and Unlock(). So you can use + // these for efficiency, but don't use them anyplace where being able + // to do shared reads is necessary to avoid deadlock. + inline void ReaderLock(); // Block until free or shared then acquire a share + inline void ReaderUnlock(); // Release a read share of this Mutex + inline void WriterLock() { Lock(); } // Acquire an exclusive lock + inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() + inline void AssertHeld() { } + + private: + MutexType mutex_; + + // Catch the error of writing Mutex when intending MutexLock. + Mutex(Mutex *ignored); + // Disallow "evil" constructors + Mutex(const Mutex&); + void operator=(const Mutex&); +}; + +// Now the implementation of Mutex for various systems +#if defined(NO_THREADS) + +// When we don't have threads, we can be either reading or writing, +// but not both. We can have lots of readers at once (in no-threads +// mode, that's most likely to happen in recursive function calls), +// but only one writer. We represent this by having mutex_ be -1 when +// writing and a number > 0 when reading (and 0 when no lock is held). +// +// In debug mode, we assert these invariants, while in non-debug mode +// we do nothing, for efficiency. That's why everything is in an +// assert. +#include + +Mutex::Mutex() : mutex_(0) { } +Mutex::~Mutex() { assert(mutex_ == 0); } +void Mutex::Lock() { assert(--mutex_ == -1); } +void Mutex::Unlock() { assert(mutex_++ == -1); } +bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } +void Mutex::ReaderLock() { assert(++mutex_ > 0); } +void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } + +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + +#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) + +Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } +Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } +bool Mutex::TryLock() { return pthread_rwlock_trywrlock(&mutex_) == 0; } +void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); } +void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } + +#undef SAFE_PTHREAD + +#elif defined(HAVE_PTHREAD) + +#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) + +Mutex::Mutex() { SAFE_PTHREAD(pthread_mutex_init(&mutex_, NULL)); } +Mutex::~Mutex() { SAFE_PTHREAD(pthread_mutex_destroy(&mutex_)); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock(&mutex_)); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock(&mutex_)); } +bool Mutex::TryLock() { return pthread_mutex_trylock(&mutex_) == 0; } +void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks +void Mutex::ReaderUnlock() { Unlock(); } +#undef SAFE_PTHREAD + +#elif defined(_WIN32) + +Mutex::Mutex() { InitializeCriticalSection(&mutex_); } +Mutex::~Mutex() { DeleteCriticalSection(&mutex_); } +void Mutex::Lock() { EnterCriticalSection(&mutex_); } +void Mutex::Unlock() { LeaveCriticalSection(&mutex_); } +bool Mutex::TryLock() { return TryEnterCriticalSection(&mutex_) != 0; } +void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks +void Mutex::ReaderUnlock() { Unlock(); } + +#endif + + +// -------------------------------------------------------------------------- +// Some helper classes + +// MutexLock(mu) acquires mu when constructed and releases it when destroyed. +class MutexLock { + public: + explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } + ~MutexLock() { mu_->Unlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + MutexLock(const MutexLock&); + void operator=(const MutexLock&); +}; + +// ReaderMutexLock and WriterMutexLock do the same, for rwlocks +class ReaderMutexLock { + public: + explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } + ~ReaderMutexLock() { mu_->ReaderUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + ReaderMutexLock(const ReaderMutexLock&); + void operator=(const ReaderMutexLock&); +}; + +class WriterMutexLock { + public: + explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } + ~WriterMutexLock() { mu_->WriterUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + WriterMutexLock(const WriterMutexLock&); + void operator=(const WriterMutexLock&); +}; + +// Catch bug where variable name is omitted, e.g. MutexLock (&mu); +#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name) +#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) +#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) + +// Provide safe way to declare and use global, linker-initialized mutex. Sigh. +#ifdef HAVE_PTHREAD + +#define GLOBAL_MUTEX(name) \ + static pthread_mutex_t (name) = PTHREAD_MUTEX_INITIALIZER +#define GLOBAL_MUTEX_LOCK(name) \ + pthread_mutex_lock(&(name)) +#define GLOBAL_MUTEX_UNLOCK(name) \ + pthread_mutex_unlock(&(name)) + +#else + +#define GLOBAL_MUTEX(name) \ + static Mutex name +#define GLOBAL_MUTEX_LOCK(name) \ + name.Lock() +#define GLOBAL_MUTEX_UNLOCK(name) \ + name.Unlock() + +#endif + +} // namespace re2 + +#endif /* #define RE2_UTIL_MUTEX_H_ */ diff --git a/src/openalpr/support/re2/util/pcre.h b/src/openalpr/support/re2/util/pcre.h new file mode 100644 index 0000000..f8b4a1e --- /dev/null +++ b/src/openalpr/support/re2/util/pcre.h @@ -0,0 +1,679 @@ +// Copyright 2003-2010 Google Inc. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This is a variant of PCRE's pcrecpp.h, originally written at Google. +// The main changes are the addition of the HitLimit method and +// compilation as PCRE in namespace re2. + +// C++ interface to the pcre regular-expression library. PCRE supports +// Perl-style regular expressions (with extensions like \d, \w, \s, +// ...). +// +// ----------------------------------------------------------------------- +// REGEXP SYNTAX: +// +// This module uses the pcre library and hence supports its syntax +// for regular expressions: +// +// http://www.google.com/search?q=pcre +// +// The syntax is pretty similar to Perl's. For those not familiar +// with Perl's regular expressions, here are some examples of the most +// commonly used extensions: +// +// "hello (\\w+) world" -- \w matches a "word" character +// "version (\\d+)" -- \d matches a digit +// "hello\\s+world" -- \s matches any whitespace character +// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary +// "(?i)hello" -- (?i) turns on case-insensitive matching +// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible +// +// ----------------------------------------------------------------------- +// MATCHING INTERFACE: +// +// The "FullMatch" operation checks that supplied text matches a +// supplied pattern exactly. +// +// Example: successful match +// CHECK(PCRE::FullMatch("hello", "h.*o")); +// +// Example: unsuccessful match (requires full match): +// CHECK(!PCRE::FullMatch("hello", "e")); +// +// ----------------------------------------------------------------------- +// UTF-8 AND THE MATCHING INTERFACE: +// +// By default, pattern and text are plain text, one byte per character. +// The UTF8 flag, passed to the constructor, causes both pattern +// and string to be treated as UTF-8 text, still a byte stream but +// potentially multiple bytes per character. In practice, the text +// is likelier to be UTF-8 than the pattern, but the match returned +// may depend on the UTF8 flag, so always use it when matching +// UTF8 text. E.g., "." will match one byte normally but with UTF8 +// set may match up to three bytes of a multi-byte character. +// +// Example: +// PCRE re(utf8_pattern, PCRE::UTF8); +// CHECK(PCRE::FullMatch(utf8_string, re)); +// +// ----------------------------------------------------------------------- +// MATCHING WITH SUB-STRING EXTRACTION: +// +// You can supply extra pointer arguments to extract matched subpieces. +// +// Example: extracts "ruby" into "s" and 1234 into "i" +// int i; +// string s; +// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); +// +// Example: fails because string cannot be stored in integer +// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i)); +// +// Example: fails because there aren't enough sub-patterns: +// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s)); +// +// Example: does not try to extract any extra sub-patterns +// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); +// +// Example: does not try to extract into NULL +// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); +// +// Example: integer overflow causes failure +// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); +// +// ----------------------------------------------------------------------- +// PARTIAL MATCHES +// +// You can use the "PartialMatch" operation when you want the pattern +// to match any substring of the text. +// +// Example: simple search for a string: +// CHECK(PCRE::PartialMatch("hello", "ell")); +// +// Example: find first number in a string +// int number; +// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number)); +// CHECK_EQ(number, 100); +// +// ----------------------------------------------------------------------- +// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS +// +// PCRE makes it easy to use any string as a regular expression, without +// requiring a separate compilation step. +// +// If speed is of the essence, you can create a pre-compiled "PCRE" +// object from the pattern and use it multiple times. If you do so, +// you can typically parse text faster than with sscanf. +// +// Example: precompile pattern for faster matching: +// PCRE pattern("h.*o"); +// while (ReadLine(&str)) { +// if (PCRE::FullMatch(str, pattern)) ...; +// } +// +// ----------------------------------------------------------------------- +// SCANNING TEXT INCPCREMENTALLY +// +// The "Consume" operation may be useful if you want to repeatedly +// match regular expressions at the front of a string and skip over +// them as they match. This requires use of the "StringPiece" type, +// which represents a sub-range of a real string. +// +// Example: read lines of the form "var = value" from a string. +// string contents = ...; // Fill string somehow +// StringPiece input(contents); // Wrap a StringPiece around it +// +// string var; +// int value; +// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { +// ...; +// } +// +// Each successful call to "Consume" will set "var/value", and also +// advance "input" so it points past the matched text. Note that if the +// regular expression matches an empty string, input will advance +// by 0 bytes. If the regular expression being used might match +// an empty string, the loop body must check for this case and either +// advance the string or break out of the loop. +// +// The "FindAndConsume" operation is similar to "Consume" but does not +// anchor your match at the beginning of the string. For example, you +// could extract all words from a string by repeatedly calling +// PCRE::FindAndConsume(&input, "(\\w+)", &word) +// +// ----------------------------------------------------------------------- +// PARSING HEX/OCTAL/C-RADIX NUMBERS +// +// By default, if you pass a pointer to a numeric value, the +// corresponding text is interpreted as a base-10 number. You can +// instead wrap the pointer with a call to one of the operators Hex(), +// Octal(), or CRadix() to interpret the text in another base. The +// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) +// prefixes, but defaults to base-10. +// +// Example: +// int a, b, c, d; +// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", +// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d)); +// will leave 64 in a, b, c, and d. + +#include "util/util.h" +#include "re2/stringpiece.h" + +#ifdef USEPCRE +#include +namespace re2 { +const bool UsingPCRE = true; +} // namespace re2 +#else +namespace re2 { +const bool UsingPCRE = false; +struct pcre; +struct pcre_extra { int flags, match_limit, match_limit_recursion; }; +#define pcre_free(x) {} +#define PCRE_EXTRA_MATCH_LIMIT 0 +#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0 +#define PCRE_ANCHORED 0 +#define PCRE_NOTEMPTY 0 +#define PCRE_ERROR_NOMATCH 1 +#define PCRE_ERROR_MATCHLIMIT 2 +#define PCRE_ERROR_RECURSIONLIMIT 3 +#define PCRE_INFO_CAPTURECOUNT 0 +#define pcre_compile(a,b,c,d,e) ({ (void)(a); (void)(b); *(c)=""; *(d)=0; (void)(e); ((pcre*)0); }) +#define pcre_exec(a, b, c, d, e, f, g, h) ({ (void)(a); (void)(b); (void)(c); (void)(d); (void)(e); (void)(f); (void)(g); (void)(h); 0; }) +#define pcre_fullinfo(a, b, c, d) ({ (void)(a); (void)(b); (void)(c); *(d) = 0; 0; }) +} // namespace re2 +#endif + +namespace re2 { + +class PCRE_Options; + +// Interface for regular expression matching. Also corresponds to a +// pre-compiled regular expression. An "PCRE" object is safe for +// concurrent use by multiple threads. +class PCRE { + public: + // We convert user-passed pointers into special Arg objects + class Arg; + + // Marks end of arg list. + // ONLY USE IN OPTIONAL ARG DEFAULTS. + // DO NOT PASS EXPLICITLY. + static Arg no_more_args; + + // Options are same value as those in pcre. We provide them here + // to avoid users needing to include pcre.h and also to isolate + // users from pcre should we change the underlying library. + // Only those needed by Google programs are exposed here to + // avoid collision with options employed internally by regexp.cc + // Note that some options have equivalents that can be specified in + // the regexp itself. For example, prefixing your regexp with + // "(?s)" has the same effect as the PCRE_DOTALL option. + enum Option { + None = 0x0000, + UTF8 = 0x0800, // == PCRE_UTF8 + EnabledCompileOptions = UTF8, + EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag + }; + + // We provide implicit conversions from strings so that users can + // pass in a string or a "const char*" wherever an "PCRE" is expected. + PCRE(const char* pattern); + PCRE(const char* pattern, Option option); + PCRE(const string& pattern); + PCRE(const string& pattern, Option option); + PCRE(const char *pattern, const PCRE_Options& re_option); + PCRE(const string& pattern, const PCRE_Options& re_option); + + ~PCRE(); + + // The string specification for this PCRE. E.g. + // PCRE re("ab*c?d+"); + // re.pattern(); // "ab*c?d+" + const string& pattern() const { return pattern_; } + + // If PCRE could not be created properly, returns an error string. + // Else returns the empty string. + const string& error() const { return *error_; } + + // Whether the PCRE has hit a match limit during execution. + // Not thread safe. Intended only for testing. + // If hitting match limits is a problem, + // you should be using PCRE2 (re2/re2.h) + // instead of checking this flag. + bool HitLimit(); + void ClearHitLimit(); + + /***** The useful part: the matching interface *****/ + + // Matches "text" against "pattern". If pointer arguments are + // supplied, copies matched sub-patterns into them. + // + // You can pass in a "const char*" or a "string" for "text". + // You can pass in a "const char*" or a "string" or a "PCRE" for "pattern". + // + // The provided pointer arguments can be pointers to any scalar numeric + // type, or one of: + // string (matched piece is copied to string) + // StringPiece (StringPiece is mutated to point to matched piece) + // T (where "bool T::ParseFrom(const char*, int)" exists) + // (void*)NULL (the corresponding matched sub-pattern is not copied) + // + // Returns true iff all of the following conditions are satisfied: + // a. "text" matches "pattern" exactly + // b. The number of matched sub-patterns is >= number of supplied pointers + // c. The "i"th argument has a suitable type for holding the + // string captured as the "i"th sub-pattern. If you pass in + // NULL for the "i"th argument, or pass fewer arguments than + // number of sub-patterns, "i"th captured sub-pattern is + // ignored. + // + // CAVEAT: An optional sub-pattern that does not exist in the + // matched string is assigned the empty string. Therefore, the + // following will return false (because the empty string is not a + // valid number): + // int number; + // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number); + struct FullMatchFunctor { + bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args + const Arg& ptr1 = no_more_args, + const Arg& ptr2 = no_more_args, + const Arg& ptr3 = no_more_args, + const Arg& ptr4 = no_more_args, + const Arg& ptr5 = no_more_args, + const Arg& ptr6 = no_more_args, + const Arg& ptr7 = no_more_args, + const Arg& ptr8 = no_more_args, + const Arg& ptr9 = no_more_args, + const Arg& ptr10 = no_more_args, + const Arg& ptr11 = no_more_args, + const Arg& ptr12 = no_more_args, + const Arg& ptr13 = no_more_args, + const Arg& ptr14 = no_more_args, + const Arg& ptr15 = no_more_args, + const Arg& ptr16 = no_more_args) const; + }; + + static const FullMatchFunctor FullMatch; + + // Exactly like FullMatch(), except that "pattern" is allowed to match + // a substring of "text". + struct PartialMatchFunctor { + bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args + const Arg& ptr1 = no_more_args, + const Arg& ptr2 = no_more_args, + const Arg& ptr3 = no_more_args, + const Arg& ptr4 = no_more_args, + const Arg& ptr5 = no_more_args, + const Arg& ptr6 = no_more_args, + const Arg& ptr7 = no_more_args, + const Arg& ptr8 = no_more_args, + const Arg& ptr9 = no_more_args, + const Arg& ptr10 = no_more_args, + const Arg& ptr11 = no_more_args, + const Arg& ptr12 = no_more_args, + const Arg& ptr13 = no_more_args, + const Arg& ptr14 = no_more_args, + const Arg& ptr15 = no_more_args, + const Arg& ptr16 = no_more_args) const; + }; + + static const PartialMatchFunctor PartialMatch; + + // Like FullMatch() and PartialMatch(), except that pattern has to + // match a prefix of "text", and "input" is advanced past the matched + // text. Note: "input" is modified iff this routine returns true. + struct ConsumeFunctor { + bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args + const Arg& ptr1 = no_more_args, + const Arg& ptr2 = no_more_args, + const Arg& ptr3 = no_more_args, + const Arg& ptr4 = no_more_args, + const Arg& ptr5 = no_more_args, + const Arg& ptr6 = no_more_args, + const Arg& ptr7 = no_more_args, + const Arg& ptr8 = no_more_args, + const Arg& ptr9 = no_more_args, + const Arg& ptr10 = no_more_args, + const Arg& ptr11 = no_more_args, + const Arg& ptr12 = no_more_args, + const Arg& ptr13 = no_more_args, + const Arg& ptr14 = no_more_args, + const Arg& ptr15 = no_more_args, + const Arg& ptr16 = no_more_args) const; + }; + + static const ConsumeFunctor Consume; + + // Like Consume(..), but does not anchor the match at the beginning of the + // string. That is, "pattern" need not start its match at the beginning of + // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next + // word in "s" and stores it in "word". + struct FindAndConsumeFunctor { + bool operator ()(StringPiece* input, const PCRE& pattern, + const Arg& ptr1 = no_more_args, + const Arg& ptr2 = no_more_args, + const Arg& ptr3 = no_more_args, + const Arg& ptr4 = no_more_args, + const Arg& ptr5 = no_more_args, + const Arg& ptr6 = no_more_args, + const Arg& ptr7 = no_more_args, + const Arg& ptr8 = no_more_args, + const Arg& ptr9 = no_more_args, + const Arg& ptr10 = no_more_args, + const Arg& ptr11 = no_more_args, + const Arg& ptr12 = no_more_args, + const Arg& ptr13 = no_more_args, + const Arg& ptr14 = no_more_args, + const Arg& ptr15 = no_more_args, + const Arg& ptr16 = no_more_args) const; + }; + + static const FindAndConsumeFunctor FindAndConsume; + + // Replace the first match of "pattern" in "str" with "rewrite". + // Within "rewrite", backslash-escaped digits (\1 to \9) can be + // used to insert text matching corresponding parenthesized group + // from the pattern. \0 in "rewrite" refers to the entire matching + // text. E.g., + // + // string s = "yabba dabba doo"; + // CHECK(PCRE::Replace(&s, "b+", "d")); + // + // will leave "s" containing "yada dabba doo" + // + // Returns true if the pattern matches and a replacement occurs, + // false otherwise. + static bool Replace(string *str, + const PCRE& pattern, + const StringPiece& rewrite); + + // Like Replace(), except replaces all occurrences of the pattern in + // the string with the rewrite. Replacements are not subject to + // re-matching. E.g., + // + // string s = "yabba dabba doo"; + // CHECK(PCRE::GlobalReplace(&s, "b+", "d")); + // + // will leave "s" containing "yada dada doo" + // + // Returns the number of replacements made. + static int GlobalReplace(string *str, + const PCRE& pattern, + const StringPiece& rewrite); + + // Like Replace, except that if the pattern matches, "rewrite" + // is copied into "out" with substitutions. The non-matching + // portions of "text" are ignored. + // + // Returns true iff a match occurred and the extraction happened + // successfully; if no match occurs, the string is left unaffected. + static bool Extract(const StringPiece &text, + const PCRE& pattern, + const StringPiece &rewrite, + string *out); + + // Check that the given @p rewrite string is suitable for use with + // this PCRE. It checks that: + // * The PCRE has enough parenthesized subexpressions to satisfy all + // of the \N tokens in @p rewrite, and + // * The @p rewrite string doesn't have any syntax errors + // ('\' followed by anything besides [0-9] and '\'). + // Making this test will guarantee that "replace" and "extract" + // operations won't LOG(ERROR) or fail because of a bad rewrite + // string. + // @param rewrite The proposed rewrite string. + // @param error An error message is recorded here, iff we return false. + // Otherwise, it is unchanged. + // @return true, iff @p rewrite is suitable for use with the PCRE. + bool CheckRewriteString(const StringPiece& rewrite, string* error) const; + + // Returns a copy of 'unquoted' with all potentially meaningful + // regexp characters backslash-escaped. The returned string, used + // as a regular expression, will exactly match the original string. + // For example, + // 1.5-2.0? + // becomes: + // 1\.5\-2\.0\? + static string QuoteMeta(const StringPiece& unquoted); + + /***** Generic matching interface (not so nice to use) *****/ + + // Type of match (TODO: Should be restructured as an Option) + enum Anchor { + UNANCHORED, // No anchoring + ANCHOR_START, // Anchor at start only + ANCHOR_BOTH, // Anchor at start and end + }; + + // General matching routine. Stores the length of the match in + // "*consumed" if successful. + bool DoMatch(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const* args, int n) const; + + // Return the number of capturing subpatterns, or -1 if the + // regexp wasn't valid on construction. + int NumberOfCapturingGroups() const; + + private: + void Init(const char* pattern, Option option, int match_limit, + int stack_limit, bool report_errors); + + // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with + // pairs of integers for the beginning and end positions of matched + // text. The first pair corresponds to the entire matched text; + // subsequent pairs correspond, in order, to parentheses-captured + // matches. Returns the number of pairs (one more than the number of + // the last subpattern with a match) if matching was successful + // and zero if the match failed. + // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching + // against "foo", "bar", and "baz" respectively. + // When matching PCRE("(foo)|hello") against "hello", it will return 1. + // But the values for all subpattern are filled in into "vec". + int TryMatch(const StringPiece& text, + int startpos, + Anchor anchor, + bool empty_ok, + int *vec, + int vecsize) const; + + // Append the "rewrite" string, with backslash subsitutions from "text" + // and "vec", to string "out". + bool Rewrite(string *out, + const StringPiece &rewrite, + const StringPiece &text, + int *vec, + int veclen) const; + + // internal implementation for DoMatch + bool DoMatchImpl(const StringPiece& text, + Anchor anchor, + int* consumed, + const Arg* const args[], + int n, + int* vec, + int vecsize) const; + + // Compile the regexp for the specified anchoring mode + pcre* Compile(Anchor anchor); + + string pattern_; + Option options_; + pcre* re_full_; // For full matches + pcre* re_partial_; // For partial matches + const string* error_; // Error indicator (or empty string) + bool report_errors_; // Silences error logging if false + int match_limit_; // Limit on execution resources + int stack_limit_; // Limit on stack resources (bytes) + mutable int32_t hit_limit_; // Hit limit during execution (bool)? + DISALLOW_COPY_AND_ASSIGN(PCRE); +}; + +// PCRE_Options allow you to set the PCRE::Options, plus any pcre +// "extra" options. The only extras are match_limit, which limits +// the CPU time of a match, and stack_limit, which limits the +// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default +// that should not cause too many problems in production code. +// If PCRE hits a limit during a match, it may return a false negative, +// but (hopefully) it won't crash. +// +// NOTE: If you are handling regular expressions specified by +// (external or internal) users, rather than hard-coded ones, +// you should be using PCRE2, which uses an alternate implementation +// that avoids these issues. See http://go/re2quick. +class PCRE_Options { + public: + // constructor + PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {} + // accessors + PCRE::Option option() const { return option_; } + void set_option(PCRE::Option option) { + option_ = option; + } + int match_limit() const { return match_limit_; } + void set_match_limit(int match_limit) { + match_limit_ = match_limit; + } + int stack_limit() const { return stack_limit_; } + void set_stack_limit(int stack_limit) { + stack_limit_ = stack_limit; + } + + // If the regular expression is malformed, an error message will be printed + // iff report_errors() is true. Default: true. + bool report_errors() const { return report_errors_; } + void set_report_errors(bool report_errors) { + report_errors_ = report_errors; + } + private: + PCRE::Option option_; + int match_limit_; + int stack_limit_; + bool report_errors_; +}; + + +/***** Implementation details *****/ + +// Hex/Octal/Binary? + +// Special class for parsing into objects that define a ParseFrom() method +template +class _PCRE_MatchObject { + public: + static inline bool Parse(const char* str, int n, void* dest) { + if (dest == NULL) return true; + T* object = reinterpret_cast(dest); + return object->ParseFrom(str, n); + } +}; + +class PCRE::Arg { + public: + // Empty constructor so we can declare arrays of PCRE::Arg + Arg(); + + // Constructor specially designed for NULL arguments + Arg(void*); + + typedef bool (*Parser)(const char* str, int n, void* dest); + +// Type-specific parsers +#define MAKE_PARSER(type,name) \ + Arg(type* p) : arg_(p), parser_(name) { } \ + Arg(type* p, Parser parser) : arg_(p), parser_(parser) { } \ + + + MAKE_PARSER(char, parse_char); + MAKE_PARSER(unsigned char, parse_uchar); + MAKE_PARSER(short, parse_short); + MAKE_PARSER(unsigned short, parse_ushort); + MAKE_PARSER(int, parse_int); + MAKE_PARSER(unsigned int, parse_uint); + MAKE_PARSER(long, parse_long); + MAKE_PARSER(unsigned long, parse_ulong); + MAKE_PARSER(long long, parse_longlong); + MAKE_PARSER(unsigned long long, parse_ulonglong); + MAKE_PARSER(float, parse_float); + MAKE_PARSER(double, parse_double); + MAKE_PARSER(string, parse_string); + MAKE_PARSER(StringPiece, parse_stringpiece); + +#undef MAKE_PARSER + + // Generic constructor + template Arg(T*, Parser parser); + // Generic constructor template + template Arg(T* p) + : arg_(p), parser_(_PCRE_MatchObject::Parse) { + } + + // Parse the data + bool Parse(const char* str, int n) const; + + private: + void* arg_; + Parser parser_; + + static bool parse_null (const char* str, int n, void* dest); + static bool parse_char (const char* str, int n, void* dest); + static bool parse_uchar (const char* str, int n, void* dest); + static bool parse_float (const char* str, int n, void* dest); + static bool parse_double (const char* str, int n, void* dest); + static bool parse_string (const char* str, int n, void* dest); + static bool parse_stringpiece (const char* str, int n, void* dest); + +#define DECLARE_INTEGER_PARSER(name) \ + private: \ + static bool parse_ ## name(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _radix( \ + const char* str, int n, void* dest, int radix); \ + public: \ + static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \ + static bool parse_ ## name ## _cradix(const char* str, int n, void* dest) + + DECLARE_INTEGER_PARSER(short); + DECLARE_INTEGER_PARSER(ushort); + DECLARE_INTEGER_PARSER(int); + DECLARE_INTEGER_PARSER(uint); + DECLARE_INTEGER_PARSER(long); + DECLARE_INTEGER_PARSER(ulong); + DECLARE_INTEGER_PARSER(longlong); + DECLARE_INTEGER_PARSER(ulonglong); + +#undef DECLARE_INTEGER_PARSER +}; + +inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { } +inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } + +inline bool PCRE::Arg::Parse(const char* str, int n) const { + return (*parser_)(str, n, arg_); +} + +// This part of the parser, appropriate only for ints, deals with bases +#define MAKE_INTEGER_PARSER(type, name) \ + inline PCRE::Arg Hex(type* ptr) { \ + return PCRE::Arg(ptr, PCRE::Arg::parse_ ## name ## _hex); } \ + inline PCRE::Arg Octal(type* ptr) { \ + return PCRE::Arg(ptr, PCRE::Arg::parse_ ## name ## _octal); } \ + inline PCRE::Arg CRadix(type* ptr) { \ + return PCRE::Arg(ptr, PCRE::Arg::parse_ ## name ## _cradix); } + +MAKE_INTEGER_PARSER(short, short); +MAKE_INTEGER_PARSER(unsigned short, ushort); +MAKE_INTEGER_PARSER(int, int); +MAKE_INTEGER_PARSER(unsigned int, uint); +MAKE_INTEGER_PARSER(long, long); +MAKE_INTEGER_PARSER(unsigned long, ulong); +MAKE_INTEGER_PARSER(long long, longlong); +MAKE_INTEGER_PARSER(unsigned long long, ulonglong); + +#undef MAKE_INTEGER_PARSER + +} // namespace re2 diff --git a/src/openalpr/support/re2/util/random.h b/src/openalpr/support/re2/util/random.h new file mode 100644 index 0000000..6c6e701 --- /dev/null +++ b/src/openalpr/support/re2/util/random.h @@ -0,0 +1,29 @@ +// Copyright 2005-2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Modified from Google perftools's tcmalloc_unittest.cc. + +#ifndef RE2_UTIL_RANDOM_H__ +#define RE2_UTIL_RANDOM_H__ + +#include "util/util.h" + +namespace re2 { + +// ACM minimal standard random number generator. (re-entrant.) +class ACMRandom { + public: + ACMRandom(int32 seed) : seed_(seed) {} + int32 Next(); + int32 Uniform(int32); + + void Reset(int32 seed) { seed_ = seed; } + + private: + int32 seed_; +}; + +} // namespace re2 + +#endif // RE2_UTIL_RANDOM_H__ diff --git a/src/openalpr/support/re2/util/rune.cc b/src/openalpr/support/re2/util/rune.cc new file mode 100644 index 0000000..9792744 --- /dev/null +++ b/src/openalpr/support/re2/util/rune.cc @@ -0,0 +1,258 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include +#include +#include "re2/util/utf.h" + +namespace re2 { + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, + /* 0001 1111 1111 1111 1111 1111 */ + + Maskx = (1< T1 + */ + c = *(unsigned char*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(unsigned char*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(unsigned char*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(unsigned char*)(str+3) ^ Tx; + if (c3 & Testx) + goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3) + goto bad; + *rune = l; + return 4; + } + + /* + * Support for 5-byte or longer UTF-8 would go here, but + * since we don't have that, we'll just fall through to bad. + */ + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int +runetochar(char *str, const Rune *rune) +{ + /* Runes are signed, so convert to unsigned for range check. */ + unsigned long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if(c <= Rune1) { + str[0] = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + if(c <= Rune2) { + str[0] = T2 | (c >> 1*Bitx); + str[1] = Tx | (c & Maskx); + return 2; + } + + /* + * If the Rune is out of range, convert it to the error rune. + * Do this test here because the error rune encodes to three bytes. + * Doing it earlier would duplicate work, since an out of range + * Rune wouldn't have fit in one or two bytes. + */ + if (c > Runemax) + c = Runeerror; + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + if (c <= Rune3) { + str[0] = T3 | (c >> 2*Bitx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[2] = Tx | (c & Maskx); + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + str[0] = T4 | (c >> 3*Bitx); + str[1] = Tx | ((c >> 2*Bitx) & Maskx); + str[2] = Tx | ((c >> 1*Bitx) & Maskx); + str[3] = Tx | (c & Maskx); + return 4; +} + +int +runelen(Rune rune) +{ + char str[10]; + + return runetochar(str, &rune); +} + +int +fullrune(const char *str, int n) +{ + if (n > 0) { + int c = *(unsigned char*)str; + if (c < Tx) + return 1; + if (n > 1) { + if (c < T3) + return 1; + if (n > 2) { + if (c < T4 || n > 3) + return 1; + } + } + } + return 0; +} + + +int +utflen(const char *s) +{ + int c; + long n; + Rune rune; + + n = 0; + for(;;) { + c = *(unsigned char*)s; + if(c < Runeself) { + if(c == 0) + return n; + s++; + } else + s += chartorune(&rune, s); + n++; + } + return 0; +} + +char* +utfrune(const char *s, Rune c) +{ + long c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr((char*)s, c); + + for(;;) { + c1 = *(unsigned char*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return (char*)s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return (char*)s; + s += n; + } + return 0; +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/util/sparse_array.h b/src/openalpr/support/re2/util/sparse_array.h new file mode 100644 index 0000000..e552f8f --- /dev/null +++ b/src/openalpr/support/re2/util/sparse_array.h @@ -0,0 +1,453 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DESCRIPTION +// +// SparseArray(m) is a map from integers in [0, m) to T values. +// It requires (sizeof(T)+sizeof(int))*m memory, but it provides +// fast iteration through the elements in the array and fast clearing +// of the array. The array has a concept of certain elements being +// uninitialized (having no value). +// +// Insertion and deletion are constant time operations. +// +// Allocating the array is a constant time operation +// when memory allocation is a constant time operation. +// +// Clearing the array is a constant time operation (unusual!). +// +// Iterating through the array is an O(n) operation, where n +// is the number of items in the array (not O(m)). +// +// The array iterator visits entries in the order they were first +// inserted into the array. It is safe to add items to the array while +// using an iterator: the iterator will visit indices added to the array +// during the iteration, but will not re-visit indices whose values +// change after visiting. Thus SparseArray can be a convenient +// implementation of a work queue. +// +// The SparseArray implementation is NOT thread-safe. It is up to the +// caller to make sure only one thread is accessing the array. (Typically +// these arrays are temporary values and used in situations where speed is +// important.) +// +// The SparseArray interface does not present all the usual STL bells and +// whistles. +// +// Implemented with reference to Briggs & Torczon, An Efficient +// Representation for Sparse Sets, ACM Letters on Programming Languages +// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. +// +// Briggs & Torczon popularized this technique, but it had been known +// long before their paper. They point out that Aho, Hopcroft, and +// Ullman's 1974 Design and Analysis of Computer Algorithms and Bentley's +// 1986 Programming Pearls both hint at the technique in exercises to the +// reader (in Aho & Hopcroft, exercise 2.12; in Bentley, column 1 +// exercise 8). +// +// Briggs & Torczon describe a sparse set implementation. I have +// trivially generalized it to create a sparse array (actually the original +// target of the AHU and Bentley exercises). + +// IMPLEMENTATION +// +// SparseArray uses a vector dense_ and an array sparse_to_dense_, both of +// size max_size_. At any point, the number of elements in the sparse array is +// size_. +// +// The vector dense_ contains the size_ elements in the sparse array (with +// their indices), +// in the order that the elements were first inserted. This array is dense: +// the size_ pairs are dense_[0] through dense_[size_-1]. +// +// The array sparse_to_dense_ maps from indices in [0,m) to indices in +// [0,size_). +// For indices present in the array, dense_[sparse_to_dense_[i]].index_ == i. +// For indices not present in the array, sparse_to_dense_ can contain +// any value at all, perhaps outside the range [0, size_) but perhaps not. +// +// The lax requirement on sparse_to_dense_ values makes clearing +// the array very easy: set size_ to 0. Lookups are slightly more +// complicated. An index i has a value in the array if and only if: +// sparse_to_dense_[i] is in [0, size_) AND +// dense_[sparse_to_dense_[i]].index_ == i. +// If both these properties hold, only then it is safe to refer to +// dense_[sparse_to_dense_[i]].value_ +// as the value associated with index i. +// +// To insert a new entry, set sparse_to_dense_[i] to size_, +// initialize dense_[size_], and then increment size_. +// +// Deletion of specific values from the array is implemented by +// swapping dense_[size_-1] and the dense_ being deleted and then +// updating the appropriate sparse_to_dense_ entries. +// +// To make the sparse array as efficient as possible for non-primitive types, +// elements may or may not be destroyed when they are deleted from the sparse +// array through a call to erase(), erase_existing() or resize(). They +// immediately become inaccessible, but they are only guaranteed to be +// destroyed when the SparseArray destructor is called. + +#ifndef RE2_UTIL_SPARSE_ARRAY_H__ +#define RE2_UTIL_SPARSE_ARRAY_H__ + +#include "re2/util/util.h" + +namespace re2 { + +template +class SparseArray { + public: + SparseArray(); + SparseArray(int max_size); + ~SparseArray(); + + // IndexValue pairs: exposed in SparseArray::iterator. + class IndexValue; + + typedef IndexValue value_type; + typedef typename vector::iterator iterator; + typedef typename vector::const_iterator const_iterator; + + inline const IndexValue& iv(int i) const; + + // Return the number of entries in the array. + int size() const { + return size_; + } + + // Iterate over the array. + iterator begin() { + return dense_.begin(); + } + iterator end() { + return dense_.begin() + size_; + } + + const_iterator begin() const { + return dense_.begin(); + } + const_iterator end() const { + return dense_.begin() + size_; + } + + // Change the maximum size of the array. + // Invalidates all iterators. + void resize(int max_size); + + // Return the maximum size of the array. + // Indices can be in the range [0, max_size). + int max_size() const { + return max_size_; + } + + // Clear the array. + void clear() { + size_ = 0; + } + + // Check whether index i is in the array. + inline bool has_index(int i) const; + + // Comparison function for sorting. + // Can sort the sparse array so that future iterations + // will visit indices in increasing order using + // sort(arr.begin(), arr.end(), arr.less); + static bool less(const IndexValue& a, const IndexValue& b); + + public: + // Set the value at index i to v. + inline iterator set(int i, Value v); + + pair insert(const value_type& new_value); + + // Returns the value at index i + // or defaultv if index i is not initialized in the array. + inline Value get(int i, Value defaultv) const; + + iterator find(int i); + + const_iterator find(int i) const; + + // Change the value at index i to v. + // Fast but unsafe: only use if has_index(i) is true. + inline iterator set_existing(int i, Value v); + + // Set the value at the new index i to v. + // Fast but unsafe: only use if has_index(i) is false. + inline iterator set_new(int i, Value v); + + // Get the value at index i from the array.. + // Fast but unsafe: only use if has_index(i) is true. + inline Value get_existing(int i) const; + + // Erasing items from the array during iteration is in general + // NOT safe. There is one special case, which is that the current + // index-value pair can be erased as long as the iterator is then + // checked for being at the end before being incremented. + // For example: + // + // for (i = m.begin(); i != m.end(); ++i) { + // if (ShouldErase(i->index(), i->value())) { + // m.erase(i->index()); + // --i; + // } + // } + // + // Except in the specific case just described, elements must + // not be erased from the array (including clearing the array) + // while iterators are walking over the array. Otherwise, + // the iterators could walk past the end of the array. + + // Erases the element at index i from the array. + inline void erase(int i); + + // Erases the element at index i from the array. + // Fast but unsafe: only use if has_index(i) is true. + inline void erase_existing(int i); + + private: + // Add the index i to the array. + // Only use if has_index(i) is known to be false. + // Since it doesn't set the value associated with i, + // this function is private, only intended as a helper + // for other methods. + inline void create_index(int i); + + // In debug mode, verify that some invariant properties of the class + // are being maintained. This is called at the end of the constructor + // and at the beginning and end of all public non-const member functions. + inline void DebugCheckInvariants() const; + + int size_; + int max_size_; + int* sparse_to_dense_; + vector dense_; + bool valgrind_; + + DISALLOW_COPY_AND_ASSIGN(SparseArray); +}; + +template +SparseArray::SparseArray() + : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(), valgrind_(RunningOnValgrind()) {} + +// IndexValue pairs: exposed in SparseArray::iterator. +template +class SparseArray::IndexValue { + friend class SparseArray; + public: + typedef int first_type; + typedef Value second_type; + + IndexValue() {} + IndexValue(int index, const Value& value) : second(value), index_(index) {} + + int index() const { return index_; } + Value value() const { return second; } + + // Provide the data in the 'second' member so that the utilities + // in map-util work. + Value second; + + private: + int index_; +}; + +template +const typename SparseArray::IndexValue& +SparseArray::iv(int i) const { + DCHECK_GE(i, 0); + DCHECK_LT(i, size_); + return dense_[i]; +} + +// Change the maximum size of the array. +// Invalidates all iterators. +template +void SparseArray::resize(int new_max_size) { + DebugCheckInvariants(); + if (new_max_size > max_size_) { + int* a = new int[new_max_size]; + if (sparse_to_dense_) { + memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); + // Don't need to zero the memory but appease Valgrind. + if (valgrind_) { + for (int i = max_size_; i < new_max_size; i++) + a[i] = 0xababababU; + } + delete[] sparse_to_dense_; + } + sparse_to_dense_ = a; + + dense_.resize(new_max_size); + } + max_size_ = new_max_size; + if (size_ > max_size_) + size_ = max_size_; + DebugCheckInvariants(); +} + +// Check whether index i is in the array. +template +bool SparseArray::has_index(int i) const { + DCHECK_GE(i, 0); + DCHECK_LT(i, max_size_); + if (static_cast(i) >= static_cast(max_size_)) { + return false; + } + // Unsigned comparison avoids checking sparse_to_dense_[i] < 0. + return (uint)sparse_to_dense_[i] < (uint)size_ && + dense_[sparse_to_dense_[i]].index_ == i; +} + +// Set the value at index i to v. +template +typename SparseArray::iterator SparseArray::set(int i, Value v) { + DebugCheckInvariants(); + if (static_cast(i) >= static_cast(max_size_)) { + // Semantically, end() would be better here, but we already know + // the user did something stupid, so begin() insulates them from + // dereferencing an invalid pointer. + return begin(); + } + if (!has_index(i)) + create_index(i); + return set_existing(i, v); +} + +template +pair::iterator, bool> SparseArray::insert( + const value_type& new_value) { + DebugCheckInvariants(); + pair::iterator, bool> p; + if (has_index(new_value.index_)) { + p = make_pair(dense_.begin() + sparse_to_dense_[new_value.index_], false); + } else { + p = make_pair(set_new(new_value.index_, new_value.second), true); + } + DebugCheckInvariants(); + return p; +} + +template +Value SparseArray::get(int i, Value defaultv) const { + if (!has_index(i)) + return defaultv; + return get_existing(i); +} + +template +typename SparseArray::iterator SparseArray::find(int i) { + if (has_index(i)) + return dense_.begin() + sparse_to_dense_[i]; + return end(); +} + +template +typename SparseArray::const_iterator +SparseArray::find(int i) const { + if (has_index(i)) { + return dense_.begin() + sparse_to_dense_[i]; + } + return end(); +} + +template +typename SparseArray::iterator +SparseArray::set_existing(int i, Value v) { + DebugCheckInvariants(); + DCHECK(has_index(i)); + dense_[sparse_to_dense_[i]].second = v; + DebugCheckInvariants(); + return dense_.begin() + sparse_to_dense_[i]; +} + +template +typename SparseArray::iterator +SparseArray::set_new(int i, Value v) { + DebugCheckInvariants(); + if (static_cast(i) >= static_cast(max_size_)) { + // Semantically, end() would be better here, but we already know + // the user did something stupid, so begin() insulates them from + // dereferencing an invalid pointer. + return begin(); + } + DCHECK(!has_index(i)); + create_index(i); + return set_existing(i, v); +} + +template +Value SparseArray::get_existing(int i) const { + DCHECK(has_index(i)); + return dense_[sparse_to_dense_[i]].second; +} + +template +void SparseArray::erase(int i) { + DebugCheckInvariants(); + if (has_index(i)) + erase_existing(i); + DebugCheckInvariants(); +} + +template +void SparseArray::erase_existing(int i) { + DebugCheckInvariants(); + DCHECK(has_index(i)); + int di = sparse_to_dense_[i]; + if (di < size_ - 1) { + dense_[di] = dense_[size_ - 1]; + sparse_to_dense_[dense_[di].index_] = di; + } + size_--; + DebugCheckInvariants(); +} + +template +void SparseArray::create_index(int i) { + DCHECK(!has_index(i)); + DCHECK_LT(size_, max_size_); + sparse_to_dense_[i] = size_; + dense_[size_].index_ = i; + size_++; +} + +template SparseArray::SparseArray(int max_size) { + max_size_ = max_size; + sparse_to_dense_ = new int[max_size]; + valgrind_ = RunningOnValgrind(); + dense_.resize(max_size); + // Don't need to zero the new memory, but appease Valgrind. + if (valgrind_) { + for (int i = 0; i < max_size; i++) { + sparse_to_dense_[i] = 0xababababU; + dense_[i].index_ = 0xababababU; + } + } + size_ = 0; + DebugCheckInvariants(); +} + +template SparseArray::~SparseArray() { + DebugCheckInvariants(); + delete[] sparse_to_dense_; +} + +template void SparseArray::DebugCheckInvariants() const { + DCHECK_LE(0, size_); + DCHECK_LE(size_, max_size_); + DCHECK(size_ == 0 || sparse_to_dense_ != NULL); +} + +// Comparison function for sorting. +template bool SparseArray::less(const IndexValue& a, + const IndexValue& b) { + return a.index_ < b.index_; +} + +} // namespace re2 + +#endif // RE2_UTIL_SPARSE_ARRAY_H__ diff --git a/src/openalpr/support/re2/util/sparse_set.h b/src/openalpr/support/re2/util/sparse_set.h new file mode 100644 index 0000000..1612632 --- /dev/null +++ b/src/openalpr/support/re2/util/sparse_set.h @@ -0,0 +1,188 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// DESCRIPTION +// +// SparseSet(m) is a set of integers in [0, m). +// It requires sizeof(int)*m memory, but it provides +// fast iteration through the elements in the set and fast clearing +// of the set. +// +// Insertion and deletion are constant time operations. +// +// Allocating the set is a constant time operation +// when memory allocation is a constant time operation. +// +// Clearing the set is a constant time operation (unusual!). +// +// Iterating through the set is an O(n) operation, where n +// is the number of items in the set (not O(m)). +// +// The set iterator visits entries in the order they were first +// inserted into the array. It is safe to add items to the set while +// using an iterator: the iterator will visit indices added to the set +// during the iteration, but will not re-visit indices whose values +// change after visiting. Thus SparseSet can be a convenient +// implementation of a work queue. +// +// The SparseSet implementation is NOT thread-safe. It is up to the +// caller to make sure only one thread is accessing the set. (Typically +// these sets are temporary values and used in situations where speed is +// important.) +// +// The SparseSet interface does not present all the usual STL bells and +// whistles. +// +// Implemented with reference to Briggs & Torczon, An Efficient +// Representation for Sparse Sets, ACM Letters on Programming Languages +// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. +// +// For a generalization to sparse array, see sparse_array.h. + +// IMPLEMENTATION +// +// See sparse_array.h for implementation details + +#ifndef RE2_UTIL_SPARSE_SET_H__ +#define RE2_UTIL_SPARSE_SET_H__ + +#include "re2/util/util.h" + +namespace re2 { + +static bool InitMemory() { +#ifdef MEMORY_SANITIZER + return true; +#else + return RunningOnValgrind(); +#endif +} + +class SparseSet { + public: + SparseSet() + : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL), + init_memory_(InitMemory()) {} + + SparseSet(int max_size) { + max_size_ = max_size; + sparse_to_dense_ = new int[max_size]; + dense_ = new int[max_size]; + init_memory_ = InitMemory(); + // Don't need to zero the memory, but do so anyway + // to appease Valgrind. + if (init_memory_) { + for (int i = 0; i < max_size; i++) { + dense_[i] = 0xababababU; + sparse_to_dense_[i] = 0xababababU; + } + } + size_ = 0; + } + + ~SparseSet() { + delete[] sparse_to_dense_; + delete[] dense_; + } + + typedef int* iterator; + typedef const int* const_iterator; + + int size() const { return size_; } + iterator begin() { return dense_; } + iterator end() { return dense_ + size_; } + const_iterator begin() const { return dense_; } + const_iterator end() const { return dense_ + size_; } + + // Change the maximum size of the array. + // Invalidates all iterators. + void resize(int new_max_size) { + if (size_ > new_max_size) + size_ = new_max_size; + if (new_max_size > max_size_) { + int* a = new int[new_max_size]; + if (sparse_to_dense_) { + memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); + if (init_memory_) { + for (int i = max_size_; i < new_max_size; i++) + a[i] = 0xababababU; + } + delete[] sparse_to_dense_; + } + sparse_to_dense_ = a; + + a = new int[new_max_size]; + if (dense_) { + memmove(a, dense_, size_*sizeof a[0]); + if (init_memory_) { + for (int i = size_; i < new_max_size; i++) + a[i] = 0xababababU; + } + delete[] dense_; + } + dense_ = a; + } + max_size_ = new_max_size; + } + + // Return the maximum size of the array. + // Indices can be in the range [0, max_size). + int max_size() const { return max_size_; } + + // Clear the array. + void clear() { size_ = 0; } + + // Check whether i is in the array. + bool contains(int i) const { + DCHECK_GE(i, 0); + DCHECK_LT(i, max_size_); + if (static_cast(i) >= static_cast(max_size_)) { + return false; + } + // Unsigned comparison avoids checking sparse_to_dense_[i] < 0. + return (uint)sparse_to_dense_[i] < (uint)size_ && + dense_[sparse_to_dense_[i]] == i; + } + + // Adds i to the set. + void insert(int i) { + if (!contains(i)) + insert_new(i); + } + + // Set the value at the new index i to v. + // Fast but unsafe: only use if contains(i) is false. + void insert_new(int i) { + if (static_cast(i) >= static_cast(max_size_)) { + // Semantically, end() would be better here, but we already know + // the user did something stupid, so begin() insulates them from + // dereferencing an invalid pointer. + return; + } + DCHECK(!contains(i)); + DCHECK_LT(size_, max_size_); + sparse_to_dense_[i] = size_; + dense_[size_] = i; + size_++; + } + + // Comparison function for sorting. + // Can sort the sparse array so that future iterations + // will visit indices in increasing order using + // sort(arr.begin(), arr.end(), arr.less); + static bool less(int a, int b) { return a < b; } + + private: + int size_; + int max_size_; + int* sparse_to_dense_; + int* dense_; + bool init_memory_; + + DISALLOW_COPY_AND_ASSIGN(SparseSet); +}; + +} // namespace re2 + +#endif // RE2_UTIL_SPARSE_SET_H__ diff --git a/src/openalpr/support/re2/util/stringprintf.cc b/src/openalpr/support/re2/util/stringprintf.cc new file mode 100644 index 0000000..3c9c14b --- /dev/null +++ b/src/openalpr/support/re2/util/stringprintf.cc @@ -0,0 +1,82 @@ +// Copyright 2002 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/util/util.h" + +namespace re2 { + +static void StringAppendV(string* dst, const char* format, va_list ap) { + // First try with a small fixed size buffer + char space[1024]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, sizeof(space), format, backup_ap); + va_end(backup_ap); + + if ((result >= 0) && (static_cast(result) < sizeof(space))) { + // It fit + dst->append(space, result); + return; + } + + // Repeatedly increase buffer size until it fits + int length = sizeof(space); + while (true) { + if (result < 0) { + // Older behavior: just try doubling the buffer size + length *= 2; + } else { + // We need exactly "result+1" characters + length = result+1; + } + char* buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + #ifdef WIN32 + result = vsnprintf_s(buf, length, length, format, backup_ap); + #else + result = vsnprintf(buf, length, format, backup_ap); + #endif + va_end(backup_ap); + + if ((result >= 0) && (result < length)) { + // It fit + dst->append(buf, result); + delete[] buf; + return; + } + delete[] buf; + } +} + +string StringPrintf(const char* format, ...) { + va_list ap; + va_start(ap, format); + string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +void SStringPrintf(string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); +} + +void StringAppendF(string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/util/strutil.cc b/src/openalpr/support/re2/util/strutil.cc new file mode 100644 index 0000000..215923a --- /dev/null +++ b/src/openalpr/support/re2/util/strutil.cc @@ -0,0 +1,102 @@ +// Copyright 1999-2005 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/util/util.h" +#include "re2/stringpiece.h" + +namespace re2 { + +// ---------------------------------------------------------------------- +// CEscapeString() +// Copies 'src' to 'dest', escaping dangerous characters using +// C-style escape sequences. 'src' and 'dest' should not overlap. +// Returns the number of bytes written to 'dest' (not including the \0) +// or -1 if there was insufficient space. +// ---------------------------------------------------------------------- +int CEscapeString(const char* src, int src_len, char* dest, + int dest_len) { + const char* src_end = src + src_len; + int used = 0; + + for (; src < src_end; src++) { + if (dest_len - used < 2) // space for two-character escape + return -1; + + unsigned char c = *src; + switch (c) { + case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; + case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; + case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; + case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; + case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; + case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if (c < ' ' || c > '~') { + if (dest_len - used < 5) // space for four-character escape + \0 + return -1; + #ifdef WIN32 + sprintf_s(dest + used, dest_len, "\\%03o", c); + #else + sprintf(dest + used, "\\%03o", c); + #endif + + used += 4; + } else { + dest[used++] = c; break; + } + } + } + + if (dest_len - used < 1) // make sure that there is room for \0 + return -1; + + dest[used] = '\0'; // doesn't count towards return value though + return used; +} + + +// ---------------------------------------------------------------------- +// CEscape() +// Copies 'src' to result, escaping dangerous characters using +// C-style escape sequences. 'src' and 'dest' should not overlap. +// ---------------------------------------------------------------------- +string CEscape(const StringPiece& src) { + const int dest_length = src.size() * 4 + 1; // Maximum possible expansion + char* dest = new char[dest_length]; + const int len = CEscapeString(src.data(), src.size(), + dest, dest_length); + string s = string(dest, len); + delete[] dest; + return s; +} + +string PrefixSuccessor(const StringPiece& prefix) { + // We can increment the last character in the string and be done + // unless that character is 255, in which case we have to erase the + // last character and increment the previous character, unless that + // is 255, etc. If the string is empty or consists entirely of + // 255's, we just return the empty string. + bool done = false; + string limit(prefix.data(), prefix.size()); + int index = limit.length() - 1; + while (!done && index >= 0) { + if ((limit[index]&255) == 255) { + limit.erase(index); + index--; + } else { + limit[index]++; + done = true; + } + } + if (!done) { + return ""; + } else { + return limit; + } +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/util/test.h b/src/openalpr/support/re2/util/test.h new file mode 100644 index 0000000..45ca6fa --- /dev/null +++ b/src/openalpr/support/re2/util/test.h @@ -0,0 +1,50 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_UTIL_TEST_H__ +#define RE2_UTIL_TEST_H__ + +#include "util/util.h" +#include "util/flags.h" + +#define TEST(x, y) \ + void x##y(void); \ + TestRegisterer r##x##y(x##y, # x "." # y); \ + void x##y(void) + +void RegisterTest(void (*)(void), const char*); + +class TestRegisterer { + public: + TestRegisterer(void (*fn)(void), const char *s) { + RegisterTest(fn, s); + } +}; + +// TODO(rsc): Do a better job. +#define EXPECT_EQ CHECK_EQ +#define EXPECT_TRUE CHECK +#define EXPECT_LT CHECK_LT +#define EXPECT_GT CHECK_GT +#define EXPECT_LE CHECK_LE +#define EXPECT_GE CHECK_GE +#define EXPECT_FALSE(x) CHECK(!(x)) + +const bool UsingMallocCounter = false; +namespace testing { +class MallocCounter { + public: + MallocCounter(int x) { } + static const int THIS_THREAD_ONLY = 0; + long long HeapGrowth() { return 0; } + long long PeakHeapGrowth() { return 0; } + void Reset() { } +}; +} // namespace testing + +namespace re2 { +int64 VirtualProcessSize(); +} // namespace re2 + +#endif // RE2_UTIL_TEST_H__ diff --git a/src/openalpr/support/re2/util/thread.h b/src/openalpr/support/re2/util/thread.h new file mode 100644 index 0000000..b9610e0 --- /dev/null +++ b/src/openalpr/support/re2/util/thread.h @@ -0,0 +1,26 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_UTIL_THREAD_H__ +#define RE2_UTIL_THREAD_H__ + +#include + +class Thread { + public: + Thread(); + virtual ~Thread(); + void Start(); + void Join(); + void SetJoinable(bool); + virtual void Run() = 0; + + private: + pthread_t pid_; + bool running_; + bool joinable_; +}; + +#endif // RE2_UTIL_THREAD_H__ + diff --git a/src/openalpr/support/re2/util/utf.h b/src/openalpr/support/re2/util/utf.h new file mode 100644 index 0000000..06ff8f0 --- /dev/null +++ b/src/openalpr/support/re2/util/utf.h @@ -0,0 +1,43 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + * This file and rune.cc have been converted to compile as C++ code + * in name space re2. + */ +#ifndef RE2_UTIL_UTF_H__ +#define RE2_UTIL_UTF_H__ + +#include + +namespace re2 { + +typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ + +enum +{ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF, /* maximum rune value */ +}; + +int runetochar(char* s, const Rune* r); +int chartorune(Rune* r, const char* s); +int fullrune(const char* s, int n); +int utflen(const char* s); +char* utfrune(const char*, Rune); + +} // namespace re2 + +#endif // RE2_UTIL_UTF_H__ diff --git a/src/openalpr/support/re2/util/util.h b/src/openalpr/support/re2/util/util.h new file mode 100644 index 0000000..974223e --- /dev/null +++ b/src/openalpr/support/re2/util/util.h @@ -0,0 +1,154 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_UTIL_UTIL_H__ +#define RE2_UTIL_UTIL_H__ + +// C +#include +#include +#include +#include // For size_t +#include +#include +#include // For clock_gettime, CLOCK_REALTIME +#include // For isdigit, isalpha + +#if !defined(_WIN32) +#include // For gettimeofday +#endif + +// C++ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Use std names. +using std::set; +using std::pair; +using std::vector; +using std::string; +using std::min; +using std::max; +using std::ostream; +using std::map; +using std::stack; +using std::sort; +using std::swap; +using std::make_pair; + +#if defined(__GNUC__) && !defined(USE_CXX0X) && !defined(_LIBCPP_ABI_VERSION) + +#include +using std::tr1::unordered_set; + +#else + +#include +#if defined(_WIN32) +using std::tr1::unordered_set; +#else +using std::unordered_set; +#endif + +#endif + +#ifdef _WIN32 + +#define snprintf _snprintf_s +#define sprintf sprintf_s +#define stricmp _stricmp +#define strtof strtod /* not really correct but best we can do */ +#define strtoll _strtoi64 +#define strtoull _strtoui64 +#define vsnprintf vsnprintf_s + +#pragma warning(disable: 4018) // signed/unsigned mismatch +#pragma warning(disable: 4244) // possible data loss in int conversion +#pragma warning(disable: 4800) // conversion from int to bool + +#endif + +namespace re2 { + +typedef int8_t int8; +typedef uint8_t uint8; +typedef int16_t int16; +typedef uint16_t uint16; +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; + +typedef unsigned long ulong; +typedef unsigned int uint; +typedef unsigned short ushort; + +// Prevent the compiler from complaining about or optimizing away variables +// that appear unused. +#undef ATTRIBUTE_UNUSED +#if defined(__GNUC__) +#define ATTRIBUTE_UNUSED __attribute__ ((unused)) +#else +#define ATTRIBUTE_UNUSED +#endif + +// COMPILE_ASSERT causes a compile error about msg if expr is not true. +#if __cplusplus >= 201103L +#define COMPILE_ASSERT(expr, msg) static_assert(expr, #msg) +#else +template struct CompileAssert {}; +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED +#endif + +// DISALLOW_COPY_AND_ASSIGN disallows the copy and operator= functions. +// It goes in the private: declarations in a class. +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +#define arraysize(array) (int)(sizeof(array)/sizeof((array)[0])) + +class StringPiece; + +string CEscape(const StringPiece& src); +int CEscapeString(const char* src, int src_len, char* dest, int dest_len); + +extern string StringPrintf(const char* format, ...); +extern void SStringPrintf(string* dst, const char* format, ...); +extern void StringAppendF(string* dst, const char* format, ...); +extern string PrefixSuccessor(const StringPiece& prefix); + +uint32 hashword(const uint32*, size_t, uint32); +void hashword2(const uint32*, size_t, uint32*, uint32*); + +static inline uint32 Hash32StringWithSeed(const char* s, int len, uint32 seed) { + return hashword((uint32*)s, len/4, seed); +} + +static inline uint64 Hash64StringWithSeed(const char* s, int len, uint32 seed) { + uint32 x, y; + x = seed; + y = 0; + hashword2((uint32*)s, len/4, &x, &y); + return ((uint64)x << 32) | y; +} + +int RunningOnValgrind(); + +} // namespace re2 + +#include "re2/util/logging.h" +#include "re2/util/mutex.h" +#include "re2/util/utf.h" + +#endif // RE2_UTIL_UTIL_H__ diff --git a/src/openalpr/support/re2/util/valgrind.cc b/src/openalpr/support/re2/util/valgrind.cc new file mode 100644 index 0000000..88a6120 --- /dev/null +++ b/src/openalpr/support/re2/util/valgrind.cc @@ -0,0 +1,26 @@ +// Copyright 2009 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "re2/util/util.h" +#ifndef _WIN32 +#include "re2/util/valgrind.h" +#endif + +namespace re2 { + +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +int RunningOnValgrind() { +#if __has_feature(memory_sanitizer) + return true; +#elif defined(RUNNING_ON_VALGRIND) + return RUNNING_ON_VALGRIND; +#else + return 0; +#endif +} + +} // namespace re2 diff --git a/src/openalpr/support/re2/util/valgrind.h b/src/openalpr/support/re2/util/valgrind.h new file mode 100644 index 0000000..ca10b1a --- /dev/null +++ b/src/openalpr/support/re2/util/valgrind.h @@ -0,0 +1,4517 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2009 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + +#include + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). + + Misc note: how to find out what's predefined in gcc by default: + gcc -Wp,-dM somefile.c +*/ +#undef PLAT_ppc64_aix5 +#undef PLAT_ppc32_aix5 +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_arm_linux + +#if defined(_AIX) && defined(__64BIT__) +# define PLAT_ppc64_aix5 1 +#elif defined(_AIX) && !defined(__64BIT__) +# define PLAT_ppc32_aix5 1 +#elif defined(__APPLE__) && defined(__i386__) +# define PLAT_x86_darwin 1 +#elif defined(__APPLE__) && defined(__x86_64__) +# define PLAT_amd64_darwin 1 +#elif defined(__linux__) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif defined(__linux__) && defined(__x86_64__) +# define PLAT_amd64_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) +# define PLAT_ppc64_linux 1 +#elif defined(__linux__) && defined(__arm__) +# define PLAT_arm_linux 1 +#else +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + (_zzq_rlval) = (_zzq_default); \ + } + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ------------------------- x86-{linux,darwin} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" +#endif /* PLAT_x86_linux || PLAT_x86_darwin */ + +/* ------------------------ amd64-{linux,darwin} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[6]; \ + register unsigned long long int _zzq_result __asm__("r3"); \ + register unsigned long long int* _zzq_ptr __asm__("r4"); \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1" \ + : "=r" (_zzq_result) \ + : "0" (_zzq_default), "r" (_zzq_ptr) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr __asm__("r3"); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "mov r12, r12, ror #3 ; mov r12, r12, ror #13 \n\t" \ + "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile("mov r3, %1\n\t" /*default*/ \ + "mov r4, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = client_request ( R4 ) */ \ + "orr r10, r10, r10\n\t" \ + "mov %0, r3" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "cc","memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = guest_NRADDR */ \ + "orr r11, r11, r11\n\t" \ + "mov %0, r3" \ + : "=r" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R4 */ \ + "orr r12, r12, r12\n\t" + +#endif /* PLAT_arm_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + unsigned int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[7]; \ + register unsigned int _zzq_result; \ + register unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "lwz 3, 24(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[7]; \ + register unsigned long long int _zzq_result; \ + register unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int long long)(_zzq_request); \ + _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int long long)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "ld 3, 48(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_aix5 */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ + +/* Use an extra level of macroisation so as to ensure the soname/fnname + args are fully macro-expanded before pasting them together. */ +#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + VG_CONCAT4(_vgwZU_,soname,_,fnname) + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + VG_CONCAT4(_vgwZZ_,soname,_,fnname) + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0) + +#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0) + +#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0) + +#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0) + +/* ------------------------- x86-{linux,darwin} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $4, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $8, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $12, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $20, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $24, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $28, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $36, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $40, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $44, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux || PLAT_x86_darwin */ + +/* ------------------------ amd64-{linux,darwin} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CALL_NOREDIR_RAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4","r14" + +/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #4 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #8 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #12 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "push {r0, r1, r2, r3} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #16 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #20 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "ldr r0, [%1, #40] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #24 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #28 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory",__CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "ldr r2, [%1, #48] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + "add sp, sp, #32 \n\t" \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_arm_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "lwz 3," #_n_fr "(1)\n\t" \ + "stw 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,68(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "ld 3," #_n_fr "(1)\n\t" \ + "std 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_aix5 */ + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + /* The first two pass the va_list argument by value, which + assumes it is the same size as or smaller than a UWord, + which generally isn't the case. Hence are deprecated. + The second two pass the vargs by reference and so are + immune to this problem. */ + /* both :: char* fmt, va_list vargs (DEPRECATED) */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + /* both :: char* fmt, va_list* vargs */ + VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503, + + /* Wine support */ + VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0); \ + _qzz_res; \ + }) + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0); \ + } + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. The return value + is the number of characters printed, excluding the "**** " part at the + start and the backtrace (if present). */ + +#if defined(NVALGRIND) + +# define VALGRIND_PRINTF(...) +# define VALGRIND_PRINTF_BACKTRACE(...) + +#else /* NVALGRIND */ + +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +#endif /* NVALGRIND */ + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0); \ + _qyy_res; \ + }) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + __extension__ \ + ({unsigned int _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0); \ + _qyy_res; \ + }) + +/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing + when heap blocks are allocated in order to give accurate results. This + happens automatically for the standard allocator functions such as + malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete, + delete[], etc. + + But if your program uses a custom allocator, this doesn't automatically + happen, and Valgrind will not do as well. For example, if you allocate + superblocks with mmap() and then allocates chunks of the superblocks, all + Valgrind's observations will be at the mmap() level and it won't know that + the chunks should be considered separate entities. In Memcheck's case, + that means you probably won't get heap block overrun detection (because + there won't be redzones marked as unaddressable) and you definitely won't + get any leak detection. + + The following client requests allow a custom allocator to be annotated so + that it can be handled accurately by Valgrind. + + VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated + by a malloc()-like function. For Memcheck (an illustrative case), this + does two things: + + - It records that the block has been allocated. This means any addresses + within the block mentioned in error messages will be + identified as belonging to the block. It also means that if the block + isn't freed it will be detected by the leak checker. + + - It marks the block as being addressable and undefined (if 'is_zeroed' is + not set), or addressable and defined (if 'is_zeroed' is set). This + controls how accesses to the block by the program are handled. + + 'addr' is the start of the usable block (ie. after any + redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator + can apply redzones -- these are blocks of padding at the start and end of + each block. Adding redzones is recommended as it makes it much more likely + Valgrind will spot block overruns. `is_zeroed' indicates if the memory is + zeroed (or filled with another predictable value), as is the case for + calloc(). + + VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a + heap block -- that will be used by the client program -- is allocated. + It's best to put it at the outermost level of the allocator if possible; + for example, if you have a function my_alloc() which calls + internal_alloc(), and the client request is put inside internal_alloc(), + stack traces relating to the heap block will contain entries for both + my_alloc() and internal_alloc(), which is probably not what you want. + + For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out + custom blocks from within a heap block, B, that has been allocated with + malloc/calloc/new/etc, then block B will be *ignored* during leak-checking + -- the custom blocks will take precedence. + + VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For + Memcheck, it does two things: + + - It records that the block has been deallocated. This assumes that the + block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - It marks the block as being unaddressable. + + VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a + heap block is deallocated. + + In many cases, these two client requests will not be enough to get your + allocator working well with Memcheck. More specifically, if your allocator + writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call + will be necessary to mark the memory as addressable just before the zeroing + occurs, otherwise you'll get a lot of invalid write errors. For example, + you'll need to do this if your allocator recycles freed blocks, but it + zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK). + Alternatively, if your allocator reuses freed blocks for allocator-internal + data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary. + + Really, what's happening is a blurring of the lines between the client + program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the + memory should be considered unaddressable to the client program, but the + allocator knows more than the rest of the client program and so may be able + to safely access it. Extra client requests are necessary for Valgrind to + understand the distinction between the allocator and the rest of the + program. + + Note: there is currently no VALGRIND_REALLOCLIKE_BLOCK client request; it + has to be emulated with MALLOCLIKE/FREELIKE and memory copying. + + Ignored if addr == 0. +*/ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0); \ + } + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0); \ + } + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0); \ + } + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0); \ + } + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0); \ + } + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0); \ + } + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0); \ + } + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Mark a piece of memory as being a stack. Returns a stack id. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0); \ + } + +/* Change the start and end address of the stack id. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0); \ + } + +/* Load PDB debug info for Wine PE image_map. */ +#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__LOAD_PDB_DEBUGINFO, \ + fd, ptr, total_size, delta, 0); \ + } + + +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_arm_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#endif /* __VALGRIND_H */ diff --git a/src/openalpr/support/re2/variadic_function.h b/src/openalpr/support/re2/variadic_function.h new file mode 100644 index 0000000..7c7d6d5 --- /dev/null +++ b/src/openalpr/support/re2/variadic_function.h @@ -0,0 +1,344 @@ +// Copyright 2010 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef RE2_VARIADIC_FUNCTION_H_ +#define RE2_VARIADIC_FUNCTION_H_ + +namespace re2 { + +template +class VariadicFunction2 { + public: + Result operator()(Param0 p0, Param1 p1) const { + return Func(p0, p1, 0, 0); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0) const { + const Arg* const args[] = { &a0 }; + return Func(p0, p1, args, 1); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1) const { + const Arg* const args[] = { &a0, &a1 }; + return Func(p0, p1, args, 2); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2) const { + const Arg* const args[] = { &a0, &a1, &a2 }; + return Func(p0, p1, args, 3); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3 }; + return Func(p0, p1, args, 4); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4 }; + return Func(p0, p1, args, 5); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5 }; + return Func(p0, p1, args, 6); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6 }; + return Func(p0, p1, args, 7); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7 }; + return Func(p0, p1, args, 8); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8 }; + return Func(p0, p1, args, 9); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9 }; + return Func(p0, p1, args, 10); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10 }; + return Func(p0, p1, args, 11); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11 }; + return Func(p0, p1, args, 12); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12 }; + return Func(p0, p1, args, 13); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13 }; + return Func(p0, p1, args, 14); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14 }; + return Func(p0, p1, args, 15); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15 }; + return Func(p0, p1, args, 16); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16 }; + return Func(p0, p1, args, 17); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17 }; + return Func(p0, p1, args, 18); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18 }; + return Func(p0, p1, args, 19); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19 }; + return Func(p0, p1, args, 20); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, + &a20 }; + return Func(p0, p1, args, 21); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21 }; + return Func(p0, p1, args, 22); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22 }; + return Func(p0, p1, args, 23); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23 }; + return Func(p0, p1, args, 24); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24 }; + return Func(p0, p1, args, 25); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25 }; + return Func(p0, p1, args, 26); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, + const Arg& a26) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25, &a26 }; + return Func(p0, p1, args, 27); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, + const Arg& a26, const Arg& a27) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25, &a26, &a27 }; + return Func(p0, p1, args, 28); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, + const Arg& a26, const Arg& a27, const Arg& a28) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28 }; + return Func(p0, p1, args, 29); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, + const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29 }; + return Func(p0, p1, args, 30); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, + const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29, + const Arg& a30) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30 }; + return Func(p0, p1, args, 31); + } + + Result operator()(Param0 p0, Param1 p1, const Arg& a0, const Arg& a1, + const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, + const Arg& a6, const Arg& a7, const Arg& a8, const Arg& a9, + const Arg& a10, const Arg& a11, const Arg& a12, const Arg& a13, + const Arg& a14, const Arg& a15, const Arg& a16, const Arg& a17, + const Arg& a18, const Arg& a19, const Arg& a20, const Arg& a21, + const Arg& a22, const Arg& a23, const Arg& a24, const Arg& a25, + const Arg& a26, const Arg& a27, const Arg& a28, const Arg& a29, + const Arg& a30, const Arg& a31) const { + const Arg* const args[] = { &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8, + &a9, &a10, &a11, &a12, &a13, &a14, &a15, &a16, &a17, &a18, &a19, &a20, + &a21, &a22, &a23, &a24, &a25, &a26, &a27, &a28, &a29, &a30, &a31 }; + return Func(p0, p1, args, 32); + } +}; + +} // namespace re2 + +#endif // RE2_VARIADIC_FUNCTION_H_ diff --git a/src/openalpr/support/re2/walker-inl.h b/src/openalpr/support/re2/walker-inl.h new file mode 100644 index 0000000..bdcf7f5 --- /dev/null +++ b/src/openalpr/support/re2/walker-inl.h @@ -0,0 +1,244 @@ +// Copyright 2006 The RE2 Authors. All Rights Reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Helper class for traversing Regexps without recursion. +// Clients should declare their own subclasses that override +// the PreVisit and PostVisit methods, which are called before +// and after visiting the subexpressions. + +// Not quite the Visitor pattern, because (among other things) +// the Visitor pattern is recursive. + +#ifndef RE2_WALKER_INL_H__ +#define RE2_WALKER_INL_H__ + +#include "re2/regexp.h" + +namespace re2 { + +template struct WalkState; + +template class Regexp::Walker { + public: + Walker(); + virtual ~Walker(); + + // Virtual method called before visiting re's children. + // PreVisit passes ownership of its return value to its caller. + // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg + // and passed to the child PreVisits and PostVisits as parent_arg. + // At the top-most Regexp, parent_arg is arg passed to walk. + // If PreVisit sets *stop to true, the walk does not recurse + // into the children. Instead it behaves as though the return + // value from PreVisit is the return value from PostVisit. + // The default PreVisit returns parent_arg. + virtual T PreVisit(Regexp* re, T parent_arg, bool* stop); + + // Virtual method called after visiting re's children. + // The pre_arg is the T that PreVisit returned. + // The child_args is a vector of the T that the child PostVisits returned. + // PostVisit takes ownership of pre_arg. + // PostVisit takes ownership of the Ts + // in *child_args, but not the vector itself. + // PostVisit passes ownership of its return value + // to its caller. + // The default PostVisit simply returns pre_arg. + virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg, + T* child_args, int nchild_args); + + // Virtual method called to copy a T, + // when Walk notices that more than one child is the same re. + virtual T Copy(T arg); + + // Virtual method called to do a "quick visit" of the re, + // but not its children. Only called once the visit budget + // has been used up and we're trying to abort the walk + // as quickly as possible. Should return a value that + // makes sense for the parent PostVisits still to be run. + // This function is (hopefully) only called by + // WalkExponential, but must be implemented by all clients, + // just in case. + virtual T ShortVisit(Regexp* re, T parent_arg) = 0; + + // Walks over a regular expression. + // Top_arg is passed as parent_arg to PreVisit and PostVisit of re. + // Returns the T returned by PostVisit on re. + T Walk(Regexp* re, T top_arg); + + // Like Walk, but doesn't use Copy. This can lead to + // exponential runtimes on cross-linked Regexps like the + // ones generated by Simplify. To help limit this, + // at most max_visits nodes will be visited and then + // the walk will be cut off early. + // If the walk *is* cut off early, ShortVisit(re) + // will be called on regexps that cannot be fully + // visited rather than calling PreVisit/PostVisit. + T WalkExponential(Regexp* re, T top_arg, int max_visits); + + // Clears the stack. Should never be necessary, since + // Walk always enters and exits with an empty stack. + // Logs DFATAL if stack is not already clear. + void Reset(); + + // Returns whether walk was cut off. + bool stopped_early() { return stopped_early_; } + + private: + // Walk state for the entire traversal. + stack >* stack_; + bool stopped_early_; + int max_visits_; + + T WalkInternal(Regexp* re, T top_arg, bool use_copy); + + DISALLOW_COPY_AND_ASSIGN(Walker); +}; + +template T Regexp::Walker::PreVisit(Regexp* re, + T parent_arg, + bool* stop) { + return parent_arg; +} + +template T Regexp::Walker::PostVisit(Regexp* re, + T parent_arg, + T pre_arg, + T* child_args, + int nchild_args) { + return pre_arg; +} + +template T Regexp::Walker::Copy(T arg) { + return arg; +} + +// State about a single level in the traversal. +template struct WalkState { + WalkState(Regexp* re, T parent) + : re(re), + n(-1), + parent_arg(parent), + child_args(NULL) { } + + Regexp* re; // The regexp + int n; // The index of the next child to process; -1 means need to PreVisit + T parent_arg; // Accumulated arguments. + T pre_arg; + T child_arg; // One-element buffer for child_args. + T* child_args; +}; + +template Regexp::Walker::Walker() { + stack_ = new stack >; + stopped_early_ = false; +} + +template Regexp::Walker::~Walker() { + Reset(); + delete stack_; +} + +// Clears the stack. Should never be necessary, since +// Walk always enters and exits with an empty stack. +// Logs DFATAL if stack is not already clear. +template void Regexp::Walker::Reset() { + if (stack_ && stack_->size() > 0) { + LOG(DFATAL) << "Stack not empty."; + while (stack_->size() > 0) { + delete stack_->top().child_args; + stack_->pop(); + } + } +} + +template T Regexp::Walker::WalkInternal(Regexp* re, T top_arg, + bool use_copy) { + Reset(); + + if (re == NULL) { + LOG(DFATAL) << "Walk NULL"; + return top_arg; + } + + stack_->push(WalkState(re, top_arg)); + + WalkState* s; + for (;;) { + T t; + s = &stack_->top(); + Regexp* re = s->re; + switch (s->n) { + case -1: { + if (--max_visits_ < 0) { + stopped_early_ = true; + t = ShortVisit(re, s->parent_arg); + break; + } + bool stop = false; + s->pre_arg = PreVisit(re, s->parent_arg, &stop); + if (stop) { + t = s->pre_arg; + break; + } + s->n = 0; + s->child_args = NULL; + if (re->nsub_ == 1) + s->child_args = &s->child_arg; + else if (re->nsub_ > 1) + s->child_args = new T[re->nsub_]; + // Fall through. + } + default: { + if (re->nsub_ > 0) { + Regexp** sub = re->sub(); + if (s->n < re->nsub_) { + if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) { + s->child_args[s->n] = Copy(s->child_args[s->n - 1]); + s->n++; + } else { + stack_->push(WalkState(sub[s->n], s->pre_arg)); + } + continue; + } + } + + t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n); + if (re->nsub_ > 1) + delete[] s->child_args; + break; + } + } + + // We've finished stack_->top(). + // Update next guy down. + stack_->pop(); + if (stack_->size() == 0) + return t; + s = &stack_->top(); + if (s->child_args != NULL) + s->child_args[s->n] = t; + else + s->child_arg = t; + s->n++; + } +} + +template T Regexp::Walker::Walk(Regexp* re, T top_arg) { + // Without the exponential walking behavior, + // this budget should be more than enough for any + // regexp, and yet not enough to get us in trouble + // as far as CPU time. + max_visits_ = 1000000; + return WalkInternal(re, top_arg, true); +} + +template T Regexp::Walker::WalkExponential(Regexp* re, T top_arg, + int max_visits) { + max_visits_ = max_visits; + return WalkInternal(re, top_arg, false); +} + +} // namespace re2 + +#endif // RE2_WALKER_INL_H__ diff --git a/src/openalpr/support/regex/ascii.c b/src/openalpr/support/regex/ascii.c deleted file mode 100644 index c2715f4..0000000 --- a/src/openalpr/support/regex/ascii.c +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************** - ascii.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -static int -ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; -} - -OnigEncodingType OnigEncodingASCII = { - onigenc_single_byte_mbc_enc_len, - "US-ASCII", /* name */ - 1, /* max byte length */ - 1, /* min byte length */ - onigenc_is_mbc_newline_0x0a, - onigenc_single_byte_mbc_to_code, - onigenc_single_byte_code_to_mbclen, - onigenc_single_byte_code_to_mbc, - onigenc_ascii_mbc_case_fold, - onigenc_ascii_apply_all_case_fold, - onigenc_ascii_get_case_fold_codes_by_str, - onigenc_minimum_property_name_to_ctype, - ascii_is_code_ctype, - onigenc_not_support_get_ctype_code_range, - onigenc_single_byte_left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/src/openalpr/support/regex/onig_config.h b/src/openalpr/support/regex/onig_config.h deleted file mode 100644 index 90e6473..0000000 --- a/src/openalpr/support/regex/onig_config.h +++ /dev/null @@ -1,130 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.in by autoheader. */ - -#ifdef _WIN32 -#include "win32/onig_config.h" -#else - -/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP - systems. This function is required for `alloca.c' support on those systems. - */ -/* #undef CRAY_STACKSEG_END */ - -/* Define to 1 if using `alloca.c'. */ -/* #undef C_ALLOCA */ - -/* Define to 1 if you have `alloca', as a function or macro. */ -#define HAVE_ALLOCA 1 - -/* Define to 1 if you have and it should be used (not on Ultrix). - */ -#define HAVE_ALLOCA_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define if compilerr supports prototypes */ -#define HAVE_PROTOTYPES 1 - -/* Define if compiler supports stdarg prototypes */ -#define HAVE_STDARG_PROTOTYPES 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TIMES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TIME_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* Name of package */ -#define PACKAGE "onig" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "onig" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "onig 5.9.6" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "onig" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "5.9.6" - -/* The size of `int', as computed by sizeof. */ -#define SIZEOF_INT 4 - -/* The size of `long', as computed by sizeof. */ -#define SIZEOF_LONG 8 - -/* The size of `short', as computed by sizeof. */ -#define SIZEOF_SHORT 2 - -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at runtime. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ -/* #undef STACK_DIRECTION */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to 1 if you can safely include both and . */ -#define TIME_WITH_SYS_TIME 1 - -/* Define if combination explosion check */ -/* #undef USE_COMBINATION_EXPLOSION_CHECK */ - -/* Define if enable CR+NL as line terminator */ -/* #undef USE_CRNL_AS_LINE_TERMINATOR */ - -/* Version number of package */ -#define VERSION "5.9.6" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ - - - -#endif \ No newline at end of file diff --git a/src/openalpr/support/regex/oniggnu.h b/src/openalpr/support/regex/oniggnu.h deleted file mode 100644 index 3da9f23..0000000 --- a/src/openalpr/support/regex/oniggnu.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef ONIGGNU_H -#define ONIGGNU_H -/********************************************************************** - oniggnu.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "oniguruma.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define RE_MBCTYPE_ASCII 0 -#define RE_MBCTYPE_EUC 1 -#define RE_MBCTYPE_SJIS 2 -#define RE_MBCTYPE_UTF8 3 - -/* GNU regex options */ -#ifndef RE_NREGS -#define RE_NREGS ONIG_NREGION -#endif - -#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE -#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND -#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE -#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE -#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST -#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) -#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY -#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE -#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP -#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP - - -ONIG_EXTERN -void re_mbcinit P_((int)); -ONIG_EXTERN -int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -ONIG_EXTERN -int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -ONIG_EXTERN -void re_free_pattern P_((struct re_pattern_buffer*)); -ONIG_EXTERN -int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); -ONIG_EXTERN -int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); -ONIG_EXTERN -int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); -ONIG_EXTERN -void re_set_casetable P_((const char*)); -ONIG_EXTERN -void re_free_registers P_((struct re_registers*)); -ONIG_EXTERN -int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGGNU_H */ diff --git a/src/openalpr/support/regex/onigposix.h b/src/openalpr/support/regex/onigposix.h deleted file mode 100644 index f1cb35f..0000000 --- a/src/openalpr/support/regex/onigposix.h +++ /dev/null @@ -1,169 +0,0 @@ -#ifndef ONIGPOSIX_H -#define ONIGPOSIX_H -/********************************************************************** - onigposix.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2005 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* options */ -#define REG_ICASE (1<<0) -#define REG_NEWLINE (1<<1) -#define REG_NOTBOL (1<<2) -#define REG_NOTEOL (1<<3) -#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ -#define REG_NOSUB (1<<5) - -/* POSIX error codes */ -#define REG_NOMATCH 1 -#define REG_BADPAT 2 -#define REG_ECOLLATE 3 -#define REG_ECTYPE 4 -#define REG_EESCAPE 5 -#define REG_ESUBREG 6 -#define REG_EBRACK 7 -#define REG_EPAREN 8 -#define REG_EBRACE 9 -#define REG_BADBR 10 -#define REG_ERANGE 11 -#define REG_ESPACE 12 -#define REG_BADRPT 13 - -/* extended error codes */ -#define REG_EONIG_INTERNAL 14 -#define REG_EONIG_BADWC 15 -#define REG_EONIG_BADARG 16 -#define REG_EONIG_THREAD 17 - -/* character encodings (for reg_set_encoding()) */ -#define REG_POSIX_ENCODING_ASCII 0 -#define REG_POSIX_ENCODING_EUC_JP 1 -#define REG_POSIX_ENCODING_SJIS 2 -#define REG_POSIX_ENCODING_UTF8 3 -#define REG_POSIX_ENCODING_UTF16_BE 4 -#define REG_POSIX_ENCODING_UTF16_LE 5 - - -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} regmatch_t; - -/* POSIX regex_t */ -typedef struct { - void* onig; /* Oniguruma regex_t* */ - size_t re_nsub; - int comp_options; -} regex_t; - - -#ifndef P_ -#if defined(__STDC__) || defined(_WIN32) -# define P_(args) args -#else -# define P_(args) () -#endif -#endif - -#ifndef ONIG_EXTERN -#if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) -#define ONIG_EXTERN extern __declspec(dllexport) -#else -#define ONIG_EXTERN extern __declspec(dllimport) -#endif -#endif -#endif - -#ifndef ONIG_EXTERN -#define ONIG_EXTERN extern -#endif - -#ifndef ONIGURUMA_H -typedef unsigned int OnigOptionType; - -/* syntax */ -typedef struct { - unsigned int op; - unsigned int op2; - unsigned int behavior; - OnigOptionType options; /* default option */ -} OnigSyntaxType; - -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; -ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; -ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; -ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; - -/* predefined syntaxes (see regsyntax.c) */ -#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) -#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) -#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) -#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) -#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) -#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) -#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) -#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) -/* default syntax */ -#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax - -ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; - -ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); -ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); -ONIG_EXTERN const char* onig_version P_((void)); -ONIG_EXTERN const char* onig_copyright P_((void)); - -#endif /* ONIGURUMA_H */ - - -ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); -ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); -ONIG_EXTERN void regfree P_((regex_t* reg)); -ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); - -/* extended API */ -ONIG_EXTERN void reg_set_encoding P_((int enc)); -ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); -ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); -ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGPOSIX_H */ diff --git a/src/openalpr/support/regex/oniguruma.h b/src/openalpr/support/regex/oniguruma.h deleted file mode 100644 index af2abeb..0000000 --- a/src/openalpr/support/regex/oniguruma.h +++ /dev/null @@ -1,827 +0,0 @@ -#ifndef ONIGURUMA_H -#define ONIGURUMA_H -/********************************************************************** - oniguruma.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2009 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 5 -#define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 6 - -#ifdef __cplusplus -# ifndef HAVE_PROTOTYPES -# define HAVE_PROTOTYPES 1 -# endif -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifndef P_ -#if defined(__STDC__) || defined(_WIN32) -# define P_(args) args -#else -# define P_(args) () -#endif -#endif - -#ifndef PV_ -#ifdef HAVE_STDARG_PROTOTYPES -# define PV_(args) args -#else -# define PV_(args) () -#endif -#endif - -#ifndef ONIG_EXTERN -#if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) -#define ONIG_EXTERN extern __declspec(dllexport) -#else -#define ONIG_EXTERN extern -#endif -#endif -#endif - -#ifndef ONIG_EXTERN -#define ONIG_EXTERN extern -#endif - -/* PART: character encoding */ - -#ifndef ONIG_ESCAPE_UCHAR_COLLISION -#define UChar OnigUChar -#endif - -#ifdef _WIN32 -# include -typedef ULONG_PTR OnigCodePoint; -#else -typedef unsigned long OnigCodePoint; -#endif -typedef unsigned char OnigUChar; -typedef unsigned int OnigCtype; -typedef unsigned int OnigDistance; - -#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) - -typedef unsigned int OnigCaseFoldType; /* case fold flag */ - -ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; - -/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ -/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ -#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) -#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) - -#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR -#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag - - -#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 -#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 -/* 13 => Unicode:0x1ffc */ - -/* code range */ -#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) -#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] -#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] - -typedef struct { - int byte_len; /* argument(original) character(s) byte length */ - int code_len; /* number of code */ - OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; -} OnigCaseFoldCodeItem; - -typedef struct { - OnigCodePoint esc; - OnigCodePoint anychar; - OnigCodePoint anytime; - OnigCodePoint zero_or_one_time; - OnigCodePoint one_or_more_time; - OnigCodePoint anychar_anytime; -} OnigMetaCharTableType; - -typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); - -typedef struct OnigEncodingTypeST { - int (*mbc_enc_len)(const OnigUChar* p); - const char* name; - int max_enc_len; - int min_enc_len; - int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end); - OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end); - int (*code_to_mbclen)(OnigCodePoint code); - int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); - int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); - int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg); - int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]); - int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); - int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype); - int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]); - OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); - int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); -} OnigEncodingType; - -typedef OnigEncodingType* OnigEncoding; - -ONIG_EXTERN OnigEncodingType OnigEncodingASCII; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; -ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE; -ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; -ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; -ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; -ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; -ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; -ONIG_EXTERN OnigEncodingType OnigEncodingCP1251; -ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; -ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; - -#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) -#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) -#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) -#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) -#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) -#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) -#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) -#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) -#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) -#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) -#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) -#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) -#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) -#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) -#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) -#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) -#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) -#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE) -#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE) -#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE) -#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE) -#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) -#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) -#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) -#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) -#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) -#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) -#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) -#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251) -#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) -#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) - -#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) - - -/* work size */ -#define ONIGENC_CODE_TO_MBC_MAXLEN 7 -#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 -/* 18: 6(max-byte) * 3(case-fold chars) */ - -/* character types */ -#define ONIGENC_CTYPE_NEWLINE 0 -#define ONIGENC_CTYPE_ALPHA 1 -#define ONIGENC_CTYPE_BLANK 2 -#define ONIGENC_CTYPE_CNTRL 3 -#define ONIGENC_CTYPE_DIGIT 4 -#define ONIGENC_CTYPE_GRAPH 5 -#define ONIGENC_CTYPE_LOWER 6 -#define ONIGENC_CTYPE_PRINT 7 -#define ONIGENC_CTYPE_PUNCT 8 -#define ONIGENC_CTYPE_SPACE 9 -#define ONIGENC_CTYPE_UPPER 10 -#define ONIGENC_CTYPE_XDIGIT 11 -#define ONIGENC_CTYPE_WORD 12 -#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ -#define ONIGENC_CTYPE_ASCII 14 -#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII - - -#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p) - -#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) -#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) -#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) -#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) -#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) -#define ONIGENC_IS_MBC_WORD(enc,s,end) \ - ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) - - -#define ONIGENC_NAME(enc) ((enc)->name) - -#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ - (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf) -#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ - (enc)->is_allowed_reverse_match(s,end) -#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ - (enc)->left_adjust_char_head(start, s) -#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ - (enc)->apply_all_case_fold(case_fold_flag,f,arg) -#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ - (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs) -#define ONIGENC_STEP_BACK(enc,start,s,n) \ - onigenc_step_back((enc),(start),(s),(n)) - -#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) -#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) -#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) -#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) -#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end)) -#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end)) -#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) -#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) -#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ - (enc)->property_name_to_ctype(enc,p,end) - -#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype) - -#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) -#define ONIGENC_IS_CODE_GRAPH(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) -#define ONIGENC_IS_CODE_PRINT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) -#define ONIGENC_IS_CODE_ALNUM(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) -#define ONIGENC_IS_CODE_ALPHA(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) -#define ONIGENC_IS_CODE_LOWER(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) -#define ONIGENC_IS_CODE_UPPER(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) -#define ONIGENC_IS_CODE_CNTRL(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) -#define ONIGENC_IS_CODE_PUNCT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) -#define ONIGENC_IS_CODE_SPACE(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) -#define ONIGENC_IS_CODE_BLANK(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) -#define ONIGENC_IS_CODE_DIGIT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) -#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) -#define ONIGENC_IS_CODE_WORD(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) - -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ - (enc)->get_ctype_code_range(ctype,sbout,ranges) - -ONIG_EXTERN -OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n)); - - -/* encoding API */ -ONIG_EXTERN -int onigenc_init P_((void)); -ONIG_EXTERN -int onigenc_set_default_encoding P_((OnigEncoding enc)); -ONIG_EXTERN -OnigEncoding onigenc_get_default_encoding P_((void)); -ONIG_EXTERN -void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); -ONIG_EXTERN -OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev)); -ONIG_EXTERN -OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); -ONIG_EXTERN -OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); -ONIG_EXTERN -OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); -ONIG_EXTERN -int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end)); -ONIG_EXTERN -int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); -ONIG_EXTERN -int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); - - - -/* PART: regular expression */ - -/* config parameters */ -#define ONIG_NREGION 10 -#define ONIG_MAX_BACKREF_NUM 1000 -#define ONIG_MAX_REPEAT_NUM 100000 -#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 -/* constants */ -#define ONIG_MAX_ERROR_MESSAGE_LEN 90 - -typedef unsigned int OnigOptionType; - -#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE - -/* options */ -#define ONIG_OPTION_NONE 0U -#define ONIG_OPTION_IGNORECASE 1U -#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) -#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) -#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) -#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) -#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) -#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) -#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) -#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) -/* options (search time) */ -#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) -#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) -#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ - -#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) -#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) -#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) - -/* syntax */ -typedef struct { - unsigned int op; - unsigned int op2; - unsigned int behavior; - OnigOptionType options; /* default option */ - OnigMetaCharTableType meta_char_table; -} OnigSyntaxType; - -ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; -ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; -ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; -ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; -ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; -ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; - -/* predefined syntaxes (see regsyntax.c) */ -#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) -#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) -#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) -#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) -#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) -#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) -#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) -#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) -#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) -#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) - -/* default syntax */ -ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; -#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax - -/* syntax (operators) */ -#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) -#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ -#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ -#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) -#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ -#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) -#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ -#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) -#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ -#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ -#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ -#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ -#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ -#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ -#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ -#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ -#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ -#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ -#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ -#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ -#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ -#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ -#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ -#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ -#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ -#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ -#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ -#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ -#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ -#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ -#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ - -#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ -#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ -#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ -#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ -#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ -#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?...) */ -#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k */ -#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g, \g */ -#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@..) */ -#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ -#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ -#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ -#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ -#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ -/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ -#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ -#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ - -/* syntax (behavior) */ -#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ -#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ -#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ -#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ -#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ -#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ -#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ -#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ -#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ -#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?)(?) */ -#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ - -/* syntax (behavior) in char class [...] */ -#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ -#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ -#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) -#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ -/* syntax (behavior) warning */ -#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ -#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ - -/* meta character specifiers (onig_set_meta_char()) */ -#define ONIG_META_CHAR_ESCAPE 0 -#define ONIG_META_CHAR_ANYCHAR 1 -#define ONIG_META_CHAR_ANYTIME 2 -#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 -#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 -#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 - -#define ONIG_INEFFECTIVE_META_CHAR 0 - -/* error codes */ -#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) -/* normal return */ -#define ONIG_NORMAL 0 -#define ONIG_MISMATCH -1 -#define ONIG_NO_SUPPORT_CONFIG -2 - -/* internal error */ -#define ONIGERR_MEMORY -5 -#define ONIGERR_TYPE_BUG -6 -#define ONIGERR_PARSER_BUG -11 -#define ONIGERR_STACK_BUG -12 -#define ONIGERR_UNDEFINED_BYTECODE -13 -#define ONIGERR_UNEXPECTED_BYTECODE -14 -#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 -#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 -#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 -/* general error */ -#define ONIGERR_INVALID_ARGUMENT -30 -/* syntax error */ -#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 -#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 -#define ONIGERR_EMPTY_CHAR_CLASS -102 -#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 -#define ONIGERR_END_PATTERN_AT_ESCAPE -104 -#define ONIGERR_END_PATTERN_AT_META -105 -#define ONIGERR_END_PATTERN_AT_CONTROL -106 -#define ONIGERR_META_CODE_SYNTAX -108 -#define ONIGERR_CONTROL_CODE_SYNTAX -109 -#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 -#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 -#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 -#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 -#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 -#define ONIGERR_NESTED_REPEAT_OPERATOR -115 -#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 -#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 -#define ONIGERR_END_PATTERN_IN_GROUP -118 -#define ONIGERR_UNDEFINED_GROUP_OPTION -119 -#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 -#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 -#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 -/* values error (syntax error) */ -#define ONIGERR_TOO_BIG_NUMBER -200 -#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 -#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 -#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 -#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 -#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 -#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 -#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 -#define ONIGERR_INVALID_BACKREF -208 -#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 -#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 -#define ONIGERR_EMPTY_GROUP_NAME -214 -#define ONIGERR_INVALID_GROUP_NAME -215 -#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 -#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 -#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 -#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 -#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 -#define ONIGERR_NEVER_ENDING_RECURSION -221 -#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 -#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 -#define ONIGERR_INVALID_CODE_POINT_VALUE -400 -#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 -#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 -#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 -#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 - -/* errors related to thread */ -#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 - - -/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ -#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 -#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ - ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) - -typedef struct OnigCaptureTreeNodeStruct { - int group; /* group number */ - int beg; - int end; - int allocated; - int num_childs; - struct OnigCaptureTreeNodeStruct** childs; -} OnigCaptureTreeNode; - -/* match result region type */ -struct re_registers { - int allocated; - int num_regs; - int* beg; - int* end; - /* extended */ - OnigCaptureTreeNode* history_root; /* capture history tree root */ -}; - -/* capture tree traverse */ -#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 -#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 -#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ - ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) - - -#define ONIG_REGION_NOTPOS -1 - -typedef struct re_registers OnigRegion; - -typedef struct { - OnigEncoding enc; - OnigUChar* par; - OnigUChar* par_end; -} OnigErrorInfo; - -typedef struct { - int lower; - int upper; -} OnigRepeatRange; - -typedef void (*OnigWarnFunc) P_((const char* s)); -extern void onig_null_warn P_((const char* s)); -#define ONIG_NULL_WARN onig_null_warn - -#define ONIG_CHAR_TABLE_SIZE 256 - -/* regex_t state */ -#define ONIG_STATE_NORMAL 0 -#define ONIG_STATE_SEARCHING 1 -#define ONIG_STATE_COMPILING -1 -#define ONIG_STATE_MODIFY -2 - -#define ONIG_STATE(reg) \ - ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) - -typedef struct re_pattern_buffer { - /* common members of BBuf(bytes-buffer) */ - unsigned char* p; /* compiled pattern */ - unsigned int used; /* used space for p */ - unsigned int alloc; /* allocated space for p */ - - int state; /* normal, searching, compiling */ - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_NULL_CHECK_START/END id counter */ - int num_comb_exp_check; /* combination explosion check */ - int num_call; /* number of subexp call */ - unsigned int capture_history; /* (?@...) flag (1-31) */ - unsigned int bt_mem_start; /* need backtrack flag */ - unsigned int bt_mem_end; /* need backtrack flag */ - int stack_pop_level; - int repeat_range_alloc; - OnigRepeatRange* repeat_range; - - OnigEncoding enc; - OnigOptionType options; - OnigSyntaxType* syntax; - OnigCaseFoldType case_fold_flag; - void* name_table; - - /* optimization info (string search, char-map and anchors) */ - int optimize; /* optimize flag */ - int threshold_len; /* search str-length for apply optimize */ - int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ - int sub_anchor; /* start-anchor for exact or map */ - unsigned char *exact; - unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ - OnigDistance dmin; /* min-distance of exact or map */ - OnigDistance dmax; /* max-distance of exact or map */ - - /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict */ -} OnigRegexType; - -typedef OnigRegexType* OnigRegex; - -#ifndef ONIG_ESCAPE_REGEX_T_COLLISION - typedef OnigRegexType regex_t; -#endif - - -typedef struct { - int num_of_elements; - OnigEncoding pattern_enc; - OnigEncoding target_enc; - OnigSyntaxType* syntax; - OnigOptionType option; - OnigCaseFoldType case_fold_flag; -} OnigCompileInfo; - -/* Oniguruma Native API */ -ONIG_EXTERN -int onig_init P_((void)); -ONIG_EXTERN -int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); -ONIG_EXTERN -void onig_set_warn_func P_((OnigWarnFunc f)); -ONIG_EXTERN -void onig_set_verb_warn_func P_((OnigWarnFunc f)); -ONIG_EXTERN -int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); -int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); -ONIG_EXTERN -void onig_free P_((OnigRegex)); -ONIG_EXTERN -void onig_free_body P_((OnigRegex)); -ONIG_EXTERN -int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); -ONIG_EXTERN -int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); -ONIG_EXTERN -int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); -ONIG_EXTERN -OnigRegion* onig_region_new P_((void)); -ONIG_EXTERN -void onig_region_init P_((OnigRegion* region)); -ONIG_EXTERN -void onig_region_free P_((OnigRegion* region, int free_self)); -ONIG_EXTERN -void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); -ONIG_EXTERN -void onig_region_clear P_((OnigRegion* region)); -ONIG_EXTERN -int onig_region_resize P_((OnigRegion* region, int n)); -ONIG_EXTERN -int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); -ONIG_EXTERN -int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); -ONIG_EXTERN -int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); -ONIG_EXTERN -int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); -ONIG_EXTERN -int onig_number_of_names P_((OnigRegex reg)); -ONIG_EXTERN -int onig_number_of_captures P_((OnigRegex reg)); -ONIG_EXTERN -int onig_number_of_capture_histories P_((OnigRegex reg)); -ONIG_EXTERN -OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); -ONIG_EXTERN -int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); -ONIG_EXTERN -int onig_noname_group_capture_is_active P_((OnigRegex reg)); -ONIG_EXTERN -OnigEncoding onig_get_encoding P_((OnigRegex reg)); -ONIG_EXTERN -OnigOptionType onig_get_options P_((OnigRegex reg)); -ONIG_EXTERN -OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg)); -ONIG_EXTERN -OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); -ONIG_EXTERN -int onig_set_default_syntax P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); -ONIG_EXTERN -unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax)); -ONIG_EXTERN -void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); -ONIG_EXTERN -void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); -ONIG_EXTERN -void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); -ONIG_EXTERN -void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); -ONIG_EXTERN -int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code)); -ONIG_EXTERN -void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); -ONIG_EXTERN -OnigCaseFoldType onig_get_default_case_fold_flag P_((void)); -ONIG_EXTERN -int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag)); -ONIG_EXTERN -unsigned int onig_get_match_stack_limit_size P_((void)); -ONIG_EXTERN -int onig_set_match_stack_limit_size P_((unsigned int size)); -ONIG_EXTERN -int onig_end P_((void)); -ONIG_EXTERN -const char* onig_version P_((void)); -ONIG_EXTERN -const char* onig_copyright P_((void)); - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGURUMA_H */ diff --git a/src/openalpr/support/regex/regcomp.c b/src/openalpr/support/regex/regcomp.c deleted file mode 100644 index b93ca94..0000000 --- a/src/openalpr/support/regex/regcomp.c +++ /dev/null @@ -1,6285 +0,0 @@ -/********************************************************************** - regcomp.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2013 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regparse.h" - -OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; - -extern OnigCaseFoldType -onig_get_default_case_fold_flag(void) -{ - return OnigDefaultCaseFoldFlag; -} - -extern int -onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) -{ - OnigDefaultCaseFoldFlag = case_fold_flag; - return 0; -} - - -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - -static UChar* -str_dup(UChar* s, UChar* end) -{ - int len = end - s; - - if (len > 0) { - UChar* r = (UChar* )xmalloc(len + 1); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, len); - r[len] = (UChar )0; - return r; - } - else return NULL; -} - -static void -swap_node(Node* a, Node* b) -{ - Node c; - c = *a; *a = *b; *b = c; - - if (NTYPE(a) == NT_STR) { - StrNode* sn = NSTR(a); - if (sn->capa == 0) { - int len = sn->end - sn->s; - sn->s = sn->buf; - sn->end = sn->s + len; - } - } - - if (NTYPE(b) == NT_STR) { - StrNode* sn = NSTR(b); - if (sn->capa == 0) { - int len = sn->end - sn->s; - sn->s = sn->buf; - sn->end = sn->s + len; - } - } -} - -static OnigDistance -distance_add(OnigDistance d1, OnigDistance d2) -{ - if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) - return ONIG_INFINITE_DISTANCE; - else { - if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; - else return ONIG_INFINITE_DISTANCE; - } -} - -static OnigDistance -distance_multiply(OnigDistance d, int m) -{ - if (m == 0) return 0; - - if (d < ONIG_INFINITE_DISTANCE / m) - return d * m; - else - return ONIG_INFINITE_DISTANCE; -} - -static int -bitset_is_empty(BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { - if (bs[i] != 0) return 0; - } - return 1; -} - -#ifdef ONIG_DEBUG -static int -bitset_on_num(BitSetRef bs) -{ - int i, n; - - n = 0; - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(bs, i)) n++; - } - return n; -} -#endif - -extern int -onig_bbuf_init(BBuf* buf, int size) -{ - if (size <= 0) { - size = 0; - buf->p = NULL; - } - else { - buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); - } - - buf->alloc = size; - buf->used = 0; - return 0; -} - - -#ifdef USE_SUBEXP_CALL - -static int -unset_addr_list_init(UnsetAddrList* uslist, int size) -{ - UnsetAddr* p; - - p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_MEMERR(p); - uslist->num = 0; - uslist->alloc = size; - uslist->us = p; - return 0; -} - -static void -unset_addr_list_end(UnsetAddrList* uslist) -{ - if (IS_NOT_NULL(uslist->us)) - xfree(uslist->us); -} - -static int -unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) -{ - UnsetAddr* p; - int size; - - if (uslist->num >= uslist->alloc) { - size = uslist->alloc * 2; - p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); - CHECK_NULL_RETURN_MEMERR(p); - uslist->alloc = size; - uslist->us = p; - } - - uslist->us[uslist->num].offset = offset; - uslist->us[uslist->num].target = node; - uslist->num++; - return 0; -} -#endif /* USE_SUBEXP_CALL */ - - -static int -add_opcode(regex_t* reg, int opcode) -{ - BBUF_ADD1(reg, opcode); - return 0; -} - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -static int -add_state_check_num(regex_t* reg, int num) -{ - StateCheckNumType n = (StateCheckNumType )num; - - BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); - return 0; -} -#endif - -static int -add_rel_addr(regex_t* reg, int addr) -{ - RelAddrType ra = (RelAddrType )addr; - - BBUF_ADD(reg, &ra, SIZE_RELADDR); - return 0; -} - -static int -add_abs_addr(regex_t* reg, int addr) -{ - AbsAddrType ra = (AbsAddrType )addr; - - BBUF_ADD(reg, &ra, SIZE_ABSADDR); - return 0; -} - -static int -add_length(regex_t* reg, int len) -{ - LengthType l = (LengthType )len; - - BBUF_ADD(reg, &l, SIZE_LENGTH); - return 0; -} - -static int -add_mem_num(regex_t* reg, int num) -{ - MemNumType n = (MemNumType )num; - - BBUF_ADD(reg, &n, SIZE_MEMNUM); - return 0; -} - -static int -add_pointer(regex_t* reg, void* addr) -{ - PointerType ptr = (PointerType )addr; - - BBUF_ADD(reg, &ptr, SIZE_POINTER); - return 0; -} - -static int -add_option(regex_t* reg, OnigOptionType option) -{ - BBUF_ADD(reg, &option, SIZE_OPTION); - return 0; -} - -static int -add_opcode_rel_addr(regex_t* reg, int opcode, int addr) -{ - int r; - - r = add_opcode(reg, opcode); - if (r) return r; - r = add_rel_addr(reg, addr); - return r; -} - -static int -add_bytes(regex_t* reg, UChar* bytes, int len) -{ - BBUF_ADD(reg, bytes, len); - return 0; -} - -static int -add_bitset(regex_t* reg, BitSetRef bs) -{ - BBUF_ADD(reg, bs, SIZE_BITSET); - return 0; -} - -static int -add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) -{ - int r; - - r = add_opcode(reg, opcode); - if (r) return r; - r = add_option(reg, option); - return r; -} - -static int compile_length_tree(Node* node, regex_t* reg); -static int compile_tree(Node* node, regex_t* reg); - - -#define IS_NEED_STR_LEN_OP_EXACT(op) \ - ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ - (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) - -static int -select_str_opcode(int mb_len, int str_len, int ignore_case) -{ - int op; - - if (ignore_case) { - switch (str_len) { - case 1: op = OP_EXACT1_IC; break; - default: op = OP_EXACTN_IC; break; - } - } - else { - switch (mb_len) { - case 1: - switch (str_len) { - case 1: op = OP_EXACT1; break; - case 2: op = OP_EXACT2; break; - case 3: op = OP_EXACT3; break; - case 4: op = OP_EXACT4; break; - case 5: op = OP_EXACT5; break; - default: op = OP_EXACTN; break; - } - break; - - case 2: - switch (str_len) { - case 1: op = OP_EXACTMB2N1; break; - case 2: op = OP_EXACTMB2N2; break; - case 3: op = OP_EXACTMB2N3; break; - default: op = OP_EXACTMB2N; break; - } - break; - - case 3: - op = OP_EXACTMB3N; - break; - - default: - op = OP_EXACTMBN; - break; - } - } - return op; -} - -static int -compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) -{ - int r; - int saved_num_null_check = reg->num_null_check; - - if (empty_info != 0) { - r = add_opcode(reg, OP_NULL_CHECK_START); - if (r) return r; - r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ - if (r) return r; - reg->num_null_check++; - } - - r = compile_tree(node, reg); - if (r) return r; - - if (empty_info != 0) { - if (empty_info == NQ_TARGET_IS_EMPTY) - r = add_opcode(reg, OP_NULL_CHECK_END); - else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); - else if (empty_info == NQ_TARGET_IS_EMPTY_REC) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); - - if (r) return r; - r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ - } - return r; -} - -#ifdef USE_SUBEXP_CALL -static int -compile_call(CallNode* node, regex_t* reg) -{ - int r; - - r = add_opcode(reg, OP_CALL); - if (r) return r; - r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), - node->target); - if (r) return r; - r = add_abs_addr(reg, 0 /*dummy addr.*/); - return r; -} -#endif - -static int -compile_tree_n_times(Node* node, int n, regex_t* reg) -{ - int i, r; - - for (i = 0; i < n; i++) { - r = compile_tree(node, reg); - if (r) return r; - } - return 0; -} - -static int -add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, - regex_t* reg ARG_UNUSED, int ignore_case) -{ - int len; - int op = select_str_opcode(mb_len, str_len, ignore_case); - - len = SIZE_OPCODE; - - if (op == OP_EXACTMBN) len += SIZE_LENGTH; - if (IS_NEED_STR_LEN_OP_EXACT(op)) - len += SIZE_LENGTH; - - len += mb_len * str_len; - return len; -} - -static int -add_compile_string(UChar* s, int mb_len, int str_len, - regex_t* reg, int ignore_case) -{ - int op = select_str_opcode(mb_len, str_len, ignore_case); - add_opcode(reg, op); - - if (op == OP_EXACTMBN) - add_length(reg, mb_len); - - if (IS_NEED_STR_LEN_OP_EXACT(op)) { - if (op == OP_EXACTN_IC) - add_length(reg, mb_len * str_len); - else - add_length(reg, str_len); - } - - add_bytes(reg, s, mb_len * str_len); - return 0; -} - - -static int -compile_length_string_node(Node* node, regex_t* reg) -{ - int rlen, r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; - UChar *p, *prev; - StrNode* sn; - - sn = NSTR(node); - if (sn->end <= sn->s) - return 0; - - ambig = NSTRING_IS_AMBIG(node); - - p = prev = sn->s; - prev_len = enclen(enc, p); - p += prev_len; - slen = 1; - rlen = 0; - - for (; p < sn->end; ) { - len = enclen(enc, p); - if (len == prev_len) { - slen++; - } - else { - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); - rlen += r; - prev = p; - slen = 1; - prev_len = len; - } - p += len; - } - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); - rlen += r; - return rlen; -} - -static int -compile_length_string_raw_node(StrNode* sn, regex_t* reg) -{ - if (sn->end <= sn->s) - return 0; - - return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); -} - -static int -compile_string_node(Node* node, regex_t* reg) -{ - int r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; - UChar *p, *prev, *end; - StrNode* sn; - - sn = NSTR(node); - if (sn->end <= sn->s) - return 0; - - end = sn->end; - ambig = NSTRING_IS_AMBIG(node); - - p = prev = sn->s; - prev_len = enclen(enc, p); - p += prev_len; - slen = 1; - - for (; p < end; ) { - len = enclen(enc, p); - if (len == prev_len) { - slen++; - } - else { - r = add_compile_string(prev, prev_len, slen, reg, ambig); - if (r) return r; - - prev = p; - slen = 1; - prev_len = len; - } - - p += len; - } - return add_compile_string(prev, prev_len, slen, reg, ambig); -} - -static int -compile_string_raw_node(StrNode* sn, regex_t* reg) -{ - if (sn->end <= sn->s) - return 0; - - return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); -} - -static int -add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) -{ -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - add_length(reg, mbuf->used); - return add_bytes(reg, mbuf->p, mbuf->used); -#else - int r, pad_size; - UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; - - GET_ALIGNMENT_PAD_SIZE(p, pad_size); - add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); - if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - - r = add_bytes(reg, mbuf->p, mbuf->used); - - /* padding for return value from compile_length_cclass_node() to be fix. */ - pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; - if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - return r; -#endif -} - -static int -compile_length_cclass_node(CClassNode* cc, regex_t* reg) -{ - int len; - - if (IS_NCCLASS_SHARE(cc)) { - len = SIZE_OPCODE + SIZE_POINTER; - return len; - } - - if (IS_NULL(cc->mbuf)) { - len = SIZE_OPCODE + SIZE_BITSET; - } - else { - if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - len = SIZE_OPCODE; - } - else { - len = SIZE_OPCODE + SIZE_BITSET; - } -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - len += SIZE_LENGTH + cc->mbuf->used; -#else - len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); -#endif - } - - return len; -} - -static int -compile_cclass_node(CClassNode* cc, regex_t* reg) -{ - int r; - - if (IS_NCCLASS_SHARE(cc)) { - add_opcode(reg, OP_CCLASS_NODE); - r = add_pointer(reg, cc); - return r; - } - - if (IS_NULL(cc->mbuf)) { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_NOT); - else - add_opcode(reg, OP_CCLASS); - - r = add_bitset(reg, cc->bs); - } - else { - if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MB_NOT); - else - add_opcode(reg, OP_CCLASS_MB); - - r = add_multi_byte_cclass(cc->mbuf, reg); - } - else { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MIX_NOT); - else - add_opcode(reg, OP_CCLASS_MIX); - - r = add_bitset(reg, cc->bs); - if (r) return r; - r = add_multi_byte_cclass(cc->mbuf, reg); - } - } - - return r; -} - -static int -entry_repeat_range(regex_t* reg, int id, int lower, int upper) -{ -#define REPEAT_RANGE_ALLOC 4 - - OnigRepeatRange* p; - - if (reg->repeat_range_alloc == 0) { - p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_MEMERR(p); - reg->repeat_range = p; - reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; - } - else if (reg->repeat_range_alloc <= id) { - int n; - n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; - p = (OnigRepeatRange* )xrealloc(reg->repeat_range, - sizeof(OnigRepeatRange) * n); - CHECK_NULL_RETURN_MEMERR(p); - reg->repeat_range = p; - reg->repeat_range_alloc = n; - } - else { - p = reg->repeat_range; - } - - p[id].lower = lower; - p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); - return 0; -} - -static int -compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, - regex_t* reg) -{ - int r; - int num_repeat = reg->num_repeat; - - r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); - if (r) return r; - r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ - reg->num_repeat++; - if (r) return r; - r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); - if (r) return r; - - r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); - if (r) return r; - - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - - if ( -#ifdef USE_SUBEXP_CALL - reg->num_call > 0 || -#endif - IS_QUANTIFIER_IN_REPEAT(qn)) { - r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); - } - else { - r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); - } - if (r) return r; - r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ - return r; -} - -static int -is_anychar_star_quantifier(QtfrNode* qn) -{ - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == NT_CANY) - return 1; - else - return 0; -} - -#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 -#define CKN_ON (ckn > 0) - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int len, mod_tlen, cklen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); - - /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) - len = SIZE_OP_JUMP; - else - len = 0; - - len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; - } - else { - if (qn->lower == 0) - len = SIZE_OP_JUMP; - else - len = 0; - - len += mod_tlen + SIZE_OP_PUSH + cklen; - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) /* /(?..){0}/ */ - len = SIZE_OP_JUMP + tlen; - else - len = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; - } - else { - len = SIZE_OP_PUSH + tlen; - } - } - else { - len = tlen; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - if (CKN_ON) - len += SIZE_OP_STATE_CHECK; - } - - return len; -} - -static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int r, mod_tlen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - if (CKN_ON) { - r = add_state_check_num(reg, ckn); - if (r) return r; - } - - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(reg->options)) { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR - : OP_ANYCHAR_ML_STAR)); - } - else { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR - : OP_ANYCHAR_STAR)); - } - if (r) return r; - if (CKN_ON) - r = add_state_check_num(reg, ckn); - - return r; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) { - r = add_opcode_rel_addr(reg, OP_JUMP, - (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r) return r; - } - - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - } - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP - + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); - } - else { - if (qn->lower == 0) { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - } - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, - -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); - } - else - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else - r = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, tlen); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, tlen); - } - if (r) return r; - } - - r = compile_tree(qn->target, reg); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - } - - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); - if (CKN_ON) { - if (r) return r; - r = add_opcode(reg, OP_STATE_CHECK); - if (r) return r; - r = add_state_check_num(reg, ckn); - } - } - return r; -} - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int len, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact)) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { - len = SIZE_OP_JUMP; - } - else { - len = tlen * qn->lower; - } - - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) - len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; - else if (IS_NOT_NULL(qn->next_head_exact)) - len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; - else - len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; - } - else - len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; - } - else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ - len = SIZE_OP_JUMP + tlen; - } - else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - len = tlen * qn->lower; - len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - } - - return len; -} - -static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int i, r, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact)) { - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(reg->options)) - return add_opcode(reg, OP_ANYCHAR_ML_STAR); - else - return add_opcode(reg, OP_ANYCHAR_STAR); - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); - else if (IS_NOT_NULL(qn->next_head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); - else - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); - } - else { - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); - } - if (r) return r; - } - else { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - } - - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); - } - else if (IS_NOT_NULL(qn->next_head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); - } - } - else { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - int n = qn->upper - qn->lower; - - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - - for (i = 0; i < n; i++) { - r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); - } - return r; -} -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -static int -compile_length_option_node(EncloseNode* node, regex_t* reg) -{ - int tlen; - OnigOptionType prev = reg->options; - - reg->options = node->option; - tlen = compile_length_tree(node->target, reg); - reg->options = prev; - - if (tlen < 0) return tlen; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL - + tlen + SIZE_OP_SET_OPTION; - } - else - return tlen; -} - -static int -compile_option_node(EncloseNode* node, regex_t* reg) -{ - int r; - OnigOptionType prev = reg->options; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - if (r) return r; - r = add_opcode(reg, OP_FAIL); - if (r) return r; - } - - reg->options = node->option; - r = compile_tree(node->target, reg); - reg->options = prev; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - } - return r; -} - -static int -compile_length_enclose_node(EncloseNode* node, regex_t* reg) -{ - int len; - int tlen; - - if (node->type == ENCLOSE_OPTION) - return compile_length_option_node(node, reg); - - if (node->target) { - tlen = compile_length_tree(node->target, reg); - if (tlen < 0) return tlen; - } - else - tlen = 0; - - switch (node->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - len = SIZE_OP_MEMORY_START_PUSH + tlen - + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - } - else -#endif - { - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - len = SIZE_OP_MEMORY_START_PUSH; - else - len = SIZE_OP_MEMORY_START; - - len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) - ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); - } - break; - - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - tlen = compile_length_tree(qn->target, reg); - if (tlen < 0) return tlen; - - len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; - } - else { - len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return len; -} - -static int get_char_length_tree(Node* node, regex_t* reg, int* len); - -static int -compile_enclose_node(EncloseNode* node, regex_t* reg) -{ - int r, len; - - if (node->type == ENCLOSE_OPTION) - return compile_option_node(node, reg); - - switch (node->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - r = add_opcode(reg, OP_CALL); - if (r) return r; - node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; - node->state |= NST_ADDR_FIXED; - r = add_abs_addr(reg, (int )node->call_addr); - if (r) return r; - len = compile_length_tree(node->target, reg); - len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) return r; - } -#endif - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - r = add_opcode(reg, OP_MEMORY_START_PUSH); - else - r = add_opcode(reg, OP_MEMORY_START); - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); - else - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_REC : OP_MEMORY_END)); - - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = add_opcode(reg, OP_RETURN); - } - else -#endif - { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH); - else - r = add_opcode(reg, OP_MEMORY_END); - if (r) return r; - r = add_mem_num(reg, node->regnum); - } - break; - - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - - len = compile_length_tree(qn->target, reg); - if (len < 0) return len; - - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); - } - else { - r = add_opcode(reg, OP_PUSH_STOP_BT); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP_STOP_BT); - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_length_anchor_node(AnchorNode* node, regex_t* reg) -{ - int len; - int tlen = 0; - - if (node->target) { - tlen = compile_length_tree(node->target, reg); - if (tlen < 0) return tlen; - } - - switch (node->type) { - case ANCHOR_PREC_READ: - len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; - break; - case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; - break; - case ANCHOR_LOOK_BEHIND: - len = SIZE_OP_LOOK_BEHIND + tlen; - break; - case ANCHOR_LOOK_BEHIND_NOT: - len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; - break; - - default: - len = SIZE_OPCODE; - break; - } - - return len; -} - -static int -compile_anchor_node(AnchorNode* node, regex_t* reg) -{ - int r, len; - - switch (node->type) { - case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; - case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; - case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; - case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; - case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; - case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; - - case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; - case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; -#ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; - case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; -#endif - - case ANCHOR_PREC_READ: - r = add_opcode(reg, OP_PUSH_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP_POS); - break; - - case ANCHOR_PREC_READ_NOT: - len = compile_length_tree(node->target, reg); - if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_FAIL_POS); - break; - - case ANCHOR_LOOK_BEHIND: - { - int n; - r = add_opcode(reg, OP_LOOK_BEHIND); - if (r) return r; - if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; - r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - } - break; - - case ANCHOR_LOOK_BEHIND_NOT: - { - int n; - len = compile_length_tree(node->target, reg); - r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, - len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); - if (r) return r; - if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; - r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_length_tree(Node* node, regex_t* reg) -{ - int len, type, r; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - len = 0; - do { - r = compile_length_tree(NCAR(node), reg); - if (r < 0) return r; - len += r; - } while (IS_NOT_NULL(node = NCDR(node))); - r = len; - break; - - case NT_ALT: - { - int n; - - n = r = 0; - do { - r += compile_length_tree(NCAR(node), reg); - n++; - } while (IS_NOT_NULL(node = NCDR(node))); - r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); - } - break; - - case NT_STR: - if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(NSTR(node), reg); - else - r = compile_length_string_node(node, reg); - break; - - case NT_CCLASS: - r = compile_length_cclass_node(NCCLASS(node), reg); - break; - - case NT_CTYPE: - case NT_CANY: - r = SIZE_OPCODE; - break; - - case NT_BREF: - { - BRefNode* br = NBREF(node); - -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + - SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - else -#endif - if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) - ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); - } - else { - r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - r = SIZE_OP_CALL; - break; -#endif - - case NT_QTFR: - r = compile_length_quantifier_node(NQTFR(node), reg); - break; - - case NT_ENCLOSE: - r = compile_length_enclose_node(NENCLOSE(node), reg); - break; - - case NT_ANCHOR: - r = compile_length_anchor_node(NANCHOR(node), reg); - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_tree(Node* node, regex_t* reg) -{ - int n, type, len, pos, r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = compile_tree(NCAR(node), reg); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - Node* x = node; - len = 0; - do { - len += compile_length_tree(NCAR(x), reg); - if (NCDR(x) != NULL) { - len += SIZE_OP_PUSH + SIZE_OP_JUMP; - } - } while (IS_NOT_NULL(x = NCDR(x))); - pos = reg->used + len; /* goal position */ - - do { - len = compile_length_tree(NCAR(node), reg); - if (IS_NOT_NULL(NCDR(node))) { - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); - if (r) break; - } - r = compile_tree(NCAR(node), reg); - if (r) break; - if (IS_NOT_NULL(NCDR(node))) { - len = pos - (reg->used + SIZE_OP_JUMP); - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) break; - } - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_STR: - if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(NSTR(node), reg); - else - r = compile_string_node(node, reg); - break; - - case NT_CCLASS: - r = compile_cclass_node(NCCLASS(node), reg); - break; - - case NT_CTYPE: - { - int op; - - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; - else op = OP_WORD; - break; - default: - return ONIGERR_TYPE_BUG; - break; - } - r = add_opcode(reg, op); - } - break; - - case NT_CANY: - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML); - else - r = add_opcode(reg, OP_ANYCHAR); - break; - - case NT_BREF: - { - BRefNode* br = NBREF(node); - -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r) return r; - r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r) return r; - r = add_length(reg, br->nest_level); - if (r) return r; - - goto add_bacref_mems; - } - else -#endif - if (br->back_num == 1) { - n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREFN_IC); - if (r) return r; - r = add_mem_num(reg, n); - } - else { - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - default: - r = add_opcode(reg, OP_BACKREFN); - if (r) return r; - r = add_mem_num(reg, n); - break; - } - } - } - else { - int i; - int* p; - - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREF_MULTI_IC); - } - else { - r = add_opcode(reg, OP_BACKREF_MULTI); - } - if (r) return r; - -#ifdef USE_BACKREF_WITH_LEVEL - add_bacref_mems: -#endif - r = add_length(reg, br->back_num); - if (r) return r; - p = BACKREFS_P(br); - for (i = br->back_num - 1; i >= 0; i--) { - r = add_mem_num(reg, p[i]); - if (r) return r; - } - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - r = compile_call(NCALL(node), reg); - break; -#endif - - case NT_QTFR: - r = compile_quantifier_node(NQTFR(node), reg); - break; - - case NT_ENCLOSE: - r = compile_enclose_node(NENCLOSE(node), reg); - break; - - case NT_ANCHOR: - r = compile_anchor_node(NANCHOR(node), reg); - break; - - default: -#ifdef ONIG_DEBUG - fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); -#endif - break; - } - - return r; -} - -#ifdef USE_NAMED_GROUP - -static int -noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) -{ - int r = 0; - Node* node = *plink; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = noname_disable_map(&(NCAR(node)), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - { - Node** ptarget = &(NQTFR(node)->target); - Node* old = *ptarget; - r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { - onig_reduce_nested_quantifier(node, *ptarget); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - if (IS_ENCLOSE_NAMED_GROUP(en)) { - (*counter)++; - map[en->regnum].new_val = *counter; - en->regnum = *counter; - r = noname_disable_map(&(en->target), map, counter); - } - else { - *plink = en->target; - en->target = NULL_NODE; - onig_node_free(node); - r = noname_disable_map(plink, map, counter); - } - } - else - r = noname_disable_map(&(en->target), map, counter); - } - break; - - default: - break; - } - - return r; -} - -static int -renumber_node_backref(Node* node, GroupNumRemap* map) -{ - int i, pos, n, old_num; - int *backs; - BRefNode* bn = NBREF(node); - - if (! IS_BACKREF_NAME_REF(bn)) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - - old_num = bn->back_num; - if (IS_NULL(bn->back_dynamic)) - backs = bn->back_static; - else - backs = bn->back_dynamic; - - for (i = 0, pos = 0; i < old_num; i++) { - n = map[backs[i]].new_val; - if (n > 0) { - backs[pos] = n; - pos++; - } - } - - bn->back_num = pos; - return 0; -} - -static int -renumber_by_map(Node* node, GroupNumRemap* map) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = renumber_by_map(NCAR(node), map); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = renumber_by_map(NQTFR(node)->target, map); - break; - case NT_ENCLOSE: - r = renumber_by_map(NENCLOSE(node)->target, map); - break; - - case NT_BREF: - r = renumber_node_backref(node, map); - break; - - default: - break; - } - - return r; -} - -static int -numbered_ref_check(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = numbered_ref_check(NCAR(node)); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = numbered_ref_check(NQTFR(node)->target); - break; - case NT_ENCLOSE: - r = numbered_ref_check(NENCLOSE(node)->target); - break; - - case NT_BREF: - if (! IS_BACKREF_NAME_REF(NBREF(node))) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - break; - - default: - break; - } - - return r; -} - -static int -disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) -{ - int r, i, pos, counter; - BitStatusType loc; - GroupNumRemap* map; - - map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); - CHECK_NULL_RETURN_MEMERR(map); - for (i = 1; i <= env->num_mem; i++) { - map[i].new_val = 0; - } - counter = 0; - r = noname_disable_map(root, map, &counter); - if (r != 0) return r; - - r = renumber_by_map(*root, map); - if (r != 0) return r; - - for (i = 1, pos = 1; i <= env->num_mem; i++) { - if (map[i].new_val > 0) { - SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; - pos++; - } - } - - loc = env->capture_history; - BIT_STATUS_CLEAR(env->capture_history); - for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(loc, i)) { - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); - } - } - - env->num_mem = env->num_named; - reg->num_mem = env->num_named; - - return onig_renumber_name_table(reg, map); -} -#endif /* USE_NAMED_GROUP */ - -#ifdef USE_SUBEXP_CALL -static int -unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) -{ - int i, offset; - EncloseNode* en; - AbsAddrType addr; - - for (i = 0; i < uslist->num; i++) { - en = NENCLOSE(uslist->us[i].target); - if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; - addr = en->call_addr; - offset = uslist->us[i].offset; - - BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); - } - return 0; -} -#endif - -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT -static int -quantifiers_memory_node_info(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - { - int v; - do { - v = quantifiers_memory_node_info(NCAR(node)); - if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ - } - else - r = quantifiers_memory_node_info(NCALL(node)->target); - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->upper != 0) { - r = quantifiers_memory_node_info(qn->target); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: - return NQ_TARGET_IS_EMPTY_MEM; - break; - - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = quantifiers_memory_node_info(en->target); - break; - default: - break; - } - } - break; - - case NT_BREF: - case NT_STR: - case NT_CTYPE: - case NT_CCLASS: - case NT_CANY: - case NT_ANCHOR: - default: - break; - } - - return r; -} -#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ - -static int -get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) -{ - OnigDistance tmin; - int r = 0; - - *min = 0; - switch (NTYPE(node)) { - case NT_BREF: - { - int i; - int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) break; - - backs = BACKREFS_P(br); - if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[0]], min, env); - if (r != 0) break; - for (i = 1; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[i]], &tmin, env); - if (r != 0) break; - if (*min > tmin) *min = tmin; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - EncloseNode* en = NENCLOSE(NCALL(node)->target); - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; - } - else - r = get_min_match_length(NCALL(node)->target, min, env); - break; -#endif - - case NT_LIST: - do { - r = get_min_match_length(NCAR(node), &tmin, env); - if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - Node *x, *y; - y = node; - do { - x = NCAR(y); - r = get_min_match_length(x, &tmin, env); - if (r != 0) break; - if (y == node) *min = tmin; - else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - *min = sn->end - sn->s; - } - break; - - case NT_CTYPE: - *min = 1; - break; - - case NT_CCLASS: - case NT_CANY: - *min = 1; - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - - if (qn->lower > 0) { - r = get_min_match_length(qn->target, min, env); - if (r == 0) - *min = distance_multiply(*min, qn->lower); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; - else { - r = get_min_match_length(en->target, min, env); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_min_match_length(en->target, min, env); - break; - } - } - break; - - case NT_ANCHOR: - default: - break; - } - - return r; -} - -static int -get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) -{ - OnigDistance tmax; - int r = 0; - - *max = 0; - switch (NTYPE(node)) { - case NT_LIST: - do { - r = get_max_match_length(NCAR(node), &tmax, env); - if (r == 0) - *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = get_max_match_length(NCAR(node), &tmax, env); - if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - *max = sn->end - sn->s; - } - break; - - case NT_CTYPE: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_CCLASS: - case NT_CANY: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_BREF: - { - int i; - int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) { - *max = ONIG_INFINITE_DISTANCE; - break; - } - backs = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_max_match_length(nodes[backs[i]], &tmax, env); - if (r != 0) break; - if (*max < tmax) *max = tmax; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_max_match_length(NCALL(node)->target, max, env); - else - *max = ONIG_INFINITE_DISTANCE; - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - - if (qn->upper != 0) { - r = get_max_match_length(qn->target, max, env); - if (r == 0 && *max != 0) { - if (! IS_REPEAT_INFINITE(qn->upper)) - *max = distance_multiply(*max, qn->upper); - else - *max = ONIG_INFINITE_DISTANCE; - } - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; - else { - r = get_max_match_length(en->target, max, env); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_max_match_length(en->target, max, env); - break; - } - } - break; - - case NT_ANCHOR: - default: - break; - } - - return r; -} - -#define GET_CHAR_LEN_VARLEN -1 -#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 - -/* fixed size pattern node only */ -static int -get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) -{ - int tlen; - int r = 0; - - level++; - *len = 0; - switch (NTYPE(node)) { - case NT_LIST: - do { - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - if (r == 0) - *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - int tlen2; - int varlen = 0; - - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { - r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); - if (r == 0) { - if (tlen != tlen2) - varlen = 1; - } - } - if (r == 0) { - if (varlen != 0) { - if (level == 1) - r = GET_CHAR_LEN_TOP_ALT_VARLEN; - else - r = GET_CHAR_LEN_VARLEN; - } - else - *len = tlen; - } - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - UChar *s = sn->s; - while (s < sn->end) { - s += enclen(reg->enc, s); - (*len)++; - } - } - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->lower == qn->upper) { - r = get_char_length_tree1(qn->target, reg, &tlen, level); - if (r == 0) - *len = distance_multiply(tlen, qn->lower); - } - else - r = GET_CHAR_LEN_VARLEN; - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_char_length_tree1(NCALL(node)->target, reg, len, level); - else - r = GET_CHAR_LEN_VARLEN; - break; -#endif - - case NT_CTYPE: - *len = 1; - break; - - case NT_CCLASS: - case NT_CANY: - *len = 1; - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) - *len = en->char_len; - else { - r = get_char_length_tree1(en->target, reg, len, level); - if (r == 0) { - en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_char_length_tree1(en->target, reg, len, level); - break; - default: - break; - } - } - break; - - case NT_ANCHOR: - break; - - default: - r = GET_CHAR_LEN_VARLEN; - break; - } - - return r; -} - -static int -get_char_length_tree(Node* node, regex_t* reg, int* len) -{ - return get_char_length_tree1(node, reg, len, 0); -} - -/* x is not included y ==> 1 : 0 */ -static int -is_not_included(Node* x, Node* y, regex_t* reg) -{ - int i, len; - OnigCodePoint code; - UChar *p; - int ytype; - - retry: - ytype = NTYPE(y); - switch (NTYPE(x)) { - case NT_CTYPE: - { - switch (ytype) { - case NT_CTYPE: - if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && - NCTYPE(y)->not != NCTYPE(x)->not) - return 1; - else - return 0; - break; - - case NT_CCLASS: - swap: - { - Node* tmp; - tmp = x; x = y; y = tmp; - goto retry; - } - break; - - case NT_STR: - goto swap; - break; - - default: - break; - } - } - break; - - case NT_CCLASS: - { - CClassNode* xc = NCCLASS(x); - switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(y)->not == 0) { - if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(xc->bs, i)) { - if (IS_CODE_SB_WORD(reg->enc, i)) return 0; - } - } - return 1; - } - return 0; - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! IS_CODE_SB_WORD(reg->enc, i)) { - if (!IS_NCCLASS_NOT(xc)) { - if (BITSET_AT(xc->bs, i)) - return 0; - } - else { - if (! BITSET_AT(xc->bs, i)) - return 0; - } - } - } - return 1; - } - break; - - default: - break; - } - break; - - case NT_CCLASS: - { - int v; - CClassNode* yc = NCCLASS(y); - - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(xc)) || - (v == 0 && IS_NCCLASS_NOT(xc))) { - v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(yc)) || - (v == 0 && IS_NCCLASS_NOT(yc))) - return 0; - } - } - if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) - return 1; - return 0; - } - break; - - case NT_STR: - goto swap; - break; - - default: - break; - } - } - break; - - case NT_STR: - { - StrNode* xs = NSTR(x); - if (NSTRING_LEN(x) == 0) - break; - - //c = *(xs->s); - switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y)->not; - else - return !(NCTYPE(y)->not); - break; - default: - break; - } - break; - - case NT_CCLASS: - { - CClassNode* cc = NCCLASS(y); - - code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, - xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); - return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); - } - break; - - case NT_STR: - { - UChar *q; - StrNode* ys = NSTR(y); - len = NSTRING_LEN(x); - if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); - if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { - /* tiny version */ - return 0; - } - else { - for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { - if (*p != *q) return 1; - } - } - } - break; - - default: - break; - } - } - break; - - default: - break; - } - - return 0; -} - -static Node* -get_head_value_node(Node* node, int exact, regex_t* reg) -{ - Node* n = NULL_NODE; - - switch (NTYPE(node)) { - case NT_BREF: - case NT_ALT: - case NT_CANY: -#ifdef USE_SUBEXP_CALL - case NT_CALL: -#endif - break; - - case NT_CTYPE: - case NT_CCLASS: - if (exact == 0) { - n = node; - } - break; - - case NT_LIST: - n = get_head_value_node(NCAR(node), exact, reg); - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - - if (sn->end <= sn->s) - break; - - if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { - } - else { - n = node; - } - } - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->lower > 0) { - if (IS_NOT_NULL(qn->head_exact)) - n = qn->head_exact; - else - n = get_head_value_node(qn->target, exact, reg); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - - reg->options = NENCLOSE(node)->option; - n = get_head_value_node(NENCLOSE(node)->target, exact, reg); - reg->options = options; - } - break; - - case ENCLOSE_MEMORY: - case ENCLOSE_STOP_BACKTRACK: - n = get_head_value_node(en->target, exact, reg); - break; - } - } - break; - - case NT_ANCHOR: - if (NANCHOR(node)->type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node)->target, exact, reg); - break; - - default: - break; - } - - return n; -} - -static int -check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) -{ - int type, r = 0; - - type = NTYPE(node); - if ((NTYPE2BIT(type) & type_mask) == 0) - return 1; - - switch (type) { - case NT_LIST: - case NT_ALT: - do { - r = check_type_tree(NCAR(node), type_mask, enclose_mask, - anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, - anchor_mask); - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - if ((en->type & enclose_mask) == 0) - return 1; - - r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); - } - break; - - case NT_ANCHOR: - type = NANCHOR(node)->type; - if ((type & anchor_mask) == 0) - return 1; - - if (NANCHOR(node)->target) - r = check_type_tree(NANCHOR(node)->target, - type_mask, enclose_mask, anchor_mask); - break; - - default: - break; - } - return r; -} - -#ifdef USE_SUBEXP_CALL - -#define RECURSION_EXIST 1 -#define RECURSION_INFINITE 2 - -static int -subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node *x; - OnigDistance min; - int ret; - - x = node; - do { - ret = subexp_inf_recursive_check(NCAR(x), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r |= ret; - if (head) { - ret = get_min_match_length(NCAR(x), &min, env); - if (ret != 0) return ret; - if (min != 0) head = 0; - } - } while (IS_NOT_NULL(x = NCDR(x))); - } - break; - - case NT_ALT: - { - int ret; - r = RECURSION_EXIST; - do { - ret = subexp_inf_recursive_check(NCAR(node), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r &= ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); - if (r == RECURSION_EXIST) { - if (NQTFR(node)->lower == 0) r = 0; - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check(an->target, env, head); - break; - } - } - break; - - case NT_CALL: - r = subexp_inf_recursive_check(NCALL(node)->target, env, head); - break; - - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); - } - break; - - default: - break; - } - - return r; -} - -static int -subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - do { - r = subexp_inf_recursive_check_trav(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check_trav(an->target, env); - break; - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - if (IS_ENCLOSE_RECURSION(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_inf_recursive_check(en->target, env, 1); - if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - r = subexp_inf_recursive_check_trav(en->target, env); - } - - break; - - default: - break; - } - - return r; -} - -static int -subexp_recursive_check(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r |= subexp_recursive_check(NCAR(node)); - } while (IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = subexp_recursive_check(NQTFR(node)->target); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check(an->target); - break; - } - } - break; - - case NT_CALL: - r = subexp_recursive_check(NCALL(node)->target); - if (r != 0) SET_CALL_RECURSION(node); - break; - - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return 1; /* recursion */ - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NENCLOSE(node)->target); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); - } - break; - - default: - break; - } - - return r; -} - - -static int -subexp_recursive_check_trav(Node* node, ScanEnv* env) -{ -#define FOUND_CALLED_NODE 1 - - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - { - int ret; - do { - ret = subexp_recursive_check_trav(NCAR(node), env); - if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; - else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - r = subexp_recursive_check_trav(NQTFR(node)->target, env); - if (NQTFR(node)->upper == 0) { - if (r == FOUND_CALLED_NODE) - NQTFR(node)->is_refered = 1; - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check_trav(an->target, env); - break; - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - if (! IS_ENCLOSE_RECURSION(en)) { - if (IS_ENCLOSE_CALLED(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_recursive_check(en->target); - if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - } - r = subexp_recursive_check_trav(en->target, env); - if (IS_ENCLOSE_CALLED(en)) - r |= FOUND_CALLED_NODE; - } - break; - - default: - break; - } - - return r; -} - -static int -setup_subexp_call(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = setup_subexp_call(NQTFR(node)->target, env); - break; - case NT_ENCLOSE: - r = setup_subexp_call(NENCLOSE(node)->target, env); - break; - - case NT_CALL: - { - CallNode* cn = NCALL(node); - Node** nodes = SCANENV_MEM_NODES(env); - - if (cn->group_num != 0) { - int gnum = cn->group_num; - -#ifdef USE_NAMED_GROUP - if (env->num_named > 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - } -#endif - if (gnum > env->num_mem) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_GROUP_REFERENCE; - } - -#ifdef USE_NAMED_GROUP - set_call_attr: -#endif - cn->target = nodes[cn->group_num]; - if (IS_NULL(cn->target)) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - SET_ENCLOSE_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); - cn->unset_addr_list = env->unset_addr_list; - } -#ifdef USE_NAMED_GROUP - else { - int *refs; - - int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, - &refs); - if (n <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->group_num = refs[0]; - goto set_call_attr; - } - } -#endif - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = setup_subexp_call(an->target, env); - break; - } - } - break; - - default: - break; - } - - return r; -} -#endif - -/* divide different length alternatives in look-behind. - (?<=A|B) ==> (?<=A)|(?<=B) - (? (?type; - - head = an->target; - np = NCAR(head); - swap_node(node, head); - NCAR(node) = head; - NANCHOR(head)->target = np; - - np = node; - while ((np = NCDR(np)) != NULL_NODE) { - insert_node = onig_node_new_anchor(anc_type); - CHECK_NULL_RETURN_MEMERR(insert_node); - NANCHOR(insert_node)->target = NCAR(np); - NCAR(np) = insert_node; - } - - if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { - np = node; - do { - SET_NTYPE(np, NT_LIST); /* alt -> list */ - } while ((np = NCDR(np)) != NULL_NODE); - } - return 0; -} - -static int -setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) -{ - int r, len; - AnchorNode* an = NANCHOR(node); - - r = get_char_length_tree(an->target, reg, &len); - if (r == 0) - an->char_len = len; - else if (r == GET_CHAR_LEN_VARLEN) - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) - r = divide_look_behind_alternatives(node); - else - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - - return r; -} - -static int -next_setup(Node* node, Node* next_node, regex_t* reg) -{ - int type; - - retry: - type = NTYPE(node); - if (type == NT_QTFR) { - QtfrNode* qn = NQTFR(node); - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QTFR_PEEK_NEXT - Node* n = get_head_value_node(next_node, 1, reg); - /* '\0': for UTF-16BE etc... */ - if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { - qn->next_head_exact = n; - } -#endif - /* automatic posseivation a*b ==> (?>a*)b */ - if (qn->lower <= 1) { - int ttype = NTYPE(qn->target); - if (IS_NODE_TYPE_SIMPLE(ttype)) { - Node *x, *y; - x = get_head_value_node(qn->target, 0, reg); - if (IS_NOT_NULL(x)) { - y = get_head_value_node(next_node, 0, reg); - if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); - CHECK_NULL_RETURN_MEMERR(en); - SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); - swap_node(node, en); - NENCLOSE(node)->target = en; - } - } - } - } - } - } - else if (type == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - node = en->target; - goto retry; - } - } - return 0; -} - - -static int -update_string_node_case_fold(regex_t* reg, Node *node) -{ - UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *sbuf, *ebuf, *sp; - int r, i, len, sbuf_size; - StrNode* sn = NSTR(node); - - end = sn->end; - sbuf_size = (end - sn->s) * 2; - sbuf = (UChar* )xmalloc(sbuf_size); - CHECK_NULL_RETURN_MEMERR(sbuf); - ebuf = sbuf + sbuf_size; - - sp = sbuf; - p = sn->s; - while (p < end) { - len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); - for (i = 0; i < len; i++) { - if (sp >= ebuf) { - sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); - CHECK_NULL_RETURN_MEMERR(sbuf); - sp = sbuf + sbuf_size; - sbuf_size *= 2; - ebuf = sbuf + sbuf_size; - } - - *sp++ = buf[i]; - } - } - - r = onig_node_str_set(node, sbuf, sp); - if (r != 0) { - xfree(sbuf); - return r; - } - - xfree(sbuf); - return 0; -} - -static int -expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, - regex_t* reg) -{ - int r; - Node *node; - - node = onig_node_new_str(s, end); - if (IS_NULL(node)) return ONIGERR_MEMORY; - - r = update_string_node_case_fold(reg, node); - if (r != 0) { - onig_node_free(node); - return r; - } - - NSTRING_SET_AMBIG(node); - NSTRING_SET_DONT_GET_OPT_INFO(node); - *rnode = node; - return 0; -} - -static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, - regex_t* reg, Node **rnode) -{ - int r, i, j, len, varlen; - Node *anode, *var_anode, *snode, *xnode, *an; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - - *rnode = var_anode = NULL_NODE; - - varlen = 0; - for (i = 0; i < item_num; i++) { - if (items[i].byte_len != slen) { - varlen = 1; - break; - } - } - - if (varlen != 0) { - *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(var_anode)) return ONIGERR_MEMORY; - - xnode = onig_node_new_list(NULL, NULL); - if (IS_NULL(xnode)) goto mem_err; - NCAR(var_anode) = xnode; - - anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(anode)) goto mem_err; - NCAR(xnode) = anode; - } - else { - *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(anode)) return ONIGERR_MEMORY; - } - - snode = onig_node_new_str(p, p + slen); - if (IS_NULL(snode)) goto mem_err; - - NCAR(anode) = snode; - - for (i = 0; i < item_num; i++) { - snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - - for (j = 0; j < items[i].code_len; j++) { - len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); - if (len < 0) { - r = len; - goto mem_err2; - } - - r = onig_node_str_cat(snode, buf, buf + len); - if (r != 0) goto mem_err2; - } - - an = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(an)) { - goto mem_err2; - } - - if (items[i].byte_len != slen) { - Node *rem; - UChar *q = p + items[i].byte_len; - - if (q < end) { - r = expand_case_fold_make_rem_string(&rem, q, end, reg); - if (r != 0) { - onig_node_free(an); - goto mem_err2; - } - - xnode = onig_node_list_add(NULL_NODE, snode); - if (IS_NULL(xnode)) { - onig_node_free(an); - onig_node_free(rem); - goto mem_err2; - } - if (IS_NULL(onig_node_list_add(xnode, rem))) { - onig_node_free(an); - onig_node_free(xnode); - onig_node_free(rem); - goto mem_err; - } - - NCAR(an) = xnode; - } - else { - NCAR(an) = snode; - } - - NCDR(var_anode) = an; - var_anode = an; - } - else { - NCAR(an) = snode; - NCDR(anode) = an; - anode = an; - } - } - - return varlen; - - mem_err2: - onig_node_free(snode); - - mem_err: - onig_node_free(*rnode); - - return ONIGERR_MEMORY; -} - -static int -expand_case_fold_string(Node* node, regex_t* reg) -{ -#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 - - int r, n, len, alt_num; - UChar *start, *end, *p; - Node *top_root, *root, *snode, *prev_node; - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - StrNode* sn = NSTR(node); - - if (NSTRING_IS_AMBIG(node)) return 0; - - start = sn->s; - end = sn->end; - if (start >= end) return 0; - - r = 0; - top_root = root = prev_node = snode = NULL_NODE; - alt_num = 1; - p = start; - while (p < end) { - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, - p, end, items); - if (n < 0) { - r = n; - goto err; - } - - len = enclen(reg->enc, p); - - if (n == 0) { - if (IS_NULL(snode)) { - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - prev_node = snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, snode))) { - onig_node_free(snode); - goto mem_err; - } - } - } - - r = onig_node_str_cat(snode, p, p + len); - if (r != 0) goto err; - } - else { - alt_num *= (n + 1); - if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; - - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); - if (r < 0) goto mem_err; - if (r == 1) { - if (IS_NULL(root)) { - top_root = prev_node; - } - else { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - - root = NCAR(prev_node); - } - else { /* r == 0 */ - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - } - - snode = NULL_NODE; - } - - p += len; - } - - if (p < end) { - Node *srem; - - r = expand_case_fold_make_rem_string(&srem, p, end, reg); - if (r != 0) goto mem_err; - - if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(srem); - onig_node_free(prev_node); - goto mem_err; - } - } - - if (IS_NULL(root)) { - prev_node = srem; - } - else { - if (IS_NULL(onig_node_list_add(root, srem))) { - onig_node_free(srem); - goto mem_err; - } - } - } - - /* ending */ - top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); - swap_node(node, top_root); - onig_node_free(top_root); - return 0; - - mem_err: - r = ONIGERR_MEMORY; - - err: - onig_node_free(top_root); - return r; -} - - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define CEC_THRES_NUM_BIG_REPEAT 512 -#define CEC_INFINITE_NUM 0x7fffffff - -#define CEC_IN_INFINITE_REPEAT (1<<0) -#define CEC_IN_FINITE_REPEAT (1<<1) -#define CEC_CONT_BIG_REPEAT (1<<2) - -static int -setup_comb_exp_check(Node* node, int state, ScanEnv* env) -{ - int type; - int r = state; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node* prev = NULL_NODE; - do { - r = setup_comb_exp_check(NCAR(node), r, env); - prev = NCAR(node); - } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_ALT: - { - int ret; - do { - ret = setup_comb_exp_check(NCAR(node), state, env); - r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - { - int child_state = state; - int add_state = 0; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; - int var_num; - - if (! IS_REPEAT_INFINITE(qn->upper)) { - if (qn->upper > 1) { - /* {0,1}, {1,1} are allowed */ - child_state |= CEC_IN_FINITE_REPEAT; - - /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ - if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(qn->target); - if (en->type == ENCLOSE_MEMORY) { - if (NTYPE(en->target) == NT_QTFR) { - QtfrNode* q = NQTFR(en->target); - if (IS_REPEAT_INFINITE(q->upper) - && q->greedy == qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - if (qn->upper == 1) - child_state = state; - } - } - } - } - } - } - } - - if (state & CEC_IN_FINITE_REPEAT) { - qn->comb_exp_check_num = -1; - } - else { - if (IS_REPEAT_INFINITE(qn->upper)) { - var_num = CEC_INFINITE_NUM; - child_state |= CEC_IN_INFINITE_REPEAT; - } - else { - var_num = qn->upper - qn->lower; - } - - if (var_num >= CEC_THRES_NUM_BIG_REPEAT) - add_state |= CEC_CONT_BIG_REPEAT; - - if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || - ((state & CEC_CONT_BIG_REPEAT) != 0 && - var_num >= CEC_THRES_NUM_BIG_REPEAT)) { - if (qn->comb_exp_check_num == 0) { - env->num_comb_exp_check++; - qn->comb_exp_check_num = env->num_comb_exp_check; - if (env->curr_max_regnum > env->comb_exp_max_regnum) - env->comb_exp_max_regnum = env->curr_max_regnum; - } - } - } - - r = setup_comb_exp_check(target, child_state, env); - r |= add_state; - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_MEMORY: - { - if (env->curr_max_regnum < en->regnum) - env->curr_max_regnum = en->regnum; - - r = setup_comb_exp_check(en->target, state, env); - } - break; - - default: - r = setup_comb_exp_check(en->target, state, env); - break; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) - env->has_recursion = 1; - else - r = setup_comb_exp_check(NCALL(node)->target, state, env); - break; -#endif - - default: - break; - } - - return r; -} -#endif - -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) - -/* setup_tree does the following work. - 1. check empty loop. (set qn->target_empty_info) - 2. expand ignore-case in char class. - 3. set memory status bit flags. (reg->mem_stats) - 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. - 5. find invalid patterns in look-behind. - 6. expand repeated string. - */ -static int -setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node* prev = NULL_NODE; - do { - r = setup_tree(NCAR(node), reg, state, env); - if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCAR(node), reg); - } - prev = NCAR(node); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_ALT: - do { - r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_CCLASS: - break; - - case NT_STR: - if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); - } - break; - - case NT_CTYPE: - case NT_CANY: - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - break; -#endif - - case NT_BREF: - { - int i; - int* p; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); - BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); - } -#endif - SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); - } - } - break; - - case NT_QTFR: - { - OnigDistance d; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; - - if ((state & IN_REPEAT) != 0) { - qn->state |= NST_IN_REPEAT; - } - - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - r = get_min_match_length(target, &d, env); - if (r) break; - if (d == 0) { - qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - r = quantifiers_memory_node_info(target); - if (r < 0) break; - if (r > 0) { - qn->target_empty_info = r; - } -#endif -#if 0 - r = get_max_match_length(target, &d, env); - if (r == 0 && d == 0) { - /* ()* ==> ()?, ()+ ==> () */ - qn->upper = 1; - if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == NT_STR) { - qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ - } - } -#endif - } - } - - state |= IN_REPEAT; - if (qn->lower != qn->upper) - state |= IN_VAR_REPEAT; - r = setup_tree(target, reg, state, env); - if (r) break; - - /* expand string */ -#define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == NT_STR) { - if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && - qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int len = NSTRING_LEN(target); - StrNode* sn = NSTR(target); - - if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTR(target)->flag); - for (i = 0; i < n; i++) { - r = onig_node_str_cat(node, sn->s, sn->end); - if (r) break; - } - onig_node_free(target); - break; /* break case NT_QTFR: */ - } - } - } - -#ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_NOT_NULL(tqn->head_exact)) { - qn->head_exact = tqn->head_exact; - tqn->head_exact = NULL; - } - } - else { - qn->head_exact = get_head_value_node(qn->target, 1, reg); - } - } -#endif - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - r = setup_tree(NENCLOSE(node)->target, reg, state, env); - reg->options = options; - } - break; - - case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { - BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ - } - r = setup_tree(en->target, reg, state, env); - break; - - case ENCLOSE_STOP_BACKTRACK: - { - Node* target = en->target; - r = setup_tree(target, reg, state, env); - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && - tqn->greedy != 0) { /* (?>a*), a*+ etc... */ - int qtype = NTYPE(tqn->target); - if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); - } - } - } - break; - } - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - r = setup_tree(an->target, reg, state, env); - break; - case ANCHOR_PREC_READ_NOT: - r = setup_tree(an->target, reg, (state | IN_NOT), env); - break; - -/* allowed node types in look-behind */ -#define ALLOWED_TYPE_IN_LB \ - ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ - BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) - -#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) -#define ALLOWED_ENCLOSE_IN_LB_NOT 0 - -#define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) -#define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) - - case ANCHOR_LOOK_BEHIND: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_look_behind(node, reg, env); - if (r != 0) return r; - r = setup_tree(an->target, reg, state, env); - } - break; - - case ANCHOR_LOOK_BEHIND_NOT: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_look_behind(node, reg, env); - if (r != 0) return r; - r = setup_tree(an->target, reg, (state | IN_NOT), env); - } - break; - } - } - break; - - default: - break; - } - - return r; -} - -/* set skip map for Boyer-Moor search */ -static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - UChar skip[], int** int_skip) -{ - int i, len; - - len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; - - for (i = 0; i < len - 1; i++) - skip[s[i]] = len - 1 - i; - } - else { - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; - - for (i = 0; i < len - 1; i++) - (*int_skip)[s[i]] = len - 1 - i; - } - return 0; -} - -#define OPT_EXACT_MAXLEN 24 - -typedef struct { - OnigDistance min; /* min byte length */ - OnigDistance max; /* max byte length */ -} MinMaxLen; - -typedef struct { - MinMaxLen mmd; - OnigEncoding enc; - OnigOptionType options; - OnigCaseFoldType case_fold_flag; - ScanEnv* scan_env; -} OptEnv; - -typedef struct { - int left_anchor; - int right_anchor; -} OptAncInfo; - -typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - - int reach_end; - int ignore_case; - int len; - UChar s[OPT_EXACT_MAXLEN]; -} OptExactInfo; - -typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - - int value; /* weighted value */ - UChar map[ONIG_CHAR_TABLE_SIZE]; -} OptMapInfo; - -typedef struct { - MinMaxLen len; - - OptAncInfo anc; - OptExactInfo exb; /* boundary */ - OptExactInfo exm; /* middle */ - OptExactInfo expr; /* prec read (?=...) */ - - OptMapInfo map; /* boundary */ -} NodeOptInfo; - - -static int -map_position_value(OnigEncoding enc, int i) -{ - static const short int ByteValTable[] = { - 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 - }; - - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { - if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) - return 20; - else - return (int )ByteValTable[i]; - } - else - return 4; /* Take it easy. */ -} - -static int -distance_value(MinMaxLen* mm) -{ - /* 1000 / (min-max-dist + 1) */ - static const short int dist_vals[] = { - 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, - 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, - 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, - 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, - 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, - 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, - 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, - 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, - 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 - }; - - int d; - - if (mm->max == ONIG_INFINITE_DISTANCE) return 0; - - d = mm->max - mm->min; - if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) - /* return dist_vals[d] * 16 / (mm->min + 12); */ - return (int )dist_vals[d]; - else - return 1; -} - -static int -comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) -{ - if (v2 <= 0) return -1; - if (v1 <= 0) return 1; - - v1 *= distance_value(d1); - v2 *= distance_value(d2); - - if (v2 > v1) return 1; - if (v2 < v1) return -1; - - if (d2->min < d1->min) return 1; - if (d2->min > d1->min) return -1; - return 0; -} - -static int -is_equal_mml(MinMaxLen* a, MinMaxLen* b) -{ - return (a->min == b->min && a->max == b->max) ? 1 : 0; -} - - -static void -set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) -{ - mml->min = min; - mml->max = max; -} - -static void -clear_mml(MinMaxLen* mml) -{ - mml->min = mml->max = 0; -} - -static void -copy_mml(MinMaxLen* to, MinMaxLen* from) -{ - to->min = from->min; - to->max = from->max; -} - -static void -add_mml(MinMaxLen* to, MinMaxLen* from) -{ - to->min = distance_add(to->min, from->min); - to->max = distance_add(to->max, from->max); -} - -#if 0 -static void -add_len_mml(MinMaxLen* to, OnigDistance len) -{ - to->min = distance_add(to->min, len); - to->max = distance_add(to->max, len); -} -#endif - -static void -alt_merge_mml(MinMaxLen* to, MinMaxLen* from) -{ - if (to->min > from->min) to->min = from->min; - if (to->max < from->max) to->max = from->max; -} - -static void -copy_opt_env(OptEnv* to, OptEnv* from) -{ - *to = *from; -} - -static void -clear_opt_anc_info(OptAncInfo* anc) -{ - anc->left_anchor = 0; - anc->right_anchor = 0; -} - -static void -copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) -{ - *to = *from; -} - -static void -concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - OnigDistance left_len, OnigDistance right_len) -{ - clear_opt_anc_info(to); - - to->left_anchor = left->left_anchor; - if (left_len == 0) { - to->left_anchor |= right->left_anchor; - } - - to->right_anchor = right->right_anchor; - if (right_len == 0) { - to->right_anchor |= left->right_anchor; - } -} - -static int -is_left_anchor(int anc) -{ - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || - anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || - anc == ANCHOR_PREC_READ_NOT) - return 0; - - return 1; -} - -static int -is_set_opt_anc_info(OptAncInfo* to, int anc) -{ - if ((to->left_anchor & anc) != 0) return 1; - - return ((to->right_anchor & anc) != 0 ? 1 : 0); -} - -static void -add_opt_anc_info(OptAncInfo* to, int anc) -{ - if (is_left_anchor(anc)) - to->left_anchor |= anc; - else - to->right_anchor |= anc; -} - -static void -remove_opt_anc_info(OptAncInfo* to, int anc) -{ - if (is_left_anchor(anc)) - to->left_anchor &= ~anc; - else - to->right_anchor &= ~anc; -} - -static void -alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) -{ - to->left_anchor &= add->left_anchor; - to->right_anchor &= add->right_anchor; -} - -static int -is_full_opt_exact_info(OptExactInfo* ex) -{ - return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); -} - -static void -clear_opt_exact_info(OptExactInfo* ex) -{ - clear_mml(&ex->mmd); - clear_opt_anc_info(&ex->anc); - ex->reach_end = 0; - ex->ignore_case = 0; - ex->len = 0; - ex->s[0] = '\0'; -} - -static void -copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) -{ - *to = *from; -} - -static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) -{ - int i, j, len; - UChar *p, *end; - OptAncInfo tanc; - - if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ - - to->ignore_case = 1; - } - - p = add->s; - end = p + add->len; - for (i = to->len; p < end; ) { - len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) - to->s[i++] = *p++; - } - - to->len = i; - to->reach_end = (p == end ? add->reach_end : 0); - - concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); - if (! to->reach_end) tanc.right_anchor = 0; - copy_opt_anc_info(&to->anc, &tanc); -} - -static void -concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) -{ - int i, j, len; - UChar *p; - - for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) - to->s[i++] = *p++; - } - - to->len = i; -} - -static void -alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) -{ - int i, j, len; - - if (add->len == 0 || to->len == 0) { - clear_opt_exact_info(to); - return ; - } - - if (! is_equal_mml(&to->mmd, &add->mmd)) { - clear_opt_exact_info(to); - return ; - } - - for (i = 0; i < to->len && i < add->len; ) { - if (to->s[i] != add->s[i]) break; - len = enclen(env->enc, to->s + i); - - for (j = 1; j < len; j++) { - if (to->s[i+j] != add->s[i+j]) break; - } - if (j < len) break; - i += len; - } - - if (! add->reach_end || i < add->len || i < to->len) { - to->reach_end = 0; - } - to->len = i; - to->ignore_case |= add->ignore_case; - - alt_merge_opt_anc_info(&to->anc, &add->anc); - if (! to->reach_end) to->anc.right_anchor = 0; -} - -static void -select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) -{ - int v1, v2; - - v1 = now->len; - v2 = alt->len; - - if (v2 == 0) { - return ; - } - else if (v1 == 0) { - copy_opt_exact_info(now, alt); - return ; - } - else if (v1 <= 2 && v2 <= 2) { - /* ByteValTable[x] is big value --> low price */ - v2 = map_position_value(enc, now->s[0]); - v1 = map_position_value(enc, alt->s[0]); - - if (now->len > 1) v1 += 5; - if (alt->len > 1) v2 += 5; - } - - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; - - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_exact_info(now, alt); -} - -static void -clear_opt_map_info(OptMapInfo* map) -{ - static const OptMapInfo clean_info = { - {0, 0}, {0, 0}, 0, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - } - }; - - xmemcpy(map, &clean_info, sizeof(OptMapInfo)); -} - -static void -copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) -{ - *to = *from; -} - -static void -add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) -{ - if (map->map[c] == 0) { - map->map[c] = 1; - map->value += map_position_value(enc, c); - } -} - -static int -add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) -{ - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int i, n; - - add_char_opt_map_info(map, p[0], enc); - - case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); - if (n < 0) return n; - - for (i = 0; i < n; i++) { - ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); - add_char_opt_map_info(map, buf[0], enc); - } - - return 0; -} - -static void -select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) -{ - static int z = 1<<15; /* 32768: something big value */ - - int v1, v2; - - if (alt->value == 0) return ; - if (now->value == 0) { - copy_opt_map_info(now, alt); - return ; - } - - v1 = z / now->value; - v2 = z / alt->value; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_map_info(now, alt); -} - -static int -comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) -{ -#define COMP_EM_BASE 20 - int ve, vm; - - if (m->value <= 0) return -1; - - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); - vm = COMP_EM_BASE * 5 * 2 / m->value; - return comp_distance_value(&e->mmd, &m->mmd, ve, vm); -} - -static void -alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) -{ - int i, val; - - /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ - if (to->value == 0) return ; - if (add->value == 0 || to->mmd.max < add->mmd.min) { - clear_opt_map_info(to); - return ; - } - - alt_merge_mml(&to->mmd, &add->mmd); - - val = 0; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (add->map[i]) - to->map[i] = 1; - - if (to->map[i]) - val += map_position_value(enc, i); - } - to->value = val; - - alt_merge_opt_anc_info(&to->anc, &add->anc); -} - -static void -set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) -{ - copy_mml(&(opt->exb.mmd), mmd); - copy_mml(&(opt->expr.mmd), mmd); - copy_mml(&(opt->map.mmd), mmd); -} - -static void -clear_node_opt_info(NodeOptInfo* opt) -{ - clear_mml(&opt->len); - clear_opt_anc_info(&opt->anc); - clear_opt_exact_info(&opt->exb); - clear_opt_exact_info(&opt->exm); - clear_opt_exact_info(&opt->expr); - clear_opt_map_info(&opt->map); -} - -static void -copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) -{ - *to = *from; -} - -static void -concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) -{ - int exb_reach, exm_reach; - OptAncInfo tanc; - - concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); - copy_opt_anc_info(&to->anc, &tanc); - - if (add->exb.len > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, - to->len.max, add->len.max); - copy_opt_anc_info(&add->exb.anc, &tanc); - } - - if (add->map.value > 0 && to->len.max == 0) { - if (add->map.mmd.max == 0) - add->map.anc.left_anchor |= to->anc.left_anchor; - } - - exb_reach = to->exb.reach_end; - exm_reach = to->exm.reach_end; - - if (add->len.max != 0) - to->exb.reach_end = to->exm.reach_end = 0; - - if (add->exb.len > 0) { - if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb, enc); - clear_opt_exact_info(&add->exb); - } - else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb, enc); - clear_opt_exact_info(&add->exb); - } - } - select_opt_exact_info(enc, &to->exm, &add->exb); - select_opt_exact_info(enc, &to->exm, &add->exm); - - if (to->expr.len > 0) { - if (add->len.max > 0) { - if (to->expr.len > (int )add->len.max) - to->expr.len = add->len.max; - - if (to->expr.mmd.max == 0) - select_opt_exact_info(enc, &to->exb, &to->expr); - else - select_opt_exact_info(enc, &to->exm, &to->expr); - } - } - else if (add->expr.len > 0) { - copy_opt_exact_info(&to->expr, &add->expr); - } - - select_opt_map_info(&to->map, &add->map); - - add_mml(&to->len, &add->len); -} - -static void -alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) -{ - alt_merge_opt_anc_info (&to->anc, &add->anc); - alt_merge_opt_exact_info(&to->exb, &add->exb, env); - alt_merge_opt_exact_info(&to->exm, &add->exm, env); - alt_merge_opt_exact_info(&to->expr, &add->expr, env); - alt_merge_opt_map_info(env->enc, &to->map, &add->map); - - alt_merge_mml(&to->len, &add->len); -} - - -#define MAX_NODE_OPT_INFO_REF_COUNT 5 - -static int -optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) -{ - int type; - int r = 0; - - clear_node_opt_info(opt); - set_bound_node_opt_info(opt, &env->mmd); - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - OptEnv nenv; - NodeOptInfo nopt; - Node* nd = node; - - copy_opt_env(&nenv, env); - do { - r = optimize_node_left(NCAR(nd), &nopt, &nenv); - if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); - } - } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); - } - break; - - case NT_ALT: - { - NodeOptInfo nopt; - Node* nd = node; - - do { - r = optimize_node_left(NCAR(nd), &nopt, env); - if (r == 0) { - if (nd == node) copy_node_opt_info(opt, &nopt); - else alt_merge_node_opt_info(opt, &nopt, env); - } - } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - int slen = sn->end - sn->s; - int is_raw = NSTRING_IS_RAW(node); - - if (! NSTRING_IS_AMBIG(node)) { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); - if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s), env->enc); - } - set_mml(&opt->len, slen, slen); - } - else { - int max; - - if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { - int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; - } - else { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 1; - - if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->case_fold_flag); - if (r != 0) break; - } - - max = slen; - } - - set_mml(&opt->len, slen, max); - } - - if (opt->exb.len == slen) - opt->exb.reach_end = 1; - } - break; - - case NT_CCLASS: - { - int i, z; - CClassNode* cc = NCCLASS(node); - - /* no need to check ignore case. (setted in setup_tree()) */ - - if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - - set_mml(&opt->len, min, max); - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - z = BITSET_AT(cc->bs, i); - if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - set_mml(&opt->len, 1, 1); - } - } - break; - - case NT_CTYPE: - { - int i, min, max; - - max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - - if (max == 1) { - min = 1; - - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - break; - } - } - else { - min = ONIGENC_MBC_MINLEN(env->enc); - } - set_mml(&opt->len, min, max); - } - break; - - case NT_CANY: - { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - set_mml(&opt->len, min, max); - } - break; - - case NT_ANCHOR: - switch (NANCHOR(node)->type) { - case ANCHOR_BEGIN_BUF: - case ANCHOR_BEGIN_POSITION: - case ANCHOR_BEGIN_LINE: - case ANCHOR_END_BUF: - case ANCHOR_SEMI_END_BUF: - case ANCHOR_END_LINE: - add_opt_anc_info(&opt->anc, NANCHOR(node)->type); - break; - - case ANCHOR_PREC_READ: - { - NodeOptInfo nopt; - - r = optimize_node_left(NANCHOR(node)->target, &nopt, env); - if (r == 0) { - if (nopt.exb.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exb); - else if (nopt.exm.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exm); - - opt->expr.reach_end = 0; - - if (nopt.map.value > 0) - copy_opt_map_info(&opt->map, &nopt.map); - } - } - break; - - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ - case ANCHOR_LOOK_BEHIND_NOT: - break; - } - break; - - case NT_BREF: - { - int i; - int* backs; - OnigDistance min, max, tmin, tmax; - Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BRefNode* br = NBREF(node); - - if (br->state & NST_RECURSION) { - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - break; - } - backs = BACKREFS_P(br); - r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); - if (r != 0) break; - for (i = 1; i < br->back_num; i++) { - r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); - if (r != 0) break; - if (min > tmin) min = tmin; - if (max < tmax) max = tmax; - } - if (r == 0) set_mml(&opt->len, min, max); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - else { - OnigOptionType save = env->options; - env->options = NENCLOSE(NCALL(node)->target)->option; - r = optimize_node_left(NCALL(node)->target, opt, env); - env->options = save; - } - break; -#endif - - case NT_QTFR: - { - int i; - OnigDistance min, max; - NodeOptInfo nopt; - QtfrNode* qn = NQTFR(node); - - r = optimize_node_left(qn->target, &nopt, env); - if (r) break; - - if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { - if (env->mmd.max == 0 && - NTYPE(qn->target) == NT_CANY && qn->greedy) { - if (IS_MULTILINE(env->options)) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); - else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); - } - } - else { - if (qn->lower > 0) { - copy_node_opt_info(opt, &nopt); - if (nopt.exb.len > 0) { - if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && - ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); - } - if (i < qn->lower) { - opt->exb.reach_end = 0; - } - } - } - - if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; - } - if (qn->lower > 1) - opt->exm.reach_end = 0; - } - } - - min = distance_multiply(nopt.len.min, qn->lower); - if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); - else - max = distance_multiply(nopt.len.max, qn->upper); - - set_mml(&opt->len, min, max); - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType save = env->options; - - env->options = en->option; - r = optimize_node_left(en->target, opt, env); - env->options = save; - } - break; - - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - en->opt_count++; - if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { - OnigDistance min, max; - - min = 0; - max = ONIG_INFINITE_DISTANCE; - if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; - if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; - set_mml(&opt->len, min, max); - } - else -#endif - { - r = optimize_node_left(en->target, opt, env); - - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { - if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); - } - } - break; - - case ENCLOSE_STOP_BACKTRACK: - r = optimize_node_left(en->target, opt, env); - break; - } - } - break; - - default: -#ifdef ONIG_DEBUG - fprintf(stderr, "optimize_node_left: undefined node type %d\n", - NTYPE(node)); -#endif - r = ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -set_optimize_exact_info(regex_t* reg, OptExactInfo* e) -{ - int r; - - if (e->len == 0) return 0; - - if (e->ignore_case) { - reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - xmemcpy(reg->exact, e->s, e->len); - reg->exact_end = reg->exact + e->len; - reg->optimize = ONIG_OPTIMIZE_EXACT_IC; - } - else { - int allow_reverse; - - reg->exact = str_dup(e->s, e->s + e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - reg->exact_end = reg->exact + e->len; - - allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - - if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, - reg->map, &(reg->int_map)); - if (r) return r; - - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); - } - else { - reg->optimize = ONIG_OPTIMIZE_EXACT; - } - } - - reg->dmin = e->mmd.min; - reg->dmax = e->mmd.max; - - if (reg->dmin != ONIG_INFINITE_DISTANCE) { - reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); - } - - return 0; -} - -static void -set_optimize_map_info(regex_t* reg, OptMapInfo* m) -{ - int i; - - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - reg->map[i] = m->map[i]; - - reg->optimize = ONIG_OPTIMIZE_MAP; - reg->dmin = m->mmd.min; - reg->dmax = m->mmd.max; - - if (reg->dmin != ONIG_INFINITE_DISTANCE) { - reg->threshold_len = reg->dmin + 1; - } -} - -static void -set_sub_anchor(regex_t* reg, OptAncInfo* anc) -{ - reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; - reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; -} - -#ifdef ONIG_DEBUG -static void print_optimize_info(FILE* f, regex_t* reg); -#endif - -static int -set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) -{ - - int r; - NodeOptInfo opt; - OptEnv env; - - env.enc = reg->enc; - env.options = reg->options; - env.case_fold_flag = reg->case_fold_flag; - env.scan_env = scan_env; - clear_mml(&env.mmd); - - r = optimize_node_left(node, &opt, &env); - if (r) return r; - - reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); - - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); - - if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { - reg->anchor_dmin = opt.len.min; - reg->anchor_dmax = opt.len.max; - } - - if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); - if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { - goto set_map; - } - else { - r = set_optimize_exact_info(reg, &opt.exb); - set_sub_anchor(reg, &opt.exb.anc); - } - } - else if (opt.map.value > 0) { - set_map: - set_optimize_map_info(reg, &opt.map); - set_sub_anchor(reg, &opt.map.anc); - } - else { - reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; - if (opt.len.max == 0) - reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; - } - -#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) - print_optimize_info(stderr, reg); -#endif - return r; -} - -static void -clear_optimize_info(regex_t* reg) -{ - reg->optimize = ONIG_OPTIMIZE_NONE; - reg->anchor = 0; - reg->anchor_dmin = 0; - reg->anchor_dmax = 0; - reg->sub_anchor = 0; - reg->exact_end = (UChar* )NULL; - reg->threshold_len = 0; - if (IS_NOT_NULL(reg->exact)) { - xfree(reg->exact); - reg->exact = (UChar* )NULL; - } -} - -#ifdef ONIG_DEBUG - -static void print_enc_string(FILE* fp, OnigEncoding enc, - const UChar *s, const UChar *end) -{ - fprintf(fp, "\nPATTERN: /"); - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - const UChar *p; - OnigCodePoint code; - - p = s; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) { - fprintf(fp, " 0x%04x ", (int )code); - } - else { - fputc((int )code, fp); - } - - p += enclen(enc, p); - } - } - else { - while (s < end) { - fputc((int )*s, fp); - s++; - } - } - - fprintf(fp, "/\n"); -} - -static void -print_distance_range(FILE* f, OnigDistance a, OnigDistance b) -{ - if (a == ONIG_INFINITE_DISTANCE) - fputs("inf", f); - else - fprintf(f, "(%u)", a); - - fputs("-", f); - - if (b == ONIG_INFINITE_DISTANCE) - fputs("inf", f); - else - fprintf(f, "(%u)", b); -} - -static void -print_anchor(FILE* f, int anchor) -{ - int q = 0; - - fprintf(f, "["); - - if (anchor & ANCHOR_BEGIN_BUF) { - fprintf(f, "begin-buf"); - q = 1; - } - if (anchor & ANCHOR_BEGIN_LINE) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "begin-line"); - } - if (anchor & ANCHOR_BEGIN_POSITION) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "begin-pos"); - } - if (anchor & ANCHOR_END_BUF) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "end-buf"); - } - if (anchor & ANCHOR_SEMI_END_BUF) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "semi-end-buf"); - } - if (anchor & ANCHOR_END_LINE) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "end-line"); - } - if (anchor & ANCHOR_ANYCHAR_STAR) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "anychar-star"); - } - if (anchor & ANCHOR_ANYCHAR_STAR_ML) { - if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-pl"); - } - - fprintf(f, "]"); -} - -static void -print_optimize_info(FILE* f, regex_t* reg) -{ - static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", - "EXACT_IC", "MAP" }; - - fprintf(f, "optimize: %s\n", on[reg->optimize]); - fprintf(f, " anchor: "); print_anchor(f, reg->anchor); - if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) - print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); - fprintf(f, "\n"); - - if (reg->optimize) { - fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); - fprintf(f, "\n"); - } - fprintf(f, "\n"); - - if (reg->exact) { - UChar *p; - fprintf(f, "exact: ["); - for (p = reg->exact; p < reg->exact_end; p++) { - fputc(*p, f); - } - fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); - } - else if (reg->optimize & ONIG_OPTIMIZE_MAP) { - int c, i, n = 0; - - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - if (reg->map[i]) n++; - - fprintf(f, "map: n=%d\n", n); - if (n > 0) { - c = 0; - fputc('[', f); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (reg->map[i] != 0) { - if (c > 0) fputs(", ", f); - c++; - if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && - ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) - fputc(i, f); - else - fprintf(f, "%d", i); - } - } - fprintf(f, "]\n"); - } - } -} -#endif /* ONIG_DEBUG */ - - -extern void -onig_free_body(regex_t* reg) -{ - if (IS_NOT_NULL(reg)) { - if (IS_NOT_NULL(reg->p)) xfree(reg->p); - if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); - if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); - -#ifdef USE_NAMED_GROUP - onig_names_free(reg); -#endif - } -} - -extern void -onig_free(regex_t* reg) -{ - if (IS_NOT_NULL(reg)) { - onig_free_body(reg); - xfree(reg); - } -} - -#define REGEX_TRANSFER(to,from) do {\ - (to)->state = ONIG_STATE_MODIFY;\ - onig_free_body(to);\ - xmemcpy(to, from, sizeof(regex_t));\ - xfree(from);\ -} while (0) - -extern void -onig_transfer(regex_t* to, regex_t* from) -{ - THREAD_ATOMIC_START; - REGEX_TRANSFER(to, from); - THREAD_ATOMIC_END; -} - -#define REGEX_CHAIN_HEAD(reg) do {\ - while (IS_NOT_NULL((reg)->chain)) {\ - (reg) = (reg)->chain;\ - }\ -} while (0) - -extern void -onig_chain_link_add(regex_t* to, regex_t* add) -{ - THREAD_ATOMIC_START; - REGEX_CHAIN_HEAD(to); - to->chain = add; - THREAD_ATOMIC_END; -} - -extern void -onig_chain_reduce(regex_t* reg) -{ - regex_t *head, *prev; - - prev = reg; - head = prev->chain; - if (IS_NOT_NULL(head)) { - reg->state = ONIG_STATE_MODIFY; - while (IS_NOT_NULL(head->chain)) { - prev = head; - head = head->chain; - } - prev->chain = (regex_t* )NULL; - REGEX_TRANSFER(reg, head); - } -} - -#ifdef ONIG_DEBUG -static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); -#endif -#ifdef ONIG_DEBUG_PARSE_TREE -static void print_tree P_((FILE* f, Node* node)); -#endif - -extern int -onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo) -{ -#define COMPILE_INIT_SIZE 20 - - int r, init_size; - Node* root; - ScanEnv scan_env; -#ifdef USE_SUBEXP_CALL - UnsetAddrList uslist; -#endif - - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; - - reg->state = ONIG_STATE_COMPILING; - -#ifdef ONIG_DEBUG - print_enc_string(stderr, reg->enc, pattern, pattern_end); -#endif - - if (reg->alloc == 0) { - init_size = (pattern_end - pattern) * 2; - if (init_size <= 0) init_size = COMPILE_INIT_SIZE; - r = BBUF_INIT(reg, init_size); - if (r != 0) goto end; - } - else - reg->used = 0; - - reg->num_mem = 0; - reg->num_repeat = 0; - reg->num_null_check = 0; - reg->repeat_range_alloc = 0; - reg->repeat_range = (OnigRepeatRange* )NULL; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - reg->num_comb_exp_check = 0; -#endif - - r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); - if (r != 0) goto err; - -#ifdef USE_NAMED_GROUP - /* mixed use named group and no-named group */ - if (scan_env.num_named > 0 && - IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { - if (scan_env.num_named != scan_env.num_mem) - r = disable_noname_group_capture(&root, reg, &scan_env); - else - r = numbered_ref_check(root); - - if (r != 0) goto err; - } -#endif - -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - r = unset_addr_list_init(&uslist, scan_env.num_call); - if (r != 0) goto err; - scan_env.unset_addr_list = &uslist; - r = setup_subexp_call(root, &scan_env); - if (r != 0) goto err_unset; - r = subexp_recursive_check_trav(root, &scan_env); - if (r < 0) goto err_unset; - r = subexp_inf_recursive_check_trav(root, &scan_env); - if (r != 0) goto err_unset; - - reg->num_call = scan_env.num_call; - } - else - reg->num_call = 0; -#endif - - r = setup_tree(root, reg, 0, &scan_env); - if (r != 0) goto err_unset; - -#ifdef ONIG_DEBUG_PARSE_TREE - print_tree(stderr, root); -#endif - - reg->capture_history = scan_env.capture_history; - reg->bt_mem_start = scan_env.bt_mem_start; - reg->bt_mem_start |= reg->capture_history; - if (IS_FIND_CONDITION(reg->options)) - BIT_STATUS_ON_ALL(reg->bt_mem_end); - else { - reg->bt_mem_end = scan_env.bt_mem_end; - reg->bt_mem_end |= reg->capture_history; - } - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (scan_env.backrefed_mem == 0 -#ifdef USE_SUBEXP_CALL - || scan_env.num_call == 0 -#endif - ) { - setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_SUBEXP_CALL - if (scan_env.has_recursion != 0) { - scan_env.num_comb_exp_check = 0; - } - else -#endif - if (scan_env.comb_exp_max_regnum > 0) { - int i; - for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { - scan_env.num_comb_exp_check = 0; - break; - } - } - } - } - - reg->num_comb_exp_check = scan_env.num_comb_exp_check; -#endif - - clear_optimize_info(reg); -#ifndef ONIG_DONT_OPTIMIZE - r = set_optimize_info_from_tree(root, reg, &scan_env); - if (r != 0) goto err_unset; -#endif - - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { - xfree(scan_env.mem_nodes_dynamic); - scan_env.mem_nodes_dynamic = (Node** )NULL; - } - - r = compile_tree(root, reg); - if (r == 0) { - r = add_opcode(reg, OP_END); -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - r = unset_addr_list_fix(&uslist, reg); - unset_addr_list_end(&uslist); - if (r) goto err; - } -#endif - - if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) - reg->stack_pop_level = STACK_POP_LEVEL_ALL; - else { - if (reg->bt_mem_start != 0) - reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; - else - reg->stack_pop_level = STACK_POP_LEVEL_FREE; - } - } -#ifdef USE_SUBEXP_CALL - else if (scan_env.num_call > 0) { - unset_addr_list_end(&uslist); - } -#endif - onig_node_free(root); - -#ifdef ONIG_DEBUG_COMPILE -#ifdef USE_NAMED_GROUP - onig_print_names(stderr, reg); -#endif - print_compiled_byte_code_list(stderr, reg); -#endif - - end: - reg->state = ONIG_STATE_NORMAL; - return r; - - err_unset: -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - unset_addr_list_end(&uslist); - } -#endif - err: - if (IS_NOT_NULL(scan_env.error)) { - if (IS_NOT_NULL(einfo)) { - einfo->enc = scan_env.enc; - einfo->par = scan_env.error; - einfo->par_end = scan_env.error_end; - } - } - - onig_node_free(root); - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) - xfree(scan_env.mem_nodes_dynamic); - return r; -} - -#ifdef USE_RECOMPILE_API -extern int -onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) -{ - int r; - regex_t *new_reg; - - r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); - if (r) return r; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_transfer(reg, new_reg); - } - else { - onig_chain_link_add(reg, new_reg); - } - return 0; -} -#endif - -static int onig_inited = 0; - -extern int -onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, OnigSyntaxType* syntax) -{ - if (! onig_inited) - onig_init(); - - if (IS_NULL(reg)) - return ONIGERR_INVALID_ARGUMENT; - - if (ONIGENC_IS_UNDEF(enc)) - return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; - - if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) - == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; - } - - (reg)->state = ONIG_STATE_MODIFY; - - if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { - option |= syntax->options; - option &= ~ONIG_OPTION_SINGLELINE; - } - else - option |= syntax->options; - - (reg)->enc = enc; - (reg)->options = option; - (reg)->syntax = syntax; - (reg)->optimize = 0; - (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; - (reg)->int_map_backward = (int* )NULL; - (reg)->chain = (regex_t* )NULL; - - (reg)->p = (UChar* )NULL; - (reg)->alloc = 0; - (reg)->used = 0; - (reg)->name_table = (void* )NULL; - - (reg)->case_fold_flag = case_fold_flag; - return 0; -} - -extern int -onig_new_without_alloc(regex_t* reg, const UChar* pattern, - const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, - OnigSyntaxType* syntax, OnigErrorInfo* einfo) -{ - int r; - - r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) return r; - - r = onig_compile(reg, pattern, pattern_end, einfo); - return r; -} - -extern int -onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) -{ - int r; - - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - - r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) goto err; - - r = onig_compile(*reg, pattern, pattern_end, einfo); - if (r) { - err: - onig_free(*reg); - *reg = NULL; - } - return r; -} - - -extern int -onig_init(void) -{ - if (onig_inited != 0) - return 0; - - THREAD_SYSTEM_INIT; - THREAD_ATOMIC_START; - - onig_inited = 1; - - onigenc_init(); - /* onigenc_set_default_caseconv_table((UChar* )0); */ - -#ifdef ONIG_DEBUG_STATISTICS - onig_statistics_init(); -#endif - - THREAD_ATOMIC_END; - return 0; -} - - -static OnigEndCallListItemType* EndCallTop; - -extern void onig_add_end_call(void (*func)(void)) -{ - OnigEndCallListItemType* item; - - item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); - if (item == 0) return ; - - item->next = EndCallTop; - item->func = func; - - EndCallTop = item; -} - -static void -exec_end_call_list(void) -{ - OnigEndCallListItemType* prev; - void (*func)(void); - - while (EndCallTop != 0) { - func = EndCallTop->func; - (*func)(); - - prev = EndCallTop; - EndCallTop = EndCallTop->next; - xfree(prev); - } -} - -extern int -onig_end(void) -{ - THREAD_ATOMIC_START; - - exec_end_call_list(); - -#ifdef ONIG_DEBUG_STATISTICS - onig_print_statistics(stderr); -#endif - -#ifdef USE_SHARED_CCLASS_TABLE - onig_free_shared_cclass_table(); -#endif - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - onig_free_node_list(); -#endif - - onig_inited = 0; - - THREAD_ATOMIC_END; - THREAD_SYSTEM_END; - return 0; -} - -extern int -onig_is_in_code_range(const UChar* p, OnigCodePoint code) -{ - OnigCodePoint n, *data; - OnigCodePoint low, high, x; - - GET_CODE_POINT(n, p); - data = (OnigCodePoint* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (code > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && code >= data[low * 2]) ? 1 : 0); -} - -extern int -onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_NCCLASS_NOT(cc)) - return !found; - else - return found; -} - -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int len; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - len = 2; - } - else { - len = ONIGENC_CODE_TO_MBCLEN(enc, code); - } - return onig_is_code_in_cc_len(len, code, cc); -} - - -#ifdef ONIG_DEBUG - -/* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 -#define ARG_STATE_CHECK 6 - -OnigOpInfoType OnigOpInfo[] = { - { OP_FINISH, "finish", ARG_NON }, - { OP_END, "end", ARG_NON }, - { OP_EXACT1, "exact1", ARG_SPECIAL }, - { OP_EXACT2, "exact2", ARG_SPECIAL }, - { OP_EXACT3, "exact3", ARG_SPECIAL }, - { OP_EXACT4, "exact4", ARG_SPECIAL }, - { OP_EXACT5, "exact5", ARG_SPECIAL }, - { OP_EXACTN, "exactn", ARG_SPECIAL }, - { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, - { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, - { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, - { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, - { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, - { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, - { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, - { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, - { OP_CCLASS, "cclass", ARG_SPECIAL }, - { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, - { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, - { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, - { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, - { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, - { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, - { OP_ANYCHAR, "anychar", ARG_NON }, - { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, - { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, - { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, - { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, - { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, - { OP_WORD, "word", ARG_NON }, - { OP_NOT_WORD, "not-word", ARG_NON }, - { OP_WORD_BOUND, "word-bound", ARG_NON }, - { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, - { OP_WORD_BEGIN, "word-begin", ARG_NON }, - { OP_WORD_END, "word-end", ARG_NON }, - { OP_BEGIN_BUF, "begin-buf", ARG_NON }, - { OP_END_BUF, "end-buf", ARG_NON }, - { OP_BEGIN_LINE, "begin-line", ARG_NON }, - { OP_END_LINE, "end-line", ARG_NON }, - { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, - { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, - { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, - { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, - { OP_PUSH_POS, "push-pos", ARG_NON }, - { OP_POP_POS, "pop-pos", ARG_NON }, - { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, - { OP_FAIL_POS, "fail-pos", ARG_NON }, - { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, - { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, - { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, - { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, - { OP_CALL, "call", ARG_ABSADDR }, - { OP_RETURN, "return", ARG_NON }, - { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, - { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_ML_STAR, - "state-check-anychar-ml*", ARG_STATE_CHECK }, - { -1, "", ARG_NON } -}; - -static char* -op2name(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].name; - } - return ""; -} - -static int -op2arg_type(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].arg_type; - } - return ARG_SPECIAL; -} - -static void -Indent(FILE* f, int indent) -{ - int i; - for (i = 0; i < indent; i++) putc(' ', f); -} - -static void -p_string(FILE* f, int len, UChar* s) -{ - fputs(":", f); - while (len-- > 0) { fputc(*s++, f); } -} - -static void -p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) -{ - int x = len * mb_len; - - fprintf(f, ":%d:", len); - while (x-- > 0) { fputc(*s++, f); } -} - -extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, - OnigEncoding enc) -{ - int i, n, arg_type; - RelAddrType addr; - LengthType len; - MemNumType mem; - StateCheckNumType scn; - OnigCodePoint code; - UChar *q; - - fprintf(f, "[%s", op2name(*bp)); - arg_type = op2arg_type(*bp); - if (arg_type != ARG_SPECIAL) { - bp++; - switch (arg_type) { - case ARG_NON: - break; - case ARG_RELADDR: - GET_RELADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); - break; - case ARG_ABSADDR: - GET_ABSADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); - break; - case ARG_LENGTH: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - case ARG_MEMNUM: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - case ARG_OPTION: - { - OnigOptionType option = *((OnigOptionType* )bp); - bp += SIZE_OPTION; - fprintf(f, ":%d", option); - } - break; - - case ARG_STATE_CHECK: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - fprintf(f, ":%d", scn); - break; - } - } - else { - switch (*bp++) { - case OP_EXACT1: - case OP_ANYCHAR_STAR_PEEK_NEXT: - case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p_string(f, 1, bp++); break; - case OP_EXACT2: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACT3: - p_string(f, 3, bp); bp += 3; break; - case OP_EXACT4: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACT5: - p_string(f, 5, bp); bp += 5; break; - case OP_EXACTN: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_EXACTMB2N1: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACTMB2N2: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACTMB2N3: - p_string(f, 6, bp); bp += 6; break; - case OP_EXACTMB2N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 2, bp); - bp += len * 2; - break; - case OP_EXACTMB3N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 3, bp); - bp += len * 3; - break; - case OP_EXACTMBN: - { - int mb_len; - - GET_LENGTH_INC(mb_len, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:%d:", mb_len, len); - n = len * mb_len; - while (n-- > 0) { fputc(*bp++, f); } - } - break; - - case OP_EXACT1_IC: - len = enclen(enc, bp); - p_string(f, len, bp); - bp += len; - break; - case OP_EXACTN_IC: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_CCLASS: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_MB: - case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d", (int )code, len); - break; - - case OP_CCLASS_MIX: - case OP_CCLASS_MIX_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d:%d", n, (int )code, len); - break; - - case OP_CCLASS_NODE: - { - CClassNode *cc; - - GET_POINTER_INC(cc, bp); - n = bitset_on_num(cc->bs); - fprintf(f, ":%u:%d", (unsigned int )cc, n); - } - break; - - case OP_BACKREFN_IC: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - - case OP_BACKREF_MULTI_IC: - case OP_BACKREF_MULTI: - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - break; - - case OP_BACKREF_WITH_LEVEL: - { - OnigOptionType option; - LengthType level; - - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); - GET_LENGTH_INC(level, bp); - fprintf(f, ":%d", level); - - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - } - break; - - case OP_REPEAT: - case OP_REPEAT_NG: - { - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:%d", mem, addr); - } - break; - - case OP_PUSH_OR_JUMP_EXACT1: - case OP_PUSH_IF_PEEK_NEXT: - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":(%d)", addr); - p_string(f, 1, bp); - bp += 1; - break; - - case OP_LOOK_BEHIND: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - - case OP_PUSH_LOOK_BEHIND_NOT: - GET_RELADDR_INC(addr, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:(%d)", len, addr); - break; - - case OP_STATE_CHECK_PUSH: - case OP_STATE_CHECK_PUSH_OR_JUMP: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:(%d)", scn, addr); - break; - - default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", - *--bp); - } - } - fputs("]", f); - if (nextp) *nextp = bp; -} - -static void -print_compiled_byte_code_list(FILE* f, regex_t* reg) -{ - int ncode; - UChar* bp = reg->p; - UChar* end = reg->p + reg->used; - - fprintf(f, "code length: %d\n", reg->used); - - ncode = 0; - while (bp < end) { - ncode++; - if (bp > reg->p) { - if (ncode % 5 == 0) - fprintf(f, "\n"); - else - fputs(" ", f); - } - onig_print_compiled_byte_code(f, bp, &bp, reg->enc); - } - - fprintf(f, "\n"); -} - -static void -print_indent_tree(FILE* f, Node* node, int indent) -{ - int i, type; - int add = 3; - UChar* p; - - Indent(f, indent); - if (IS_NULL(node)) { - fprintf(f, "ERROR: null node!!!\n"); - exit (0); - } - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - if (NTYPE(node) == NT_LIST) - fprintf(f, "\n", (int )node); - else - fprintf(f, "\n", (int )node); - - print_indent_tree(f, NCAR(node), indent + add); - while (IS_NOT_NULL(node = NCDR(node))) { - if (NTYPE(node) != type) { - fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); - exit(0); - } - print_indent_tree(f, NCAR(node), indent + add); - } - break; - - case NT_STR: - fprintf(f, "", - (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { - if (*p >= 0x20 && *p < 0x7f) - fputc(*p, f); - else { - fprintf(f, " 0x%02x", *p); - } - } - break; - - case NT_CCLASS: - fprintf(f, "", (int )node); - if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node)->mbuf) { - BBuf* bbuf = NCCLASS(node)->mbuf; - for (i = 0; i < bbuf->used; i++) { - if (i > 0) fprintf(f, ","); - fprintf(f, "%0x", bbuf->p[i]); - } - } - break; - - case NT_CTYPE: - fprintf(f, " ", (int )node); - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) - fputs("not word", f); - else - fputs("word", f); - break; - - default: - fprintf(f, "ERROR: undefined ctype.\n"); - exit(0); - } - break; - - case NT_CANY: - fprintf(f, "", (int )node); - break; - - case NT_ANCHOR: - fprintf(f, " ", (int )node); - switch (NANCHOR(node)->type) { - case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; - case ANCHOR_END_BUF: fputs("end buf", f); break; - case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; - case ANCHOR_END_LINE: fputs("end line", f); break; - case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; - case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; - - case ANCHOR_WORD_BOUND: fputs("word bound", f); break; - case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; -#ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; - case ANCHOR_WORD_END: fputs("word end", f); break; -#endif - case ANCHOR_PREC_READ: fputs("prec read", f); break; - case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; - case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; - case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; - - default: - fprintf(f, "ERROR: undefined anchor type.\n"); - break; - } - break; - - case NT_BREF: - { - int* p; - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - fprintf(f, "", (int )node); - for (i = 0; i < br->back_num; i++) { - if (i > 0) fputs(", ", f); - fprintf(f, "%d", p[i]); - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - { - CallNode* cn = NCALL(node); - fprintf(f, "", (int )node); - p_string(f, cn->name_end - cn->name, cn->name); - } - break; -#endif - - case NT_QTFR: - fprintf(f, "{%d,%d}%s\n", (int )node, - NQTFR(node)->lower, NQTFR(node)->upper, - (NQTFR(node)->greedy ? "" : "?")); - print_indent_tree(f, NQTFR(node)->target, indent + add); - break; - - case NT_ENCLOSE: - fprintf(f, " ", (int )node); - switch (NENCLOSE(node)->type) { - case ENCLOSE_OPTION: - fprintf(f, "option:%d", NENCLOSE(node)->option); - break; - case ENCLOSE_MEMORY: - fprintf(f, "memory:%d", NENCLOSE(node)->regnum); - break; - case ENCLOSE_STOP_BACKTRACK: - fprintf(f, "stop-bt"); - break; - - default: - break; - } - fprintf(f, "\n"); - print_indent_tree(f, NENCLOSE(node)->target, indent + add); - break; - - default: - fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); - break; - } - - if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && - type != NT_ENCLOSE) - fprintf(f, "\n"); - fflush(f); -} -#endif /* ONIG_DEBUG */ - -#ifdef ONIG_DEBUG_PARSE_TREE -static void -print_tree(FILE* f, Node* node) -{ - print_indent_tree(f, node, 0); -} -#endif diff --git a/src/openalpr/support/regex/regenc.c b/src/openalpr/support/regex/regenc.c deleted file mode 100644 index b5db6ae..0000000 --- a/src/openalpr/support/regex/regenc.c +++ /dev/null @@ -1,905 +0,0 @@ -/********************************************************************** - regenc.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -OnigEncoding OnigEncDefaultCharEncoding = NULL; - -extern int -onigenc_init(void) -{ - return 0; -} - -extern OnigEncoding -onigenc_get_default_encoding(void) -{ - if (OnigEncDefaultCharEncoding == NULL) - OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; - - return OnigEncDefaultCharEncoding; -} - -extern int -onigenc_set_default_encoding(OnigEncoding enc) -{ - OnigEncDefaultCharEncoding = enc; - return 0; -} - -extern UChar* -onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) -{ - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); - if (p < s) { - p += enclen(enc, p); - } - return p; -} - -extern UChar* -onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, - const UChar* start, const UChar* s, const UChar** prev) -{ - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); - - if (p < s) { - if (prev) *prev = (const UChar* )p; - p += enclen(enc, p); - } - else { - if (prev) *prev = (const UChar* )NULL; /* Sorry */ - } - return p; -} - -extern UChar* -onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) -{ - if (s <= start) - return (UChar* )NULL; - - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); -} - -extern UChar* -onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) -{ - while (ONIG_IS_NOT_NULL(s) && n-- > 0) { - if (s <= start) - return (UChar* )NULL; - - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); - } - return (UChar* )s; -} - -extern UChar* -onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) -{ - UChar* q = (UChar* )p; - while (n-- > 0) { - q += ONIGENC_MBC_ENC_LEN(enc, q); - } - return (q <= end ? q : NULL); -} - -extern int -onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) -{ - int n = 0; - UChar* q = (UChar* )p; - - while (q < end) { - q += ONIGENC_MBC_ENC_LEN(enc, q); - n++; - } - return n; -} - -extern int -onigenc_strlen_null(OnigEncoding enc, const UChar* s) -{ - int n = 0; - UChar* p = (UChar* )s; - - while (1) { - if (*p == '\0') { - UChar* q; - int len = ONIGENC_MBC_MINLEN(enc); - - if (len == 1) return n; - q = p + 1; - while (len > 1) { - if (*q != '\0') break; - q++; - len--; - } - if (len == 1) return n; - } - p += ONIGENC_MBC_ENC_LEN(enc, p); - n++; - } -} - -extern int -onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) -{ - UChar* start = (UChar* )s; - UChar* p = (UChar* )s; - - while (1) { - if (*p == '\0') { - UChar* q; - int len = ONIGENC_MBC_MINLEN(enc); - - if (len == 1) return (int )(p - start); - q = p + 1; - while (len > 1) { - if (*q != '\0') break; - q++; - len--; - } - if (len == 1) return (int )(p - start); - } - p += ONIGENC_MBC_ENC_LEN(enc, p); - } -} - -const UChar OnigEncAsciiToLowerCaseTable[] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; - -#ifdef USE_UPPER_CASE_TABLE -const UChar OnigEncAsciiToUpperCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', - '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; -#endif - -const unsigned short OnigEncAsciiCtypeTable[256] = { - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, - 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, - 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -}; - -const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -#ifdef USE_UPPER_CASE_TABLE -const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', - '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', -}; -#endif - -extern void -onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) -{ - /* nothing */ - /* obsoleted. */ -} - -extern UChar* -onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) -{ - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); -} - -const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { - { 0x41, 0x61 }, - { 0x42, 0x62 }, - { 0x43, 0x63 }, - { 0x44, 0x64 }, - { 0x45, 0x65 }, - { 0x46, 0x66 }, - { 0x47, 0x67 }, - { 0x48, 0x68 }, - { 0x49, 0x69 }, - { 0x4a, 0x6a }, - { 0x4b, 0x6b }, - { 0x4c, 0x6c }, - { 0x4d, 0x6d }, - { 0x4e, 0x6e }, - { 0x4f, 0x6f }, - { 0x50, 0x70 }, - { 0x51, 0x71 }, - { 0x52, 0x72 }, - { 0x53, 0x73 }, - { 0x54, 0x74 }, - { 0x55, 0x75 }, - { 0x56, 0x76 }, - { 0x57, 0x77 }, - { 0x58, 0x78 }, - { 0x59, 0x79 }, - { 0x5a, 0x7a } -}; - -extern int -onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - OnigCodePoint code; - int i, r; - - for (i = 0; - i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); - i++) { - code = OnigAsciiLowerMap[i].to; - r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); - if (r != 0) return r; - - code = OnigAsciiLowerMap[i].from; - r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); - if (r != 0) return r; - } - - return 0; -} - -extern int -onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end ARG_UNUSED, - OnigCaseFoldCodeItem items[]) -{ - if (0x41 <= *p && *p <= 0x5a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p + 0x20); - return 1; - } - else if (0x61 <= *p && *p <= 0x7a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p - 0x20); - return 1; - } - else - return 0; -} - -static int -ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - static OnigCodePoint ss[] = { 0x73, 0x73 }; - - return (*f)((OnigCodePoint )0xdf, ss, 2, arg); -} - -extern int -onigenc_apply_all_case_fold_with_map(int map_size, - const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - OnigCodePoint code; - int i, r; - - r = onigenc_ascii_apply_all_case_fold(flag, f, arg); - if (r != 0) return r; - - for (i = 0; i < map_size; i++) { - code = map[i].to; - r = (*f)(map[i].from, &code, 1, arg); - if (r != 0) return r; - - code = map[i].from; - r = (*f)(map[i].to, &code, 1, arg); - if (r != 0) return r; - } - - if (ess_tsett_flag != 0) - return ss_apply_all_case_fold(flag, f, arg); - - return 0; -} - -extern int -onigenc_get_case_fold_codes_by_str_with_map(int map_size, - const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) -{ - if (0x41 <= *p && *p <= 0x5a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { - /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; - } - else if (0x61 <= *p && *p <= 0x7a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { - /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; - } - else if (*p == 0xdf && ess_tsett_flag != 0) { - items[0].byte_len = 1; - items[0].code_len = 2; - items[0].code[0] = (OnigCodePoint )'s'; - items[0].code[1] = (OnigCodePoint )'s'; - - items[1].byte_len = 1; - items[1].code_len = 2; - items[1].code[0] = (OnigCodePoint )'S'; - items[1].code[1] = (OnigCodePoint )'S'; - - items[2].byte_len = 1; - items[2].code_len = 2; - items[2].code[0] = (OnigCodePoint )'s'; - items[2].code[1] = (OnigCodePoint )'S'; - - items[3].byte_len = 1; - items[3].code_len = 2; - items[3].code[0] = (OnigCodePoint )'S'; - items[3].code[1] = (OnigCodePoint )'s'; - - return 4; - } - else { - int i; - - for (i = 0; i < map_size; i++) { - if (*p == map[i].from) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].to; - return 1; - } - else if (*p == map[i].to) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].from; - return 1; - } - } - } - - return 0; -} - - -extern int -onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, - OnigCodePoint* sb_out ARG_UNUSED, - const OnigCodePoint* ranges[] ARG_UNUSED) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) -{ - if (p < end) { - if (*p == 0x0a) return 1; - } - return 0; -} - -/* for single byte encodings */ -extern int -onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, - const UChar*end ARG_UNUSED, UChar* lower) -{ - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); - - (*p)++; - return 1; /* return byte length of converted char to lower */ -} - -#if 0 -extern int -onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - (*pp)++; - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); -} -#endif - -extern int -onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) -{ - return 1; -} - -extern OnigCodePoint -onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) -{ - return (OnigCodePoint )(*p); -} - -extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) -{ - return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); -} - -extern int -onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) -{ - *buf = (UChar )(code & 0xff); - return 1; -} - -extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, - const UChar* s) -{ - return (UChar* )s; -} - -extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, - const UChar* end ARG_UNUSED) -{ - return TRUE; -} - -extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, - const UChar* end ARG_UNUSED) -{ - return FALSE; -} - -extern OnigCodePoint -onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) -{ - int c, i, len; - OnigCodePoint n; - - len = enclen(enc, p); - n = (OnigCodePoint )(*p++); - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - return n; -} - -extern int -onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, - const UChar** pp, const UChar* end ARG_UNUSED, - UChar* lower) -{ - int len; - const UChar *p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - (*pp)++; - return 1; - } - else { - int i; - - len = enclen(enc, p); - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - (*pp) += len; - return len; /* return byte length of converted to lower char */ - } -} - -#if 0 -extern int -onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - (*pp)++; - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - - (*pp) += enclen(enc, p); - return FALSE; -} -#endif - -extern int -onigenc_mb2_code_to_mbclen(OnigCodePoint code) -{ - if ((code & 0xff00) != 0) return 2; - else return 1; -} - -extern int -onigenc_mb4_code_to_mbclen(OnigCodePoint code) -{ - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return 3; - else if ((code & 0xff00) != 0) return 2; - else return 1; -} - -extern int -onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff00) != 0) { - *p++ = (UChar )((code >> 8) & 0xff); - } - *p++ = (UChar )(code & 0xff); - -#if 1 - if (enclen(enc, buf) != (p - buf)) - return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif - return p - buf; -} - -extern int -onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff000000) != 0) { - *p++ = (UChar )((code >> 24) & 0xff); - } - if ((code & 0xff0000) != 0 || p != buf) { - *p++ = (UChar )((code >> 16) & 0xff); - } - if ((code & 0xff00) != 0 || p != buf) { - *p++ = (UChar )((code >> 8) & 0xff); - } - *p++ = (UChar )(code & 0xff); - -#if 1 - if (enclen(enc, buf) != (p - buf)) - return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif - return p - buf; -} - -extern int -onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) -{ - static PosixBracketEntryType PBS[] = { - { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } - }; - - PosixBracketEntryType *pb; - int len; - - len = onigenc_strlen(enc, p, end); - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (len == pb->len && - onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) - return pb->ctype; - } - - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; -} - -extern int -onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { - return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -extern int -onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { - return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -extern int -onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, - const UChar* sascii /* ascii */, int n) -{ - int x, c; - - while (n-- > 0) { - if (p >= end) return (int )(*sascii); - - c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); - x = *sascii - c; - if (x) return x; - - sascii++; - p += enclen(enc, p); - } - return 0; -} - -/* Property management */ -static int -resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) -{ - int size; - const OnigCodePoint **list = *plist; - - size = sizeof(OnigCodePoint*) * new_size; - if (IS_NULL(list)) { - list = (const OnigCodePoint** )xmalloc(size); - } - else { - list = (const OnigCodePoint** )xrealloc((void* )list, size); - } - - if (IS_NULL(list)) return ONIGERR_MEMORY; - - *plist = list; - *psize = new_size; - - return 0; -} - -extern int -onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, - hash_table_type **table, const OnigCodePoint*** plist, int *pnum, - int *psize) -{ -#define PROP_INIT_SIZE 16 - - int r; - - if (*psize <= *pnum) { - int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); - r = resize_property_list(new_size, plist, psize); - if (r != 0) return r; - } - - (*plist)[*pnum] = prop; - - if (ONIG_IS_NULL(*table)) { - *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); - if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; - } - - *pnum = *pnum + 1; - onig_st_insert_strend(*table, name, name + strlen((char* )name), - (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); - return 0; -} - -extern int -onigenc_property_list_init(int (*f)(void)) -{ - int r; - - THREAD_ATOMIC_START; - - r = f(); - - THREAD_ATOMIC_END; - return r; -} diff --git a/src/openalpr/support/regex/regenc.h b/src/openalpr/support/regex/regenc.h deleted file mode 100644 index ac12207..0000000 --- a/src/openalpr/support/regex/regenc.h +++ /dev/null @@ -1,189 +0,0 @@ -#ifndef REGENC_H -#define REGENC_H -/********************************************************************** - regenc.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef PACKAGE -/* PACKAGE is defined in config.h */ -#include "onig_config.h" -#endif - -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif - -#include "oniguruma.h" - -typedef struct { - OnigCodePoint from; - OnigCodePoint to; -} OnigPairCaseFoldCodes; - - -#ifndef NULL -#define NULL ((void* )0) -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef ARG_UNUSED -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif -#endif - -#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) -#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL -#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) - -#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) - -/* character types bit flag */ -#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) -#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) -#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) -#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) -#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) -#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) -#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) -#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) -#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) -#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) -#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) -#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) -#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) -#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) -#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) - -#define CTYPE_TO_BIT(ctype) (1<<(ctype)) -#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ - ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ - (ctype) == ONIGENC_CTYPE_PRINT) - - -typedef struct { - UChar *name; - int ctype; - short int len; -} PosixBracketEntryType; - - -/* #define USE_CRNL_AS_LINE_TERMINATOR */ -#define USE_UNICODE_PROPERTIES -/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ - - -#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII - -/* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); - - -/* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); -ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); -ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); -ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); - -/* methods for multi byte encoding */ -ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); - - -/* in enc/unicode.c */ -ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); -ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); - - -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) - -#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ - OnigEncISO_8859_1_ToLowerCaseTable[c] -#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ - OnigEncISO_8859_1_ToUpperCaseTable[c] - -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; - -ONIG_EXTERN int -onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); -ONIG_EXTERN UChar* -onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); - -/* defined in regexec.c, but used in enc/xxx.c */ -extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); - -ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; - -#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) -#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] -#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] -#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ - ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) -#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ - (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ - ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) - - -#endif /* REGENC_H */ diff --git a/src/openalpr/support/regex/regerror.c b/src/openalpr/support/regex/regerror.c deleted file mode 100644 index 385e560..0000000 --- a/src/openalpr/support/regex/regerror.c +++ /dev/null @@ -1,387 +0,0 @@ -/********************************************************************** - regerror.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" -#include /* for vsnprintf() */ - -#ifdef HAVE_STDARG_PROTOTYPES -#include -#define va_init_list(a,b) va_start(a,b) -#else -#include -#define va_init_list(a,b) va_start(a) -#endif - -extern UChar* -onig_error_code_to_format(int code) -{ - char *p; - - if (code >= 0) return (UChar* )0; - - switch (code) { - case ONIG_MISMATCH: - p = "mismatch"; break; - case ONIG_NO_SUPPORT_CONFIG: - p = "no support in this configuration"; break; - case ONIGERR_MEMORY: - p = "fail to memory allocation"; break; - case ONIGERR_MATCH_STACK_LIMIT_OVER: - p = "match-stack limit over"; break; - case ONIGERR_TYPE_BUG: - p = "undefined type (bug)"; break; - case ONIGERR_PARSER_BUG: - p = "internal parser error (bug)"; break; - case ONIGERR_STACK_BUG: - p = "stack error (bug)"; break; - case ONIGERR_UNDEFINED_BYTECODE: - p = "undefined bytecode (bug)"; break; - case ONIGERR_UNEXPECTED_BYTECODE: - p = "unexpected bytecode (bug)"; break; - case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: - p = "default multibyte-encoding is not setted"; break; - case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: - p = "can't convert to wide-char on specified multibyte-encoding"; break; - case ONIGERR_INVALID_ARGUMENT: - p = "invalid argument"; break; - case ONIGERR_END_PATTERN_AT_LEFT_BRACE: - p = "end pattern at left brace"; break; - case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: - p = "end pattern at left bracket"; break; - case ONIGERR_EMPTY_CHAR_CLASS: - p = "empty char-class"; break; - case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: - p = "premature end of char-class"; break; - case ONIGERR_END_PATTERN_AT_ESCAPE: - p = "end pattern at escape"; break; - case ONIGERR_END_PATTERN_AT_META: - p = "end pattern at meta"; break; - case ONIGERR_END_PATTERN_AT_CONTROL: - p = "end pattern at control"; break; - case ONIGERR_META_CODE_SYNTAX: - p = "invalid meta-code syntax"; break; - case ONIGERR_CONTROL_CODE_SYNTAX: - p = "invalid control-code syntax"; break; - case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: - p = "char-class value at end of range"; break; - case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: - p = "char-class value at start of range"; break; - case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: - p = "unmatched range specifier in char-class"; break; - case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: - p = "target of repeat operator is not specified"; break; - case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: - p = "target of repeat operator is invalid"; break; - case ONIGERR_NESTED_REPEAT_OPERATOR: - p = "nested repeat operator"; break; - case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: - p = "unmatched close parenthesis"; break; - case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: - p = "end pattern with unmatched parenthesis"; break; - case ONIGERR_END_PATTERN_IN_GROUP: - p = "end pattern in group"; break; - case ONIGERR_UNDEFINED_GROUP_OPTION: - p = "undefined group option"; break; - case ONIGERR_INVALID_POSIX_BRACKET_TYPE: - p = "invalid POSIX bracket type"; break; - case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: - p = "invalid pattern in look-behind"; break; - case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: - p = "invalid repeat range {lower,upper}"; break; - case ONIGERR_TOO_BIG_NUMBER: - p = "too big number"; break; - case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: - p = "too big number for repeat range"; break; - case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: - p = "upper is smaller than lower in repeat range"; break; - case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: - p = "empty range in char class"; break; - case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: - p = "mismatch multibyte code length in char-class range"; break; - case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: - p = "too many multibyte code ranges are specified"; break; - case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: - p = "too short multibyte code string"; break; - case ONIGERR_TOO_BIG_BACKREF_NUMBER: - p = "too big backref number"; break; - case ONIGERR_INVALID_BACKREF: -#ifdef USE_NAMED_GROUP - p = "invalid backref number/name"; break; -#else - p = "invalid backref number"; break; -#endif - case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: - p = "numbered backref/call is not allowed. (use name)"; break; - case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: - p = "too big wide-char value"; break; - case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: - p = "too long wide-char value"; break; - case ONIGERR_INVALID_CODE_POINT_VALUE: - p = "invalid code point value"; break; - case ONIGERR_EMPTY_GROUP_NAME: - p = "group name is empty"; break; - case ONIGERR_INVALID_GROUP_NAME: - p = "invalid group name <%n>"; break; - case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: -#ifdef USE_NAMED_GROUP - p = "invalid char in group name <%n>"; break; -#else - p = "invalid char in group number <%n>"; break; -#endif - case ONIGERR_UNDEFINED_NAME_REFERENCE: - p = "undefined name <%n> reference"; break; - case ONIGERR_UNDEFINED_GROUP_REFERENCE: - p = "undefined group <%n> reference"; break; - case ONIGERR_MULTIPLEX_DEFINED_NAME: - p = "multiplex defined name <%n>"; break; - case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: - p = "multiplex definition name <%n> call"; break; - case ONIGERR_NEVER_ENDING_RECURSION: - p = "never ending recursion"; break; - case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: - p = "group number is too big for capture history"; break; - case ONIGERR_INVALID_CHAR_PROPERTY_NAME: - p = "invalid character property name {%n}"; break; - case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: - p = "not supported encoding combination"; break; - case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: - p = "invalid combination of options"; break; - case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: - p = "over thread pass limit count"; break; - - default: - p = "undefined error code"; break; - } - - return (UChar* )p; -} - -static void sprint_byte(char* s, unsigned int v) -{ - sprintf(s, "%02x", (v & 0377)); -} - -static void sprint_byte_with_x(char* s, unsigned int v) -{ - sprintf(s, "\\x%02x", (v & 0377)); -} - -static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, - UChar buf[], int buf_size, int *is_over) -{ - int len; - UChar *p; - OnigCodePoint code; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - p = s; - len = 0; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) { - if (code > 0xffff && len + 10 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); - sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); - len += 10; - } - else if (len + 6 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); - len += 6; - } - else { - break; - } - } - else { - buf[len++] = (UChar )code; - } - - p += enclen(enc, p); - if (len >= buf_size) break; - } - - *is_over = ((p < end) ? 1 : 0); - } - else { - len = MIN((end - s), buf_size); - xmemcpy(buf, s, (size_t )len); - *is_over = ((buf_size < (end - s)) ? 1 : 0); - } - - return len; -} - - -/* for ONIG_MAX_ERROR_MESSAGE_LEN */ -#define MAX_ERROR_PAR_LEN 30 - -extern int -#ifdef HAVE_STDARG_PROTOTYPES -onig_error_code_to_str(UChar* s, int code, ...) -#else -onig_error_code_to_str(s, code, va_alist) - UChar* s; - int code; - va_dcl -#endif -{ - UChar *p, *q; - OnigErrorInfo* einfo; - int len, is_over; - UChar parbuf[MAX_ERROR_PAR_LEN]; - va_list vargs; - - va_init_list(vargs, code); - - switch (code) { - case ONIGERR_UNDEFINED_NAME_REFERENCE: - case ONIGERR_UNDEFINED_GROUP_REFERENCE: - case ONIGERR_MULTIPLEX_DEFINED_NAME: - case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: - case ONIGERR_INVALID_GROUP_NAME: - case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: - case ONIGERR_INVALID_CHAR_PROPERTY_NAME: - einfo = va_arg(vargs, OnigErrorInfo*); - len = to_ascii(einfo->enc, einfo->par, einfo->par_end, - parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); - q = onig_error_code_to_format(code); - p = s; - while (*q != '\0') { - if (*q == '%') { - q++; - if (*q == 'n') { /* '%n': name */ - xmemcpy(p, parbuf, len); - p += len; - if (is_over != 0) { - xmemcpy(p, "...", 3); - p += 3; - } - q++; - } - else - goto normal_char; - } - else { - normal_char: - *p++ = *q++; - } - } - *p = '\0'; - len = p - s; - break; - - default: - q = onig_error_code_to_format(code); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); - xmemcpy(s, q, len); - s[len] = '\0'; - break; - } - - va_end(vargs); - return len; -} - - -void -#ifdef HAVE_STDARG_PROTOTYPES -onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, ...) -#else -onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) - UChar buf[]; - int bufsize; - OnigEncoding enc; - UChar* pat; - UChar* pat_end; - const UChar *fmt; - va_dcl -#endif -{ - int n, need, len; - UChar *p, *s, *bp; - UChar bs[6]; - va_list args; - - va_init_list(args, fmt); - n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); - va_end(args); - - need = (pat_end - pat) * 4 + 4; - - if (n + need < bufsize) { - strcat((char* )buf, ": /"); - s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); - - p = pat; - while (p < pat_end) { - if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p); - while (len-- > 0) *s++ = *p++; - } - else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; - } - else if (ONIGENC_IS_MBC_HEAD(enc, p)) { - len = enclen(enc, p); - if (ONIGENC_MBC_MINLEN(enc) == 1) { - while (len-- > 0) *s++ = *p++; - } - else { /* for UTF16 */ - int blen; - - while (len-- > 0) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); - bp = bs; - while (blen-- > 0) *s++ = *bp++; - } - } - } - else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && - !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); - bp = bs; - while (len-- > 0) *s++ = *bp++; - } - else { - *s++ = *p++; - } - } - - *s++ = '/'; - *s = '\0'; - } -} diff --git a/src/openalpr/support/regex/regexec.c b/src/openalpr/support/regex/regexec.c deleted file mode 100644 index 7430d78..0000000 --- a/src/openalpr/support/regex/regexec.c +++ /dev/null @@ -1,3803 +0,0 @@ -/********************************************************************** - regexec.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - -#ifdef USE_CRNL_AS_LINE_TERMINATOR -#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ - (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ - ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) -#endif - -#ifdef USE_CAPTURE_HISTORY -static void history_tree_free(OnigCaptureTreeNode* node); - -static void -history_tree_clear(OnigCaptureTreeNode* node) -{ - int i; - - if (IS_NOT_NULL(node)) { - for (i = 0; i < node->num_childs; i++) { - if (IS_NOT_NULL(node->childs[i])) { - history_tree_free(node->childs[i]); - } - } - for (i = 0; i < node->allocated; i++) { - node->childs[i] = (OnigCaptureTreeNode* )0; - } - node->num_childs = 0; - node->beg = ONIG_REGION_NOTPOS; - node->end = ONIG_REGION_NOTPOS; - node->group = -1; - } -} - -static void -history_tree_free(OnigCaptureTreeNode* node) -{ - history_tree_clear(node); - xfree(node); -} - -static void -history_root_free(OnigRegion* r) -{ - if (IS_NOT_NULL(r->history_root)) { - history_tree_free(r->history_root); - r->history_root = (OnigCaptureTreeNode* )0; - } -} - -static OnigCaptureTreeNode* -history_node_new(void) -{ - OnigCaptureTreeNode* node; - - node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); - CHECK_NULL_RETURN(node); - node->childs = (OnigCaptureTreeNode** )0; - node->allocated = 0; - node->num_childs = 0; - node->group = -1; - node->beg = ONIG_REGION_NOTPOS; - node->end = ONIG_REGION_NOTPOS; - - return node; -} - -static int -history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) -{ -#define HISTORY_TREE_INIT_ALLOC_SIZE 8 - - if (parent->num_childs >= parent->allocated) { - int n, i; - - if (IS_NULL(parent->childs)) { - n = HISTORY_TREE_INIT_ALLOC_SIZE; - parent->childs = - (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); - } - else { - n = parent->allocated * 2; - parent->childs = - (OnigCaptureTreeNode** )xrealloc(parent->childs, - sizeof(OnigCaptureTreeNode*) * n); - } - CHECK_NULL_RETURN_MEMERR(parent->childs); - for (i = parent->allocated; i < n; i++) { - parent->childs[i] = (OnigCaptureTreeNode* )0; - } - parent->allocated = n; - } - - parent->childs[parent->num_childs] = child; - parent->num_childs++; - return 0; -} - -static OnigCaptureTreeNode* -history_tree_clone(OnigCaptureTreeNode* node) -{ - int i; - OnigCaptureTreeNode *clone, *child; - - clone = history_node_new(); - CHECK_NULL_RETURN(clone); - - clone->beg = node->beg; - clone->end = node->end; - for (i = 0; i < node->num_childs; i++) { - child = history_tree_clone(node->childs[i]); - if (IS_NULL(child)) { - history_tree_free(clone); - return (OnigCaptureTreeNode* )0; - } - history_tree_add_child(clone, child); - } - - return clone; -} - -extern OnigCaptureTreeNode* -onig_get_capture_tree(OnigRegion* region) -{ - return region->history_root; -} -#endif /* USE_CAPTURE_HISTORY */ - -extern void -onig_region_clear(OnigRegion* region) -{ - int i; - - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } -#ifdef USE_CAPTURE_HISTORY - history_root_free(region); -#endif -} - -extern int -onig_region_resize(OnigRegion* region, int n) -{ - region->num_regs = n; - - if (n < ONIG_NREGION) - n = ONIG_NREGION; - - if (region->allocated == 0) { - region->beg = (int* )xmalloc(n * sizeof(int)); - region->end = (int* )xmalloc(n * sizeof(int)); - - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = n; - } - else if (region->allocated < n) { - region->beg = (int* )xrealloc(region->beg, n * sizeof(int)); - region->end = (int* )xrealloc(region->end, n * sizeof(int)); - - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = n; - } - - return 0; -} - -static int -onig_region_resize_clear(OnigRegion* region, int n) -{ - int r; - - r = onig_region_resize(region, n); - if (r != 0) return r; - onig_region_clear(region); - return 0; -} - -extern int -onig_region_set(OnigRegion* region, int at, int beg, int end) -{ - if (at < 0) return ONIGERR_INVALID_ARGUMENT; - - if (at >= region->allocated) { - int r = onig_region_resize(region, at + 1); - if (r < 0) return r; - } - - region->beg[at] = beg; - region->end[at] = end; - return 0; -} - -extern void -onig_region_init(OnigRegion* region) -{ - region->num_regs = 0; - region->allocated = 0; - region->beg = (int* )0; - region->end = (int* )0; - region->history_root = (OnigCaptureTreeNode* )0; -} - -extern OnigRegion* -onig_region_new(void) -{ - OnigRegion* r; - - r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); - onig_region_init(r); - return r; -} - -extern void -onig_region_free(OnigRegion* r, int free_self) -{ - if (r) { - if (r->allocated > 0) { - if (r->beg) xfree(r->beg); - if (r->end) xfree(r->end); - r->allocated = 0; - } -#ifdef USE_CAPTURE_HISTORY - history_root_free(r); -#endif - if (free_self) xfree(r); - } -} - -extern void -onig_region_copy(OnigRegion* to, OnigRegion* from) -{ -#define RREGC_SIZE (sizeof(int) * from->num_regs) - int i; - - if (to == from) return; - - if (to->allocated == 0) { - if (from->num_regs > 0) { - to->beg = (int* )xmalloc(RREGC_SIZE); - to->end = (int* )xmalloc(RREGC_SIZE); - to->allocated = from->num_regs; - } - } - else if (to->allocated < from->num_regs) { - to->beg = (int* )xrealloc(to->beg, RREGC_SIZE); - to->end = (int* )xrealloc(to->end, RREGC_SIZE); - to->allocated = from->num_regs; - } - - for (i = 0; i < from->num_regs; i++) { - to->beg[i] = from->beg[i]; - to->end[i] = from->end[i]; - } - to->num_regs = from->num_regs; - -#ifdef USE_CAPTURE_HISTORY - history_root_free(to); - - if (IS_NOT_NULL(from->history_root)) { - to->history_root = history_tree_clone(from->history_root); - } -#endif -} - - -/** stack **/ -#define INVALID_STACK_INDEX -1 - -/* stack type */ -/* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0002 -#define STK_POS_NOT 0x0003 -/* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 -/* avoided by normal-POP */ -#define STK_NULL_CHECK_START 0x3000 -#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_VOID 0x0a00 /* for fill a blank */ - -/* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define STK_MASK_TO_VOID_TARGET 0x10ff -#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ - -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ - (msa).best_len = ONIG_MISMATCH;\ -} while(0) -#else -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ -} while(0) -#endif - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ - (msa).state_check_buff = (void* )xmalloc(size);\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - } while(0) - -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif - - - -#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ - if (msa->stack_p) {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ - stk_alloc = (OnigStackType* )(msa->stack_p);\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + msa->stack_n;\ - }\ - else {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ - + sizeof(OnigStackType) * (stack_num));\ - stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + (stack_num);\ - }\ -} while(0) - -#define STACK_SAVE do{\ - if (stk_base != stk_alloc) {\ - msa->stack_p = stk_base;\ - msa->stack_n = stk_end - stk_base;\ - };\ -} while(0) - -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; - -extern unsigned int -onig_get_match_stack_limit_size(void) -{ - return MatchStackLimitSize; -} - -extern int -onig_set_match_stack_limit_size(unsigned int size) -{ - MatchStackLimitSize = size; - return 0; -} - -static int -stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, - OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) -{ - unsigned int n; - OnigStackType *x, *stk_base, *stk_end, *stk; - - stk_base = *arg_stk_base; - stk_end = *arg_stk_end; - stk = *arg_stk; - - n = stk_end - stk_base; - if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - xmemcpy(x, stk_base, n * sizeof(OnigStackType)); - n *= 2; - } - else { - n *= 2; - if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { - if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) - return ONIGERR_MATCH_STACK_LIMIT_OVER; - else - n = MatchStackLimitSize; - } - x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - } - *arg_stk = x + (stk - stk_base); - *arg_stk_base = x; - *arg_stk_end = x + n; - return 0; -} - -#define STACK_ENSURE(n) do {\ - if (stk_end - stk < (n)) {\ - int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ - if (r != 0) { STACK_SAVE; return r; } \ - }\ -} while(0) - -#define STACK_AT(index) (stk_base + (index)) -#define GET_STACK_INDEX(stk) ((stk) - stk_base) - -#define STACK_PUSH_TYPE(stack_type) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - STACK_INC;\ -} while(0) - -#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (state_check_buff != NULL) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8)); \ - } - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (state_check_buff != NULL) {\ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ - }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) - -#define STACK_PUSH_REPEAT(id, pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT;\ - stk->u.repeat.num = (id);\ - stk->u.repeat.pcode = (pat);\ - stk->u.repeat.count = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_REPEAT_INC(sindex) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT_INC;\ - stk->u.repeat_inc.si = (sindex);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_START(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_START;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END_MARK(mnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END_MARK;\ - stk->u.mem.num = (mnum);\ - STACK_INC;\ -} while(0) - -#define STACK_GET_MEM_START(mnum, k) do {\ - int level = 0;\ - k = stk;\ - while (k > stk_base) {\ - k--;\ - if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->u.mem.num == (mnum)) {\ - level++;\ - }\ - else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) break;\ - level--;\ - }\ - }\ -} while(0) - -#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ - int level = 0;\ - while (k < stk) {\ - if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) (start) = k->u.mem.pstr;\ - level++;\ - }\ - else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ - level--;\ - if (level == 0) {\ - (end) = k->u.mem.pstr;\ - break;\ - }\ - }\ - k++;\ - }\ -} while(0) - -#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_START;\ - stk->u.null_check.num = (cnum);\ - stk->u.null_check.pstr = (s);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_END;\ - stk->u.null_check.num = (cnum);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_CALL_FRAME(pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_CALL_FRAME;\ - stk->u.call_frame.ret_addr = (pat);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_RETURN do {\ - STACK_ENSURE(1);\ - stk->type = STK_RETURN;\ - STACK_INC;\ -} while(0) - - -#ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p, at) \ - if ((p) < stk_base) {\ - fprintf(stderr, "at %s\n", at);\ - goto stack_error;\ - } -#else -#define STACK_BASE_CHECK(p, at) -#endif - -#define STACK_POP_ONE do {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ -} while(0) - -#define STACK_POP do {\ - switch (pop_level) {\ - case STACK_POP_LEVEL_FREE:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - case STACK_POP_LEVEL_MEM_START:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP 2"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - default:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP 3"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - }\ -} while(0) - -#define STACK_POP_TIL_POS_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ - if (stk->type == STK_POS_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ -} while(0) - -#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ - if (stk->type == STK_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ -} while(0) - -#define STACK_POS_END(k) do {\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_POS_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_POS) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_STOP_BT_END do {\ - OnigStackType *k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_STOP_BT) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK(isnull,id,s) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - else level--;\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - level++;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (k->u.null_check.pstr != (s)) {\ - (isnull) = 0;\ - break;\ - }\ - else {\ - UChar* endp;\ - (isnull) = 1;\ - while (k < stk) {\ - if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ - }\ - }\ - k++;\ - }\ - break;\ - }\ - }\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - if (k->u.null_check.pstr != (s)) {\ - (isnull) = 0;\ - break;\ - }\ - else {\ - UChar* endp;\ - (isnull) = 1;\ - while (k < stk) {\ - if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ - }\ - }\ - k++;\ - }\ - break;\ - }\ - }\ - else {\ - level--;\ - }\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - if (k->u.null_check.num == (id)) level++;\ - }\ - }\ -} while(0) - -#define STACK_GET_REPEAT(id, k) do {\ - int level = 0;\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ - if (k->type == STK_REPEAT) {\ - if (level == 0) {\ - if (k->u.repeat.num == (id)) {\ - break;\ - }\ - }\ - }\ - else if (k->type == STK_CALL_FRAME) level--;\ - else if (k->type == STK_RETURN) level++;\ - }\ -} while(0) - -#define STACK_RETURN(addr) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_RETURN"); \ - if (k->type == STK_CALL_FRAME) {\ - if (level == 0) {\ - (addr) = k->u.call_frame.ret_addr;\ - break;\ - }\ - else level--;\ - }\ - else if (k->type == STK_RETURN)\ - level++;\ - }\ -} while(0) - - -#define STRING_CMP(s1,s2,len) do {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) goto fail;\ - }\ -} while(0) - -#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\ - if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ - goto fail; \ -} while(0) - -static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, - UChar* s1, UChar** ps2, int mblen) -{ - UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *p1, *p2, *end1, *s2, *end2; - int len1, len2; - - s2 = *ps2; - end1 = s1 + mblen; - end2 = s2 + mblen; - while (s1 < end1) { - len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1); - len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2); - if (len1 != len2) return 0; - p1 = buf1; - p2 = buf2; - while (len1-- > 0) { - if (*p1 != *p2) return 0; - p1++; - p2++; - } - } - - *ps2 = s2; - return 1; -} - -#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ - is_fail = 0;\ - while (len-- > 0) {\ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ - }\ -} while(0) - -#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\ - if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ - is_fail = 1; \ - else \ - is_fail = 0; \ -} while(0) - - -#define IS_EMPTY_STR (str == end) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#define DATA_ENSURE_CHECK1 (s < right_range) -#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) -#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#else -#define DATA_ENSURE_CHECK1 (s < end) -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#define DATA_ENSURE(n) if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - - -#ifdef USE_CAPTURE_HISTORY -static int -make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, - OnigStackType* stk_top, UChar* str, regex_t* reg) -{ - int n, r; - OnigCaptureTreeNode* child; - OnigStackType* k = *kp; - - while (k < stk_top) { - if (k->type == STK_MEM_START) { - n = k->u.mem.num; - if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { - child = history_node_new(); - CHECK_NULL_RETURN_MEMERR(child); - child->group = n; - child->beg = (int )(k->u.mem.pstr - str); - r = history_tree_add_child(node, child); - if (r != 0) return r; - *kp = (k + 1); - r = make_capture_history_tree(child, kp, stk_top, str, reg); - if (r != 0) return r; - - k = *kp; - child->end = (int )(k->u.mem.pstr - str); - } - } - else if (k->type == STK_MEM_END) { - if (k->u.mem.num == node->group) { - node->end = (int )(k->u.mem.pstr - str); - *kp = k; - return 0; - } - } - k++; - } - - return 1; /* 1: root node ending. */ -} -#endif - -#ifdef USE_BACKREF_WITH_LEVEL -static int mem_is_in_memp(int mem, int num, UChar* memp) -{ - int i; - MemNumType m; - - for (i = 0; i < num; i++) { - GET_MEMNUM_INC(m, memp); - if (mem == (int )m) return 1; - } - return 0; -} - -static int backref_match_at_nested_level(regex_t* reg - , OnigStackType* top, OnigStackType* stk_base - , int ignore_case, int case_fold_flag - , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) -{ - UChar *ss, *p, *pstart, *pend = NULL_UCHARP; - int level; - OnigStackType* k; - - level = 0; - k = top; - k--; - while (k >= stk_base) { - if (k->type == STK_CALL_FRAME) { - level--; - } - else if (k->type == STK_RETURN) { - level++; - } - else if (level == nest) { - if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { - if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ - p = pstart; - ss = *s; - - if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, case_fold_flag, - pstart, &ss, (int )(pend - pstart)) == 0) - return 0; /* or goto next_mem; */ - } - else { - while (p < pend) { - if (*p++ != *ss++) return 0; /* or goto next_mem; */ - } - } - - *s = ss; - return 1; - } - } - } - else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pend = k->u.mem.pstr; - } - } - } - k--; - } - - return 0; -} -#endif /* USE_BACKREF_WITH_LEVEL */ - - -#ifdef ONIG_DEBUG_STATISTICS - -#define USE_TIMEOFDAY - -#ifdef USE_TIMEOFDAY -#ifdef HAVE_SYS_TIME_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -static struct timeval ts, te; -#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) -#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ - (((te).tv_sec - (ts).tv_sec)*1000000)) -#else -#ifdef HAVE_SYS_TIMES_H -#include -#endif -static struct tms ts, te; -#define GETTIME(t) times(&(t)) -#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) -#endif - -static int OpCounter[256]; -static int OpPrevCounter[256]; -static unsigned long OpTime[256]; -static int OpCurr = OP_FINISH; -static int OpPrevTarget = OP_FAIL; -static int MaxStackDepth = 0; - -#define MOP_IN(opcode) do {\ - if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ - OpCurr = opcode;\ - OpCounter[opcode]++;\ - GETTIME(ts);\ -} while(0) - -#define MOP_OUT do {\ - GETTIME(te);\ - OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while(0) - -extern void -onig_statistics_init(void) -{ - int i; - for (i = 0; i < 256; i++) { - OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; - } - MaxStackDepth = 0; -} - -extern void -onig_print_statistics(FILE* f) -{ - int i; - fprintf(f, " count prev time\n"); - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); - } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); -} - -#define STACK_INC do {\ - stk++;\ - if (stk - stk_base > MaxStackDepth) \ - MaxStackDepth = stk - stk_base;\ -} while(0) - -#else -#define STACK_INC stk++ - -#define MOP_IN(opcode) -#define MOP_OUT -#endif - - -/* matching region of POSIX API */ -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} posix_regmatch_t; - -/* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ -static int -match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) -{ - static UChar FinishCode[] = { OP_FINISH }; - - int i, n, num_mem, best_len, pop_level; - LengthType tlen, tlen2; - MemNumType mem; - RelAddrType addr; - OnigOptionType option = reg->options; - OnigEncoding encode = reg->enc; - OnigCaseFoldType case_fold_flag = reg->case_fold_flag; - UChar *s, *q, *sbegin; - UChar *p = reg->p; - char *alloca_base; - OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; - OnigStackType *stkp; /* used as any purpose. */ - OnigStackIndex si; - OnigStackIndex *repeat_stk; - OnigStackIndex *mem_start_stk, *mem_end_stk; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; -#endif - n = reg->num_repeat + reg->num_mem * 2; - - STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); - pop_level = reg->stack_pop_level; - num_mem = reg->num_mem; - repeat_stk = (OnigStackIndex* )alloca_base; - - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; - mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ - mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ - for (i = 1; i <= num_mem; i++) { - mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; - } - -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", - (int )str, (int )end, (int )sstart, (int )sprev); - fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); -#endif - - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ - best_len = ONIG_MISMATCH; - s = (UChar* )sstart; - while (1) { -#ifdef ONIG_DEBUG_MATCH - { - UChar *q, *bp, buf[50]; - int len; - fprintf(stderr, "%4d> \"", (int )(s - str)); - bp = buf; - for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; - } - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } - else { xmemcpy(bp, "\"", 1); bp += 1; } - *bp = 0; - fputs((char* )buf, stderr); - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - onig_print_compiled_byte_code(stderr, p, NULL, encode); - fprintf(stderr, "\n"); - } -#endif - - sbegin = s; - switch (*p++) { - case OP_END: MOP_IN(OP_END); - n = s - sstart; - if (n > best_len) { - OnigRegion* region; -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { - if (n > msa->best_len) { - msa->best_len = n; - msa->best_s = (UChar* )sstart; - } - else - goto end_best_len; - } -#endif - best_len = n; - region = msa->region; - if (region) { -#ifdef USE_POSIX_API_REGION_OPTION - if (IS_POSIX_REGION(msa->options)) { - posix_regmatch_t* rmt = (posix_regmatch_t* )region; - - rmt[0].rm_so = sstart - str; - rmt[0].rm_eo = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; - - rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; - } - } - } - else { -#endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = sstart - str; - region->end[0] = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - - region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - } - -#ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { - int r; - OnigCaptureTreeNode* node; - - if (IS_NULL(region->history_root)) { - region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_MEMERR(node); - } - else { - node = region->history_root; - history_tree_clear(node); - } - - node->group = 0; - node->beg = sstart - str; - node->end = s - str; - - stkp = stk_base; - r = make_capture_history_tree(region->history_root, &stkp, - stk, (UChar* )str, reg); - if (r < 0) { - best_len = r; /* error code */ - goto finish; - } - } -#endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_API_REGION_OPTION - } /* else IS_POSIX_REGION() */ -#endif - } /* if (region) */ - } /* n > best_len */ - -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - end_best_len: -#endif - MOP_OUT; - - if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = ONIG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ - } - } - - /* default behavior: return first-matching result. */ - goto finish; - break; - - case OP_EXACT1: MOP_IN(OP_EXACT1); -#if 0 - DATA_ENSURE(1); - if (*p != *s) goto fail; - p++; s++; -#endif - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; - MOP_OUT; - break; - - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); - { - int len; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) { - goto fail; - } - p++; q++; - } - } - MOP_OUT; - break; - - case OP_EXACT2: MOP_IN(OP_EXACT2); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT3: MOP_IN(OP_EXACT3); - DATA_ENSURE(3); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT4: MOP_IN(OP_EXACT4); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT5: MOP_IN(OP_EXACT5); - DATA_ENSURE(5); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTN: MOP_IN(OP_EXACTN); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen); - while (tlen-- > 0) { - if (*p++ != *s++) goto fail; - } - sprev = s - 1; - MOP_OUT; - continue; - break; - - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); - { - int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - GET_LENGTH_INC(tlen, p); - endp = p + tlen; - - while (p < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) goto fail; - p++; q++; - } - } - } - - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - break; - - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); - DATA_ENSURE(6); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 2); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 2; - MOP_OUT; - continue; - break; - - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 3); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 3; - MOP_OUT; - continue; - break; - - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); - GET_LENGTH_INC(tlen, p); /* mb-len */ - GET_LENGTH_INC(tlen2, p); /* string len */ - tlen2 *= tlen; - DATA_ENSURE(tlen2); - while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - tlen; - MOP_OUT; - continue; - break; - - case OP_CCLASS: MOP_IN(OP_CCLASS); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; - p += SIZE_BITSET; - s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; - break; - - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); - if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; - - cclass_mb: - GET_LENGTH_INC(tlen, p); - { - OnigCodePoint code; - UChar *ss; - int mb_len; - - DATA_ENSURE(1); - mb_len = enclen(encode, s); - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (! onig_is_in_code_range(p, code)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (! onig_is_in_code_range(q, code)) goto fail; -#endif - } - p += tlen; - MOP_OUT; - break; - - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - MOP_OUT; - break; - - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; - p += SIZE_BITSET; - s += enclen(encode, s); - MOP_OUT; - break; - - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_HEAD(encode, s)) { - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; - } - - cclass_mb_not: - GET_LENGTH_INC(tlen, p); - { - OnigCodePoint code; - UChar *ss; - int mb_len = enclen(encode, s); - - if (! DATA_ENSURE_CHECK(mb_len)) { - DATA_ENSURE(1); - s = (UChar* )end; - p += tlen; - goto cc_mb_not_success; - } - - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (onig_is_in_code_range(p, code)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (onig_is_in_code_range(q, code)) goto fail; -#endif - } - p += tlen; - - cc_mb_not_success: - MOP_OUT; - break; - - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb_not; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - MOP_OUT; - break; - - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); - { - OnigCodePoint code; - void *node; - int mb_len; - UChar *ss; - - DATA_ENSURE(1); - GET_POINTER_INC(node, p); - mb_len = enclen(encode, s); - ss = s; - s += mb_len; - DATA_ENSURE(0); - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; - } - MOP_OUT; - break; - - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); - DATA_ENSURE(1); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - s += n; - MOP_OUT; - break; - - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); - DATA_ENSURE(1); - n = enclen(encode, s); - DATA_ENSURE(n); - s += n; - MOP_OUT; - break; - - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); - while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); - while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; - - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); - while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - p++; - MOP_OUT; - break; - - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); - while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - p++; - MOP_OUT; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_WORD: MOP_IN(OP_WORD); - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - - s += enclen(encode, s); - MOP_OUT; - break; - - case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - - s += enclen(encode, s); - MOP_OUT; - break; - - case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - MOP_OUT; - continue; - break; - - case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - MOP_OUT; - continue; - break; - -#ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - continue; - } - } - goto fail; - break; - - case OP_WORD_END: MOP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - continue; - } - } - goto fail; - break; -#endif - - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); - if (! ON_STR_BEGIN(s)) goto fail; - - MOP_OUT; - continue; - break; - - case OP_END_BUF: MOP_IN(OP_END_BUF); - if (! ON_STR_END(s)) goto fail; - - MOP_OUT; - continue; - break; - - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); - if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; - continue; - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; - continue; - } - goto fail; - break; - - case OP_END_LINE: MOP_IN(OP_END_LINE); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; - continue; - } -#ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; - continue; - } -#endif - goto fail; - break; - - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; - continue; - } -#ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - UChar* ss = s + enclen(encode, s); - ss += enclen(encode, ss); - if (ON_STR_END(ss)) { - MOP_OUT; - continue; - } - } -#endif - goto fail; - break; - - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); - if (s != msa->start) - goto fail; - - MOP_OUT; - continue; - break; - - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_START(mem, s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); - GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (OnigStackIndex )((void* )s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_END(mem, s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); - MOP_OUT; - continue; - break; - -#ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); - GET_MEMNUM_INC(mem, p); - STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ - STACK_PUSH_MEM_END(mem, s); - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); - STACK_GET_MEM_START(mem, stkp); - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - else - mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); - - STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; - continue; - break; -#endif - - case OP_BACKREF1: MOP_IN(OP_BACKREF1); - mem = 1; - goto backref; - break; - - case OP_BACKREF2: MOP_IN(OP_BACKREF2); - mem = 2; - goto backref; - break; - - case OP_BACKREFN: MOP_IN(OP_BACKREFN); - GET_MEMNUM_INC(mem, p); - backref: - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - MOP_OUT; - continue; - } - break; - - case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); - GET_MEMNUM_INC(mem, p); - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - -#ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_WITH_LEVEL: - { - int len; - OnigOptionType ic; - LengthType level; - - GET_OPTION_INC(ic, p); - GET_LENGTH_INC(level, p); - GET_LENGTH_INC(tlen, p); - - sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic - , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * tlen); - } - else - goto fail; - - MOP_OUT; - continue; - } - - break; -#endif - -#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ - case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); - GET_OPTION_INC(option, p); - STACK_PUSH_ALT(p, s, sprev); - p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - MOP_OUT; - continue; - break; - - case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); - GET_OPTION_INC(option, p); - MOP_OUT; - continue; - break; -#endif - - case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_PUSH_NULL_CHECK_START(mem, s); - MOP_OUT; - continue; - break; - - case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - null_check_found: - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - case OP_REPEAT_INC_SG: - case OP_REPEAT_INC_NG_SG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } - } - MOP_OUT; - continue; - break; - -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - if (isnull == -1) goto fail; - goto null_check_found; - } - } - MOP_OUT; - continue; - break; -#endif - -#ifdef USE_SUBEXP_CALL - case OP_NULL_CHECK_END_MEMST_PUSH: - MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); -#else - STACK_NULL_CHECK_REC(isnull, mem, s); -#endif - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - if (isnull == -1) goto fail; - goto null_check_found; - } - else { - STACK_PUSH_NULL_CHECK_END(mem); - } - } - MOP_OUT; - continue; - break; -#endif - - case OP_JUMP: MOP_IN(OP_JUMP); - GET_RELADDR_INC(addr, p); - p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_PUSH: MOP_IN(OP_PUSH); - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; - continue; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_POP: MOP_IN(OP_POP); - STACK_POP_ONE; - MOP_OUT; - continue; - break; - - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); - GET_RELADDR_INC(addr, p); - if (*p == *s && DATA_ENSURE_CHECK1) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - } - p += (addr + 1); - MOP_OUT; - continue; - break; - - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); - GET_RELADDR_INC(addr, p); - if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - } - p++; - MOP_OUT; - continue; - break; - - case OP_REPEAT: MOP_IN(OP_REPEAT); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); - } - } - MOP_OUT; - continue; - break; - - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p, s, sprev); - p += addr; - } - } - MOP_OUT; - continue; - break; - - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - si = repeat_stk[mem]; - stkp = STACK_AT(si); - - repeat_inc: - stkp->u.repeat.count++; - if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ - } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(p, s, sprev); - p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ - } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - goto repeat_inc; - break; - - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - si = repeat_stk[mem]; - stkp = STACK_AT(si); - - repeat_inc_ng: - stkp->u.repeat.count++; - if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { - if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - UChar* pcode = stkp->u.repeat.pcode; - - STACK_PUSH_REPEAT_INC(si); - STACK_PUSH_ALT(pcode, s, sprev); - } - else { - p = stkp->u.repeat.pcode; - STACK_PUSH_REPEAT_INC(si); - } - } - else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - STACK_PUSH_REPEAT_INC(si); - } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - goto repeat_inc_ng; - break; - - case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); - STACK_PUSH_POS(s, sprev); - MOP_OUT; - continue; - break; - - case OP_POP_POS: MOP_IN(OP_POP_POS); - { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; - } - MOP_OUT; - continue; - break; - - case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); - GET_RELADDR_INC(addr, p); - STACK_PUSH_POS_NOT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - - case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); - STACK_POP_TIL_POS_NOT; - goto fail; - break; - - case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BT; - MOP_OUT; - continue; - break; - - case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); - STACK_STOP_BT_END; - MOP_OUT; - continue; - break; - - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); - GET_LENGTH_INC(tlen, p); - s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); - if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - MOP_OUT; - continue; - break; - - case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); - GET_RELADDR_INC(addr, p); - GET_LENGTH_INC(tlen, p); - q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); - if (IS_NULL(q)) { - /* too short case -> success. ex. /(?p + addr; - MOP_OUT; - continue; - break; - - case OP_RETURN: MOP_IN(OP_RETURN); - STACK_RETURN(p); - STACK_PUSH_RETURN; - MOP_OUT; - continue; - break; -#endif - - case OP_FINISH: - goto finish; - break; - - fail: - MOP_OUT; - /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); - STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; - continue; - break; - - default: - goto bytecode_error; - - } /* end of switch */ - sprev = sbegin; - } /* end of while(1) */ - - finish: - STACK_SAVE; - return best_len; - -#ifdef ONIG_DEBUG - stack_error: - STACK_SAVE; - return ONIGERR_STACK_BUG; -#endif - - bytecode_error: - STACK_SAVE; - return ONIGERR_UNDEFINED_BYTECODE; - - unexpected_bytecode_error: - STACK_SAVE; - return ONIGERR_UNEXPECTED_BYTECODE; -} - - -static UChar* -slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *t, *p, *s, *end; - - end = (UChar* )text_end; - end -= target_end - target - 1; - if (end > text_range) - end = text_range; - - s = (UChar* )text; - - while (s < end) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s += enclen(enc, s); - } - - return (UChar* )NULL; -} - -static int -str_lower_case_match(OnigEncoding enc, int case_fold_flag, - const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) -{ - int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - while (t < tend) { - lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); - q = lowbuf; - while (lowlen > 0) { - if (*t++ != *q++) return 0; - lowlen--; - } - } - - return 1; -} - -static UChar* -slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *s, *end; - - end = (UChar* )text_end; - end -= target_end - target - 1; - if (end > text_range) - end = text_range; - - s = (UChar* )text; - - while (s < end) { - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) - return s; - - s += enclen(enc, s); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *t, *p, *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - - while (s >= text) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - - while (s >= text) { - if (str_lower_case_match(enc, case_fold_flag, - target, target_end, s, text_end)) - return s; - - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *p, *end; - const UChar *tail; - int skip, tlen1; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", - (int )text, (int )text_end, (int )text_range); -#endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); - } - } - else { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); - } - } - - return (UChar* )NULL; -} - -static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *t, *p, *end; - const UChar *tail; - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; - } - } - return (UChar* )NULL; -} - -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) - -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); - } - - return (UChar* )NULL; -} - -static UChar* -map_search(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* text_range) -{ - const UChar *s = text; - - while (s < text_range) { - if (map[*s]) return (UChar* )s; - - s += enclen(enc, s); - } - return (UChar* )NULL; -} - -static UChar* -map_search_backward(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* adjust_text, - const UChar* text_start) -{ - const UChar *s = text_start; - - while (s >= text) { - if (map[*s]) return (UChar* )s; - - s = onigenc_get_prev_char_head(enc, adjust_text, s); - } - return (UChar* )NULL; -} - -extern int -onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, - OnigOptionType option) -{ - int r; - UChar *prev; - OnigMatchArg msa; - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - - MATCH_ARG_INIT(msa, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif - - if (region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { - r = onig_region_resize_clear(region, reg->num_mem + 1); - } - else - r = 0; - - if (r == 0) { - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); - } - - MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); - return r; -} - -static int -forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) -{ - UChar *p, *pprev = (UChar* )NULL; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", - (int )str, (int )end, (int )s, (int )range); -#endif - - p = s; - if (reg->dmin > 0) { - if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { - p += reg->dmin; - } - else { - UChar *q = p + reg->dmin; - while (p < q) p += enclen(reg->enc, p); - } - } - - retry: - switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: - p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); - break; - case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_EXACT_BM: - p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: - p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_MAP: - p = map_search(reg->enc, reg->map, p, range); - break; - } - - if (p && p < range) { - if (p - reg->dmin < s) { - retry_gate: - pprev = p; - p += enclen(reg->enc, p); - goto retry; - } - - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) -#ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) -#endif - ) - goto retry_gate; - break; - } - } - - if (reg->dmax == 0) { - *low = p; - if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - } - } - else { - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); - } - } - } - /* no needs to adjust *high, *high is used as range check only */ - *high = p - reg->dmin; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", - (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); -#endif - return 1; /* success */ - } - - return 0; /* fail */ -} - -static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, - int** skip)); - -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 - -static int -backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) -{ - int r; - UChar *p; - - range += reg->dmin; - p = s; - - retry: - switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: - exact_method: - p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: - if (IS_NULL(reg->int_map_backward)) { - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); - break; - - case ONIG_OPTIMIZE_MAP: - p = map_search_backward(reg->enc, reg->map, range, adjrange, p); - break; - } - - if (p) { - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) -#ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) -#endif - ) { - p = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; - } - } - - /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - *high = p - reg->dmin; - *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); - } - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); -#endif - return 1; /* success */ - } - - fail: -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: fail.\n"); -#endif - return 0; /* fail */ -} - - -extern int -onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) -{ - int r; - UChar *s, *prev; - OnigMatchArg msa; - const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar *orig_range = range; -#endif - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - if (region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { - r = onig_region_resize_clear(region, reg->num_mem + 1); - if (r) goto finish_no_msa; - } - - if (start > end || start < str) goto mismatch_no_msa; - - -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - - - /* anchor optimize: resume search range */ - if (reg->anchor != 0 && str < end) { - UChar *min_semi_end, *max_semi_end; - - if (reg->anchor & ANCHOR_BEGIN_POSITION) { - /* search start-position only */ - begin_position: - if (range > start) - range = start + 1; - else - range = start; - } - else if (reg->anchor & ANCHOR_BEGIN_BUF) { - /* search str-position only */ - if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; - } - else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_END_BUF) { - min_semi_end = max_semi_end = (UChar* )end; - - end_buf: - if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; - - if (range > start) { - if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { - start = min_semi_end - reg->anchor_dmax; - if (start < end) - start = onigenc_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end); - } - } - if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { - range = max_semi_end - reg->anchor_dmin + 1; - } - - if (start >= range) goto mismatch_no_msa; - } - else { - if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { - range = min_semi_end - reg->anchor_dmax; - } - if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { - start = max_semi_end - reg->anchor_dmin; - start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); - } - if (range > start) goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_SEMI_END_BUF) { - UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); - - max_semi_end = (UChar* )end; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - -#ifdef USE_CRNL_AS_LINE_TERMINATOR - pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); - if (IS_NOT_NULL(pre_end) && - ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - } -#endif - if (min_semi_end > str && start <= min_semi_end) { - goto end_buf; - } - } - else { - min_semi_end = (UChar* )end; - goto end_buf; - } - } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { - goto begin_position; - } - } - else if (str == end) { /* empty string */ - static const UChar* address_for_empty_string = (UChar* )""; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search: empty string.\n"); -#endif - - if (reg->threshold_len == 0) { - start = end = str = address_for_empty_string; - s = (UChar* )start; - prev = (UChar* )NULL; - - MATCH_ARG_INIT(msa, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif - MATCH_AND_RETURN_CHECK(end); - goto mismatch; - } - goto mismatch_no_msa; - } - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - MATCH_ARG_INIT(msa, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif - - s = (UChar* )start; - if (range > start) { /* forward search */ - if (s > str) - prev = onigenc_get_prev_char_head(reg->enc, str, s); - else - prev = (UChar* )NULL; - - if (reg->optimize != ONIG_OPTIMIZE_NONE) { - UChar *sch_range, *low, *high, *low_prev; - - sch_range = (UChar* )range; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = (UChar* )end; - else { - sch_range += reg->dmax; - if (sch_range > end) sch_range = (UChar* )end; - } - } - - if ((end - start) < reg->threshold_len) - goto mismatch; - - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - } - } while (s < range); - goto mismatch; - } - else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; - - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); - } - } while (s < range); - goto mismatch; - } - } - } - - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - } while (s < range); - - if (s == range) { /* because empty match with /$/. */ - MATCH_AND_RETURN_CHECK(orig_range); - } - } - else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - if (orig_start < end) - orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif - - if (reg->optimize != ONIG_OPTIMIZE_NONE) { - UChar *low, *high, *adjrange, *sch_start; - - if (range < end) - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); - else - adjrange = (UChar* )end; - - if (reg->dmax != ONIG_INFINITE_DISTANCE && - (end - range) >= reg->threshold_len) { - do { - sch_start = s + reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } - } while (s >= range); - goto mismatch; - } - else { /* check only. */ - if ((end - range) < reg->threshold_len) goto mismatch; - - sch_start = s; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = (UChar* )end; - else { - sch_start += reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - else - sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, - start, sch_start); - } - } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; - } - } - - do { - prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } while (s >= range); - } - - mismatch: -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(reg->options)) { - if (msa.best_len >= 0) { - s = msa.best_s; - goto match; - } - } -#endif - r = ONIG_MISMATCH; - - finish: - MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); - - /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_API_REGION_OPTION - && !IS_POSIX_REGION(option) -#endif - ) { - onig_region_clear(region); - } - -#ifdef ONIG_DEBUG - if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); -#endif - return r; - - mismatch_no_msa: - r = ONIG_MISMATCH; - finish_no_msa: - ONIG_STATE_DEC_THREAD(reg); -#ifdef ONIG_DEBUG - if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); -#endif - return r; - - match: - ONIG_STATE_DEC_THREAD(reg); - MATCH_ARG_FREE(msa); - return s - str; -} - -extern OnigEncoding -onig_get_encoding(regex_t* reg) -{ - return reg->enc; -} - -extern OnigOptionType -onig_get_options(regex_t* reg) -{ - return reg->options; -} - -extern OnigCaseFoldType -onig_get_case_fold_flag(regex_t* reg) -{ - return reg->case_fold_flag; -} - -extern OnigSyntaxType* -onig_get_syntax(regex_t* reg) -{ - return reg->syntax; -} - -extern int -onig_number_of_captures(regex_t* reg) -{ - return reg->num_mem; -} - -extern int -onig_number_of_capture_histories(regex_t* reg) -{ -#ifdef USE_CAPTURE_HISTORY - int i, n; - - n = 0; - for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(reg->capture_history, i) != 0) - n++; - } - return n; -#else - return 0; -#endif -} - -extern void -onig_copy_encoding(OnigEncoding to, OnigEncoding from) -{ - *to = *from; -} - diff --git a/src/openalpr/support/regex/regext.c b/src/openalpr/support/regex/regext.c deleted file mode 100644 index b1b957b..0000000 --- a/src/openalpr/support/regex/regext.c +++ /dev/null @@ -1,222 +0,0 @@ -/********************************************************************** - regext.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -static void -conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) -{ - while (s < end) { - *conv++ = '\0'; - *conv++ = '\0'; - *conv++ = '\0'; - *conv++ = *s++; - } -} - -static void -conv_ext0le32(const UChar* s, const UChar* end, UChar* conv) -{ - while (s < end) { - *conv++ = *s++; - *conv++ = '\0'; - *conv++ = '\0'; - *conv++ = '\0'; - } -} - -static void -conv_ext0be(const UChar* s, const UChar* end, UChar* conv) -{ - while (s < end) { - *conv++ = '\0'; - *conv++ = *s++; - } -} - -static void -conv_ext0le(const UChar* s, const UChar* end, UChar* conv) -{ - while (s < end) { - *conv++ = *s++; - *conv++ = '\0'; - } -} - -static void -conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv) -{ - while (s < end) { - *conv++ = s[3]; - *conv++ = s[2]; - *conv++ = s[1]; - *conv++ = s[0]; - s += 4; - } -} - -static void -conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv) -{ - while (s < end) { - *conv++ = s[1]; - *conv++ = s[0]; - s += 2; - } -} - -static int -conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, - UChar** conv, UChar** conv_end) -{ - int len = end - s; - - if (to == ONIG_ENCODING_UTF16_BE) { - if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { - *conv = (UChar* )xmalloc(len * 2); - CHECK_NULL_RETURN_MEMERR(*conv); - *conv_end = *conv + (len * 2); - conv_ext0be(s, end, *conv); - return 0; - } - else if (from == ONIG_ENCODING_UTF16_LE) { - swap16: - *conv = (UChar* )xmalloc(len); - CHECK_NULL_RETURN_MEMERR(*conv); - *conv_end = *conv + len; - conv_swap2bytes(s, end, *conv); - return 0; - } - } - else if (to == ONIG_ENCODING_UTF16_LE) { - if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { - *conv = (UChar* )xmalloc(len * 2); - CHECK_NULL_RETURN_MEMERR(*conv); - *conv_end = *conv + (len * 2); - conv_ext0le(s, end, *conv); - return 0; - } - else if (from == ONIG_ENCODING_UTF16_BE) { - goto swap16; - } - } - if (to == ONIG_ENCODING_UTF32_BE) { - if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { - *conv = (UChar* )xmalloc(len * 4); - CHECK_NULL_RETURN_MEMERR(*conv); - *conv_end = *conv + (len * 4); - conv_ext0be32(s, end, *conv); - return 0; - } - else if (from == ONIG_ENCODING_UTF32_LE) { - swap32: - *conv = (UChar* )xmalloc(len); - CHECK_NULL_RETURN_MEMERR(*conv); - *conv_end = *conv + len; - conv_swap4bytes(s, end, *conv); - return 0; - } - } - else if (to == ONIG_ENCODING_UTF32_LE) { - if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { - *conv = (UChar* )xmalloc(len * 4); - CHECK_NULL_RETURN_MEMERR(*conv); - *conv_end = *conv + (len * 4); - conv_ext0le32(s, end, *conv); - return 0; - } - else if (from == ONIG_ENCODING_UTF32_BE) { - goto swap32; - } - } - - return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; -} - -extern int -onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigCompileInfo* ci, OnigErrorInfo* einfo) -{ - int r; - UChar *cpat, *cpat_end; - - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; - - if (ci->pattern_enc != ci->target_enc) { - r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, - &cpat, &cpat_end); - if (r) return r; - } - else { - cpat = (UChar* )pattern; - cpat_end = (UChar* )pattern_end; - } - - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) { - r = ONIGERR_MEMORY; - goto err2; - } - - r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc, - ci->syntax); - if (r) goto err; - - r = onig_compile(*reg, cpat, cpat_end, einfo); - if (r) { - err: - onig_free(*reg); - *reg = NULL; - } - - err2: - if (cpat != pattern) xfree(cpat); - - return r; -} - -#ifdef USE_RECOMPILE_API -extern int -onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigCompileInfo* ci, OnigErrorInfo* einfo) -{ - int r; - regex_t *new_reg; - - r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo); - if (r) return r; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_transfer(reg, new_reg); - } - else { - onig_chain_link_add(reg, new_reg); - } - return 0; -} -#endif diff --git a/src/openalpr/support/regex/reggnu.c b/src/openalpr/support/regex/reggnu.c deleted file mode 100644 index 4bd18c4..0000000 --- a/src/openalpr/support/regex/reggnu.c +++ /dev/null @@ -1,167 +0,0 @@ -/********************************************************************** - reggnu.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#ifndef ONIGGNU_H -#include "oniggnu.h" -#endif - -extern void -re_free_registers(OnigRegion* r) -{ - /* 0: don't free self */ - onig_region_free(r, 0); -} - -extern int -re_adjust_startpos(regex_t* reg, const char* string, int size, - int startpos, int range) -{ - if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { - UChar *p; - UChar *s = (UChar* )string + startpos; - - if (range > 0) { - p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); - } - else { - p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); - } - return p - (UChar* )string; - } - - return startpos; -} - -extern int -re_match(regex_t* reg, const char* str, int size, int pos, - struct re_registers* regs) -{ - return onig_match(reg, (UChar* )str, (UChar* )(str + size), - (UChar* )(str + pos), regs, ONIG_OPTION_NONE); -} - -extern int -re_search(regex_t* bufp, const char* string, int size, int startpos, int range, - struct re_registers* regs) -{ - return onig_search(bufp, (UChar* )string, (UChar* )(string + size), - (UChar* )(string + startpos), - (UChar* )(string + startpos + range), - regs, ONIG_OPTION_NONE); -} - -extern int -re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) -{ - int r; - OnigErrorInfo einfo; - - r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); - if (r != ONIG_NORMAL) { - if (IS_NOT_NULL(ebuf)) - (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); - } - - return r; -} - -#ifdef USE_RECOMPILE_API -extern int -re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) -{ - int r; - OnigErrorInfo einfo; - OnigEncoding enc; - - /* I think encoding and options should be arguments of this function. - But this is adapted to present re.c. (2002/11/29) - */ - enc = OnigEncDefaultCharEncoding; - - r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), - reg->options, enc, OnigDefaultSyntax, &einfo); - if (r != ONIG_NORMAL) { - if (IS_NOT_NULL(ebuf)) - (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); - } - return r; -} -#endif - -extern void -re_free_pattern(regex_t* reg) -{ - onig_free(reg); -} - -extern int -re_alloc_pattern(regex_t** reg) -{ - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - - return onig_reg_init(*reg, ONIG_OPTION_DEFAULT, - ONIGENC_CASE_FOLD_DEFAULT, - OnigEncDefaultCharEncoding, - OnigDefaultSyntax); -} - -extern void -re_set_casetable(const char* table) -{ - onigenc_set_default_caseconv_table((UChar* )table); -} - -extern void -re_mbcinit(int mb_code) -{ - OnigEncoding enc; - - switch (mb_code) { - case RE_MBCTYPE_ASCII: - enc = ONIG_ENCODING_ASCII; - break; - case RE_MBCTYPE_EUC: - enc = ONIG_ENCODING_EUC_JP; - break; - case RE_MBCTYPE_SJIS: - enc = ONIG_ENCODING_SJIS; - break; - case RE_MBCTYPE_UTF8: - enc = ONIG_ENCODING_UTF8; - break; - default: - return ; - break; - } - - onigenc_set_default_encoding(enc); -} diff --git a/src/openalpr/support/regex/regint.h b/src/openalpr/support/regex/regint.h deleted file mode 100644 index a901878..0000000 --- a/src/openalpr/support/regex/regint.h +++ /dev/null @@ -1,817 +0,0 @@ -#ifndef REGINT_H -#define REGINT_H -/********************************************************************** - regint.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2013 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* for debug */ -/* #define ONIG_DEBUG_PARSE_TREE */ -/* #define ONIG_DEBUG_COMPILE */ -/* #define ONIG_DEBUG_SEARCH */ -/* #define ONIG_DEBUG_MATCH */ -/* #define ONIG_DONT_OPTIMIZE */ - -/* for byte-code statistical data. */ -/* #define ONIG_DEBUG_STATISTICS */ - -#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ - defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ - defined(ONIG_DEBUG_STATISTICS) -#ifndef ONIG_DEBUG -#define ONIG_DEBUG -#endif -#endif - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - (defined(__ppc__) && defined(__APPLE__)) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(__mc68020__) -#define PLATFORM_UNALIGNED_WORD_ACCESS -#endif - -/* config */ -/* spec. config */ -#define USE_NAMED_GROUP -#define USE_SUBEXP_CALL -#define USE_BACKREF_WITH_LEVEL /* \k, \k */ -#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ -#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ -#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -/* #define USE_RECOMPILE_API */ -/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ - -/* internal config */ -#define USE_PARSE_TREE_NODE_RECYCLE -#define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QTFR_PEEK_NEXT -#define USE_ST_LIBRARY -#define USE_SHARED_CCLASS_TABLE - -#define INIT_MATCH_STACK_SIZE 160 -#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ - -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - -/* */ -/* escape other system UChar definition */ -#include "onig_config.h" -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif - -#define USE_WORD_BEGIN_END /* "\<", "\>" */ -#define USE_CAPTURE_HISTORY -#define USE_VARIABLE_META_CHARS -#define USE_POSIX_API_REGION_OPTION -#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ - -/* #define USE_MULTI_THREAD_SYSTEM */ -#define THREAD_SYSTEM_INIT /* depend on thread system */ -#define THREAD_SYSTEM_END /* depend on thread system */ -#define THREAD_ATOMIC_START /* depend on thread system */ -#define THREAD_ATOMIC_END /* depend on thread system */ -#define THREAD_PASS /* depend on thread system */ -#define xmalloc malloc -#define xrealloc realloc -#define xcalloc calloc -#define xfree free - -#define CHECK_INTERRUPT_IN_MATCH_AT - -#define st_init_table onig_st_init_table -#define st_init_table_with_size onig_st_init_table_with_size -#define st_init_numtable onig_st_init_numtable -#define st_init_numtable_with_size onig_st_init_numtable_with_size -#define st_init_strtable onig_st_init_strtable -#define st_init_strtable_with_size onig_st_init_strtable_with_size -#define st_delete onig_st_delete -#define st_delete_safe onig_st_delete_safe -#define st_insert onig_st_insert -#define st_lookup onig_st_lookup -#define st_foreach onig_st_foreach -#define st_add_direct onig_st_add_direct -#define st_free_table onig_st_free_table -#define st_cleanup_safe onig_st_cleanup_safe -#define st_copy onig_st_copy -#define st_nothing_key_clone onig_st_nothing_key_clone -#define st_nothing_key_free onig_st_nothing_key_free -/* */ -#define onig_st_is_member st_is_member - -#define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 - -#define THREAD_PASS_LIMIT_COUNT 8 -#define xmemset memset -#define xmemcpy memcpy -#define xmemmove memmove - -#if defined(_WIN32) && !defined(__GNUC__) -#define xalloca _alloca -#define xvsnprintf _vsnprintf -#else -#define xalloca alloca -#define xvsnprintf vsnprintf -#endif - - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) -#define ONIG_STATE_INC(reg) (reg)->state++ -#define ONIG_STATE_DEC(reg) (reg)->state-- - -#define ONIG_STATE_INC_THREAD(reg) do {\ - THREAD_ATOMIC_START;\ - (reg)->state++;\ - THREAD_ATOMIC_END;\ -} while(0) -#define ONIG_STATE_DEC_THREAD(reg) do {\ - THREAD_ATOMIC_START;\ - (reg)->state--;\ - THREAD_ATOMIC_END;\ -} while(0) -#else -#define ONIG_STATE_INC(reg) /* Nothing */ -#define ONIG_STATE_DEC(reg) /* Nothing */ -#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ -#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#ifdef HAVE_STDLIB_H -#include -#endif - -#if defined(HAVE_ALLOCA_H) && !defined(_WIN32) && !defined(__GNUC__) -#include -#endif - -#ifdef HAVE_STRING_H -# include -#else -# include -#endif - -#include -#ifdef HAVE_SYS_TYPES_H -#ifndef __BORLANDC__ -#include -#endif -#endif - -#ifdef __BORLANDC__ -#include -#endif - -#ifdef ONIG_DEBUG -# include -#endif - -#include "regenc.h" - -#ifdef MIN -#undef MIN -#endif -#ifdef MAX -#undef MAX -#endif -#define MIN(a,b) (((a)>(b))?(b):(a)) -#define MAX(a,b) (((a)<(b))?(b):(a)) - -#define IS_NULL(p) (((void*)(p)) == (void*)0) -#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL -#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY -#define NULL_UCHARP ((UChar* )0) - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - -#define PLATFORM_GET_INC(val,p,type) do{\ - val = *(type* )p;\ - (p) += sizeof(type);\ -} while(0) - -#else - -#define PLATFORM_GET_INC(val,p,type) do{\ - xmemcpy(&val, (p), sizeof(type));\ - (p) += sizeof(type);\ -} while(0) - -/* sizeof(OnigCodePoint) */ -#define WORD_ALIGNMENT_SIZE SIZEOF_LONG - -#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ - (pad_size) = WORD_ALIGNMENT_SIZE \ - - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ - if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ -} while (0) - -#define ALIGNMENT_RIGHT(addr) do {\ - (addr) += (WORD_ALIGNMENT_SIZE - 1);\ - (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ -} while (0) - -#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ - -/* stack pop level */ -#define STACK_POP_LEVEL_FREE 0 -#define STACK_POP_LEVEL_MEM_START 1 -#define STACK_POP_LEVEL_ALL 2 - -/* optimize flags */ -#define ONIG_OPTIMIZE_NONE 0 -#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ -#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ -#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ -#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ -#define ONIG_OPTIMIZE_MAP 5 /* char map */ - -/* bit status */ -typedef unsigned int BitStatusType; - -#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) -#define BIT_STATUS_CLEAR(stats) (stats) = 0 -#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) -#define BIT_STATUS_AT(stats,n) \ - ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) - -#define BIT_STATUS_ON_AT(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM) \ - (stats) |= (1 << (n));\ - else\ - (stats) |= 1;\ -} while (0) - -#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM)\ - (stats) |= (1 << (n));\ -} while (0) - - -#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) - -#define DIGITVAL(code) ((code) - '0') -#define ODIGITVAL(code) DIGITVAL(code) -#define XDIGITVAL(enc,code) \ - (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ - : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) - -#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) -#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) -#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) -#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) -#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) -#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) -#define IS_FIND_CONDITION(option) ((option) & \ - (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) -#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) -#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) -#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) - -/* OP_SET_OPTION is required for these options. -#define IS_DYNAMIC_OPTION(option) \ - (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) -*/ -/* ignore-case and multibyte status are included in compiled code. */ -#define IS_DYNAMIC_OPTION(option) 0 - -#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ - ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) - -#define REPEAT_INFINITE -1 -#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) - -/* bitset */ -#define BITS_PER_BYTE 8 -#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) -#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) -#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS -typedef unsigned int Bits; -#else -typedef unsigned char Bits; -#endif -typedef Bits BitSet[BITSET_SIZE]; -typedef Bits* BitSetRef; - -#define SIZE_BITSET sizeof(BitSet) - -#define BITSET_CLEAR(bs) do {\ - int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ -} while (0) - -#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] -#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) - -#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) -#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) -#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) -#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) - -/* bytes buffer */ -typedef struct _BBuf { - UChar* p; - unsigned int used; - unsigned int alloc; -} BBuf; - -#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) - -#define BBUF_SIZE_INC(buf,inc) do{\ - (buf)->alloc += (inc);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) - -#define BBUF_EXPAND(buf,low) do{\ - do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) - -#define BBUF_ENSURE_SIZE(buf,size) do{\ - unsigned int new_alloc = (buf)->alloc;\ - while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ - if ((buf)->alloc != new_alloc) {\ - (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ - (buf)->alloc = new_alloc;\ - }\ -} while (0) - -#define BBUF_WRITE(buf,pos,bytes,n) do{\ - int used = (pos) + (n);\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - xmemcpy((buf)->p + (pos), (bytes), (n));\ - if ((buf)->used < (unsigned int )used) (buf)->used = used;\ -} while (0) - -#define BBUF_WRITE1(buf,pos,byte) do{\ - int used = (pos) + 1;\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - (buf)->p[(pos)] = (byte);\ - if ((buf)->used < (unsigned int )used) (buf)->used = used;\ -} while (0) - -#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) -#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) -#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) -#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) - -/* from < to */ -#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ - if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ - xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ - if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ -} while (0) - -/* from > to */ -#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ - xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ -} while (0) - -/* from > to */ -#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ - xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ - (buf)->used -= (from - to);\ -} while (0) - -#define BBUF_INSERT(buf,pos,bytes,n) do {\ - if (pos >= (buf)->used) {\ - BBUF_WRITE(buf,pos,bytes,n);\ - }\ - else {\ - BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ - xmemcpy((buf)->p + (pos), (bytes), (n));\ - }\ -} while (0) - -#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] - - -#define ANCHOR_BEGIN_BUF (1<<0) -#define ANCHOR_BEGIN_LINE (1<<1) -#define ANCHOR_BEGIN_POSITION (1<<2) -#define ANCHOR_END_BUF (1<<3) -#define ANCHOR_SEMI_END_BUF (1<<4) -#define ANCHOR_END_LINE (1<<5) - -#define ANCHOR_WORD_BOUND (1<<6) -#define ANCHOR_NOT_WORD_BOUND (1<<7) -#define ANCHOR_WORD_BEGIN (1<<8) -#define ANCHOR_WORD_END (1<<9) -#define ANCHOR_PREC_READ (1<<10) -#define ANCHOR_PREC_READ_NOT (1<<11) -#define ANCHOR_LOOK_BEHIND (1<<12) -#define ANCHOR_LOOK_BEHIND_NOT (1<<13) - -#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ - -/* operation code */ -enum OpCode { - OP_FINISH = 0, /* matching process terminator (no more alternative) */ - OP_END = 1, /* pattern code terminator (success end) */ - - OP_EXACT1 = 2, /* single byte, N = 1 */ - OP_EXACT2, /* single byte, N = 2 */ - OP_EXACT3, /* single byte, N = 3 */ - OP_EXACT4, /* single byte, N = 4 */ - OP_EXACT5, /* single byte, N = 5 */ - OP_EXACTN, /* single byte */ - OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ - OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ - OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ - OP_EXACTMB2N, /* mb-length = 2 */ - OP_EXACTMB3N, /* mb-length = 3 */ - OP_EXACTMBN, /* other length */ - - OP_EXACT1_IC, /* single byte, N = 1, ignore case */ - OP_EXACTN_IC, /* single byte, ignore case */ - - OP_CCLASS, - OP_CCLASS_MB, - OP_CCLASS_MIX, - OP_CCLASS_NOT, - OP_CCLASS_MB_NOT, - OP_CCLASS_MIX_NOT, - OP_CCLASS_NODE, /* pointer to CClassNode node */ - - OP_ANYCHAR, /* "." */ - OP_ANYCHAR_ML, /* "." multi-line */ - OP_ANYCHAR_STAR, /* ".*" */ - OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ - OP_ANYCHAR_STAR_PEEK_NEXT, - OP_ANYCHAR_ML_STAR_PEEK_NEXT, - - OP_WORD, - OP_NOT_WORD, - OP_WORD_BOUND, - OP_NOT_WORD_BOUND, - OP_WORD_BEGIN, - OP_WORD_END, - - OP_BEGIN_BUF, - OP_END_BUF, - OP_BEGIN_LINE, - OP_END_LINE, - OP_SEMI_END_BUF, - OP_BEGIN_POSITION, - - OP_BACKREF1, - OP_BACKREF2, - OP_BACKREFN, - OP_BACKREFN_IC, - OP_BACKREF_MULTI, - OP_BACKREF_MULTI_IC, - OP_BACKREF_WITH_LEVEL, /* \k, \k */ - - OP_MEMORY_START, - OP_MEMORY_START_PUSH, /* push back-tracker to stack */ - OP_MEMORY_END_PUSH, /* push back-tracker to stack */ - OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ - OP_MEMORY_END, - OP_MEMORY_END_REC, /* push marker to stack */ - - OP_FAIL, /* pop stack and move */ - OP_JUMP, - OP_PUSH, - OP_POP, - OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ - OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ - OP_REPEAT, /* {n,m} */ - OP_REPEAT_NG, /* {n,m}? (non greedy) */ - OP_REPEAT_INC, - OP_REPEAT_INC_NG, /* non greedy */ - OP_REPEAT_INC_SG, /* search and get in stack */ - OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ - OP_NULL_CHECK_START, /* null loop checker start */ - OP_NULL_CHECK_END, /* null loop checker end */ - OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ - OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ - - OP_PUSH_POS, /* (?=...) start */ - OP_POP_POS, /* (?=...) end */ - OP_PUSH_POS_NOT, /* (?!...) start */ - OP_FAIL_POS, /* (?!...) end */ - OP_PUSH_STOP_BT, /* (?>...) start */ - OP_POP_STOP_BT, /* (?>...) end */ - OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ - OP_PUSH_LOOK_BEHIND_NOT, /* (? */ - OP_RETURN, - - OP_STATE_CHECK_PUSH, /* combination explosion check and push */ - OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ - OP_STATE_CHECK, /* check only */ - OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR, - - /* no need: IS_DYNAMIC_OPTION() == 0 */ - OP_SET_OPTION_PUSH, /* set option and push recover option */ - OP_SET_OPTION /* set option */ -}; - -typedef int RelAddrType; -typedef int AbsAddrType; -typedef int LengthType; -typedef int RepeatNumType; -typedef short int MemNumType; -typedef short int StateCheckNumType; -typedef void* PointerType; - -#define SIZE_OPCODE 1 -#define SIZE_RELADDR sizeof(RelAddrType) -#define SIZE_ABSADDR sizeof(AbsAddrType) -#define SIZE_LENGTH sizeof(LengthType) -#define SIZE_MEMNUM sizeof(MemNumType) -#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) -#define SIZE_REPEATNUM sizeof(RepeatNumType) -#define SIZE_OPTION sizeof(OnigOptionType) -#define SIZE_CODE_POINT sizeof(OnigCodePoint) -#define SIZE_POINTER sizeof(PointerType) - - -#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) -#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) -#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) -#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) -#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) -#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) -#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) -#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) - -/* code point's address must be aligned address. */ -#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) -#define GET_BYTE_INC(byte,p) do{\ - byte = *(p);\ - (p)++;\ -} while(0) - - -/* op-code + arg size */ -#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE -#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) -#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP SIZE_OPCODE -#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_POS SIZE_OPCODE -#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP_POS SIZE_OPCODE -#define SIZE_OP_FAIL_POS SIZE_OPCODE -#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) -#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) -#define SIZE_OP_FAIL SIZE_OPCODE -#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE -#define SIZE_OP_POP_STOP_BT SIZE_OPCODE -#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) -#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) -#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE -#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) -#define SIZE_OP_RETURN SIZE_OPCODE - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#endif - -#define MC_ESC(syn) (syn)->meta_char_table.esc -#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar -#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime -#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time -#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time -#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime - -#define IS_MC_ESC_CODE(code, syn) \ - ((code) == MC_ESC(syn) && \ - !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) - - -#define SYN_POSIX_COMMON_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ - ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ - ONIG_SYN_OP_LINE_ANCHOR | \ - ONIG_SYN_OP_ESC_CONTROL_CHARS ) - -#define SYN_GNU_REGEX_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ - ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ - ONIG_SYN_OP_VBAR_ALT | \ - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ - ONIG_SYN_OP_QMARK_ZERO_ONE | \ - ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ - ONIG_SYN_OP_ESC_W_WORD | \ - ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ - ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ - ONIG_SYN_OP_LINE_ANCHOR ) - -#define SYN_GNU_REGEX_BV \ - ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ - ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ - ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - - -#define NCCLASS_FLAGS(cc) ((cc)->flags) -#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) -#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) -#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) - -/* cclass node */ -#define FLAG_NCCLASS_NOT (1<<0) -#define FLAG_NCCLASS_SHARE (1<<1) - -#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) -#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) -#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) - -typedef struct { - int type; - /* struct _Node* next; */ - /* unsigned int flags; */ -} NodeBase; - -typedef struct { - NodeBase base; - unsigned int flags; - BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ -} CClassNode; - -typedef long OnigStackIndex; - -typedef struct _OnigStackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - OnigStackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ - OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ - OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } null_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} OnigStackType; - -typedef struct { - void* stack_p; - int stack_n; - OnigOptionType options; - OnigRegion* region; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} OnigMatchArg; - - -#define IS_CODE_SB_WORD(enc,code) \ - (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) - -typedef struct OnigEndCallListItem { - struct OnigEndCallListItem* next; - void (*func)(void); -} OnigEndCallListItemType; - -extern void onig_add_end_call(void (*func)(void)); - - -#ifdef ONIG_DEBUG - -typedef struct { - short int opcode; - char* name; - short int arg_type; -} OnigOpInfoType; - -extern OnigOpInfoType OnigOpInfo[]; - - -extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc)); - -#ifdef ONIG_DEBUG_STATISTICS -extern void onig_statistics_init P_((void)); -extern void onig_print_statistics P_((FILE* f)); -#endif -#endif - -extern UChar* onig_error_code_to_format P_((int code)); -extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); -extern int onig_bbuf_init P_((BBuf* buf, int size)); -extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); -extern void onig_chain_reduce P_((regex_t* reg)); -extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); -extern void onig_transfer P_((regex_t* to, regex_t* from)); -extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); -extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); - -/* strend hash */ -typedef void hash_table_type; -typedef unsigned long hash_data_type; - -extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); -extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); -extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); - -/* encoding property management */ -#define PROPERTY_LIST_ADD_PROP(Name, CR) \ - r = onigenc_property_list_add_property((UChar* )Name, CR,\ - &PropertyNameTable, &PropertyList, &PropertyListNum,\ - &PropertyListSize);\ - if (r != 0) goto end - -#define PROPERTY_LIST_INIT_CHECK \ - if (PropertyInited == 0) {\ - int r = onigenc_property_list_init(init_property_list);\ - if (r != 0) return r;\ - } - -extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); - -typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); - -extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); - -#endif /* REGINT_H */ diff --git a/src/openalpr/support/regex/regparse.c b/src/openalpr/support/regex/regparse.c deleted file mode 100644 index bf72300..0000000 --- a/src/openalpr/support/regex/regparse.c +++ /dev/null @@ -1,5554 +0,0 @@ -/********************************************************************** - regparse.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regparse.h" -#include "st.h" - -#define WARN_BUFSIZE 256 - -#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - - -OnigSyntaxType OnigSyntaxRuby = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | - ONIG_SYN_OP2_OPTION_RUBY | - ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | - ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | - ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | - ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | - ONIG_SYN_OP2_ESC_H_XDIGIT ) - , ( SYN_GNU_REGEX_BV | - ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | - ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | - ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | - ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | - ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | - ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | - ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; - -extern void onig_null_warn(const char* s ARG_UNUSED) { } - -#ifdef DEFAULT_WARN_FUNCTION -static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; -#else -static OnigWarnFunc onig_warn = onig_null_warn; -#endif - -#ifdef DEFAULT_VERB_WARN_FUNCTION -static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; -#else -static OnigWarnFunc onig_verb_warn = onig_null_warn; -#endif - -extern void onig_set_warn_func(OnigWarnFunc f) -{ - onig_warn = f; -} - -extern void onig_set_verb_warn_func(OnigWarnFunc f) -{ - onig_verb_warn = f; -} - -static void -bbuf_free(BBuf* bbuf) -{ - if (IS_NOT_NULL(bbuf)) { - if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); - xfree(bbuf); - } -} - -static int -bbuf_clone(BBuf** rto, BBuf* from) -{ - int r; - BBuf *to; - - *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(to); - r = BBUF_INIT(to, from->alloc); - if (r != 0) return r; - to->used = from->used; - xmemcpy(to->p, from->p, from->used); - return 0; -} - -#define BACKREF_REL_TO_ABS(rel_no, env) \ - ((env)->num_mem + 1 + (rel_no)) - -#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) - -#define MBCODE_START_POS(enc) \ - (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) - -#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ - add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) - -#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ - if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ - r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ - if (r) return r;\ - }\ -} while (0) - - -#define BITSET_IS_EMPTY(bs,empty) do {\ - int i;\ - empty = 1;\ - for (i = 0; i < (int )BITSET_SIZE; i++) {\ - if ((bs)[i] != 0) {\ - empty = 0; break;\ - }\ - }\ -} while (0) - -static void -bitset_set_range(BitSetRef bs, int from, int to) -{ - int i; - for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { - BITSET_SET_BIT(bs, i); - } -} - -#if 0 -static void -bitset_set_all(BitSetRef bs) -{ - int i; - for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } -} -#endif - -static void -bitset_invert(BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } -} - -static void -bitset_invert_to(BitSetRef from, BitSetRef to) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } -} - -static void -bitset_and(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } -} - -static void -bitset_or(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } -} - -static void -bitset_copy(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } -} - -extern int -onig_strncmp(const UChar* s1, const UChar* s2, int n) -{ - int x; - - while (n-- > 0) { - x = *s2++ - *s1++; - if (x) return x; - } - return 0; -} - -extern void -onig_strcpy(UChar* dest, const UChar* src, const UChar* end) -{ - int len = end - src; - if (len > 0) { - xmemcpy(dest, src, len); - dest[len] = (UChar )0; - } -} - -#ifdef USE_NAMED_GROUP -static UChar* -strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) -{ - int slen, term_len, i; - UChar *r; - - slen = end - s; - term_len = ONIGENC_MBC_MINLEN(enc); - - r = (UChar* )xmalloc(slen + term_len); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, slen); - - for (i = 0; i < term_len; i++) - r[slen + i] = (UChar )0; - - return r; -} -#endif - -/* scan pattern methods */ -#define PEND_VALUE 0 - -#define PFETCH_READY UChar* pfetch_prev -#define PEND (p < end ? 0 : 1) -#define PUNFETCH p = pfetch_prev -#define PINC do { \ - pfetch_prev = p; \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) -#define PFETCH(c) do { \ - c = ONIGENC_MBC_TO_CODE(enc, p, end); \ - pfetch_prev = p; \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) - -#define PINC_S do { \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) -#define PFETCH_S(c) do { \ - c = ONIGENC_MBC_TO_CODE(enc, p, end); \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ -} while (0) - -#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) -#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) - -static UChar* -strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, - int capa) -{ - UChar* r; - - if (dest) - r = (UChar* )xrealloc(dest, capa + 1); - else - r = (UChar* )xmalloc(capa + 1); - - CHECK_NULL_RETURN(r); - onig_strcpy(r + (dest_end - dest), src, src_end); - return r; -} - -/* dest on static area */ -static UChar* -strcat_capa_from_static(UChar* dest, UChar* dest_end, - const UChar* src, const UChar* src_end, int capa) -{ - UChar* r; - - r = (UChar* )xmalloc(capa + 1); - CHECK_NULL_RETURN(r); - onig_strcpy(r, dest, dest_end); - onig_strcpy(r + (dest_end - dest), src, src_end); - return r; -} - - -#ifdef USE_ST_LIBRARY - -typedef struct { - UChar* s; - UChar* end; -} st_str_end_key; - -static int -str_end_cmp(st_str_end_key* x, st_str_end_key* y) -{ - UChar *p, *q; - int c; - - if ((x->end - x->s) != (y->end - y->s)) - return 1; - - p = x->s; - q = y->s; - while (p < x->end) { - c = (int )*p - (int )*q; - if (c != 0) return c; - - p++; q++; - } - - return 0; -} - -static int -str_end_hash(st_str_end_key* x) -{ - UChar *p; - int val = 0; - - p = x->s; - while (p < x->end) { - val = val * 997 + (int )*p++; - } - - return val + (val >> 5); -} - -extern hash_table_type* -onig_st_init_strend_table_with_size(int size) -{ - static struct st_hash_type hashType = { - str_end_cmp, - str_end_hash, - }; - - return (hash_table_type* ) - onig_st_init_table_with_size(&hashType, size); -} - -extern int -onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type *value) -{ - st_str_end_key key; - - key.s = (UChar* )str_key; - key.end = (UChar* )end_key; - - return onig_st_lookup(table, (st_data_t )(&key), value); -} - -extern int -onig_st_insert_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type value) -{ - st_str_end_key* key; - int result; - - key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); - key->s = (UChar* )str_key; - key->end = (UChar* )end_key; - result = onig_st_insert(table, (st_data_t )key, value); - if (result) { - xfree(key); - } - return result; -} - -#endif /* USE_ST_LIBRARY */ - - -#ifdef USE_NAMED_GROUP - -#define INIT_NAME_BACKREFS_ALLOC_NUM 8 - -typedef struct { - UChar* name; - int name_len; /* byte length */ - int back_num; /* number of backrefs */ - int back_alloc; - int back_ref1; - int* back_refs; -} NameEntry; - -#ifdef USE_ST_LIBRARY - -typedef st_table NameTable; -typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ - -#define NAMEBUF_SIZE 24 -#define NAMEBUF_SIZE_1 25 - -#ifdef ONIG_DEBUG -static int -i_print_name_entry(UChar* key, NameEntry* e, void* arg) -{ - int i; - FILE* fp = (FILE* )arg; - - fprintf(fp, "%s: ", e->name); - if (e->back_num == 0) - fputs("-", fp); - else if (e->back_num == 1) - fprintf(fp, "%d", e->back_ref1); - else { - for (i = 0; i < e->back_num; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[i]); - } - } - fputs("\n", fp); - return ST_CONTINUE; -} - -extern int -onig_print_names(FILE* fp, regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - fprintf(fp, "name table\n"); - onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); - fputs("\n", fp); - } - return 0; -} -#endif /* ONIG_DEBUG */ - -static int -i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) -{ - xfree(e->name); - if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); - xfree(key); - xfree(e); - return ST_DELETE; -} - -static int -names_clear(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - onig_st_foreach(t, i_free_name_entry, 0); - } - return 0; -} - -extern int -onig_names_free(regex_t* reg) -{ - int r; - NameTable* t; - - r = names_clear(reg); - if (r) return r; - - t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) onig_st_free_table(t); - reg->name_table = (void* )NULL; - return 0; -} - -static NameEntry* -name_find(regex_t* reg, const UChar* name, const UChar* name_end) -{ - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - e = (NameEntry* )NULL; - if (IS_NOT_NULL(t)) { - onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); - } - return e; -} - -typedef struct { - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); - regex_t* reg; - void* arg; - int ret; - OnigEncoding enc; -} INamesArg; - -static int -i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) -{ - int r = (*(arg->func))(e->name, - e->name + e->name_len, - e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - arg->reg, arg->arg); - if (r != 0) { - arg->ret = r; - return ST_STOP; - } - return ST_CONTINUE; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - INamesArg narg; - NameTable* t = (NameTable* )reg->name_table; - - narg.ret = 0; - if (IS_NOT_NULL(t)) { - narg.func = func; - narg.reg = reg; - narg.arg = arg; - narg.enc = reg->enc; /* should be pattern encoding. */ - onig_st_foreach(t, i_names, (HashDataType )&narg); - } - return narg.ret; -} - -static int -i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) -{ - int i; - - if (e->back_num > 1) { - for (i = 0; i < e->back_num; i++) { - e->back_refs[i] = map[e->back_refs[i]].new_val; - } - } - else if (e->back_num == 1) { - e->back_ref1 = map[e->back_ref1].new_val; - } - - return ST_CONTINUE; -} - -extern int -onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - onig_st_foreach(t, i_renumber_name, (HashDataType )map); - } - return 0; -} - - -extern int -onig_number_of_names(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) - return t->num_entries; - else - return 0; -} - -#else /* USE_ST_LIBRARY */ - -#define INIT_NAMES_ALLOC_NUM 8 - -typedef struct { - NameEntry* e; - int num; - int alloc; -} NameTable; - -#ifdef ONIG_DEBUG -extern int -onig_print_names(FILE* fp, regex_t* reg) -{ - int i, j; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t) && t->num > 0) { - fprintf(fp, "name table\n"); - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - fprintf(fp, "%s: ", e->name); - if (e->back_num == 0) { - fputs("-", fp); - } - else if (e->back_num == 1) { - fprintf(fp, "%d", e->back_ref1); - } - else { - for (j = 0; j < e->back_num; j++) { - if (j > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[j]); - } - } - fputs("\n", fp); - } - fputs("\n", fp); - } - return 0; -} -#endif - -static int -names_clear(regex_t* reg) -{ - int i; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - if (IS_NOT_NULL(e->name)) { - xfree(e->name); - e->name = NULL; - e->name_len = 0; - e->back_num = 0; - e->back_alloc = 0; - if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); - e->back_refs = (int* )NULL; - } - } - if (IS_NOT_NULL(t->e)) { - xfree(t->e); - t->e = NULL; - } - t->num = 0; - } - return 0; -} - -extern int -onig_names_free(regex_t* reg) -{ - int r; - NameTable* t; - - r = names_clear(reg); - if (r) return r; - - t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) xfree(t); - reg->name_table = NULL; - return 0; -} - -static NameEntry* -name_find(regex_t* reg, UChar* name, UChar* name_end) -{ - int i, len; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - len = name_end - name; - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) - return e; - } - } - return (NameEntry* )NULL; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - int i, r; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - r = (*func)(e->name, e->name + e->name_len, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - reg, arg); - if (r != 0) return r; - } - } - return 0; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) - return t->num; - else - return 0; -} - -#endif /* else USE_ST_LIBRARY */ - -static int -name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) -{ - int alloc; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (name_end - name <= 0) - return ONIGERR_EMPTY_GROUP_NAME; - - e = name_find(reg, name, name_end); - if (IS_NULL(e)) { -#ifdef USE_ST_LIBRARY - if (IS_NULL(t)) { - t = onig_st_init_strend_table_with_size(5); - reg->name_table = (void* )t; - } - e = (NameEntry* )xmalloc(sizeof(NameEntry)); - CHECK_NULL_RETURN_MEMERR(e); - - e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) { - xfree(e); return ONIGERR_MEMORY; - } - onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), - (HashDataType )e); - - e->name_len = name_end - name; - e->back_num = 0; - e->back_alloc = 0; - e->back_refs = (int* )NULL; - -#else - - if (IS_NULL(t)) { - alloc = INIT_NAMES_ALLOC_NUM; - t = (NameTable* )xmalloc(sizeof(NameTable)); - CHECK_NULL_RETURN_MEMERR(t); - t->e = NULL; - t->alloc = 0; - t->num = 0; - - t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); - if (IS_NULL(t->e)) { - xfree(t); - return ONIGERR_MEMORY; - } - t->alloc = alloc; - reg->name_table = t; - goto clear; - } - else if (t->num == t->alloc) { - int i; - - alloc = t->alloc * 2; - t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_MEMERR(t->e); - t->alloc = alloc; - - clear: - for (i = t->num; i < t->alloc; i++) { - t->e[i].name = NULL; - t->e[i].name_len = 0; - t->e[i].back_num = 0; - t->e[i].back_alloc = 0; - t->e[i].back_refs = (int* )NULL; - } - } - e = &(t->e[t->num]); - t->num++; - e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) return ONIGERR_MEMORY; - e->name_len = name_end - name; -#endif - } - - if (e->back_num >= 1 && - ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { - onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, - name, name_end); - return ONIGERR_MULTIPLEX_DEFINED_NAME; - } - - e->back_num++; - if (e->back_num == 1) { - e->back_ref1 = backref; - } - else { - if (e->back_num == 2) { - alloc = INIT_NAME_BACKREFS_ALLOC_NUM; - e->back_refs = (int* )xmalloc(sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); - e->back_alloc = alloc; - e->back_refs[0] = e->back_ref1; - e->back_refs[1] = backref; - } - else { - if (e->back_num > e->back_alloc) { - alloc = e->back_alloc * 2; - e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); - e->back_alloc = alloc; - } - e->back_refs[e->back_num - 1] = backref; - } - } - - return 0; -} - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - NameEntry* e = name_find(reg, name, name_end); - - if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; - - switch (e->back_num) { - case 0: - break; - case 1: - *nums = &(e->back_ref1); - break; - default: - *nums = e->back_refs; - break; - } - return e->back_num; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion *region) -{ - int i, n, *nums; - - n = onig_name_to_group_numbers(reg, name, name_end, &nums); - if (n < 0) - return n; - else if (n == 0) - return ONIGERR_PARSER_BUG; - else if (n == 1) - return nums[0]; - else { - if (IS_NOT_NULL(region)) { - for (i = n - 1; i >= 0; i--) { - if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) - return nums[i]; - } - } - return nums[n - 1]; - } -} - -#else /* USE_NAMED_GROUP */ - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion* region) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - return 0; -} -#endif /* else USE_NAMED_GROUP */ - -extern int -onig_noname_group_capture_is_active(regex_t* reg) -{ - if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) - return 0; - -#ifdef USE_NAMED_GROUP - if (onig_number_of_names(reg) > 0 && - IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { - return 0; - } -#endif - - return 1; -} - - -#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 - -static void -scan_env_clear(ScanEnv* env) -{ - int i; - - BIT_STATUS_CLEAR(env->capture_history); - BIT_STATUS_CLEAR(env->bt_mem_start); - BIT_STATUS_CLEAR(env->bt_mem_end); - BIT_STATUS_CLEAR(env->backrefed_mem); - env->error = (UChar* )NULL; - env->error_end = (UChar* )NULL; - env->num_call = 0; - env->num_mem = 0; -#ifdef USE_NAMED_GROUP - env->num_named = 0; -#endif - env->mem_alloc = 0; - env->mem_nodes_dynamic = (Node** )NULL; - - for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) - env->mem_nodes_static[i] = NULL_NODE; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - env->num_comb_exp_check = 0; - env->comb_exp_max_regnum = 0; - env->curr_max_regnum = 0; - env->has_recursion = 0; -#endif -} - -static int -scan_env_add_mem_entry(ScanEnv* env) -{ - int i, need, alloc; - Node** p; - - need = env->num_mem + 1; - if (need >= SCANENV_MEMNODES_SIZE) { - if (env->mem_alloc <= need) { - if (IS_NULL(env->mem_nodes_dynamic)) { - alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; - p = (Node** )xmalloc(sizeof(Node*) * alloc); - xmemcpy(p, env->mem_nodes_static, - sizeof(Node*) * SCANENV_MEMNODES_SIZE); - } - else { - alloc = env->mem_alloc * 2; - p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); - } - CHECK_NULL_RETURN_MEMERR(p); - - for (i = env->num_mem + 1; i < alloc; i++) - p[i] = NULL_NODE; - - env->mem_nodes_dynamic = p; - env->mem_alloc = alloc; - } - } - - env->num_mem++; - return env->num_mem; -} - -static int -scan_env_set_mem_node(ScanEnv* env, int num, Node* node) -{ - if (env->num_mem >= num) - SCANENV_MEM_NODES(env)[num] = node; - else - return ONIGERR_PARSER_BUG; - return 0; -} - - -#ifdef USE_PARSE_TREE_NODE_RECYCLE -typedef struct _FreeNode { - struct _FreeNode* next; -} FreeNode; - -static FreeNode* FreeNodeList = (FreeNode* )NULL; -#endif - -extern void -onig_node_free(Node* node) -{ - start: - if (IS_NULL(node)) return ; - - switch (NTYPE(node)) { - case NT_STR: - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); - } - break; - - case NT_LIST: - case NT_ALT: - onig_node_free(NCAR(node)); - { - Node* next_node = NCDR(node); - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - { - FreeNode* n = (FreeNode* )node; - - THREAD_ATOMIC_START; - n->next = FreeNodeList; - FreeNodeList = n; - THREAD_ATOMIC_END; - } -#else - xfree(node); -#endif - node = next_node; - goto start; - } - break; - - case NT_CCLASS: - { - CClassNode* cc = NCCLASS(node); - - if (IS_NCCLASS_SHARE(cc)) return ; - if (cc->mbuf) - bbuf_free(cc->mbuf); - } - break; - - case NT_QTFR: - if (NQTFR(node)->target) - onig_node_free(NQTFR(node)->target); - break; - - case NT_ENCLOSE: - if (NENCLOSE(node)->target) - onig_node_free(NENCLOSE(node)->target); - break; - - case NT_BREF: - if (IS_NOT_NULL(NBREF(node)->back_dynamic)) - xfree(NBREF(node)->back_dynamic); - break; - - case NT_ANCHOR: - if (NANCHOR(node)->target) - onig_node_free(NANCHOR(node)->target); - break; - } - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - { - FreeNode* n = (FreeNode* )node; - - THREAD_ATOMIC_START; - n->next = FreeNodeList; - FreeNodeList = n; - THREAD_ATOMIC_END; - } -#else - xfree(node); -#endif -} - -#ifdef USE_PARSE_TREE_NODE_RECYCLE -extern int -onig_free_node_list(void) -{ - FreeNode* n; - - /* THREAD_ATOMIC_START; */ - while (IS_NOT_NULL(FreeNodeList)) { - n = FreeNodeList; - FreeNodeList = FreeNodeList->next; - xfree(n); - } - /* THREAD_ATOMIC_END; */ - return 0; -} -#endif - -static Node* -node_new(void) -{ - Node* node; - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - THREAD_ATOMIC_START; - if (IS_NOT_NULL(FreeNodeList)) { - node = (Node* )FreeNodeList; - FreeNodeList = FreeNodeList->next; - THREAD_ATOMIC_END; - return node; - } - THREAD_ATOMIC_END; -#endif - - node = (Node* )xmalloc(sizeof(Node)); - /* xmemset(node, 0, sizeof(Node)); */ - return node; -} - - -static void -initialize_cclass(CClassNode* cc) -{ - BITSET_CLEAR(cc->bs); - /* cc->base.flags = 0; */ - cc->flags = 0; - cc->mbuf = NULL; -} - -static Node* -node_new_cclass(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CCLASS); - initialize_cclass(NCCLASS(node)); - return node; -} - -static Node* -node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, - const OnigCodePoint ranges[]) -{ - int n, i; - CClassNode* cc; - OnigCodePoint j; - - Node* node = node_new_cclass(); - CHECK_NULL_RETURN(node); - - cc = NCCLASS(node); - if (not != 0) NCCLASS_SET_NOT(cc); - - BITSET_CLEAR(cc->bs); - if (sb_out > 0 && IS_NOT_NULL(ranges)) { - n = ONIGENC_CODE_RANGE_NUM(ranges); - for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); - j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { - if (j >= sb_out) goto sb_end; - - BITSET_SET_BIT(cc->bs, j); - } - } - } - - sb_end: - if (IS_NULL(ranges)) { - is_null: - cc->mbuf = NULL; - } - else { - BBuf* bbuf; - - n = ONIGENC_CODE_RANGE_NUM(ranges); - if (n == 0) goto is_null; - - bbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN(bbuf); - bbuf->alloc = n + 1; - bbuf->used = n + 1; - bbuf->p = (UChar* )((void* )ranges); - - cc->mbuf = bbuf; - } - - return node; -} - -static Node* -node_new_ctype(int type, int not) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CTYPE); - NCTYPE(node)->ctype = type; - NCTYPE(node)->not = not; - return node; -} - -static Node* -node_new_anychar(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CANY); - return node; -} - -static Node* -node_new_list(Node* left, Node* right) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_LIST); - NCAR(node) = left; - NCDR(node) = right; - return node; -} - -extern Node* -onig_node_new_list(Node* left, Node* right) -{ - return node_new_list(left, right); -} - -extern Node* -onig_node_list_add(Node* list, Node* x) -{ - Node *n; - - n = onig_node_new_list(x, NULL); - if (IS_NULL(n)) return NULL_NODE; - - if (IS_NOT_NULL(list)) { - while (IS_NOT_NULL(NCDR(list))) - list = NCDR(list); - - NCDR(list) = n; - } - - return n; -} - -extern Node* -onig_node_new_alt(Node* left, Node* right) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ALT); - NCAR(node) = left; - NCDR(node) = right; - return node; -} - -extern Node* -onig_node_new_anchor(int type) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ANCHOR); - NANCHOR(node)->type = type; - NANCHOR(node)->target = NULL; - NANCHOR(node)->char_len = -1; - return node; -} - -static Node* -node_new_backref(int back_num, int* backrefs, int by_name, -#ifdef USE_BACKREF_WITH_LEVEL - int exist_level, int nest_level, -#endif - ScanEnv* env) -{ - int i; - Node* node = node_new(); - - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_BREF); - NBREF(node)->state = 0; - NBREF(node)->back_num = back_num; - NBREF(node)->back_dynamic = (int* )NULL; - if (by_name != 0) - NBREF(node)->state |= NST_NAME_REF; - -#ifdef USE_BACKREF_WITH_LEVEL - if (exist_level != 0) { - NBREF(node)->state |= NST_NEST_LEVEL; - NBREF(node)->nest_level = nest_level; - } -#endif - - for (i = 0; i < back_num; i++) { - if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ - break; - } - } - - if (back_num <= NODE_BACKREFS_SIZE) { - for (i = 0; i < back_num; i++) - NBREF(node)->back_static[i] = backrefs[i]; - } - else { - int* p = (int* )xmalloc(sizeof(int) * back_num); - if (IS_NULL(p)) { - onig_node_free(node); - return NULL; - } - NBREF(node)->back_dynamic = p; - for (i = 0; i < back_num; i++) - p[i] = backrefs[i]; - } - return node; -} - -#ifdef USE_SUBEXP_CALL -static Node* -node_new_call(UChar* name, UChar* name_end, int gnum) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CALL); - NCALL(node)->state = 0; - NCALL(node)->target = NULL_NODE; - NCALL(node)->name = name; - NCALL(node)->name_end = name_end; - NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ - return node; -} -#endif - -static Node* -node_new_quantifier(int lower, int upper, int by_number) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_QTFR); - NQTFR(node)->state = 0; - NQTFR(node)->target = NULL; - NQTFR(node)->lower = lower; - NQTFR(node)->upper = upper; - NQTFR(node)->greedy = 1; - NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQTFR(node)->head_exact = NULL_NODE; - NQTFR(node)->next_head_exact = NULL_NODE; - NQTFR(node)->is_refered = 0; - if (by_number != 0) - NQTFR(node)->state |= NST_BY_NUMBER; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - NQTFR(node)->comb_exp_check_num = 0; -#endif - - return node; -} - -static Node* -node_new_enclose(int type) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ENCLOSE); - NENCLOSE(node)->type = type; - NENCLOSE(node)->state = 0; - NENCLOSE(node)->regnum = 0; - NENCLOSE(node)->option = 0; - NENCLOSE(node)->target = NULL; - NENCLOSE(node)->call_addr = -1; - NENCLOSE(node)->opt_count = 0; - return node; -} - -extern Node* -onig_node_new_enclose(int type) -{ - return node_new_enclose(type); -} - -static Node* -node_new_enclose_memory(OnigOptionType option, int is_named) -{ - Node* node = node_new_enclose(ENCLOSE_MEMORY); - CHECK_NULL_RETURN(node); - if (is_named != 0) - SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); - -#ifdef USE_SUBEXP_CALL - NENCLOSE(node)->option = option; -#endif - return node; -} - -static Node* -node_new_option(OnigOptionType option) -{ - Node* node = node_new_enclose(ENCLOSE_OPTION); - CHECK_NULL_RETURN(node); - NENCLOSE(node)->option = option; - return node; -} - -extern int -onig_node_str_cat(Node* node, const UChar* s, const UChar* end) -{ - int addlen = end - s; - - if (addlen > 0) { - int len = NSTR(node)->end - NSTR(node)->s; - - if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { - UChar* p; - int capa = len + addlen + NODE_STR_MARGIN; - - if (capa <= NSTR(node)->capa) { - onig_strcpy(NSTR(node)->s + len, s, end); - } - else { - if (NSTR(node)->s == NSTR(node)->buf) - p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, - s, end, capa); - else - p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); - - CHECK_NULL_RETURN_MEMERR(p); - NSTR(node)->s = p; - NSTR(node)->capa = capa; - } - } - else { - onig_strcpy(NSTR(node)->s + len, s, end); - } - NSTR(node)->end = NSTR(node)->s + len + addlen; - } - - return 0; -} - -extern int -onig_node_str_set(Node* node, const UChar* s, const UChar* end) -{ - onig_node_str_clear(node); - return onig_node_str_cat(node, s, end); -} - -static int -node_str_cat_char(Node* node, UChar c) -{ - UChar s[1]; - - s[0] = c; - return onig_node_str_cat(node, s, s + 1); -} - -extern void -onig_node_conv_to_str_node(Node* node, int flag) -{ - SET_NTYPE(node, NT_STR); - NSTR(node)->flag = flag; - NSTR(node)->capa = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; -} - -extern void -onig_node_str_clear(Node* node) -{ - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); - } - - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; -} - -static Node* -node_new_str(const UChar* s, const UChar* end) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_STR); - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; - if (onig_node_str_cat(node, s, end)) { - onig_node_free(node); - return NULL; - } - return node; -} - -extern Node* -onig_node_new_str(const UChar* s, const UChar* end) -{ - return node_new_str(s, end); -} - -static Node* -node_new_str_raw(UChar* s, UChar* end) -{ - Node* node = node_new_str(s, end); - NSTRING_SET_RAW(node); - return node; -} - -static Node* -node_new_empty(void) -{ - return node_new_str(NULL, NULL); -} - -static Node* -node_new_str_raw_char(UChar c) -{ - UChar p[1]; - - p[0] = c; - return node_new_str_raw(p, p + 1); -} - -static Node* -str_node_split_last_char(StrNode* sn, OnigEncoding enc) -{ - const UChar *p; - Node* n = NULL_NODE; - - if (sn->end > sn->s) { - p = onigenc_get_prev_char_head(enc, sn->s, sn->end); - if (p && p > sn->s) { /* can be splitted. */ - n = node_new_str(p, sn->end); - if ((sn->flag & NSTR_RAW) != 0) - NSTRING_SET_RAW(n); - sn->end = (UChar* )p; - } - } - return n; -} - -static int -str_node_can_be_split(StrNode* sn, OnigEncoding enc) -{ - if (sn->end > sn->s) { - return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0); - } - return 0; -} - -#ifdef USE_PAD_TO_SHORT_BYTE_CHAR -static int -node_str_head_pad(StrNode* sn, int num, UChar val) -{ - UChar buf[NODE_STR_BUF_SIZE]; - int i, len; - - len = sn->end - sn->s; - onig_strcpy(buf, sn->s, sn->end); - onig_strcpy(&(sn->s[num]), buf, buf + len); - sn->end += num; - - for (i = 0; i < num; i++) { - sn->s[i] = val; - } -} -#endif - -extern int -onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) -{ - unsigned int num, val; - OnigCodePoint c; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - val = (unsigned int )DIGITVAL(c); - if ((INT_MAX_LIMIT - val) / 10UL < num) - return -1; /* overflow */ - - num = num * 10 + val; - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - -static int -scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) -{ - OnigCodePoint c; - unsigned int num, val; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND && maxlen-- != 0) { - PFETCH(c); - if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { - val = (unsigned int )XDIGITVAL(enc,c); - if ((INT_MAX_LIMIT - val) / 16UL < num) - return -1; /* overflow */ - - num = (num << 4) + XDIGITVAL(enc,c); - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - -static int -scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) -{ - OnigCodePoint c; - unsigned int num, val; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND && maxlen-- != 0) { - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { - val = ODIGITVAL(c); - if ((INT_MAX_LIMIT - val) / 8UL < num) - return -1; /* overflow */ - - num = (num << 3) + val; - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - - -#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ - BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) - -/* data format: - [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] - (all data size is OnigCodePoint) - */ -static int -new_code_range(BBuf** pbuf) -{ -#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) - int r; - OnigCodePoint n; - BBuf* bbuf; - - bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(*pbuf); - r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); - if (r) return r; - - n = 0; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); - return 0; -} - -static int -add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) -{ - int r, inc_n, pos; - int low, high, bound, x; - OnigCodePoint n, *data; - BBuf* bbuf; - - if (from > to) { - n = from; from = to; to = n; - } - - if (IS_NULL(*pbuf)) { - r = new_code_range(pbuf); - if (r) return r; - bbuf = *pbuf; - n = 0; - } - else { - bbuf = *pbuf; - GET_CODE_POINT(n, bbuf->p); - } - data = (OnigCodePoint* )(bbuf->p); - data++; - - for (low = 0, bound = n; low < bound; ) { - x = (low + bound) >> 1; - if (from > data[x*2 + 1]) - low = x + 1; - else - bound = x; - } - - for (high = low, bound = n; high < bound; ) { - x = (high + bound) >> 1; - if (to >= data[x*2] - 1) - high = x + 1; - else - bound = x; - } - - inc_n = low + 1 - high; - if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) - return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; - - if (inc_n != 1) { - if (from > data[low*2]) - from = data[low*2]; - if (to < data[(high - 1)*2 + 1]) - to = data[(high - 1)*2 + 1]; - } - - if (inc_n != 0 && (OnigCodePoint )high < n) { - int from_pos = SIZE_CODE_POINT * (1 + high * 2); - int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); - int size = (n - high) * 2 * SIZE_CODE_POINT; - - if (inc_n > 0) { - BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); - } - else { - BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); - } - } - - pos = SIZE_CODE_POINT * (1 + low * 2); - BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); - BBUF_WRITE_CODE_POINT(bbuf, pos, from); - BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); - n += inc_n; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); - - return 0; -} - -static int -add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) -{ - if (from > to) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - return 0; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - - return add_code_range_to_buf(pbuf, from, to); -} - -static int -not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) -{ - int r, i, n; - OnigCodePoint pre, from, *data, to = 0; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf)) { - set_all: - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - } - - data = (OnigCodePoint* )(bbuf->p); - GET_CODE_POINT(n, data); - data++; - if (n <= 0) goto set_all; - - r = 0; - pre = MBCODE_START_POS(enc); - for (i = 0; i < n; i++) { - from = data[i*2]; - to = data[i*2+1]; - if (pre <= from - 1) { - r = add_code_range_to_buf(pbuf, pre, from - 1); - if (r != 0) return r; - } - if (to == ~((OnigCodePoint )0)) break; - pre = to + 1; - } - if (to < ~((OnigCodePoint )0)) { - r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); - } - return r; -} - -#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ - BBuf *tbuf; \ - int tnot; \ - tnot = not1; not1 = not2; not2 = tnot; \ - tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ -} while (0) - -static int -or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, - BBuf* bbuf2, int not2, BBuf** pbuf) -{ - int r; - OnigCodePoint i, n1, *data1; - OnigCodePoint from, to; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { - if (not1 != 0 || not2 != 0) - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - return 0; - } - - r = 0; - if (IS_NULL(bbuf2)) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - if (IS_NULL(bbuf1)) { - if (not1 != 0) { - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - } - else { - if (not2 == 0) { - return bbuf_clone(pbuf, bbuf2); - } - else { - return not_code_range_buf(enc, bbuf2, pbuf); - } - } - } - - if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - data1 = (OnigCodePoint* )(bbuf1->p); - GET_CODE_POINT(n1, data1); - data1++; - - if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ - r = bbuf_clone(pbuf, bbuf2); - } - else if (not1 == 0) { /* 1 OR (not 2) */ - r = not_code_range_buf(enc, bbuf2, pbuf); - } - if (r != 0) return r; - - for (i = 0; i < n1; i++) { - from = data1[i*2]; - to = data1[i*2+1]; - r = add_code_range_to_buf(pbuf, from, to); - if (r != 0) return r; - } - return 0; -} - -static int -and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, - OnigCodePoint* data, int n) -{ - int i, r; - OnigCodePoint from2, to2; - - for (i = 0; i < n; i++) { - from2 = data[i*2]; - to2 = data[i*2+1]; - if (from2 < from1) { - if (to2 < from1) continue; - else { - from1 = to2 + 1; - } - } - else if (from2 <= to1) { - if (to2 < to1) { - if (from1 <= from2 - 1) { - r = add_code_range_to_buf(pbuf, from1, from2-1); - if (r != 0) return r; - } - from1 = to2 + 1; - } - else { - to1 = from2 - 1; - } - } - else { - from1 = from2; - } - if (from1 > to1) break; - } - if (from1 <= to1) { - r = add_code_range_to_buf(pbuf, from1, to1); - if (r != 0) return r; - } - return 0; -} - -static int -and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) -{ - int r; - OnigCodePoint i, j, n1, n2, *data1, *data2; - OnigCodePoint from, to, from1, to1, from2, to2; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf1)) { - if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ - return bbuf_clone(pbuf, bbuf2); - return 0; - } - else if (IS_NULL(bbuf2)) { - if (not2 != 0) - return bbuf_clone(pbuf, bbuf1); - return 0; - } - - if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - data1 = (OnigCodePoint* )(bbuf1->p); - data2 = (OnigCodePoint* )(bbuf2->p); - GET_CODE_POINT(n1, data1); - GET_CODE_POINT(n2, data2); - data1++; - data2++; - - if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ - for (i = 0; i < n1; i++) { - from1 = data1[i*2]; - to1 = data1[i*2+1]; - for (j = 0; j < n2; j++) { - from2 = data2[j*2]; - to2 = data2[j*2+1]; - if (from2 > to1) break; - if (to2 < from1) continue; - from = MAX(from1, from2); - to = MIN(to1, to2); - r = add_code_range_to_buf(pbuf, from, to); - if (r != 0) return r; - } - } - } - else if (not1 == 0) { /* 1 AND (not 2) */ - for (i = 0; i < n1; i++) { - from1 = data1[i*2]; - to1 = data1[i*2+1]; - r = and_code_range1(pbuf, from1, to1, data2, n2); - if (r != 0) return r; - } - } - - return 0; -} - -static int -and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) -{ - int r, not1, not2; - BBuf *buf1, *buf2, *pbuf; - BitSetRef bsr1, bsr2; - BitSet bs1, bs2; - - not1 = IS_NCCLASS_NOT(dest); - bsr1 = dest->bs; - buf1 = dest->mbuf; - not2 = IS_NCCLASS_NOT(cc); - bsr2 = cc->bs; - buf2 = cc->mbuf; - - if (not1 != 0) { - bitset_invert_to(bsr1, bs1); - bsr1 = bs1; - } - if (not2 != 0) { - bitset_invert_to(bsr2, bs2); - bsr2 = bs2; - } - bitset_and(bsr1, bsr2); - if (bsr1 != dest->bs) { - bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; - } - if (not1 != 0) { - bitset_invert(dest->bs); - } - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - if (not1 != 0 && not2 != 0) { - r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); - } - else { - r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); - if (r == 0 && not1 != 0) { - BBuf *tbuf; - r = not_code_range_buf(enc, pbuf, &tbuf); - if (r != 0) { - bbuf_free(pbuf); - return r; - } - bbuf_free(pbuf); - pbuf = tbuf; - } - } - if (r != 0) return r; - - dest->mbuf = pbuf; - bbuf_free(buf1); - return r; - } - return 0; -} - -static int -or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) -{ - int r, not1, not2; - BBuf *buf1, *buf2, *pbuf; - BitSetRef bsr1, bsr2; - BitSet bs1, bs2; - - not1 = IS_NCCLASS_NOT(dest); - bsr1 = dest->bs; - buf1 = dest->mbuf; - not2 = IS_NCCLASS_NOT(cc); - bsr2 = cc->bs; - buf2 = cc->mbuf; - - if (not1 != 0) { - bitset_invert_to(bsr1, bs1); - bsr1 = bs1; - } - if (not2 != 0) { - bitset_invert_to(bsr2, bs2); - bsr2 = bs2; - } - bitset_or(bsr1, bsr2); - if (bsr1 != dest->bs) { - bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; - } - if (not1 != 0) { - bitset_invert(dest->bs); - } - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - if (not1 != 0 && not2 != 0) { - r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); - } - else { - r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); - if (r == 0 && not1 != 0) { - BBuf *tbuf; - r = not_code_range_buf(enc, pbuf, &tbuf); - if (r != 0) { - bbuf_free(pbuf); - return r; - } - bbuf_free(pbuf); - pbuf = tbuf; - } - } - if (r != 0) return r; - - dest->mbuf = pbuf; - bbuf_free(buf1); - return r; - } - else - return 0; -} - -static int -conv_backslash_value(int c, ScanEnv* env) -{ - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { - switch (c) { - case 'n': return '\n'; - case 't': return '\t'; - case 'r': return '\r'; - case 'f': return '\f'; - case 'a': return '\007'; - case 'b': return '\010'; - case 'e': return '\033'; - case 'v': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) - return '\v'; - break; - - default: - break; - } - } - return c; -} - -static int -is_invalid_quantifier_target(Node* node) -{ - switch (NTYPE(node)) { - case NT_ANCHOR: - return 1; - break; - - case NT_ENCLOSE: - /* allow enclosed elements */ - /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ - break; - - case NT_LIST: - do { - if (! is_invalid_quantifier_target(NCAR(node))) return 0; - } while (IS_NOT_NULL(node = NCDR(node))); - return 0; - break; - - case NT_ALT: - do { - if (is_invalid_quantifier_target(NCAR(node))) return 1; - } while (IS_NOT_NULL(node = NCDR(node))); - break; - - default: - break; - } - return 0; -} - -/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ -static int -popular_quantifier_num(QtfrNode* q) -{ - if (q->greedy) { - if (q->lower == 0) { - if (q->upper == 1) return 0; - else if (IS_REPEAT_INFINITE(q->upper)) return 1; - } - else if (q->lower == 1) { - if (IS_REPEAT_INFINITE(q->upper)) return 2; - } - } - else { - if (q->lower == 0) { - if (q->upper == 1) return 3; - else if (IS_REPEAT_INFINITE(q->upper)) return 4; - } - else if (q->lower == 1) { - if (IS_REPEAT_INFINITE(q->upper)) return 5; - } - } - return -1; -} - - -enum ReduceType { - RQ_ASIS = 0, /* as is */ - RQ_DEL = 1, /* delete parent */ - RQ_A, /* to '*' */ - RQ_AQ, /* to '*?' */ - RQ_QQ, /* to '??' */ - RQ_P_QQ, /* to '+)??' */ - RQ_PQ_Q /* to '+?)?' */ -}; - -static enum ReduceType ReduceTypeTable[6][6] = { - {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ - {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ - {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ - {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ - {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ - {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ -}; - -extern void -onig_reduce_nested_quantifier(Node* pnode, Node* cnode) -{ - int pnum, cnum; - QtfrNode *p, *c; - - p = NQTFR(pnode); - c = NQTFR(cnode); - pnum = popular_quantifier_num(p); - cnum = popular_quantifier_num(c); - if (pnum < 0 || cnum < 0) return ; - - switch(ReduceTypeTable[cnum][pnum]) { - case RQ_DEL: - *pnode = *cnode; - break; - case RQ_A: - p->target = c->target; - p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; - break; - case RQ_AQ: - p->target = c->target; - p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; - break; - case RQ_QQ: - p->target = c->target; - p->lower = 0; p->upper = 1; p->greedy = 0; - break; - case RQ_P_QQ: - p->target = cnode; - p->lower = 0; p->upper = 1; p->greedy = 0; - c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; - return ; - break; - case RQ_PQ_Q: - p->target = cnode; - p->lower = 0; p->upper = 1; p->greedy = 1; - c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; - return ; - break; - case RQ_ASIS: - p->target = cnode; - return ; - break; - } - - c->target = NULL_NODE; - onig_node_free(cnode); -} - - -enum TokenSyms { - TK_EOT = 0, /* end of token */ - TK_RAW_BYTE = 1, - TK_CHAR, - TK_STRING, - TK_CODE_POINT, - TK_ANYCHAR, - TK_CHAR_TYPE, - TK_BACKREF, - TK_CALL, - TK_ANCHOR, - TK_OP_REPEAT, - TK_INTERVAL, - TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ - TK_ALT, - TK_SUBEXP_OPEN, - TK_SUBEXP_CLOSE, - TK_CC_OPEN, - TK_QUOTE_OPEN, - TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ - /* in cc */ - TK_CC_CLOSE, - TK_CC_RANGE, - TK_POSIX_BRACKET_OPEN, - TK_CC_AND, /* && */ - TK_CC_CC_OPEN /* [ */ -}; - -typedef struct { - enum TokenSyms type; - int escaped; - int base; /* is number: 8, 16 (used in [....]) */ - UChar* backp; - union { - UChar* s; - int c; - OnigCodePoint code; - int anchor; - int subtype; - struct { - int lower; - int upper; - int greedy; - int possessive; - } repeat; - struct { - int num; - int ref1; - int* refs; - int by_name; -#ifdef USE_BACKREF_WITH_LEVEL - int exist_level; - int level; /* \k */ -#endif - } backref; - struct { - UChar* name; - UChar* name_end; - int gnum; - } call; - struct { - int ctype; - int not; - } prop; - } u; -} OnigToken; - - -static int -fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) -{ - int low, up, syn_allow, non_low = 0; - int r = 0; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar* p = *src; - PFETCH_READY; - - syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); - - if (PEND) { - if (syn_allow) - return 1; /* "....{" : OK! */ - else - return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ - } - - if (! syn_allow) { - c = PPEEK; - if (c == ')' || c == '(' || c == '|') { - return ONIGERR_END_PATTERN_AT_LEFT_BRACE; - } - } - - low = onig_scan_unsigned_number(&p, end, env->enc); - if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (low > ONIG_MAX_REPEAT_NUM) - return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == *src) { /* can't read low */ - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { - /* allow {,n} as {0,n} */ - low = 0; - non_low = 1; - } - else - goto invalid; - } - - if (PEND) goto invalid; - PFETCH(c); - if (c == ',') { - UChar* prev = p; - up = onig_scan_unsigned_number(&p, end, env->enc); - if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (up > ONIG_MAX_REPEAT_NUM) - return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == prev) { - if (non_low != 0) - goto invalid; - up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ - } - } - else { - if (non_low != 0) - goto invalid; - - PUNFETCH; - up = low; /* {n} : exact n times */ - r = 2; /* fixed */ - } - - if (PEND) goto invalid; - PFETCH(c); - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { - if (c != MC_ESC(env->syntax)) goto invalid; - PFETCH(c); - } - if (c != '}') goto invalid; - - if (!IS_REPEAT_INFINITE(up) && low > up) { - return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; - } - - tok->type = TK_INTERVAL; - tok->u.repeat.lower = low; - tok->u.repeat.upper = up; - *src = p; - return r; /* 0: normal {n,m}, 2: fixed {n} */ - - invalid: - if (syn_allow) - return 1; /* OK */ - else - return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; -} - -/* \M-, \C-, \c, or \... */ -static int -fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) -{ - int v; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar* p = *src; - - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - PFETCH_S(c); - switch (c) { - case 'M': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { - if (PEND) return ONIGERR_END_PATTERN_AT_META; - PFETCH_S(c); - if (c != '-') return ONIGERR_META_CODE_SYNTAX; - if (PEND) return ONIGERR_END_PATTERN_AT_META; - PFETCH_S(c); - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - c = ((c & 0xff) | 0x80); - } - else - goto backslash; - break; - - case 'C': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { - if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; - PFETCH_S(c); - if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; - goto control; - } - else - goto backslash; - - case 'c': - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { - control: - if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; - PFETCH_S(c); - if (c == '?') { - c = 0177; - } - else { - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - c &= 0x9f; - } - break; - } - /* fall through */ - - default: - { - backslash: - c = conv_backslash_value(c, env); - } - break; - } - - *src = p; - return c; -} - -static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); - -static OnigCodePoint -get_name_end_code_point(OnigCodePoint start) -{ - switch (start) { - case '<': return (OnigCodePoint )'>'; break; - case '\'': return (OnigCodePoint )'\''; break; - default: - break; - } - - return (OnigCodePoint )0; -} - -#ifdef USE_NAMED_GROUP -#ifdef USE_BACKREF_WITH_LEVEL -/* - \k, \k - \k, \k - \k<-num+n>, \k<-num-n> -*/ -static int -fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, - int* rback_num, int* rlevel) -{ - int r, sign, is_num, exist_level; - OnigCodePoint end_code; - OnigCodePoint c = 0; - OnigEncoding enc = env->enc; - UChar *name_end; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - is_num = exist_level = 0; - sign = 1; - pnum_head = *src; - - end_code = get_name_end_code_point(start_code); - - name_end = end; - r = 0; - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; - break; - } - - if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - if (r == 0 && c != end_code) { - if (c == '+' || c == '-') { - int level; - int flag = (c == '-' ? -1 : 1); - - PFETCH(c); - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; - PUNFETCH; - level = onig_scan_unsigned_number(&p, end, enc); - if (level < 0) return ONIGERR_TOO_BIG_NUMBER; - *rlevel = (level * flag); - exist_level = 1; - - PFETCH(c); - if (c == end_code) - goto end; - } - - err: - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - end: - if (r == 0) { - if (is_num != 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) goto err; - - *rback_num *= sign; - } - - *rname_end = name_end; - *src = p; - return (exist_level ? 1 : 0); - } - else { - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_BACKREF_WITH_LEVEL */ - -/* - def: 0 -> define name (don't allow number name) - 1 -> reference name (allow number name) -*/ -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) -{ - int r, is_num, sign; - OnigCodePoint end_code; - OnigCodePoint c = 0; - OnigEncoding enc = env->enc; - UChar *name_end; - UChar *pnum_head; - UChar *p = *src; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - name_end = end; - pnum_head = *src; - r = 0; - is_num = 0; - sign = 1; - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH_S(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - if (ref == 1) - is_num = 1; - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (c == '-') { - if (ref == 1) { - is_num = 2; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - if (r == 0) { - while (!PEND) { - name_end = p; - PFETCH_S(c); - if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; - break; - } - - if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - else - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - } - - if (c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - if (is_num != 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - - *rback_num *= sign; - } - - *rname_end = name_end; - *src = p; - return 0; - } - else { - while (!PEND) { - name_end = p; - PFETCH_S(c); - if (c == end_code || c == ')') - break; - } - if (PEND) - name_end = end; - - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#else -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) -{ - int r, is_num, sign; - OnigCodePoint end_code; - OnigCodePoint c = 0; - UChar *name_end; - OnigEncoding enc = env->enc; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - *rname_end = name_end = end; - r = 0; - pnum_head = *src; - is_num = 0; - sign = 1; - - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (!PEND) { - name_end = p; - - PFETCH(c); - if (c == end_code || c == ')') break; - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - if (r == 0 && c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - if (r == 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - *rback_num *= sign; - - *rname_end = name_end; - *src = p; - return 0; - } - else { - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_NAMED_GROUP */ - -static void -CC_ESC_WARN(ScanEnv* env, UChar *c) -{ - if (onig_warn == onig_null_warn) return ; - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { - UChar buf[WARN_BUFSIZE]; - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"character class has '%s' without escape", c); - (*onig_warn)((char* )buf); - } -} - -static void -CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) -{ - if (onig_warn == onig_null_warn) return ; - - if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - UChar buf[WARN_BUFSIZE]; - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, - (env)->pattern, (env)->pattern_end, - (UChar* )"regular expression has '%s' without escape", c); - (*onig_warn)((char* )buf); - } -} - -static UChar* -find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, - UChar **next, OnigEncoding enc) -{ - int i; - OnigCodePoint x; - UChar *q; - UChar *p = from; - - while (p < to) { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enclen(enc, p); - if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q); - } - if (i >= n) { - if (IS_NOT_NULL(next)) - *next = q; - return p; - } - } - p = q; - } - return NULL_UCHARP; -} - -static int -str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, - OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn) -{ - int i, in_esc; - OnigCodePoint x; - UChar *q; - UChar *p = from; - - in_esc = 0; - while (p < to) { - if (in_esc) { - in_esc = 0; - p += enclen(enc, p); - } - else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enclen(enc, p); - if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q); - } - if (i >= n) return 1; - p += enclen(enc, p); - } - else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - if (x == bad) return 0; - else if (x == MC_ESC(syn)) in_esc = 1; - p = q; - } - } - } - return 0; -} - -static int -fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) -{ - int num; - OnigCodePoint c, c2; - OnigSyntaxType* syn = env->syntax; - OnigEncoding enc = env->enc; - UChar* prev; - UChar* p = *src; - PFETCH_READY; - - if (PEND) { - tok->type = TK_EOT; - return tok->type; - } - - PFETCH(c); - tok->type = TK_CHAR; - tok->base = 0; - tok->u.c = c; - tok->escaped = 0; - - if (c == ']') { - tok->type = TK_CC_CLOSE; - } - else if (c == '-') { - tok->type = TK_CC_RANGE; - } - else if (c == MC_ESC(syn)) { - if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) - goto end; - - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - PFETCH(c); - tok->escaped = 1; - tok->u.c = c; - switch (c) { - case 'w': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 0; - break; - case 'W': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 1; - break; - case 'd': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 0; - break; - case 'D': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 1; - break; - case 's': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 0; - break; - case 'S': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 1; - break; - case 'h': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 0; - break; - case 'H': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 1; - break; - - case 'p': - case 'P': - c2 = PPEEK; - if (c2 == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c2); - if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } - } - break; - - case 'x': - if (PEND) break; - - prev = p; - if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - c2 = PPEEK; - if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } - } - else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; - } - break; - - case 'u': - if (PEND) break; - - prev = p; - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - break; - - case '0': - case '1': case '2': case '3': case '4': case '5': case '6': case '7': - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - PUNFETCH; - prev = p; - num = scan_unsigned_octal_number(&p, end, 3, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; - } - break; - - default: - PUNFETCH; - num = fetch_escaped_value(&p, end, env); - if (num < 0) return num; - if (tok->u.c != num) { - tok->u.code = (OnigCodePoint )num; - tok->type = TK_CODE_POINT; - } - break; - } - } - else if (c == '[') { - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { - OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; - tok->backp = p; /* point at '[' is readed */ - PINC; - if (str_exist_check_with_esc(send, 2, p, end, - (OnigCodePoint )']', enc, syn)) { - tok->type = TK_POSIX_BRACKET_OPEN; - } - else { - PUNFETCH; - goto cc_in_cc; - } - } - else { - cc_in_cc: - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { - tok->type = TK_CC_CC_OPEN; - } - else { - CC_ESC_WARN(env, (UChar* )"["); - } - } - } - else if (c == '&') { - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && - !PEND && (PPEEK_IS('&'))) { - PINC; - tok->type = TK_CC_AND; - } - } - - end: - *src = p; - return tok->type; -} - -static int -fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) -{ - int r, num; - OnigCodePoint c; - OnigEncoding enc = env->enc; - OnigSyntaxType* syn = env->syntax; - UChar* prev; - UChar* p = *src; - PFETCH_READY; - - start: - if (PEND) { - tok->type = TK_EOT; - return tok->type; - } - - tok->type = TK_STRING; - tok->base = 0; - tok->backp = p; - - PFETCH(c); - if (IS_MC_ESC_CODE(c, syn)) { - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - tok->backp = p; - PFETCH(c); - - tok->u.c = c; - tok->escaped = 1; - switch (c) { - case '*': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '+': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 1; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '?': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = 1; - greedy_check: - if (!PEND && PPEEK_IS('?') && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { - PFETCH(c); - tok->u.repeat.greedy = 0; - tok->u.repeat.possessive = 0; - } - else { - possessive_check: - if (!PEND && PPEEK_IS('+') && - ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && - tok->type != TK_INTERVAL) || - (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && - tok->type == TK_INTERVAL))) { - PFETCH(c); - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 1; - } - else { - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 0; - } - } - break; - - case '{': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; - r = fetch_range_quantifier(&p, end, tok, env); - if (r < 0) return r; /* error */ - if (r == 0) goto greedy_check; - else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; - - goto greedy_check; - } - /* r == 1 : normal char */ - break; - - case '|': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; - tok->type = TK_ALT; - break; - - case '(': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_OPEN; - break; - - case ')': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_CLOSE; - break; - - case 'w': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 0; - break; - - case 'W': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_WORD; - tok->u.prop.not = 1; - break; - - case 'b': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUND; - break; - - case 'B': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NOT_WORD_BOUND; - break; - -#ifdef USE_WORD_BEGIN_END - case '<': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BEGIN; - break; - - case '>': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_END; - break; -#endif - - case 's': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 0; - break; - - case 'S': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; - tok->u.prop.not = 1; - break; - - case 'd': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 0; - break; - - case 'D': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; - tok->u.prop.not = 1; - break; - - case 'h': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 0; - break; - - case 'H': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.not = 1; - break; - - case 'A': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - begin_buf: - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_BUF; - break; - - case 'Z': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_SEMI_END_BUF; - break; - - case 'z': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - end_buf: - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_END_BUF; - break; - - case 'G': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_POSITION; - break; - - case '`': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; - goto begin_buf; - break; - - case '\'': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; - goto end_buf; - break; - - case 'x': - if (PEND) break; - - prev = p; - if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } - } - else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; - } - break; - - case 'u': - if (PEND) break; - - prev = p; - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - break; - - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - PUNFETCH; - prev = p; - num = onig_scan_unsigned_number(&p, end, enc); - if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { - goto skip_backref; - } - - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && - (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) - return ONIGERR_INVALID_BACKREF; - } - - tok->type = TK_BACKREF; - tok->u.backref.num = 1; - tok->u.backref.ref1 = num; - tok->u.backref.by_name = 0; -#ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level = 0; -#endif - break; - } - - skip_backref: - if (c == '8' || c == '9') { - /* normal char */ - p = prev; PINC; - break; - } - - p = prev; - /* fall through */ - case '0': - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - prev = p; - num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; - } - else if (c != '0') { - PINC; - } - break; - -#ifdef USE_NAMED_GROUP - case 'k': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { - PFETCH(c); - if (c == '<' || c == '\'') { - UChar* name_end; - int* backs; - int back_num; - - prev = p; - -#ifdef USE_BACKREF_WITH_LEVEL - name_end = NULL_UCHARP; /* no need. escape gcc warning. */ - r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); - if (r == 1) tok->u.backref.exist_level = 1; - else tok->u.backref.exist_level = 0; -#else - r = fetch_name(&p, end, &name_end, env, &back_num, 1); -#endif - if (r < 0) return r; - - if (back_num != 0) { - if (back_num < 0) { - back_num = BACKREF_REL_TO_ABS(back_num, env); - if (back_num <= 0) - return ONIGERR_INVALID_BACKREF; - } - - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (back_num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[back_num])) - return ONIGERR_INVALID_BACKREF; - } - tok->type = TK_BACKREF; - tok->u.backref.by_name = 0; - tok->u.backref.num = 1; - tok->u.backref.ref1 = back_num; - } - else { - num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); - if (num <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - int i; - for (i = 0; i < num; i++) { - if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) - return ONIGERR_INVALID_BACKREF; - } - } - - tok->type = TK_BACKREF; - tok->u.backref.by_name = 1; - if (num == 1) { - tok->u.backref.num = 1; - tok->u.backref.ref1 = backs[0]; - } - else { - tok->u.backref.num = num; - tok->u.backref.refs = backs; - } - } - } - else - PUNFETCH; - } - break; -#endif - -#ifdef USE_SUBEXP_CALL - case 'g': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { - PFETCH(c); - if (c == '<' || c == '\'') { - int gnum; - UChar* name_end; - - prev = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); - if (r < 0) return r; - - tok->type = TK_CALL; - tok->u.call.name = prev; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - } - else - PUNFETCH; - } - break; -#endif - - case 'Q': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { - tok->type = TK_QUOTE_OPEN; - } - break; - - case 'p': - case 'P': - if (PPEEK_IS('{') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c); - if (c == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } - } - break; - - default: - PUNFETCH; - num = fetch_escaped_value(&p, end, env); - if (num < 0) return num; - /* set_raw: */ - if (tok->u.c != num) { - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { /* string */ - p = tok->backp + enclen(enc, tok->backp); - } - break; - } - } - else { - tok->u.c = c; - tok->escaped = 0; - -#ifdef USE_VARIABLE_META_CHARS - if ((c != ONIG_INEFFECTIVE_META_CHAR) && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { - if (c == MC_ANYCHAR(syn)) - goto any_char; - else if (c == MC_ANYTIME(syn)) - goto anytime; - else if (c == MC_ZERO_OR_ONE_TIME(syn)) - goto zero_or_one_time; - else if (c == MC_ONE_OR_MORE_TIME(syn)) - goto one_or_more_time; - else if (c == MC_ANYCHAR_ANYTIME(syn)) { - tok->type = TK_ANYCHAR_ANYTIME; - goto out; - } - } -#endif - - switch (c) { - case '.': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; -#ifdef USE_VARIABLE_META_CHARS - any_char: -#endif - tok->type = TK_ANYCHAR; - break; - - case '*': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; -#ifdef USE_VARIABLE_META_CHARS - anytime: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '+': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; -#ifdef USE_VARIABLE_META_CHARS - one_or_more_time: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 1; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '?': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; -#ifdef USE_VARIABLE_META_CHARS - zero_or_one_time: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = 1; - goto greedy_check; - break; - - case '{': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; - r = fetch_range_quantifier(&p, end, tok, env); - if (r < 0) return r; /* error */ - if (r == 0) goto greedy_check; - else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; - - goto greedy_check; - } - /* r == 1 : normal char */ - break; - - case '|': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; - tok->type = TK_ALT; - break; - - case '(': - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PPEEK_IS('#')) { - PFETCH(c); - while (1) { - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - if (c == MC_ESC(syn)) { - if (!PEND) PFETCH(c); - } - else { - if (c == ')') break; - } - } - goto start; - } - PUNFETCH; - } - - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_OPEN; - break; - - case ')': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_CLOSE; - break; - - case '^': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); - break; - - case '$': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); - break; - - case '[': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; - tok->type = TK_CC_OPEN; - break; - - case ']': - if (*src > env->pattern) /* /].../ is allowed. */ - CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); - break; - - case '#': - if (IS_EXTEND(env->option)) { - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_NEWLINE(enc, c)) - break; - } - goto start; - break; - } - break; - - case ' ': case '\t': case '\n': case '\r': case '\f': - if (IS_EXTEND(env->option)) - goto start; - break; - - default: - /* string */ - break; - } - } - -#ifdef USE_VARIABLE_META_CHARS - out: -#endif - *src = p; - return tok->type; -} - -static int -add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, - OnigEncoding enc ARG_UNUSED, - OnigCodePoint sb_out, const OnigCodePoint mbr[]) -{ - int i, r; - OnigCodePoint j; - - int n = ONIGENC_CODE_RANGE_NUM(mbr); - - if (not == 0) { - for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); - j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { - if (j >= sb_out) { - if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++; - else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), j, - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - i++; - } - - goto sb_end; - } - BITSET_SET_BIT(cc->bs, j); - } - } - - sb_end: - for ( ; i < n; i++) { - r = add_code_range_to_buf(&(cc->mbuf), - ONIGENC_CODE_RANGE_FROM(mbr, i), - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - } - } - else { - OnigCodePoint prev = 0; - - for (i = 0; i < n; i++) { - for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { - if (j >= sb_out) { - goto sb_end2; - } - BITSET_SET_BIT(cc->bs, j); - } - prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; - } - for (j = prev; j < sb_out; j++) { - BITSET_SET_BIT(cc->bs, j); - } - - sb_end2: - prev = sb_out; - - for (i = 0; i < n; i++) { - if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), prev, - ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); - if (r != 0) return r; - } - prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; - } - if (prev < 0x7fffffff) { - r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); - if (r != 0) return r; - } - } - - return 0; -} - -static int -add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) -{ - int c, r; - const OnigCodePoint *ranges; - OnigCodePoint sb_out; - OnigEncoding enc = env->enc; - - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); - if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); - } - else if (r != ONIG_NO_SUPPORT_CONFIG) { - return r; - } - - r = 0; - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - case ONIGENC_CTYPE_BLANK: - case ONIGENC_CTYPE_CNTRL: - case ONIGENC_CTYPE_DIGIT: - case ONIGENC_CTYPE_LOWER: - case ONIGENC_CTYPE_PUNCT: - case ONIGENC_CTYPE_SPACE: - case ONIGENC_CTYPE_UPPER: - case ONIGENC_CTYPE_XDIGIT: - case ONIGENC_CTYPE_ASCII: - case ONIGENC_CTYPE_ALNUM: - if (not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - } - break; - - case ONIGENC_CTYPE_GRAPH: - case ONIGENC_CTYPE_PRINT: - if (not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - break; - - case ONIGENC_CTYPE_WORD: - if (not == 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ - && ! ONIGENC_IS_CODE_WORD(enc, c)) - BITSET_SET_BIT(cc->bs, c); - } - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - - return r; -} - -static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) -{ -#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 -#define POSIX_BRACKET_NAME_MIN_LEN 4 - - static PosixBracketEntryType PBS[] = { - { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } - }; - - PosixBracketEntryType *pb; - int not, i, r; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *p = *src; - - if (PPEEK_IS('^')) { - PINC_S; - not = 1; - } - else - not = 0; - - if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) - goto not_posix_bracket; - - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { - p = (UChar* )onigenc_step(enc, p, end, pb->len); - if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - - r = add_ctype_to_cc(cc, pb->ctype, not, env); - if (r != 0) return r; - - PINC_S; PINC_S; - *src = p; - return 0; - } - } - - not_posix_bracket: - c = 0; - i = 0; - while (!PEND && ((c = PPEEK) != ':') && c != ']') { - PINC_S; - if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; - } - if (c == ':' && ! PEND) { - PINC_S; - if (! PEND) { - PFETCH_S(c); - if (c == ']') - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - } - } - - return 1; /* 1: is not POSIX bracket, but no error. */ -} - -static int -fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) -{ - int r; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *prev, *start, *p = *src; - - r = 0; - start = prev = p; - - while (!PEND) { - prev = p; - PFETCH_S(c); - if (c == '}') { - r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); - if (r < 0) break; - - *src = p; - return r; - } - else if (c == '(' || c == ')' || c == '{' || c == '|') { - r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; - break; - } - } - - onig_scan_env_set_error_string(env, r, *src, prev); - return r; -} - -static int -parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, ctype; - CClassNode* cc; - - ctype = fetch_char_property_to_ctype(src, end, env); - if (ctype < 0) return ctype; - - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, ctype, 0, env); - if (r != 0) return r; - if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); - - return 0; -} - - -enum CCSTATE { - CCS_VALUE, - CCS_RANGE, - CCS_COMPLETE, - CCS_START -}; - -enum CCVALTYPE { - CCV_SB, - CCV_CODE_POINT, - CCV_CLASS -}; - -static int -next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) -{ - int r; - - if (*state == CCS_RANGE) - return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; - - if (*state == CCS_VALUE && *type != CCV_CLASS) { - if (*type == CCV_SB) - BITSET_SET_BIT(cc->bs, (int )(*vs)); - else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); - if (r < 0) return r; - } - } - - *state = CCS_VALUE; - *type = CCV_CLASS; - return 0; -} - -static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, - int* vs_israw, int v_israw, - enum CCVALTYPE intype, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) -{ - int r; - - switch (*state) { - case CCS_VALUE: - if (*type == CCV_SB) - BITSET_SET_BIT(cc->bs, (int )(*vs)); - else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); - if (r < 0) return r; - } - break; - - case CCS_RANGE: - if (intype == *type) { - if (intype == CCV_SB) { - if (*vs > 0xff || v > 0xff) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )v); - } - else { - r = add_code_range(&(cc->mbuf), env, *vs, v); - if (r < 0) return r; - } - } - else { -#if 0 - if (intype == CCV_CODE_POINT && *type == CCV_SB) { -#endif - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); - r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); - if (r < 0) return r; -#if 0 - } - else - return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; -#endif - } - ccs_range_end: - *state = CCS_COMPLETE; - break; - - case CCS_COMPLETE: - case CCS_START: - *state = CCS_VALUE; - break; - - default: - break; - } - - *vs_israw = v_israw; - *vs = v; - *type = intype; - return 0; -} - -static int -code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, - ScanEnv* env) -{ - int in_esc; - OnigCodePoint code; - OnigEncoding enc = env->enc; - UChar* p = from; - - in_esc = 0; - while (! PEND) { - if (ignore_escaped && in_esc) { - in_esc = 0; - } - else { - PFETCH_S(code); - if (code == c) return 1; - if (code == MC_ESC(env->syntax)) in_esc = 1; - } - } - return 0; -} - -static int -parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, neg, len, fetched, and_start; - OnigCodePoint v, vs; - UChar *p; - Node* node; - CClassNode *cc, *prev_cc; - CClassNode work_cc; - - enum CCSTATE state; - enum CCVALTYPE val_type, in_type; - int val_israw, in_israw; - - prev_cc = (CClassNode* )NULL; - *np = NULL_NODE; - r = fetch_token_in_cc(tok, src, end, env); - if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { - neg = 1; - r = fetch_token_in_cc(tok, src, end, env); - } - else { - neg = 0; - } - - if (r < 0) return r; - if (r == TK_CC_CLOSE) { - if (! code_exist_check((OnigCodePoint )']', - *src, env->pattern_end, 1, env)) - return ONIGERR_EMPTY_CHAR_CLASS; - - CC_ESC_WARN(env, (UChar* )"]"); - r = tok->type = TK_CHAR; /* allow []...] */ - } - - *np = node = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(node); - cc = NCCLASS(node); - - and_start = 0; - state = CCS_START; - p = *src; - while (r != TK_CC_CLOSE) { - fetched = 0; - switch (r) { - case TK_CHAR: - len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c); - if (len > 1) { - in_type = CCV_CODE_POINT; - } - else if (len < 0) { - r = len; - goto err; - } - else { - sb_char: - in_type = CCV_SB; - } - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry2; - break; - - case TK_RAW_BYTE: - /* tok->base != 0 : octal or hexadec. */ - if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; - UChar* psave = p; - int i, base = tok->base; - - buf[0] = tok->u.c; - for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - if (r != TK_RAW_BYTE || tok->base != base) { - fetched = 1; - break; - } - buf[i] = tok->u.c; - } - - if (i < ONIGENC_MBC_MINLEN(env->enc)) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - - len = enclen(env->enc, buf); - if (i < len) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - else if (i > len) { /* fetch back */ - p = psave; - for (i = 1; i < len; i++) { - r = fetch_token_in_cc(tok, &p, end, env); - } - fetched = 0; - } - - if (i == 1) { - v = (OnigCodePoint )buf[0]; - goto raw_single; - } - else { - v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); - in_type = CCV_CODE_POINT; - } - } - else { - v = (OnigCodePoint )tok->u.c; - raw_single: - in_type = CCV_SB; - } - in_israw = 1; - goto val_entry2; - break; - - case TK_CODE_POINT: - v = tok->u.code; - in_israw = 1; - val_entry: - len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); - if (len < 0) { - r = len; - goto err; - } - in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); - val_entry2: - r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, - &state, env); - if (r != 0) goto err; - break; - - case TK_POSIX_BRACKET_OPEN: - r = parse_posix_bracket(cc, &p, end, env); - if (r < 0) goto err; - if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, (UChar* )"["); - p = tok->backp; - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry; - } - goto next_class; - break; - - case TK_CHAR_TYPE: - r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); - if (r != 0) return r; - - next_class: - r = next_state_class(cc, &vs, &val_type, &state, env); - if (r != 0) goto err; - break; - - case TK_CHAR_PROPERTY: - { - int ctype; - - ctype = fetch_char_property_to_ctype(&p, end, env); - if (ctype < 0) return ctype; - r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); - if (r != 0) return r; - goto next_class; - } - break; - - case TK_CC_RANGE: - if (state == CCS_VALUE) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) { /* allow [x-] */ - range_end_val: - v = (OnigCodePoint )'-'; - in_israw = 0; - goto val_entry; - } - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - state = CCS_RANGE; - } - else if (state == CCS_START) { - /* [-xa] is allowed */ - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - /* [--x] or [a&&-x] is warned. */ - if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, (UChar* )"-"); - - goto val_entry; - } - else if (state == CCS_RANGE) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [!--x] is allowed */ - } - else { /* CCS_COMPLETE */ - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ - } - r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; - goto err; - } - break; - - case TK_CC_CC_OPEN: /* [ */ - { - Node *anode; - CClassNode* acc; - - r = parse_char_class(&anode, tok, &p, end, env); - if (r != 0) goto cc_open_err; - acc = NCCLASS(anode); - r = or_cclass(cc, acc, env->enc); - - onig_node_free(anode); - cc_open_err: - if (r != 0) goto err; - } - break; - - case TK_CC_AND: /* && */ - { - if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - /* initialize local variables */ - and_start = 1; - state = CCS_START; - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env->enc); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - } - else { - prev_cc = cc; - cc = &work_cc; - } - initialize_cclass(cc); - } - break; - - case TK_EOT: - r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; - goto err; - break; - default: - r = ONIGERR_PARSER_BUG; - goto err; - break; - } - - if (fetched) - r = tok->type; - else { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - } - } - - if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env->enc); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - cc = prev_cc; - } - - if (neg != 0) - NCCLASS_SET_NOT(cc); - else - NCCLASS_CLEAR_NOT(cc); - if (IS_NCCLASS_NOT(cc) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { - int is_empty; - - is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); - if (is_empty != 0) - BITSET_IS_EMPTY(cc->bs, is_empty); - - if (is_empty == 0) { -#define NEWLINE_CODE 0x0a - - if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { - if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) - BITSET_SET_BIT(cc->bs, NEWLINE_CODE); - else - add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); - } - } - } - *src = p; - return 0; - - err: - if (cc != NCCLASS(*np)) - bbuf_free(cc->mbuf); - onig_node_free(*np); - return r; -} - -static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); - -static int -parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, num; - Node *target; - OnigOptionType option; - OnigCodePoint c; - OnigEncoding enc = env->enc; - -#ifdef USE_NAMED_GROUP - int list_capture; -#endif - - UChar* p = *src; - PFETCH_READY; - - *np = NULL; - if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - - option = env->option; - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - - PFETCH(c); - switch (c) { - case ':': /* (?:...) grouping only */ - group: - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(np, tok, term, &p, end, env); - if (r < 0) return r; - *src = p; - return 1; /* group */ - break; - - case '=': - *np = onig_node_new_anchor(ANCHOR_PREC_READ); - break; - case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); - break; - case '>': /* (?>...) stop backtrack */ - *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - break; - -#ifdef USE_NAMED_GROUP - case '\'': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - goto named_group1; - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; -#endif - - case '<': /* look behind (?<=...), (?syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - UChar *name; - UChar *name_end; - - PUNFETCH; - c = '<'; - - named_group1: - list_capture = 0; - - named_group2: - name = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); - if (r < 0) return r; - - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - - r = name_add(env->reg, name, name_end, num, env); - if (r != 0) return r; - *np = node_new_enclose_memory(env->option, 1); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; - if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - env->num_named++; - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - } -#else - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } -#endif - break; - - case '@': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { -#ifdef USE_NAMED_GROUP - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - PFETCH(c); - if (c == '<' || c == '\'') { - list_capture = 1; - goto named_group2; /* (?@...) */ - } - PUNFETCH; - } -#endif - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) { - onig_node_free(*np); - return num; - } - else if (num >= (int )BIT_STATUS_BITS_NUM) { - onig_node_free(*np); - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - } - NENCLOSE(*np)->regnum = num; - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - break; - -#ifdef USE_POSIXLINE_OPTION - case 'p': -#endif - case '-': case 'i': case 'm': case 's': case 'x': - { - int neg = 0; - - while (1) { - switch (c) { - case ':': - case ')': - break; - - case '-': neg = 1; break; - case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; - case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; - case 's': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'm': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); - } - else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; -#ifdef USE_POSIXLINE_OPTION - case 'p': - ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); - break; -#endif - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - - if (c == ')') { - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - *src = p; - return 2; /* option only */ - } - else if (c == ':') { - OnigOptionType prev = env->option; - - env->option = option; - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - env->option = prev; - if (r < 0) return r; - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->target = target; - *src = p; - return 0; - } - - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - } - } - break; - - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - } - else { - if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) - goto group; - - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - NENCLOSE(*np)->regnum = num; - } - - CHECK_NULL_RETURN_MEMERR(*np); - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - if (r < 0) return r; - - if (NTYPE(*np) == NT_ANCHOR) - NANCHOR(*np)->target = target; - else { - NENCLOSE(*np)->target = target; - if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { - /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); - if (r != 0) return r; - } - } - - *src = p; - return 0; -} - -static const char* PopularQStr[] = { - "?", "*", "+", "??", "*?", "+?" -}; - -static const char* ReduceQStr[] = { - "", "", "*", "*?", "??", "+ and ??", "+? and ?" -}; - -static int -set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) -{ - QtfrNode* qn; - - qn = NQTFR(qnode); - if (qn->lower == 1 && qn->upper == 1) { - return 1; - } - - switch (NTYPE(target)) { - case NT_STR: - if (! group) { - StrNode* sn = NSTR(target); - if (str_node_can_be_split(sn, env->enc)) { - Node* n = str_node_split_last_char(sn, env->enc); - if (IS_NOT_NULL(n)) { - qn->target = n; - return 2; - } - } - } - break; - - case NT_QTFR: - { /* check redundant double repeat. */ - /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QtfrNode* qnt = NQTFR(target); - int nestq_num = popular_quantifier_num(qn); - int targetq_num = popular_quantifier_num(qnt); - -#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { - UChar buf[WARN_BUFSIZE]; - - switch(ReduceTypeTable[targetq_num][nestq_num]) { - case RQ_ASIS: - break; - - case RQ_DEL: - if (onig_verb_warn != onig_null_warn) { - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"redundant nested repeat operator"); - (*onig_verb_warn)((char* )buf); - } - goto warn_exit; - break; - - default: - if (onig_verb_warn != onig_null_warn) { - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"nested repeat operator %s and %s was replaced with '%s'", - PopularQStr[targetq_num], PopularQStr[nestq_num], - ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - (*onig_verb_warn)((char* )buf); - } - goto warn_exit; - break; - } - } - - warn_exit: -#endif - if (targetq_num >= 0) { - if (nestq_num >= 0) { - onig_reduce_nested_quantifier(qnode, target); - goto q_exit; - } - else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ - /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ - if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - } - } - } - } - break; - - default: - break; - } - - qn->target = target; - q_exit: - return 0; -} - - -#ifdef USE_SHARED_CCLASS_TABLE - -#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 - -/* for ctype node hash table */ - -typedef struct { - OnigEncoding enc; - int not; - int type; -} type_cclass_key; - -static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) -{ - if (x->type != y->type) return 1; - if (x->enc != y->enc) return 1; - if (x->not != y->not) return 1; - return 0; -} - -static int type_cclass_hash(type_cclass_key* key) -{ - int i, val; - UChar *p; - - val = 0; - - p = (UChar* )&(key->enc); - for (i = 0; i < (int )sizeof(key->enc); i++) { - val = val * 997 + (int )*p++; - } - - p = (UChar* )(&key->type); - for (i = 0; i < (int )sizeof(key->type); i++) { - val = val * 997 + (int )*p++; - } - - val += key->not; - return val + (val >> 5); -} - -static struct st_hash_type type_type_cclass_hash = { - type_cclass_cmp, - type_cclass_hash, -}; - -static st_table* OnigTypeCClassTable; - - -static int -i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) -{ - if (IS_NOT_NULL(node)) { - CClassNode* cc = NCCLASS(node); - if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); - xfree(node); - } - - if (IS_NOT_NULL(key)) xfree(key); - return ST_DELETE; -} - -extern int -onig_free_shared_cclass_table(void) -{ - if (IS_NOT_NULL(OnigTypeCClassTable)) { - onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); - onig_st_free_table(OnigTypeCClassTable); - OnigTypeCClassTable = NULL; - } - - return 0; -} - -#endif /* USE_SHARED_CCLASS_TABLE */ - - -#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS -static int -clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) -{ - BBuf *tbuf; - int r; - - if (IS_NCCLASS_NOT(cc)) { - bitset_invert(cc->bs); - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - r = not_code_range_buf(enc, cc->mbuf, &tbuf); - if (r != 0) return r; - - bbuf_free(cc->mbuf); - cc->mbuf = tbuf; - } - - NCCLASS_CLEAR_NOT(cc); - } - - return 0; -} -#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ - -typedef struct { - ScanEnv* env; - CClassNode* cc; - Node* alt_root; - Node** ptail; -} IApplyCaseFoldArg; - -static int -i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], - int to_len, void* arg) -{ - IApplyCaseFoldArg* iarg; - ScanEnv* env; - CClassNode* cc; - BitSetRef bs; - - iarg = (IApplyCaseFoldArg* )arg; - env = iarg->env; - cc = iarg->cc; - bs = cc->bs; - - if (to_len == 1) { - int is_in = onig_is_code_in_cc(env->enc, from, cc); -#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || - (is_in == 0 && IS_NCCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - add_code_range(&(cc->mbuf), env, *to, *to); - } - else { - BITSET_SET_BIT(bs, *to); - } - } -#else - if (is_in != 0) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); - add_code_range(&(cc->mbuf), env, *to, *to); - } - else { - if (IS_NCCLASS_NOT(cc)) { - BITSET_CLEAR_BIT(bs, *to); - } - else - BITSET_SET_BIT(bs, *to); - } - } -#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ - } - else { - int r, i, len; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - Node *snode = NULL_NODE; - - if (onig_is_code_in_cc(env->enc, from, cc) -#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - && !IS_NCCLASS_NOT(cc) -#endif - ) { - for (i = 0; i < to_len; i++) { - len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); - if (i == 0) { - snode = onig_node_new_str(buf, buf + len); - CHECK_NULL_RETURN_MEMERR(snode); - - /* char-class expanded multi-char only - compare with string folded at match time. */ - NSTRING_SET_AMBIG(snode); - } - else { - r = onig_node_str_cat(snode, buf, buf + len); - if (r < 0) { - onig_node_free(snode); - return r; - } - } - } - - *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); - CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); - iarg->ptail = &(NCDR((*(iarg->ptail)))); - } - } - - return 0; -} - -static int -parse_exp(Node** np, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r, len, group = 0; - Node* qn; - Node** targetp; - - *np = NULL; - if (tok->type == (enum TokenSyms )term) - goto end_of_token; - - switch (tok->type) { - case TK_ALT: - case TK_EOT: - end_of_token: - *np = node_new_empty(); - return tok->type; - break; - - case TK_SUBEXP_OPEN: - r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); - if (r < 0) return r; - if (r == 1) group = 1; - else if (r == 2) { /* option only */ - Node* target; - OnigOptionType prev = env->option; - - env->option = NENCLOSE(*np)->option; - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, src, end, env); - env->option = prev; - if (r < 0) return r; - NENCLOSE(*np)->target = target; - return tok->type; - } - break; - - case TK_SUBEXP_CLOSE: - if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) - return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; - - if (tok->escaped) goto tk_raw_byte; - else goto tk_byte; - break; - - case TK_STRING: - tk_byte: - { - *np = node_new_str(tok->backp, *src); - CHECK_NULL_RETURN_MEMERR(*np); - - while (1) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_STRING) break; - - r = onig_node_str_cat(*np, tok->backp, *src); - if (r < 0) return r; - } - - string_end: - targetp = np; - goto repeat; - } - break; - - case TK_RAW_BYTE: - tk_raw_byte: - { - *np = node_new_str_raw_char((UChar )tok->u.c); - CHECK_NULL_RETURN_MEMERR(*np); - len = 1; - while (1) { - if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, NSTR(*np)->s)) { - r = fetch_token(tok, src, end, env); - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } - - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_RAW_BYTE) { - /* Don't use this, it is wrong for little endian encodings. */ -#ifdef USE_PAD_TO_SHORT_BYTE_CHAR - int rem; - if (len < ONIGENC_MBC_MINLEN(env->enc)) { - rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); - if (len + rem == enclen(env->enc, NSTR(*np)->s)) { - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } -#endif - return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - } - - r = node_str_cat_char(*np, (UChar )tok->u.c); - if (r < 0) return r; - - len++; - } - } - break; - - case TK_CODE_POINT: - { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); - if (num < 0) return num; -#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - *np = node_new_str_raw(buf, buf + num); -#else - *np = node_new_str(buf, buf + num); -#endif - CHECK_NULL_RETURN_MEMERR(*np); - } - break; - - case TK_QUOTE_OPEN: - { - OnigCodePoint end_op[2]; - UChar *qstart, *qend, *nextp; - - end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); - end_op[1] = (OnigCodePoint )'E'; - qstart = *src; - qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); - if (IS_NULL(qend)) { - nextp = qend = end; - } - *np = node_new_str(qstart, qend); - CHECK_NULL_RETURN_MEMERR(*np); - *src = nextp; - } - break; - - case TK_CHAR_TYPE: - { - switch (tok->u.prop.ctype) { - case ONIGENC_CTYPE_WORD: - *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); - CHECK_NULL_RETURN_MEMERR(*np); - break; - - case ONIGENC_CTYPE_SPACE: - case ONIGENC_CTYPE_DIGIT: - case ONIGENC_CTYPE_XDIGIT: - { - CClassNode* cc; - -#ifdef USE_SHARED_CCLASS_TABLE - const OnigCodePoint *mbr; - OnigCodePoint sb_out; - - r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, - &sb_out, &mbr); - if (r == 0 && - ONIGENC_CODE_RANGE_NUM(mbr) - >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { - type_cclass_key key; - type_cclass_key* new_key; - - key.enc = env->enc; - key.not = tok->u.prop.not; - key.type = tok->u.prop.ctype; - - THREAD_ATOMIC_START; - - if (IS_NULL(OnigTypeCClassTable)) { - OnigTypeCClassTable - = onig_st_init_table_with_size(&type_type_cclass_hash, 10); - if (IS_NULL(OnigTypeCClassTable)) { - THREAD_ATOMIC_END; - return ONIGERR_MEMORY; - } - } - else { - if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, - (st_data_t* )np)) { - THREAD_ATOMIC_END; - break; - } - } - - *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, - sb_out, mbr); - if (IS_NULL(*np)) { - THREAD_ATOMIC_END; - return ONIGERR_MEMORY; - } - - cc = NCCLASS(*np); - NCCLASS_SET_SHARE(cc); - new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); - xmemcpy(new_key, &key, sizeof(type_cclass_key)); - onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, - (st_data_t )*np); - - THREAD_ATOMIC_END; - } - else { -#endif - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); - if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); -#ifdef USE_SHARED_CCLASS_TABLE - } -#endif - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - } - break; - - case TK_CHAR_PROPERTY: - r = parse_char_property(np, tok, src, end, env); - if (r != 0) return r; - break; - - case TK_CC_OPEN: - { - CClassNode* cc; - - r = parse_char_class(np, tok, src, end, env); - if (r != 0) return r; - - cc = NCCLASS(*np); - if (IS_IGNORECASE(env->option)) { - IApplyCaseFoldArg iarg; - - iarg.env = env; - iarg.cc = cc; - iarg.alt_root = NULL_NODE; - iarg.ptail = &(iarg.alt_root); - - r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_apply_case_fold, &iarg); - if (r != 0) { - onig_node_free(iarg.alt_root); - return r; - } - if (IS_NOT_NULL(iarg.alt_root)) { - Node* work = onig_node_new_alt(*np, iarg.alt_root); - if (IS_NULL(work)) { - onig_node_free(iarg.alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } - } - } - break; - - case TK_ANYCHAR: - *np = node_new_anychar(); - CHECK_NULL_RETURN_MEMERR(*np); - break; - - case TK_ANYCHAR_ANYTIME: - *np = node_new_anychar(); - CHECK_NULL_RETURN_MEMERR(*np); - qn = node_new_quantifier(0, REPEAT_INFINITE, 0); - CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->target = *np; - *np = qn; - break; - - case TK_BACKREF: - len = tok->u.backref.num; - *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, -#ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level, - tok->u.backref.level, -#endif - env); - CHECK_NULL_RETURN_MEMERR(*np); - break; - -#ifdef USE_SUBEXP_CALL - case TK_CALL: - { - int gnum = tok->u.call.gnum; - - if (gnum < 0) { - gnum = BACKREF_REL_TO_ABS(gnum, env); - if (gnum <= 0) - return ONIGERR_INVALID_BACKREF; - } - *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); - CHECK_NULL_RETURN_MEMERR(*np); - env->num_call++; - } - break; -#endif - - case TK_ANCHOR: - *np = onig_node_new_anchor(tok->u.anchor); - break; - - case TK_OP_REPEAT: - case TK_INTERVAL: - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; - else - *np = node_new_empty(); - } - else { - goto tk_byte; - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - - { - targetp = np; - - re_entry: - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - - repeat: - if (r == TK_OP_REPEAT || r == TK_INTERVAL) { - if (is_invalid_quantifier_target(*targetp)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; - - qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); - CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->greedy = tok->u.repeat.greedy; - r = set_quantifier(qn, *targetp, group, env); - if (r < 0) { - onig_node_free(qn); - return r; - } - - if (tok->u.repeat.possessive != 0) { - Node* en; - en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - if (IS_NULL(en)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - NENCLOSE(en)->target = qn; - qn = en; - } - - if (r == 0) { - *targetp = qn; - } - else if (r == 1) { - onig_node_free(qn); - } - else if (r == 2) { /* split case: /abc+/ */ - Node *tmp; - - *targetp = node_new_list(*targetp, NULL); - if (IS_NULL(*targetp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - tmp = NCDR(*targetp) = node_new_list(qn, NULL); - if (IS_NULL(tmp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - targetp = &(NCAR(tmp)); - } - goto re_entry; - } - } - - return r; -} - -static int -parse_branch(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r; - Node *node, **headp; - - *top = NULL; - r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; - - if (r == TK_EOT || r == term || r == TK_ALT) { - *top = node; - } - else { - *top = node_new_list(node, NULL); - headp = &(NCDR(*top)); - while (r != TK_EOT && r != term && r != TK_ALT) { - r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) return r; - - if (NTYPE(node) == NT_LIST) { - *headp = node; - while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); - headp = &(NCDR(node)); - } - else { - *headp = node_new_list(node, NULL); - headp = &(NCDR(*headp)); - } - } - } - - return r; -} - -/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ -static int -parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r; - Node *node, **headp; - - *top = NULL; - r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) { - onig_node_free(node); - return r; - } - - if (r == term) { - *top = node; - } - else if (r == TK_ALT) { - *top = onig_node_new_alt(node, NULL); - headp = &(NCDR(*top)); - while (r == TK_ALT) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) return r; - - *headp = onig_node_new_alt(node, NULL); - headp = &(NCDR(*headp)); - } - - if (tok->type != (enum TokenSyms )term) - goto err; - } - else { - err: - if (term == TK_SUBEXP_CLOSE) - return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - else - return ONIGERR_PARSER_BUG; - } - - return r; -} - -static int -parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) -{ - int r; - OnigToken tok; - - r = fetch_token(&tok, src, end, env); - if (r < 0) return r; - r = parse_subexp(top, &tok, TK_EOT, src, end, env); - if (r < 0) return r; - return 0; -} - -extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, - regex_t* reg, ScanEnv* env) -{ - int r; - UChar* p; - -#ifdef USE_NAMED_GROUP - names_clear(reg); -#endif - - scan_env_clear(env); - env->option = reg->options; - env->case_fold_flag = reg->case_fold_flag; - env->enc = reg->enc; - env->syntax = reg->syntax; - env->pattern = (UChar* )pattern; - env->pattern_end = (UChar* )end; - env->reg = reg; - - *root = NULL; - p = (UChar* )pattern; - r = parse_regexp(root, &p, (UChar* )end, env); - reg->num_mem = env->num_mem; - return r; -} - -extern void -onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, - UChar* arg, UChar* arg_end) -{ - env->error = arg; - env->error_end = arg_end; -} diff --git a/src/openalpr/support/regex/regparse.h b/src/openalpr/support/regex/regparse.h deleted file mode 100644 index 0c5c2c9..0000000 --- a/src/openalpr/support/regex/regparse.h +++ /dev/null @@ -1,351 +0,0 @@ -#ifndef REGPARSE_H -#define REGPARSE_H -/********************************************************************** - regparse.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -/* node type */ -#define NT_STR 0 -#define NT_CCLASS 1 -#define NT_CTYPE 2 -#define NT_CANY 3 -#define NT_BREF 4 -#define NT_QTFR 5 -#define NT_ENCLOSE 6 -#define NT_ANCHOR 7 -#define NT_LIST 8 -#define NT_ALT 9 -#define NT_CALL 10 - -/* node type bit */ -#define NTYPE2BIT(type) (1<<(type)) - -#define BIT_NT_STR NTYPE2BIT(NT_STR) -#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) -#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) -#define BIT_NT_CANY NTYPE2BIT(NT_CANY) -#define BIT_NT_BREF NTYPE2BIT(NT_BREF) -#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) -#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) -#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) -#define BIT_NT_LIST NTYPE2BIT(NT_LIST) -#define BIT_NT_ALT NTYPE2BIT(NT_ALT) -#define BIT_NT_CALL NTYPE2BIT(NT_CALL) - -#define IS_NODE_TYPE_SIMPLE(type) \ - ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ - BIT_NT_CANY | BIT_NT_BREF)) != 0) - -#define NTYPE(node) ((node)->u.base.type) -#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) - -#define NSTR(node) (&((node)->u.str)) -#define NCCLASS(node) (&((node)->u.cclass)) -#define NCTYPE(node) (&((node)->u.ctype)) -#define NBREF(node) (&((node)->u.bref)) -#define NQTFR(node) (&((node)->u.qtfr)) -#define NENCLOSE(node) (&((node)->u.enclose)) -#define NANCHOR(node) (&((node)->u.anchor)) -#define NCONS(node) (&((node)->u.cons)) -#define NCALL(node) (&((node)->u.call)) - -#define NCAR(node) (NCONS(node)->car) -#define NCDR(node) (NCONS(node)->cdr) - - - -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) -#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) - -#define ENCLOSE_MEMORY (1<<0) -#define ENCLOSE_OPTION (1<<1) -#define ENCLOSE_STOP_BACKTRACK (1<<2) - -#define NODE_STR_MARGIN 16 -#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 6 - -#define NSTR_RAW (1<<0) /* by backslashed number */ -#define NSTR_AMBIG (1<<1) -#define NSTR_DONT_GET_OPT_INFO (1<<2) - -#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW -#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW -#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ - (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) -#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ - (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) - -#define BACKREFS_P(br) \ - (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); - -#define NQ_TARGET_ISNOT_EMPTY 0 -#define NQ_TARGET_IS_EMPTY 1 -#define NQ_TARGET_IS_EMPTY_MEM 2 -#define NQ_TARGET_IS_EMPTY_REC 3 - -/* status bits */ -#define NST_MIN_FIXED (1<<0) -#define NST_MAX_FIXED (1<<1) -#define NST_CLEN_FIXED (1<<2) -#define NST_MARK1 (1<<3) -#define NST_MARK2 (1<<4) -#define NST_MEM_BACKREFED (1<<5) -#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) -#define NST_RECURSION (1<<7) -#define NST_CALLED (1<<8) -#define NST_ADDR_FIXED (1<<9) -#define NST_NAMED_GROUP (1<<10) -#define NST_NAME_REF (1<<11) -#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ -#define NST_NEST_LEVEL (1<<13) -#define NST_BY_NUMBER (1<<14) /* {n,m} */ - -#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) -#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) - -#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) -#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) -#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) -#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) -#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) -#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) -#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) -#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) -#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ - (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) - -#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION -#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) -#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) -#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) - -#define CALLNODE_REFNUM_UNDEF -1 - -typedef struct { - NodeBase base; - UChar* s; - UChar* end; - unsigned int flag; - int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; -} StrNode; - -typedef struct { - NodeBase base; - int state; - struct _Node* target; - int lower; - int upper; - int greedy; - int target_empty_info; - struct _Node* head_exact; - struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif -} QtfrNode; - -typedef struct { - NodeBase base; - int state; - int type; - int regnum; - OnigOptionType option; - struct _Node* target; - AbsAddrType call_addr; - /* for multiple call reference */ - OnigDistance min_len; /* min length (byte) */ - OnigDistance max_len; /* max length (byte) */ - int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ -} EncloseNode; - -#ifdef USE_SUBEXP_CALL - -typedef struct { - int offset; - struct _Node* target; -} UnsetAddr; - -typedef struct { - int num; - int alloc; - UnsetAddr* us; -} UnsetAddrList; - -typedef struct { - NodeBase base; - int state; - int group_num; - UChar* name; - UChar* name_end; - struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ - UnsetAddrList* unset_addr_list; -} CallNode; - -#endif - -typedef struct { - NodeBase base; - int state; - int back_num; - int back_static[NODE_BACKREFS_SIZE]; - int* back_dynamic; - int nest_level; -} BRefNode; - -typedef struct { - NodeBase base; - int type; - struct _Node* target; - int char_len; -} AnchorNode; - -typedef struct { - NodeBase base; - struct _Node* car; - struct _Node* cdr; -} ConsAltNode; - -typedef struct { - NodeBase base; - int ctype; - int not; -} CtypeNode; - -typedef struct _Node { - union { - NodeBase base; - StrNode str; - CClassNode cclass; - QtfrNode qtfr; - EncloseNode enclose; - BRefNode bref; - AnchorNode anchor; - ConsAltNode cons; - CtypeNode ctype; -#ifdef USE_SUBEXP_CALL - CallNode call; -#endif - } u; -} Node; - - -#define NULL_NODE ((Node* )0) - -#define SCANENV_MEMNODES_SIZE 8 -#define SCANENV_MEM_NODES(senv) \ - (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ - (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) - -typedef struct { - OnigOptionType option; - OnigCaseFoldType case_fold_flag; - OnigEncoding enc; - OnigSyntaxType* syntax; - BitStatusType capture_history; - BitStatusType bt_mem_start; - BitStatusType bt_mem_end; - BitStatusType backrefed_mem; - UChar* pattern; - UChar* pattern_end; - UChar* error; - UChar* error_end; - regex_t* reg; /* for reg->names only */ - int num_call; -#ifdef USE_SUBEXP_CALL - UnsetAddrList* unset_addr_list; -#endif - int num_mem; -#ifdef USE_NAMED_GROUP - int num_named; -#endif - int mem_alloc; - Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; - Node** mem_nodes_dynamic; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int num_comb_exp_check; - int comb_exp_max_regnum; - int curr_max_regnum; - int has_recursion; -#endif -} ScanEnv; - - -#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) -#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) -#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) - -#ifdef USE_NAMED_GROUP -typedef struct { - int new_val; -} GroupNumRemap; - -extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); -#endif - -extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); -extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); -extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); -extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); -extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); -extern void onig_node_conv_to_str_node P_((Node* node, int raw)); -extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); -extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); -extern void onig_node_free P_((Node* node)); -extern Node* onig_node_new_enclose P_((int type)); -extern Node* onig_node_new_anchor P_((int type)); -extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); -extern Node* onig_node_new_list P_((Node* left, Node* right)); -extern Node* onig_node_list_add P_((Node* list, Node* x)); -extern Node* onig_node_new_alt P_((Node* left, Node* right)); -extern void onig_node_str_clear P_((Node* node)); -extern int onig_free_node_list P_((void)); -extern int onig_names_free P_((regex_t* reg)); -extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); -extern int onig_free_shared_cclass_table P_((void)); - -#ifdef ONIG_DEBUG -#ifdef USE_NAMED_GROUP -extern int onig_print_names(FILE*, regex_t*); -#endif -#endif - -#endif /* REGPARSE_H */ diff --git a/src/openalpr/support/regex/regposerr.c b/src/openalpr/support/regex/regposerr.c deleted file mode 100644 index 3a3dcd5..0000000 --- a/src/openalpr/support/regex/regposerr.c +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************** - regposerr.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "onig_config.h" -#include "onigposix.h" - -#ifdef HAVE_STRING_H -# include -#else -# include -#endif - -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - -static char* ESTRING[] = { - NULL, - "failed to match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "invalid collating element referenced", /* REG_ECOLLATE */ - "invalid character class type referenced", /* REG_ECTYPE */ - "bad backslash-escape sequence", /* REG_EESCAPE */ - "invalid back reference number", /* REG_ESUBREG */ - "imbalanced [ and ]", /* REG_EBRACK */ - "imbalanced ( and )", /* REG_EPAREN */ - "imbalanced { and }", /* REG_EBRACE */ - "invalid repeat range {n,m}", /* REG_BADBR */ - "invalid range", /* REG_ERANGE */ - "Out of memory", /* REG_ESPACE */ - "? * + not preceded by valid regular expression", /* REG_BADRPT */ - - /* Extended errors */ - "internal error", /* REG_EONIG_INTERNAL */ - "invalid wide char value", /* REG_EONIG_BADWC */ - "invalid argument", /* REG_EONIG_BADARG */ - "multi-thread error" /* REG_EONIG_THREAD */ -}; - -#include - - -extern size_t -regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, - size_t size) -{ - char* s; - char tbuf[35]; - size_t len; - - if (posix_ecode > 0 - && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) { - s = ESTRING[posix_ecode]; - } - else if (posix_ecode == 0) { - s = ""; - } - else { - sprintf(tbuf, "undefined error code (%d)", posix_ecode); - s = tbuf; - } - - len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */ - - if (buf != NULL && size > 0) { - strncpy(buf, s, size - 1); - buf[size - 1] = '\0'; - } - return len; -} diff --git a/src/openalpr/support/regex/regposix.c b/src/openalpr/support/regex/regposix.c deleted file mode 100644 index 7d1857c..0000000 --- a/src/openalpr/support/regex/regposix.c +++ /dev/null @@ -1,303 +0,0 @@ -/********************************************************************** - regposix.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#define regex_t onig_regex_t -#include "regint.h" -#undef regex_t -#include "onigposix.h" - -#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig)) -#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig)) - -/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */ -#define ENC_STRING_LEN(enc,s,len) do { \ - if (ONIGENC_MBC_MINLEN(enc) == 1) { \ - UChar* tmps = (UChar* )(s); \ - while (*tmps != 0) tmps++; \ - len = tmps - (UChar* )(s); \ - } \ - else { \ - len = onigenc_str_bytelen_null(enc, (UChar* )s); \ - } \ -} while(0) - -typedef struct { - int onig_err; - int posix_err; -} O2PERR; - -static int -onig2posix_error_code(int code) -{ - static const O2PERR o2p[] = { - { ONIG_MISMATCH, REG_NOMATCH }, - { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, - { ONIGERR_MEMORY, REG_ESPACE }, - { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, - { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, - { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, - { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, - { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, - { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, - { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, - { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, - { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, - { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, - { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, - { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE }, - { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, - { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, - { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, - { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, - { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, - { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, - { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, - { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, - { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, - { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, - { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, - { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, - { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, - { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, - { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, - { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, - { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, - { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, - { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, - { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, - { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, - { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, - { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, - { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, - { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, - { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, - { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC }, - { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, - { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, - { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, - { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, - { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, - { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, - { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, - { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, - { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, - { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, - { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, - { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } - - }; - - int i; - - if (code >= 0) return 0; - - for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) { - if (code == o2p[i].onig_err) - return o2p[i].posix_err; - } - - return REG_EONIG_INTERNAL; /* but, unknown error code */ -} - -extern int -regcomp(regex_t* reg, const char* pattern, int posix_options) -{ - int r, len; - OnigSyntaxType* syntax = OnigDefaultSyntax; - OnigOptionType options; - - if ((posix_options & REG_EXTENDED) == 0) - syntax = ONIG_SYNTAX_POSIX_BASIC; - - options = syntax->options; - if ((posix_options & REG_ICASE) != 0) - ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); - if ((posix_options & REG_NEWLINE) != 0) { - ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); - ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); - } - - reg->comp_options = posix_options; - - ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); - r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), - options, OnigEncDefaultCharEncoding, syntax, - (OnigErrorInfo* )NULL); - if (r != ONIG_NORMAL) { - return onig2posix_error_code(r); - } - - reg->re_nsub = ONIG_C(reg)->num_mem; - return 0; -} - -extern int -regexec(regex_t* reg, const char* str, size_t nmatch, - regmatch_t pmatch[], int posix_options) -{ - int r, i, len; - UChar* end; - regmatch_t* pm; - OnigOptionType options; - - options = ONIG_OPTION_POSIX_REGION; - if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; - if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; - - if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { - pm = (regmatch_t* )NULL; - nmatch = 0; - } - else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { - pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) - * (ONIG_C(reg)->num_mem + 1)); - if (pm == NULL) - return REG_ESPACE; - } - else { - pm = pmatch; - } - - ENC_STRING_LEN(ONIG_C(reg)->enc, str, len); - end = (UChar* )(str + len); - r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, - (OnigRegion* )pm, options); - - if (r >= 0) { - r = 0; /* Match */ - if (pm != pmatch && pm != NULL) { - xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); - } - } - else if (r == ONIG_MISMATCH) { - r = REG_NOMATCH; - for (i = 0; i < (int )nmatch; i++) - pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; - } - else { - r = onig2posix_error_code(r); - } - - if (pm != pmatch && pm != NULL) - xfree(pm); - -#if 0 - if (reg->re_nsub > nmatch - 1) - reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1); -#endif - - return r; -} - -extern void -regfree(regex_t* reg) -{ - onig_free(ONIG_C(reg)); -} - - -extern void -reg_set_encoding(int mb_code) -{ - OnigEncoding enc; - - switch (mb_code) { - case REG_POSIX_ENCODING_ASCII: - enc = ONIG_ENCODING_ASCII; - break; - case REG_POSIX_ENCODING_EUC_JP: - enc = ONIG_ENCODING_EUC_JP; - break; - case REG_POSIX_ENCODING_SJIS: - enc = ONIG_ENCODING_SJIS; - break; - case REG_POSIX_ENCODING_UTF8: - enc = ONIG_ENCODING_UTF8; - break; - case REG_POSIX_ENCODING_UTF16_BE: - enc = ONIG_ENCODING_UTF16_BE; - break; - case REG_POSIX_ENCODING_UTF16_LE: - enc = ONIG_ENCODING_UTF16_LE; - break; - - default: - return ; - break; - } - - onigenc_set_default_encoding(enc); -} - -extern int -reg_name_to_group_numbers(regex_t* reg, - const unsigned char* name, const unsigned char* name_end, int** nums) -{ - return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); -} - -typedef struct { - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); - regex_t* reg; - void* arg; -} i_wrap; - -static int -i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs, - onig_regex_t* reg ARG_UNUSED, void* arg) -{ - i_wrap* warg = (i_wrap* )arg; - - return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); -} - -extern int -reg_foreach_name(regex_t* reg, - int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), - void* arg) -{ - i_wrap warg; - - warg.func = func; - warg.reg = reg; - warg.arg = arg; - - return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); -} - -extern int -reg_number_of_names(regex_t* reg) -{ - return onig_number_of_names(ONIG_C(reg)); -} diff --git a/src/openalpr/support/regex/regsyntax.c b/src/openalpr/support/regex/regsyntax.c deleted file mode 100644 index ade5b55..0000000 --- a/src/openalpr/support/regex/regsyntax.c +++ /dev/null @@ -1,315 +0,0 @@ -/********************************************************************** - regsyntax.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2006 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -OnigSyntaxType OnigSyntaxASIS = { - 0 - , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE - , 0 - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxPosixBasic = { - ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | - ONIG_SYN_OP_ESC_BRACE_INTERVAL ) - , 0 - , 0 - , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxPosixExtended = { - ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | - ONIG_SYN_OP_BRACE_INTERVAL | - ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) - , 0 - , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | - ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | - ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | - ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxEmacs = { - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | - ONIG_SYN_OP_ESC_BRACE_INTERVAL | - ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | - ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | - ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) - , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR - , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxGrep = { - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | - ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | - ONIG_SYN_OP_ESC_VBAR_ALT | - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | - ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | - ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | - ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) - , 0 - , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxGnuRegex = { - SYN_GNU_REGEX_OP - , 0 - , SYN_GNU_REGEX_BV - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxJava = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | - ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | - ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | - ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) - , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) - , ONIG_OPTION_SINGLELINE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -OnigSyntaxType OnigSyntaxPerl = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | - ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) - , SYN_GNU_REGEX_BV - , ONIG_OPTION_SINGLELINE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -/* Perl + named group */ -OnigSyntaxType OnigSyntaxPerl_NG = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | - ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | - ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | - ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) - , ( SYN_GNU_REGEX_BV | - ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | - ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) - , ONIG_OPTION_SINGLELINE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - - - -extern int -onig_set_default_syntax(OnigSyntaxType* syntax) -{ - if (IS_NULL(syntax)) - syntax = ONIG_SYNTAX_RUBY; - - OnigDefaultSyntax = syntax; - return 0; -} - -extern void -onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) -{ - *to = *from; -} - -extern void -onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) -{ - syntax->op = op; -} - -extern void -onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) -{ - syntax->op2 = op2; -} - -extern void -onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) -{ - syntax->behavior = behavior; -} - -extern void -onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) -{ - syntax->options = options; -} - -extern unsigned int -onig_get_syntax_op(OnigSyntaxType* syntax) -{ - return syntax->op; -} - -extern unsigned int -onig_get_syntax_op2(OnigSyntaxType* syntax) -{ - return syntax->op2; -} - -extern unsigned int -onig_get_syntax_behavior(OnigSyntaxType* syntax) -{ - return syntax->behavior; -} - -extern OnigOptionType -onig_get_syntax_options(OnigSyntaxType* syntax) -{ - return syntax->options; -} - -#ifdef USE_VARIABLE_META_CHARS -extern int onig_set_meta_char(OnigSyntaxType* enc, - unsigned int what, OnigCodePoint code) -{ - switch (what) { - case ONIG_META_CHAR_ESCAPE: - enc->meta_char_table.esc = code; - break; - case ONIG_META_CHAR_ANYCHAR: - enc->meta_char_table.anychar = code; - break; - case ONIG_META_CHAR_ANYTIME: - enc->meta_char_table.anytime = code; - break; - case ONIG_META_CHAR_ZERO_OR_ONE_TIME: - enc->meta_char_table.zero_or_one_time = code; - break; - case ONIG_META_CHAR_ONE_OR_MORE_TIME: - enc->meta_char_table.one_or_more_time = code; - break; - case ONIG_META_CHAR_ANYCHAR_ANYTIME: - enc->meta_char_table.anychar_anytime = code; - break; - default: - return ONIGERR_INVALID_ARGUMENT; - break; - } - return 0; -} -#endif /* USE_VARIABLE_META_CHARS */ diff --git a/src/openalpr/support/regex/regtrav.c b/src/openalpr/support/regex/regtrav.c deleted file mode 100644 index 58a17f5..0000000 --- a/src/openalpr/support/regex/regtrav.c +++ /dev/null @@ -1,76 +0,0 @@ -/********************************************************************** - regtrav.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2004 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#ifdef USE_CAPTURE_HISTORY - -static int -capture_tree_traverse(OnigCaptureTreeNode* node, int at, - int(*callback_func)(int,int,int,int,int,void*), - int level, void* arg) -{ - int r, i; - - if (node == (OnigCaptureTreeNode* )0) - return 0; - - if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) { - r = (*callback_func)(node->group, node->beg, node->end, - level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg); - if (r != 0) return r; - } - - for (i = 0; i < node->num_childs; i++) { - r = capture_tree_traverse(node->childs[i], at, - callback_func, level + 1, arg); - if (r != 0) return r; - } - - if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) { - r = (*callback_func)(node->group, node->beg, node->end, - level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg); - if (r != 0) return r; - } - - return 0; -} -#endif /* USE_CAPTURE_HISTORY */ - -extern int -onig_capture_tree_traverse(OnigRegion* region, int at, - int(*callback_func)(int,int,int,int,int,void*), void* arg) -{ -#ifdef USE_CAPTURE_HISTORY - return capture_tree_traverse(region->history_root, at, - callback_func, 0, arg); -#else - return ONIG_NO_SUPPORT_CONFIG; -#endif -} diff --git a/src/openalpr/support/regex/regversion.c b/src/openalpr/support/regex/regversion.c deleted file mode 100644 index 4953229..0000000 --- a/src/openalpr/support/regex/regversion.c +++ /dev/null @@ -1,56 +0,0 @@ -/********************************************************************** - regversion.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "onig_config.h" -#include "oniguruma.h" -#include - -extern const char* -onig_version(void) -{ - static char s[12]; - - sprintf(s, "%d.%d.%d", - ONIGURUMA_VERSION_MAJOR, - ONIGURUMA_VERSION_MINOR, - ONIGURUMA_VERSION_TEENY); - return s; -} - -extern const char* -onig_copyright(void) -{ - static char s[58]; - - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", - ONIGURUMA_VERSION_MAJOR, - ONIGURUMA_VERSION_MINOR, - ONIGURUMA_VERSION_TEENY); - return s; -} diff --git a/src/openalpr/support/regex/st.c b/src/openalpr/support/regex/st.c deleted file mode 100644 index 022880a..0000000 --- a/src/openalpr/support/regex/st.c +++ /dev/null @@ -1,578 +0,0 @@ -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ - -/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ - -#include -#include -#include - -#ifdef _WIN32 -#include -#endif - -#include "regint.h" -#include "st.h" - -typedef struct st_table_entry st_table_entry; - -struct st_table_entry { - unsigned int hash; - st_data_t key; - st_data_t record; - st_table_entry *next; -}; - -#define ST_DEFAULT_MAX_DENSITY 5 -#define ST_DEFAULT_INIT_TABLE_SIZE 11 - - /* - * DEFAULT_MAX_DENSITY is the default for the largest we allow the - * average number of items per bin before increasing the number of - * bins - * - * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins - * allocated initially - * - */ - -static int numcmp(long, long); -static int numhash(long); -static struct st_hash_type type_numhash = { - numcmp, - numhash, -}; - -/* extern int strcmp(const char *, const char *); */ -static int strhash(const char *); -static struct st_hash_type type_strhash = { - strcmp, - strhash, -}; - -static void rehash(st_table *); - -#define alloc(type) (type*)xmalloc((unsigned)sizeof(type)) -#define Calloc(n,s) (char*)xcalloc((n),(s)) - -#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) - -#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) -#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) - -/* - * MINSIZE is the minimum size of a dictionary. - */ - -#define MINSIZE 8 - -/* -Table of prime numbers 2^n+a, 2<=n<=30. -*/ -static const long primes[] = { - 8 + 3, - 16 + 3, - 32 + 5, - 64 + 3, - 128 + 3, - 256 + 27, - 512 + 9, - 1024 + 9, - 2048 + 5, - 4096 + 3, - 8192 + 27, - 16384 + 43, - 32768 + 3, - 65536 + 45, - 131072 + 29, - 262144 + 3, - 524288 + 21, - 1048576 + 7, - 2097152 + 17, - 4194304 + 15, - 8388608 + 9, - 16777216 + 43, - 33554432 + 35, - 67108864 + 15, - 134217728 + 29, - 268435456 + 3, - 536870912 + 11, - 1073741824 + 85, - 0 -}; - -static int -new_size(size) - int size; -{ - int i; - -#if 0 - for (i=3; i<31; i++) { - if ((1< size) return 1< size) return primes[i]; - } - /* Ran out of polynomials */ - return -1; /* should raise exception */ -#endif -} - -#ifdef HASH_LOG -static int collision = 0; -static int init_st = 0; - -static void -stat_col() -{ - FILE *f = fopen("/tmp/col", "w"); - fprintf(f, "collision: %d\n", collision); - fclose(f); -} -#endif - -st_table* -st_init_table_with_size(type, size) - struct st_hash_type *type; - int size; -{ - st_table *tbl; - -#ifdef HASH_LOG - if (init_st == 0) { - init_st = 1; - atexit(stat_col); - } -#endif - - size = new_size(size); /* round up to prime number */ - - tbl = alloc(st_table); - tbl->type = type; - tbl->num_entries = 0; - tbl->num_bins = size; - tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); - - return tbl; -} - -st_table* -st_init_table(type) - struct st_hash_type *type; -{ - return st_init_table_with_size(type, 0); -} - -st_table* -st_init_numtable(void) -{ - return st_init_table(&type_numhash); -} - -st_table* -st_init_numtable_with_size(size) - int size; -{ - return st_init_table_with_size(&type_numhash, size); -} - -st_table* -st_init_strtable(void) -{ - return st_init_table(&type_strhash); -} - -st_table* -st_init_strtable_with_size(size) - int size; -{ - return st_init_table_with_size(&type_strhash, size); -} - -void -st_free_table(table) - st_table *table; -{ - register st_table_entry *ptr, *next; - int i; - - for(i = 0; i < table->num_bins; i++) { - ptr = table->bins[i]; - while (ptr != 0) { - next = ptr->next; - free(ptr); - ptr = next; - } - } - free(table->bins); - free(table); -} - -#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ -((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) - -#ifdef HASH_LOG -#define COLLISION collision++ -#else -#define COLLISION -#endif - -#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ - bin_pos = hash_val%(table)->num_bins;\ - ptr = (table)->bins[bin_pos];\ - if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ - COLLISION;\ - while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ - ptr = ptr->next;\ - }\ - ptr = ptr->next;\ - }\ -} while (0) - -int -st_lookup(table, key, value) - st_table *table; - register st_data_t key; - st_data_t *value; -{ - unsigned int hash_val, bin_pos; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - FIND_ENTRY(table, ptr, hash_val, bin_pos); - - if (ptr == 0) { - return 0; - } - else { - if (value != 0) *value = ptr->record; - return 1; - } -} - -#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ -do {\ - st_table_entry *entry;\ - if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ - rehash(table);\ - bin_pos = hash_val % table->num_bins;\ - }\ - \ - entry = alloc(st_table_entry);\ - \ - entry->hash = hash_val;\ - entry->key = key;\ - entry->record = value;\ - entry->next = table->bins[bin_pos];\ - table->bins[bin_pos] = entry;\ - table->num_entries++;\ -} while (0) - -int -st_insert(table, key, value) - register st_table *table; - register st_data_t key; - st_data_t value; -{ - unsigned int hash_val, bin_pos; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - FIND_ENTRY(table, ptr, hash_val, bin_pos); - - if (ptr == 0) { - ADD_DIRECT(table, key, value, hash_val, bin_pos); - return 0; - } - else { - ptr->record = value; - return 1; - } -} - -void -st_add_direct(table, key, value) - st_table *table; - st_data_t key; - st_data_t value; -{ - unsigned int hash_val, bin_pos; - - hash_val = do_hash(key, table); - bin_pos = hash_val % table->num_bins; - ADD_DIRECT(table, key, value, hash_val, bin_pos); -} - -static void -rehash(table) - register st_table *table; -{ - register st_table_entry *ptr, *next, **new_bins; - int i, old_num_bins = table->num_bins, new_num_bins; - unsigned int hash_val; - - new_num_bins = new_size(old_num_bins+1); - new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); - - for(i = 0; i < old_num_bins; i++) { - ptr = table->bins[i]; - while (ptr != 0) { - next = ptr->next; - hash_val = ptr->hash % new_num_bins; - ptr->next = new_bins[hash_val]; - new_bins[hash_val] = ptr; - ptr = next; - } - } - free(table->bins); - table->num_bins = new_num_bins; - table->bins = new_bins; -} - -st_table* -st_copy(old_table) - st_table *old_table; -{ - st_table *new_table; - st_table_entry *ptr, *entry; - int i, num_bins = old_table->num_bins; - - new_table = alloc(st_table); - if (new_table == 0) { - return 0; - } - - *new_table = *old_table; - new_table->bins = (st_table_entry**) - Calloc((unsigned)num_bins, sizeof(st_table_entry*)); - - if (new_table->bins == 0) { - free(new_table); - return 0; - } - - for(i = 0; i < num_bins; i++) { - new_table->bins[i] = 0; - ptr = old_table->bins[i]; - while (ptr != 0) { - entry = alloc(st_table_entry); - if (entry == 0) { - free(new_table->bins); - free(new_table); - return 0; - } - *entry = *ptr; - entry->next = new_table->bins[i]; - new_table->bins[i] = entry; - ptr = ptr->next; - } - } - return new_table; -} - -int -st_delete(table, key, value) - register st_table *table; - register st_data_t *key; - st_data_t *value; -{ - unsigned int hash_val; - st_table_entry *tmp; - register st_table_entry *ptr; - - hash_val = do_hash_bin(*key, table); - ptr = table->bins[hash_val]; - - if (ptr == 0) { - if (value != 0) *value = 0; - return 0; - } - - if (EQUAL(table, *key, ptr->key)) { - table->bins[hash_val] = ptr->next; - table->num_entries--; - if (value != 0) *value = ptr->record; - *key = ptr->key; - free(ptr); - return 1; - } - - for(; ptr->next != 0; ptr = ptr->next) { - if (EQUAL(table, ptr->next->key, *key)) { - tmp = ptr->next; - ptr->next = ptr->next->next; - table->num_entries--; - if (value != 0) *value = tmp->record; - *key = tmp->key; - free(tmp); - return 1; - } - } - - return 0; -} - -int -st_delete_safe(table, key, value, never) - register st_table *table; - register st_data_t *key; - st_data_t *value; - st_data_t never; -{ - unsigned int hash_val; - register st_table_entry *ptr; - - hash_val = do_hash_bin(*key, table); - ptr = table->bins[hash_val]; - - if (ptr == 0) { - if (value != 0) *value = 0; - return 0; - } - - for(; ptr != 0; ptr = ptr->next) { - if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { - table->num_entries--; - *key = ptr->key; - if (value != 0) *value = ptr->record; - ptr->key = ptr->record = never; - return 1; - } - } - - return 0; -} - -static int -#if defined(__GNUC__) -delete_never(st_data_t key __attribute__ ((unused)), st_data_t value, - st_data_t never) -#else -delete_never(key, value, never) - st_data_t key, value, never; -#endif -{ - if (value == never) return ST_DELETE; - return ST_CONTINUE; -} - -void -st_cleanup_safe(table, never) - st_table *table; - st_data_t never; -{ - int num_entries = table->num_entries; - - st_foreach(table, delete_never, never); - table->num_entries = num_entries; -} - -int -st_foreach(table, func, arg) - st_table *table; - int (*func)(); - st_data_t arg; -{ - st_table_entry *ptr, *last, *tmp; - enum st_retval retval; - int i; - - for(i = 0; i < table->num_bins; i++) { - last = 0; - for(ptr = table->bins[i]; ptr != 0;) { - retval = (*func)(ptr->key, ptr->record, arg); - switch (retval) { - case ST_CHECK: /* check if hash is modified during iteration */ - tmp = 0; - if (i < table->num_bins) { - for (tmp = table->bins[i]; tmp; tmp=tmp->next) { - if (tmp == ptr) break; - } - } - if (!tmp) { - /* call func with error notice */ - return 1; - } - /* fall through */ - case ST_CONTINUE: - last = ptr; - ptr = ptr->next; - break; - case ST_STOP: - return 0; - case ST_DELETE: - tmp = ptr; - if (last == 0) { - table->bins[i] = ptr->next; - } - else { - last->next = ptr->next; - } - ptr = ptr->next; - free(tmp); - table->num_entries--; - } - } - } - return 0; -} - -static int -strhash(string) - register const char *string; -{ - register int c; - -#ifdef HASH_ELFHASH - register unsigned int h = 0, g; - - while ((c = *string++) != '\0') { - h = ( h << 4 ) + c; - if ( g = h & 0xF0000000 ) - h ^= g >> 24; - h &= ~g; - } - return h; -#elif HASH_PERL - register int val = 0; - - while ((c = *string++) != '\0') { - val += c; - val += (val << 10); - val ^= (val >> 6); - } - val += (val << 3); - val ^= (val >> 11); - - return val + (val << 15); -#else - register int val = 0; - - while ((c = *string++) != '\0') { - val = val*997 + c; - } - - return val + (val>>5); -#endif -} - -static int -numcmp(x, y) - long x, y; -{ - return x != y; -} - -static int -numhash(n) - long n; -{ - return n; -} diff --git a/src/openalpr/support/regex/st.h b/src/openalpr/support/regex/st.h deleted file mode 100644 index 6f93870..0000000 --- a/src/openalpr/support/regex/st.h +++ /dev/null @@ -1,68 +0,0 @@ -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ - -/* @(#) st.h 5.1 89/12/14 */ - -#ifndef ST_INCLUDED - -#define ST_INCLUDED - -#ifdef _WIN32 -# include -typedef ULONG_PTR st_data_t; -#else -typedef unsigned long st_data_t; -#endif -#define ST_DATA_T_DEFINED - -typedef struct st_table st_table; - -struct st_hash_type { - int (*compare)(); - int (*hash)(); -}; - -struct st_table { - struct st_hash_type *type; - int num_bins; - int num_entries; - struct st_table_entry **bins; -}; - -#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) - -enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; - -#ifndef _ -# define _(args) args -#endif -#ifndef ANYARGS -# ifdef __cplusplus -# define ANYARGS ... -# else -# define ANYARGS -# endif -#endif - -st_table *st_init_table _((struct st_hash_type *)); -st_table *st_init_table_with_size _((struct st_hash_type *, int)); -st_table *st_init_numtable _((void)); -st_table *st_init_numtable_with_size _((int)); -st_table *st_init_strtable _((void)); -st_table *st_init_strtable_with_size _((int)); -int st_delete _((st_table *, st_data_t *, st_data_t *)); -int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); -int st_insert _((st_table *, st_data_t, st_data_t)); -int st_lookup _((st_table *, st_data_t, st_data_t *)); -int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); -void st_add_direct _((st_table *, st_data_t, st_data_t)); -void st_free_table _((st_table *)); -void st_cleanup_safe _((st_table *, st_data_t)); -st_table *st_copy _((st_table *)); - -#define ST_NUMCMP ((int (*)()) 0) -#define ST_NUMHASH ((int (*)()) -2) - -#define st_numcmp ST_NUMCMP -#define st_numhash ST_NUMHASH - -#endif /* ST_INCLUDED */ diff --git a/src/openalpr/support/regex/unicode.c b/src/openalpr/support/regex/unicode.c deleted file mode 100644 index e13429f..0000000 --- a/src/openalpr/support/regex/unicode.c +++ /dev/null @@ -1,11374 +0,0 @@ -/********************************************************************** - unicode.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2013 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ - ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) -#if 0 -#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \ - ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0) -#endif - -static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, - 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, - 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, - 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, - 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, - 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, - 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, - 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 -}; - -/* 'NEWLINE' */ -static const OnigCodePoint CR_NEWLINE[] = { - 1, - 0x000a, 0x000a -}; /* CR_NEWLINE */ - -/* 'Alpha': [[:Alpha:]] */ -static const OnigCodePoint CR_Alpha[] = { - 418, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09f0, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a70, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x135f, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x180b, 0x180d, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x1a00, 0x1a1b, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x20d0, 0x20eb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa827, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -}; /* CR_Alpha */ - -/* 'Blank': [[:Blank:]] */ -static const OnigCodePoint CR_Blank[] = { - 9, - 0x0009, 0x0009, - 0x0020, 0x0020, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Blank */ - -/* 'Cntrl': [[:Cntrl:]] */ -static const OnigCodePoint CR_Cntrl[] = { - 19, - 0x0000, 0x001f, - 0x007f, 0x009f, - 0x00ad, 0x00ad, - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xd800, 0xf8ff, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Cntrl */ - -/* 'Digit': [[:Digit:]] */ -static const OnigCodePoint CR_Digit[] = { - 23, - 0x0030, 0x0039, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -}; /* CR_Digit */ - -/* 'Graph': [[:Graph:]] */ -static const OnigCodePoint CR_Graph[] = { - 424, - 0x0021, 0x007e, - 0x00a1, 0x0241, - 0x0250, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060b, 0x0615, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1681, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x1a1b, - 0x1a1e, 0x1a1f, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x200b, 0x2027, - 0x202a, 0x202e, - 0x2030, 0x205e, - 0x2060, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x2090, 0x2094, - 0x20a0, 0x20b5, - 0x20d0, 0x20eb, - 0x2100, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x2b13, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2cea, - 0x2cf9, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3001, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31c0, 0x31cf, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fbb, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa800, 0xa82b, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe19, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x103c3, - 0x103c8, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d200, 0x1d245, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Graph */ - -/* 'Lower': [[:Lower:]] */ -static const OnigCodePoint CR_Lower[] = { - 480, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff, - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0239, - 0x023c, 0x023c, - 0x023f, 0x0240, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fc, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f7, 0x04f7, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213c, 0x213d, - 0x2146, 0x2149, - 0x2c30, 0x2c5e, - 0x2c81, 0x2c81, - 0x2c83, 0x2c83, - 0x2c85, 0x2c85, - 0x2c87, 0x2c87, - 0x2c89, 0x2c89, - 0x2c8b, 0x2c8b, - 0x2c8d, 0x2c8d, - 0x2c8f, 0x2c8f, - 0x2c91, 0x2c91, - 0x2c93, 0x2c93, - 0x2c95, 0x2c95, - 0x2c97, 0x2c97, - 0x2c99, 0x2c99, - 0x2c9b, 0x2c9b, - 0x2c9d, 0x2c9d, - 0x2c9f, 0x2c9f, - 0x2ca1, 0x2ca1, - 0x2ca3, 0x2ca3, - 0x2ca5, 0x2ca5, - 0x2ca7, 0x2ca7, - 0x2ca9, 0x2ca9, - 0x2cab, 0x2cab, - 0x2cad, 0x2cad, - 0x2caf, 0x2caf, - 0x2cb1, 0x2cb1, - 0x2cb3, 0x2cb3, - 0x2cb5, 0x2cb5, - 0x2cb7, 0x2cb7, - 0x2cb9, 0x2cb9, - 0x2cbb, 0x2cbb, - 0x2cbd, 0x2cbd, - 0x2cbf, 0x2cbf, - 0x2cc1, 0x2cc1, - 0x2cc3, 0x2cc3, - 0x2cc5, 0x2cc5, - 0x2cc7, 0x2cc7, - 0x2cc9, 0x2cc9, - 0x2ccb, 0x2ccb, - 0x2ccd, 0x2ccd, - 0x2ccf, 0x2ccf, - 0x2cd1, 0x2cd1, - 0x2cd3, 0x2cd3, - 0x2cd5, 0x2cd5, - 0x2cd7, 0x2cd7, - 0x2cd9, 0x2cd9, - 0x2cdb, 0x2cdb, - 0x2cdd, 0x2cdd, - 0x2cdf, 0x2cdf, - 0x2ce1, 0x2ce1, - 0x2ce3, 0x2ce4, - 0x2d00, 0x2d25, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a5, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -}; /* CR_Lower */ - -/* 'Print': [[:Print:]] */ -static const OnigCodePoint CR_Print[] = { - 423, - 0x0009, 0x000d, - 0x0020, 0x007e, - 0x0085, 0x0085, - 0x00a0, 0x0241, - 0x0250, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060b, 0x0615, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x1a1b, - 0x1a1e, 0x1a1f, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x2090, 0x2094, - 0x20a0, 0x20b5, - 0x20d0, 0x20eb, - 0x2100, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x2b13, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2cea, - 0x2cf9, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31c0, 0x31cf, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fbb, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa800, 0xa82b, - 0xac00, 0xd7a3, - 0xe000, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe19, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x103c3, - 0x103c8, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d200, 0x1d245, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Print */ - -/* 'Punct': [[:Punct:]] */ -static const OnigCodePoint CR_Punct[] = { - 96, - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d, - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05c6, 0x05c6, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x0fd0, 0x0fd1, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x19de, 0x19df, - 0x1a1e, 0x1a1f, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x205e, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27c5, 0x27c6, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f, - 0x10a50, 0x10a58 -}; /* CR_Punct */ - -/* 'Space': [[:Space:]] */ -static const OnigCodePoint CR_Space[] = { - 11, - 0x0009, 0x000d, - 0x0020, 0x0020, - 0x0085, 0x0085, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Space */ - -/* 'Upper': [[:Upper:]] */ -static const OnigCodePoint CR_Upper[] = { - 476, - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de, - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x023a, 0x023b, - 0x023d, 0x023e, - 0x0241, 0x0241, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x03fd, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f6, 0x04f6, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0x2c00, 0x2c2e, - 0x2c80, 0x2c80, - 0x2c82, 0x2c82, - 0x2c84, 0x2c84, - 0x2c86, 0x2c86, - 0x2c88, 0x2c88, - 0x2c8a, 0x2c8a, - 0x2c8c, 0x2c8c, - 0x2c8e, 0x2c8e, - 0x2c90, 0x2c90, - 0x2c92, 0x2c92, - 0x2c94, 0x2c94, - 0x2c96, 0x2c96, - 0x2c98, 0x2c98, - 0x2c9a, 0x2c9a, - 0x2c9c, 0x2c9c, - 0x2c9e, 0x2c9e, - 0x2ca0, 0x2ca0, - 0x2ca2, 0x2ca2, - 0x2ca4, 0x2ca4, - 0x2ca6, 0x2ca6, - 0x2ca8, 0x2ca8, - 0x2caa, 0x2caa, - 0x2cac, 0x2cac, - 0x2cae, 0x2cae, - 0x2cb0, 0x2cb0, - 0x2cb2, 0x2cb2, - 0x2cb4, 0x2cb4, - 0x2cb6, 0x2cb6, - 0x2cb8, 0x2cb8, - 0x2cba, 0x2cba, - 0x2cbc, 0x2cbc, - 0x2cbe, 0x2cbe, - 0x2cc0, 0x2cc0, - 0x2cc2, 0x2cc2, - 0x2cc4, 0x2cc4, - 0x2cc6, 0x2cc6, - 0x2cc8, 0x2cc8, - 0x2cca, 0x2cca, - 0x2ccc, 0x2ccc, - 0x2cce, 0x2cce, - 0x2cd0, 0x2cd0, - 0x2cd2, 0x2cd2, - 0x2cd4, 0x2cd4, - 0x2cd6, 0x2cd6, - 0x2cd8, 0x2cd8, - 0x2cda, 0x2cda, - 0x2cdc, 0x2cdc, - 0x2cde, 0x2cde, - 0x2ce0, 0x2ce0, - 0x2ce2, 0x2ce2, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -}; /* CR_Upper */ - -/* 'XDigit': [[:XDigit:]] */ -static const OnigCodePoint CR_XDigit[] = { - 3, - 0x0030, 0x0039, - 0x0041, 0x0046, - 0x0061, 0x0066 -}; /* CR_XDigit */ - -/* 'Word': [[:Word:]] */ -static const OnigCodePoint CR_Word[] = { - 464, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x005f, 0x005f, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b2, 0x00b3, - 0x00b5, 0x00b5, - 0x00b9, 0x00ba, - 0x00bc, 0x00be, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x09f4, 0x09f9, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bf2, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f33, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x135f, - 0x1369, 0x137c, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x1a00, 0x1a1b, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x2090, 0x2094, - 0x20d0, 0x20eb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2cfd, 0x2cfd, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3007, - 0x3021, 0x302f, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa827, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff3f, 0xff3f, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x103d1, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -}; /* CR_Word */ - -/* 'Alnum': [[:Alnum:]] */ -static const OnigCodePoint CR_Alnum[] = { - 436, - 0x0030, 0x0039, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x0300, 0x036f, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x0483, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0610, 0x0615, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x0669, - 0x066e, 0x06d3, - 0x06d5, 0x06dc, - 0x06de, 0x06e8, - 0x06ea, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09f1, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b6f, - 0x0b71, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bef, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e4e, - 0x0e50, 0x0e59, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f18, 0x0f19, - 0x0f20, 0x0f29, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f84, - 0x0f86, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1049, - 0x1050, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x135f, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1734, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17b3, - 0x17b6, 0x17d3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dd, - 0x17e0, 0x17e9, - 0x180b, 0x180d, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x1a00, 0x1a1b, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x20d0, 0x20eb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x302a, 0x302f, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x3099, 0x309a, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa827, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0100, 0xe01ef -}; /* CR_Alnum */ - -/* 'ASCII': [[:ASCII:]] */ -static const OnigCodePoint CR_ASCII[] = { - 1, - 0x0000, 0x007f -}; /* CR_ASCII */ - -#ifdef USE_UNICODE_PROPERTIES - -/* 'Any': - */ -static const OnigCodePoint CR_Any[] = { - 1, - 0x0000, 0x10ffff -}; /* CR_Any */ - -/* 'Assigned': - */ -static const OnigCodePoint CR_Assigned[] = { - 420, - 0x0000, 0x0241, - 0x0250, 0x036f, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x037e, 0x037e, - 0x0384, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x0589, 0x058a, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0x0600, 0x0603, - 0x060b, 0x0615, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x0621, 0x063a, - 0x0640, 0x065e, - 0x0660, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x076d, - 0x0780, 0x07b1, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0970, - 0x097d, 0x097d, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4, - 0x0e01, 0x0e3a, - 0x0e3f, 0x0e5b, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059, - 0x10a0, 0x10c5, - 0x10d0, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x13a0, 0x13f4, - 0x1401, 0x1676, - 0x1680, 0x169c, - 0x16a0, 0x16f0, - 0x1700, 0x170c, - 0x170e, 0x1714, - 0x1720, 0x1736, - 0x1740, 0x1753, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x1a1b, - 0x1a1e, 0x1a1f, - 0x1d00, 0x1dc3, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2000, 0x2063, - 0x206a, 0x2071, - 0x2074, 0x208e, - 0x2090, 0x2094, - 0x20a0, 0x20b5, - 0x20d0, 0x20eb, - 0x2100, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x2b13, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2cea, - 0x2cf9, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3000, 0x303f, - 0x3041, 0x3096, - 0x3099, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x3190, 0x31b7, - 0x31c0, 0x31cf, - 0x31f0, 0x321e, - 0x3220, 0x3243, - 0x3250, 0x32fe, - 0x3300, 0x4db5, - 0x4dc0, 0x9fbb, - 0xa000, 0xa48c, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa800, 0xa82b, - 0xac00, 0xd7a3, - 0xd800, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3f, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe00, 0xfe19, - 0xfe20, 0xfe23, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xfeff, 0xfeff, - 0xff01, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1018a, - 0x10300, 0x1031e, - 0x10320, 0x10323, - 0x10330, 0x1034a, - 0x10380, 0x1039d, - 0x1039f, 0x103c3, - 0x103c8, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d1dd, - 0x1d200, 0x1d245, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xe0100, 0xe01ef, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Assigned */ - -/* 'C': Major Category */ -static const OnigCodePoint CR_C[] = { - 422, - 0x0000, 0x001f, - 0x007f, 0x009f, - 0x00ad, 0x00ad, - 0x0242, 0x024f, - 0x0370, 0x0373, - 0x0376, 0x0379, - 0x037b, 0x037d, - 0x037f, 0x0383, - 0x038b, 0x038b, - 0x038d, 0x038d, - 0x03a2, 0x03a2, - 0x03cf, 0x03cf, - 0x0487, 0x0487, - 0x04cf, 0x04cf, - 0x04fa, 0x04ff, - 0x0510, 0x0530, - 0x0557, 0x0558, - 0x0560, 0x0560, - 0x0588, 0x0588, - 0x058b, 0x0590, - 0x05ba, 0x05ba, - 0x05c8, 0x05cf, - 0x05eb, 0x05ef, - 0x05f5, 0x060a, - 0x0616, 0x061a, - 0x061c, 0x061d, - 0x0620, 0x0620, - 0x063b, 0x063f, - 0x065f, 0x065f, - 0x06dd, 0x06dd, - 0x070e, 0x070f, - 0x074b, 0x074c, - 0x076e, 0x077f, - 0x07b2, 0x0900, - 0x093a, 0x093b, - 0x094e, 0x094f, - 0x0955, 0x0957, - 0x0971, 0x097c, - 0x097e, 0x0980, - 0x0984, 0x0984, - 0x098d, 0x098e, - 0x0991, 0x0992, - 0x09a9, 0x09a9, - 0x09b1, 0x09b1, - 0x09b3, 0x09b5, - 0x09ba, 0x09bb, - 0x09c5, 0x09c6, - 0x09c9, 0x09ca, - 0x09cf, 0x09d6, - 0x09d8, 0x09db, - 0x09de, 0x09de, - 0x09e4, 0x09e5, - 0x09fb, 0x0a00, - 0x0a04, 0x0a04, - 0x0a0b, 0x0a0e, - 0x0a11, 0x0a12, - 0x0a29, 0x0a29, - 0x0a31, 0x0a31, - 0x0a34, 0x0a34, - 0x0a37, 0x0a37, - 0x0a3a, 0x0a3b, - 0x0a3d, 0x0a3d, - 0x0a43, 0x0a46, - 0x0a49, 0x0a4a, - 0x0a4e, 0x0a58, - 0x0a5d, 0x0a5d, - 0x0a5f, 0x0a65, - 0x0a75, 0x0a80, - 0x0a84, 0x0a84, - 0x0a8e, 0x0a8e, - 0x0a92, 0x0a92, - 0x0aa9, 0x0aa9, - 0x0ab1, 0x0ab1, - 0x0ab4, 0x0ab4, - 0x0aba, 0x0abb, - 0x0ac6, 0x0ac6, - 0x0aca, 0x0aca, - 0x0ace, 0x0acf, - 0x0ad1, 0x0adf, - 0x0ae4, 0x0ae5, - 0x0af0, 0x0af0, - 0x0af2, 0x0b00, - 0x0b04, 0x0b04, - 0x0b0d, 0x0b0e, - 0x0b11, 0x0b12, - 0x0b29, 0x0b29, - 0x0b31, 0x0b31, - 0x0b34, 0x0b34, - 0x0b3a, 0x0b3b, - 0x0b44, 0x0b46, - 0x0b49, 0x0b4a, - 0x0b4e, 0x0b55, - 0x0b58, 0x0b5b, - 0x0b5e, 0x0b5e, - 0x0b62, 0x0b65, - 0x0b72, 0x0b81, - 0x0b84, 0x0b84, - 0x0b8b, 0x0b8d, - 0x0b91, 0x0b91, - 0x0b96, 0x0b98, - 0x0b9b, 0x0b9b, - 0x0b9d, 0x0b9d, - 0x0ba0, 0x0ba2, - 0x0ba5, 0x0ba7, - 0x0bab, 0x0bad, - 0x0bba, 0x0bbd, - 0x0bc3, 0x0bc5, - 0x0bc9, 0x0bc9, - 0x0bce, 0x0bd6, - 0x0bd8, 0x0be5, - 0x0bfb, 0x0c00, - 0x0c04, 0x0c04, - 0x0c0d, 0x0c0d, - 0x0c11, 0x0c11, - 0x0c29, 0x0c29, - 0x0c34, 0x0c34, - 0x0c3a, 0x0c3d, - 0x0c45, 0x0c45, - 0x0c49, 0x0c49, - 0x0c4e, 0x0c54, - 0x0c57, 0x0c5f, - 0x0c62, 0x0c65, - 0x0c70, 0x0c81, - 0x0c84, 0x0c84, - 0x0c8d, 0x0c8d, - 0x0c91, 0x0c91, - 0x0ca9, 0x0ca9, - 0x0cb4, 0x0cb4, - 0x0cba, 0x0cbb, - 0x0cc5, 0x0cc5, - 0x0cc9, 0x0cc9, - 0x0cce, 0x0cd4, - 0x0cd7, 0x0cdd, - 0x0cdf, 0x0cdf, - 0x0ce2, 0x0ce5, - 0x0cf0, 0x0d01, - 0x0d04, 0x0d04, - 0x0d0d, 0x0d0d, - 0x0d11, 0x0d11, - 0x0d29, 0x0d29, - 0x0d3a, 0x0d3d, - 0x0d44, 0x0d45, - 0x0d49, 0x0d49, - 0x0d4e, 0x0d56, - 0x0d58, 0x0d5f, - 0x0d62, 0x0d65, - 0x0d70, 0x0d81, - 0x0d84, 0x0d84, - 0x0d97, 0x0d99, - 0x0db2, 0x0db2, - 0x0dbc, 0x0dbc, - 0x0dbe, 0x0dbf, - 0x0dc7, 0x0dc9, - 0x0dcb, 0x0dce, - 0x0dd5, 0x0dd5, - 0x0dd7, 0x0dd7, - 0x0de0, 0x0df1, - 0x0df5, 0x0e00, - 0x0e3b, 0x0e3e, - 0x0e5c, 0x0e80, - 0x0e83, 0x0e83, - 0x0e85, 0x0e86, - 0x0e89, 0x0e89, - 0x0e8b, 0x0e8c, - 0x0e8e, 0x0e93, - 0x0e98, 0x0e98, - 0x0ea0, 0x0ea0, - 0x0ea4, 0x0ea4, - 0x0ea6, 0x0ea6, - 0x0ea8, 0x0ea9, - 0x0eac, 0x0eac, - 0x0eba, 0x0eba, - 0x0ebe, 0x0ebf, - 0x0ec5, 0x0ec5, - 0x0ec7, 0x0ec7, - 0x0ece, 0x0ecf, - 0x0eda, 0x0edb, - 0x0ede, 0x0eff, - 0x0f48, 0x0f48, - 0x0f6b, 0x0f70, - 0x0f8c, 0x0f8f, - 0x0f98, 0x0f98, - 0x0fbd, 0x0fbd, - 0x0fcd, 0x0fce, - 0x0fd2, 0x0fff, - 0x1022, 0x1022, - 0x1028, 0x1028, - 0x102b, 0x102b, - 0x1033, 0x1035, - 0x103a, 0x103f, - 0x105a, 0x109f, - 0x10c6, 0x10cf, - 0x10fd, 0x10ff, - 0x115a, 0x115e, - 0x11a3, 0x11a7, - 0x11fa, 0x11ff, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135e, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x1400, - 0x1677, 0x167f, - 0x169d, 0x169f, - 0x16f1, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1737, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17b4, 0x17b5, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x180f, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18aa, 0x18ff, - 0x191d, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19aa, 0x19af, - 0x19ca, 0x19cf, - 0x19da, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a20, 0x1cff, - 0x1dc4, 0x1dff, - 0x1e9c, 0x1e9f, - 0x1efa, 0x1eff, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x1fff, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x206f, - 0x2072, 0x2073, - 0x208f, 0x208f, - 0x2095, 0x209f, - 0x20b6, 0x20cf, - 0x20ec, 0x20ff, - 0x214d, 0x2152, - 0x2184, 0x218f, - 0x23dc, 0x23ff, - 0x2427, 0x243f, - 0x244b, 0x245f, - 0x269d, 0x269f, - 0x26b2, 0x2700, - 0x2705, 0x2705, - 0x270a, 0x270b, - 0x2728, 0x2728, - 0x274c, 0x274c, - 0x274e, 0x274e, - 0x2753, 0x2755, - 0x2757, 0x2757, - 0x275f, 0x2760, - 0x2795, 0x2797, - 0x27b0, 0x27b0, - 0x27bf, 0x27bf, - 0x27c7, 0x27cf, - 0x27ec, 0x27ef, - 0x2b14, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c7f, - 0x2ceb, 0x2cf8, - 0x2d26, 0x2d2f, - 0x2d66, 0x2d6e, - 0x2d70, 0x2d7f, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2dff, - 0x2e18, 0x2e1b, - 0x2e1e, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x2fef, - 0x2ffc, 0x2fff, - 0x3040, 0x3040, - 0x3097, 0x3098, - 0x3100, 0x3104, - 0x312d, 0x3130, - 0x318f, 0x318f, - 0x31b8, 0x31bf, - 0x31d0, 0x31ef, - 0x321f, 0x321f, - 0x3244, 0x324f, - 0x32ff, 0x32ff, - 0x4db6, 0x4dbf, - 0x9fbc, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa6ff, - 0xa717, 0xa7ff, - 0xa82c, 0xabff, - 0xd7a4, 0xf8ff, - 0xfa2e, 0xfa2f, - 0xfa6b, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbb2, 0xfbd2, - 0xfd40, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe1a, 0xfe1f, - 0xfe24, 0xfe2f, - 0xfe53, 0xfe53, - 0xfe67, 0xfe67, - 0xfe6c, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xff00, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffdf, - 0xffe7, 0xffe7, - 0xffef, 0xfffb, - 0xfffe, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x100ff, - 0x10103, 0x10106, - 0x10134, 0x10136, - 0x1018b, 0x102ff, - 0x1031f, 0x1031f, - 0x10324, 0x1032f, - 0x1034b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10840, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x1cfff, - 0x1d0f6, 0x1d0ff, - 0x1d127, 0x1d129, - 0x1d173, 0x1d17a, - 0x1d1de, 0x1d1ff, - 0x1d246, 0x1d2ff, - 0x1d357, 0x1d3ff, - 0x1d455, 0x1d455, - 0x1d49d, 0x1d49d, - 0x1d4a0, 0x1d4a1, - 0x1d4a3, 0x1d4a4, - 0x1d4a7, 0x1d4a8, - 0x1d4ad, 0x1d4ad, - 0x1d4ba, 0x1d4ba, - 0x1d4bc, 0x1d4bc, - 0x1d4c4, 0x1d4c4, - 0x1d506, 0x1d506, - 0x1d50b, 0x1d50c, - 0x1d515, 0x1d515, - 0x1d51d, 0x1d51d, - 0x1d53a, 0x1d53a, - 0x1d53f, 0x1d53f, - 0x1d545, 0x1d545, - 0x1d547, 0x1d549, - 0x1d551, 0x1d551, - 0x1d6a6, 0x1d6a7, - 0x1d7ca, 0x1d7cd, - 0x1d800, 0x1ffff, - 0x2a6d7, 0x2f7ff, - 0x2fa1e, 0xe00ff, - 0xe01f0, 0x10ffff -}; /* CR_C */ - -/* 'Cc': General Category */ -static const OnigCodePoint CR_Cc[] = { - 2, - 0x0000, 0x001f, - 0x007f, 0x009f -}; /* CR_Cc */ - -/* 'Cf': General Category */ -static const OnigCodePoint CR_Cf[] = { - 14, - 0x00ad, 0x00ad, - 0x0600, 0x0603, - 0x06dd, 0x06dd, - 0x070f, 0x070f, - 0x17b4, 0x17b5, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2063, - 0x206a, 0x206f, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f -}; /* CR_Cf */ - -/* 'Cn': General Category */ -static const OnigCodePoint CR_Cn[] = { - 420, - 0x0242, 0x024f, - 0x0370, 0x0373, - 0x0376, 0x0379, - 0x037b, 0x037d, - 0x037f, 0x0383, - 0x038b, 0x038b, - 0x038d, 0x038d, - 0x03a2, 0x03a2, - 0x03cf, 0x03cf, - 0x0487, 0x0487, - 0x04cf, 0x04cf, - 0x04fa, 0x04ff, - 0x0510, 0x0530, - 0x0557, 0x0558, - 0x0560, 0x0560, - 0x0588, 0x0588, - 0x058b, 0x0590, - 0x05ba, 0x05ba, - 0x05c8, 0x05cf, - 0x05eb, 0x05ef, - 0x05f5, 0x05ff, - 0x0604, 0x060a, - 0x0616, 0x061a, - 0x061c, 0x061d, - 0x0620, 0x0620, - 0x063b, 0x063f, - 0x065f, 0x065f, - 0x070e, 0x070e, - 0x074b, 0x074c, - 0x076e, 0x077f, - 0x07b2, 0x0900, - 0x093a, 0x093b, - 0x094e, 0x094f, - 0x0955, 0x0957, - 0x0971, 0x097c, - 0x097e, 0x0980, - 0x0984, 0x0984, - 0x098d, 0x098e, - 0x0991, 0x0992, - 0x09a9, 0x09a9, - 0x09b1, 0x09b1, - 0x09b3, 0x09b5, - 0x09ba, 0x09bb, - 0x09c5, 0x09c6, - 0x09c9, 0x09ca, - 0x09cf, 0x09d6, - 0x09d8, 0x09db, - 0x09de, 0x09de, - 0x09e4, 0x09e5, - 0x09fb, 0x0a00, - 0x0a04, 0x0a04, - 0x0a0b, 0x0a0e, - 0x0a11, 0x0a12, - 0x0a29, 0x0a29, - 0x0a31, 0x0a31, - 0x0a34, 0x0a34, - 0x0a37, 0x0a37, - 0x0a3a, 0x0a3b, - 0x0a3d, 0x0a3d, - 0x0a43, 0x0a46, - 0x0a49, 0x0a4a, - 0x0a4e, 0x0a58, - 0x0a5d, 0x0a5d, - 0x0a5f, 0x0a65, - 0x0a75, 0x0a80, - 0x0a84, 0x0a84, - 0x0a8e, 0x0a8e, - 0x0a92, 0x0a92, - 0x0aa9, 0x0aa9, - 0x0ab1, 0x0ab1, - 0x0ab4, 0x0ab4, - 0x0aba, 0x0abb, - 0x0ac6, 0x0ac6, - 0x0aca, 0x0aca, - 0x0ace, 0x0acf, - 0x0ad1, 0x0adf, - 0x0ae4, 0x0ae5, - 0x0af0, 0x0af0, - 0x0af2, 0x0b00, - 0x0b04, 0x0b04, - 0x0b0d, 0x0b0e, - 0x0b11, 0x0b12, - 0x0b29, 0x0b29, - 0x0b31, 0x0b31, - 0x0b34, 0x0b34, - 0x0b3a, 0x0b3b, - 0x0b44, 0x0b46, - 0x0b49, 0x0b4a, - 0x0b4e, 0x0b55, - 0x0b58, 0x0b5b, - 0x0b5e, 0x0b5e, - 0x0b62, 0x0b65, - 0x0b72, 0x0b81, - 0x0b84, 0x0b84, - 0x0b8b, 0x0b8d, - 0x0b91, 0x0b91, - 0x0b96, 0x0b98, - 0x0b9b, 0x0b9b, - 0x0b9d, 0x0b9d, - 0x0ba0, 0x0ba2, - 0x0ba5, 0x0ba7, - 0x0bab, 0x0bad, - 0x0bba, 0x0bbd, - 0x0bc3, 0x0bc5, - 0x0bc9, 0x0bc9, - 0x0bce, 0x0bd6, - 0x0bd8, 0x0be5, - 0x0bfb, 0x0c00, - 0x0c04, 0x0c04, - 0x0c0d, 0x0c0d, - 0x0c11, 0x0c11, - 0x0c29, 0x0c29, - 0x0c34, 0x0c34, - 0x0c3a, 0x0c3d, - 0x0c45, 0x0c45, - 0x0c49, 0x0c49, - 0x0c4e, 0x0c54, - 0x0c57, 0x0c5f, - 0x0c62, 0x0c65, - 0x0c70, 0x0c81, - 0x0c84, 0x0c84, - 0x0c8d, 0x0c8d, - 0x0c91, 0x0c91, - 0x0ca9, 0x0ca9, - 0x0cb4, 0x0cb4, - 0x0cba, 0x0cbb, - 0x0cc5, 0x0cc5, - 0x0cc9, 0x0cc9, - 0x0cce, 0x0cd4, - 0x0cd7, 0x0cdd, - 0x0cdf, 0x0cdf, - 0x0ce2, 0x0ce5, - 0x0cf0, 0x0d01, - 0x0d04, 0x0d04, - 0x0d0d, 0x0d0d, - 0x0d11, 0x0d11, - 0x0d29, 0x0d29, - 0x0d3a, 0x0d3d, - 0x0d44, 0x0d45, - 0x0d49, 0x0d49, - 0x0d4e, 0x0d56, - 0x0d58, 0x0d5f, - 0x0d62, 0x0d65, - 0x0d70, 0x0d81, - 0x0d84, 0x0d84, - 0x0d97, 0x0d99, - 0x0db2, 0x0db2, - 0x0dbc, 0x0dbc, - 0x0dbe, 0x0dbf, - 0x0dc7, 0x0dc9, - 0x0dcb, 0x0dce, - 0x0dd5, 0x0dd5, - 0x0dd7, 0x0dd7, - 0x0de0, 0x0df1, - 0x0df5, 0x0e00, - 0x0e3b, 0x0e3e, - 0x0e5c, 0x0e80, - 0x0e83, 0x0e83, - 0x0e85, 0x0e86, - 0x0e89, 0x0e89, - 0x0e8b, 0x0e8c, - 0x0e8e, 0x0e93, - 0x0e98, 0x0e98, - 0x0ea0, 0x0ea0, - 0x0ea4, 0x0ea4, - 0x0ea6, 0x0ea6, - 0x0ea8, 0x0ea9, - 0x0eac, 0x0eac, - 0x0eba, 0x0eba, - 0x0ebe, 0x0ebf, - 0x0ec5, 0x0ec5, - 0x0ec7, 0x0ec7, - 0x0ece, 0x0ecf, - 0x0eda, 0x0edb, - 0x0ede, 0x0eff, - 0x0f48, 0x0f48, - 0x0f6b, 0x0f70, - 0x0f8c, 0x0f8f, - 0x0f98, 0x0f98, - 0x0fbd, 0x0fbd, - 0x0fcd, 0x0fce, - 0x0fd2, 0x0fff, - 0x1022, 0x1022, - 0x1028, 0x1028, - 0x102b, 0x102b, - 0x1033, 0x1035, - 0x103a, 0x103f, - 0x105a, 0x109f, - 0x10c6, 0x10cf, - 0x10fd, 0x10ff, - 0x115a, 0x115e, - 0x11a3, 0x11a7, - 0x11fa, 0x11ff, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135e, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x1400, - 0x1677, 0x167f, - 0x169d, 0x169f, - 0x16f1, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1737, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x180f, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18aa, 0x18ff, - 0x191d, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19aa, 0x19af, - 0x19ca, 0x19cf, - 0x19da, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a20, 0x1cff, - 0x1dc4, 0x1dff, - 0x1e9c, 0x1e9f, - 0x1efa, 0x1eff, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x1fff, - 0x2064, 0x2069, - 0x2072, 0x2073, - 0x208f, 0x208f, - 0x2095, 0x209f, - 0x20b6, 0x20cf, - 0x20ec, 0x20ff, - 0x214d, 0x2152, - 0x2184, 0x218f, - 0x23dc, 0x23ff, - 0x2427, 0x243f, - 0x244b, 0x245f, - 0x269d, 0x269f, - 0x26b2, 0x2700, - 0x2705, 0x2705, - 0x270a, 0x270b, - 0x2728, 0x2728, - 0x274c, 0x274c, - 0x274e, 0x274e, - 0x2753, 0x2755, - 0x2757, 0x2757, - 0x275f, 0x2760, - 0x2795, 0x2797, - 0x27b0, 0x27b0, - 0x27bf, 0x27bf, - 0x27c7, 0x27cf, - 0x27ec, 0x27ef, - 0x2b14, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c7f, - 0x2ceb, 0x2cf8, - 0x2d26, 0x2d2f, - 0x2d66, 0x2d6e, - 0x2d70, 0x2d7f, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2dff, - 0x2e18, 0x2e1b, - 0x2e1e, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x2fef, - 0x2ffc, 0x2fff, - 0x3040, 0x3040, - 0x3097, 0x3098, - 0x3100, 0x3104, - 0x312d, 0x3130, - 0x318f, 0x318f, - 0x31b8, 0x31bf, - 0x31d0, 0x31ef, - 0x321f, 0x321f, - 0x3244, 0x324f, - 0x32ff, 0x32ff, - 0x4db6, 0x4dbf, - 0x9fbc, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa6ff, - 0xa717, 0xa7ff, - 0xa82c, 0xabff, - 0xd7a4, 0xd7ff, - 0xfa2e, 0xfa2f, - 0xfa6b, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbb2, 0xfbd2, - 0xfd40, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe1a, 0xfe1f, - 0xfe24, 0xfe2f, - 0xfe53, 0xfe53, - 0xfe67, 0xfe67, - 0xfe6c, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xfefe, - 0xff00, 0xff00, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffdf, - 0xffe7, 0xffe7, - 0xffef, 0xfff8, - 0xfffe, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x100ff, - 0x10103, 0x10106, - 0x10134, 0x10136, - 0x1018b, 0x102ff, - 0x1031f, 0x1031f, - 0x10324, 0x1032f, - 0x1034b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10840, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x1cfff, - 0x1d0f6, 0x1d0ff, - 0x1d127, 0x1d129, - 0x1d1de, 0x1d1ff, - 0x1d246, 0x1d2ff, - 0x1d357, 0x1d3ff, - 0x1d455, 0x1d455, - 0x1d49d, 0x1d49d, - 0x1d4a0, 0x1d4a1, - 0x1d4a3, 0x1d4a4, - 0x1d4a7, 0x1d4a8, - 0x1d4ad, 0x1d4ad, - 0x1d4ba, 0x1d4ba, - 0x1d4bc, 0x1d4bc, - 0x1d4c4, 0x1d4c4, - 0x1d506, 0x1d506, - 0x1d50b, 0x1d50c, - 0x1d515, 0x1d515, - 0x1d51d, 0x1d51d, - 0x1d53a, 0x1d53a, - 0x1d53f, 0x1d53f, - 0x1d545, 0x1d545, - 0x1d547, 0x1d549, - 0x1d551, 0x1d551, - 0x1d6a6, 0x1d6a7, - 0x1d7ca, 0x1d7cd, - 0x1d800, 0x1ffff, - 0x2a6d7, 0x2f7ff, - 0x2fa1e, 0xe0000, - 0xe0002, 0xe001f, - 0xe0080, 0xe00ff, - 0xe01f0, 0xeffff, - 0xffffe, 0xfffff, - 0x10fffe, 0x10ffff -}; /* CR_Cn */ - -/* 'Co': General Category */ -static const OnigCodePoint CR_Co[] = { - 3, - 0xe000, 0xf8ff, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Co */ - -/* 'Cs': General Category */ -static const OnigCodePoint CR_Cs[] = { - 1, - 0xd800, 0xdfff -}; /* CR_Cs */ - -/* 'L': Major Category */ -static const OnigCodePoint CR_L[] = { - 347, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x037a, 0x037a, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03f5, - 0x03f7, 0x0481, - 0x048a, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x0531, 0x0556, - 0x0559, 0x0559, - 0x0561, 0x0587, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0621, 0x063a, - 0x0640, 0x064a, - 0x066e, 0x066f, - 0x0671, 0x06d3, - 0x06d5, 0x06d5, - 0x06e5, 0x06e6, - 0x06ee, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x0710, - 0x0712, 0x072f, - 0x074d, 0x076d, - 0x0780, 0x07a5, - 0x07b1, 0x07b1, - 0x0904, 0x0939, - 0x093d, 0x093d, - 0x0950, 0x0950, - 0x0958, 0x0961, - 0x097d, 0x097d, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bd, 0x09bd, - 0x09ce, 0x09ce, - 0x09dc, 0x09dd, - 0x09df, 0x09e1, - 0x09f0, 0x09f1, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a72, 0x0a74, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abd, 0x0abd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae1, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3d, 0x0b3d, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b83, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c60, 0x0c61, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbd, 0x0cbd, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d60, 0x0d61, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0e01, 0x0e30, - 0x0e32, 0x0e33, - 0x0e40, 0x0e46, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb0, - 0x0eb2, 0x0eb3, - 0x0ebd, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f40, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f88, 0x0f8b, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x1050, 0x1055, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dc, - 0x1820, 0x1877, - 0x1880, 0x18a8, - 0x1900, 0x191c, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x1d00, 0x1dbf, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2131, - 0x2133, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c80, 0x2ce4, - 0x2d00, 0x2d25, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3005, 0x3006, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa48c, - 0xa800, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d -}; /* CR_L */ - -/* 'Ll': General Category */ -static const OnigCodePoint CR_Ll[] = { - 480, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00b5, 0x00b5, - 0x00ba, 0x00ba, - 0x00df, 0x00f6, - 0x00f8, 0x00ff, - 0x0101, 0x0101, - 0x0103, 0x0103, - 0x0105, 0x0105, - 0x0107, 0x0107, - 0x0109, 0x0109, - 0x010b, 0x010b, - 0x010d, 0x010d, - 0x010f, 0x010f, - 0x0111, 0x0111, - 0x0113, 0x0113, - 0x0115, 0x0115, - 0x0117, 0x0117, - 0x0119, 0x0119, - 0x011b, 0x011b, - 0x011d, 0x011d, - 0x011f, 0x011f, - 0x0121, 0x0121, - 0x0123, 0x0123, - 0x0125, 0x0125, - 0x0127, 0x0127, - 0x0129, 0x0129, - 0x012b, 0x012b, - 0x012d, 0x012d, - 0x012f, 0x012f, - 0x0131, 0x0131, - 0x0133, 0x0133, - 0x0135, 0x0135, - 0x0137, 0x0138, - 0x013a, 0x013a, - 0x013c, 0x013c, - 0x013e, 0x013e, - 0x0140, 0x0140, - 0x0142, 0x0142, - 0x0144, 0x0144, - 0x0146, 0x0146, - 0x0148, 0x0149, - 0x014b, 0x014b, - 0x014d, 0x014d, - 0x014f, 0x014f, - 0x0151, 0x0151, - 0x0153, 0x0153, - 0x0155, 0x0155, - 0x0157, 0x0157, - 0x0159, 0x0159, - 0x015b, 0x015b, - 0x015d, 0x015d, - 0x015f, 0x015f, - 0x0161, 0x0161, - 0x0163, 0x0163, - 0x0165, 0x0165, - 0x0167, 0x0167, - 0x0169, 0x0169, - 0x016b, 0x016b, - 0x016d, 0x016d, - 0x016f, 0x016f, - 0x0171, 0x0171, - 0x0173, 0x0173, - 0x0175, 0x0175, - 0x0177, 0x0177, - 0x017a, 0x017a, - 0x017c, 0x017c, - 0x017e, 0x0180, - 0x0183, 0x0183, - 0x0185, 0x0185, - 0x0188, 0x0188, - 0x018c, 0x018d, - 0x0192, 0x0192, - 0x0195, 0x0195, - 0x0199, 0x019b, - 0x019e, 0x019e, - 0x01a1, 0x01a1, - 0x01a3, 0x01a3, - 0x01a5, 0x01a5, - 0x01a8, 0x01a8, - 0x01aa, 0x01ab, - 0x01ad, 0x01ad, - 0x01b0, 0x01b0, - 0x01b4, 0x01b4, - 0x01b6, 0x01b6, - 0x01b9, 0x01ba, - 0x01bd, 0x01bf, - 0x01c6, 0x01c6, - 0x01c9, 0x01c9, - 0x01cc, 0x01cc, - 0x01ce, 0x01ce, - 0x01d0, 0x01d0, - 0x01d2, 0x01d2, - 0x01d4, 0x01d4, - 0x01d6, 0x01d6, - 0x01d8, 0x01d8, - 0x01da, 0x01da, - 0x01dc, 0x01dd, - 0x01df, 0x01df, - 0x01e1, 0x01e1, - 0x01e3, 0x01e3, - 0x01e5, 0x01e5, - 0x01e7, 0x01e7, - 0x01e9, 0x01e9, - 0x01eb, 0x01eb, - 0x01ed, 0x01ed, - 0x01ef, 0x01f0, - 0x01f3, 0x01f3, - 0x01f5, 0x01f5, - 0x01f9, 0x01f9, - 0x01fb, 0x01fb, - 0x01fd, 0x01fd, - 0x01ff, 0x01ff, - 0x0201, 0x0201, - 0x0203, 0x0203, - 0x0205, 0x0205, - 0x0207, 0x0207, - 0x0209, 0x0209, - 0x020b, 0x020b, - 0x020d, 0x020d, - 0x020f, 0x020f, - 0x0211, 0x0211, - 0x0213, 0x0213, - 0x0215, 0x0215, - 0x0217, 0x0217, - 0x0219, 0x0219, - 0x021b, 0x021b, - 0x021d, 0x021d, - 0x021f, 0x021f, - 0x0221, 0x0221, - 0x0223, 0x0223, - 0x0225, 0x0225, - 0x0227, 0x0227, - 0x0229, 0x0229, - 0x022b, 0x022b, - 0x022d, 0x022d, - 0x022f, 0x022f, - 0x0231, 0x0231, - 0x0233, 0x0239, - 0x023c, 0x023c, - 0x023f, 0x0240, - 0x0250, 0x02af, - 0x0390, 0x0390, - 0x03ac, 0x03ce, - 0x03d0, 0x03d1, - 0x03d5, 0x03d7, - 0x03d9, 0x03d9, - 0x03db, 0x03db, - 0x03dd, 0x03dd, - 0x03df, 0x03df, - 0x03e1, 0x03e1, - 0x03e3, 0x03e3, - 0x03e5, 0x03e5, - 0x03e7, 0x03e7, - 0x03e9, 0x03e9, - 0x03eb, 0x03eb, - 0x03ed, 0x03ed, - 0x03ef, 0x03f3, - 0x03f5, 0x03f5, - 0x03f8, 0x03f8, - 0x03fb, 0x03fc, - 0x0430, 0x045f, - 0x0461, 0x0461, - 0x0463, 0x0463, - 0x0465, 0x0465, - 0x0467, 0x0467, - 0x0469, 0x0469, - 0x046b, 0x046b, - 0x046d, 0x046d, - 0x046f, 0x046f, - 0x0471, 0x0471, - 0x0473, 0x0473, - 0x0475, 0x0475, - 0x0477, 0x0477, - 0x0479, 0x0479, - 0x047b, 0x047b, - 0x047d, 0x047d, - 0x047f, 0x047f, - 0x0481, 0x0481, - 0x048b, 0x048b, - 0x048d, 0x048d, - 0x048f, 0x048f, - 0x0491, 0x0491, - 0x0493, 0x0493, - 0x0495, 0x0495, - 0x0497, 0x0497, - 0x0499, 0x0499, - 0x049b, 0x049b, - 0x049d, 0x049d, - 0x049f, 0x049f, - 0x04a1, 0x04a1, - 0x04a3, 0x04a3, - 0x04a5, 0x04a5, - 0x04a7, 0x04a7, - 0x04a9, 0x04a9, - 0x04ab, 0x04ab, - 0x04ad, 0x04ad, - 0x04af, 0x04af, - 0x04b1, 0x04b1, - 0x04b3, 0x04b3, - 0x04b5, 0x04b5, - 0x04b7, 0x04b7, - 0x04b9, 0x04b9, - 0x04bb, 0x04bb, - 0x04bd, 0x04bd, - 0x04bf, 0x04bf, - 0x04c2, 0x04c2, - 0x04c4, 0x04c4, - 0x04c6, 0x04c6, - 0x04c8, 0x04c8, - 0x04ca, 0x04ca, - 0x04cc, 0x04cc, - 0x04ce, 0x04ce, - 0x04d1, 0x04d1, - 0x04d3, 0x04d3, - 0x04d5, 0x04d5, - 0x04d7, 0x04d7, - 0x04d9, 0x04d9, - 0x04db, 0x04db, - 0x04dd, 0x04dd, - 0x04df, 0x04df, - 0x04e1, 0x04e1, - 0x04e3, 0x04e3, - 0x04e5, 0x04e5, - 0x04e7, 0x04e7, - 0x04e9, 0x04e9, - 0x04eb, 0x04eb, - 0x04ed, 0x04ed, - 0x04ef, 0x04ef, - 0x04f1, 0x04f1, - 0x04f3, 0x04f3, - 0x04f5, 0x04f5, - 0x04f7, 0x04f7, - 0x04f9, 0x04f9, - 0x0501, 0x0501, - 0x0503, 0x0503, - 0x0505, 0x0505, - 0x0507, 0x0507, - 0x0509, 0x0509, - 0x050b, 0x050b, - 0x050d, 0x050d, - 0x050f, 0x050f, - 0x0561, 0x0587, - 0x1d00, 0x1d2b, - 0x1d62, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9b, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1f00, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213c, 0x213d, - 0x2146, 0x2149, - 0x2c30, 0x2c5e, - 0x2c81, 0x2c81, - 0x2c83, 0x2c83, - 0x2c85, 0x2c85, - 0x2c87, 0x2c87, - 0x2c89, 0x2c89, - 0x2c8b, 0x2c8b, - 0x2c8d, 0x2c8d, - 0x2c8f, 0x2c8f, - 0x2c91, 0x2c91, - 0x2c93, 0x2c93, - 0x2c95, 0x2c95, - 0x2c97, 0x2c97, - 0x2c99, 0x2c99, - 0x2c9b, 0x2c9b, - 0x2c9d, 0x2c9d, - 0x2c9f, 0x2c9f, - 0x2ca1, 0x2ca1, - 0x2ca3, 0x2ca3, - 0x2ca5, 0x2ca5, - 0x2ca7, 0x2ca7, - 0x2ca9, 0x2ca9, - 0x2cab, 0x2cab, - 0x2cad, 0x2cad, - 0x2caf, 0x2caf, - 0x2cb1, 0x2cb1, - 0x2cb3, 0x2cb3, - 0x2cb5, 0x2cb5, - 0x2cb7, 0x2cb7, - 0x2cb9, 0x2cb9, - 0x2cbb, 0x2cbb, - 0x2cbd, 0x2cbd, - 0x2cbf, 0x2cbf, - 0x2cc1, 0x2cc1, - 0x2cc3, 0x2cc3, - 0x2cc5, 0x2cc5, - 0x2cc7, 0x2cc7, - 0x2cc9, 0x2cc9, - 0x2ccb, 0x2ccb, - 0x2ccd, 0x2ccd, - 0x2ccf, 0x2ccf, - 0x2cd1, 0x2cd1, - 0x2cd3, 0x2cd3, - 0x2cd5, 0x2cd5, - 0x2cd7, 0x2cd7, - 0x2cd9, 0x2cd9, - 0x2cdb, 0x2cdb, - 0x2cdd, 0x2cdd, - 0x2cdf, 0x2cdf, - 0x2ce1, 0x2ce1, - 0x2ce3, 0x2ce4, - 0x2d00, 0x2d25, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a5, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9 -}; /* CR_Ll */ - -/* 'Lm': General Category */ -static const OnigCodePoint CR_Lm[] = { - 26, - 0x02b0, 0x02c1, - 0x02c6, 0x02d1, - 0x02e0, 0x02e4, - 0x02ee, 0x02ee, - 0x037a, 0x037a, - 0x0559, 0x0559, - 0x0640, 0x0640, - 0x06e5, 0x06e6, - 0x0e46, 0x0e46, - 0x0ec6, 0x0ec6, - 0x10fc, 0x10fc, - 0x17d7, 0x17d7, - 0x1843, 0x1843, - 0x1d2c, 0x1d61, - 0x1d78, 0x1d78, - 0x1d9b, 0x1dbf, - 0x2090, 0x2094, - 0x2d6f, 0x2d6f, - 0x3005, 0x3005, - 0x3031, 0x3035, - 0x303b, 0x303b, - 0x309d, 0x309e, - 0x30fc, 0x30fe, - 0xa015, 0xa015, - 0xff70, 0xff70, - 0xff9e, 0xff9f -}; /* CR_Lm */ - -/* 'Lo': General Category */ -static const OnigCodePoint CR_Lo[] = { - 245, - 0x01bb, 0x01bb, - 0x01c0, 0x01c3, - 0x05d0, 0x05ea, - 0x05f0, 0x05f2, - 0x0621, 0x063a, - 0x0641, 0x064a, - 0x066e, 0x066f, - 0x0671, 0x06d3, - 0x06d5, 0x06d5, - 0x06ee, 0x06ef, - 0x06fa, 0x06fc, - 0x06ff, 0x06ff, - 0x0710, 0x0710, - 0x0712, 0x072f, - 0x074d, 0x076d, - 0x0780, 0x07a5, - 0x07b1, 0x07b1, - 0x0904, 0x0939, - 0x093d, 0x093d, - 0x0950, 0x0950, - 0x0958, 0x0961, - 0x097d, 0x097d, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bd, 0x09bd, - 0x09ce, 0x09ce, - 0x09dc, 0x09dd, - 0x09df, 0x09e1, - 0x09f0, 0x09f1, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a72, 0x0a74, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abd, 0x0abd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae1, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3d, 0x0b3d, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b71, 0x0b71, - 0x0b83, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c60, 0x0c61, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbd, 0x0cbd, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d60, 0x0d61, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0e01, 0x0e30, - 0x0e32, 0x0e33, - 0x0e40, 0x0e45, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb0, - 0x0eb2, 0x0eb3, - 0x0ebd, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0edc, 0x0edd, - 0x0f00, 0x0f00, - 0x0f40, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f88, 0x0f8b, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x1050, 0x1055, - 0x10d0, 0x10fa, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x1676, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17dc, 0x17dc, - 0x1820, 0x1842, - 0x1844, 0x1877, - 0x1880, 0x18a8, - 0x1900, 0x191c, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19a9, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x2135, 0x2138, - 0x2d30, 0x2d65, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3006, 0x3006, - 0x303c, 0x303c, - 0x3041, 0x3096, - 0x309f, 0x309f, - 0x30a1, 0x30fa, - 0x30ff, 0x30ff, - 0x3105, 0x312c, - 0x3131, 0x318e, - 0x31a0, 0x31b7, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xa000, 0xa014, - 0xa016, 0xa48c, - 0xa800, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xac00, 0xd7a3, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff66, 0xff6f, - 0xff71, 0xff9d, - 0xffa0, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10300, 0x1031e, - 0x10330, 0x10349, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10450, 0x1049d, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d -}; /* CR_Lo */ - -/* 'Lt': General Category */ -static const OnigCodePoint CR_Lt[] = { - 10, - 0x01c5, 0x01c5, - 0x01c8, 0x01c8, - 0x01cb, 0x01cb, - 0x01f2, 0x01f2, - 0x1f88, 0x1f8f, - 0x1f98, 0x1f9f, - 0x1fa8, 0x1faf, - 0x1fbc, 0x1fbc, - 0x1fcc, 0x1fcc, - 0x1ffc, 0x1ffc -}; /* CR_Lt */ - -/* 'Lu': General Category */ -static const OnigCodePoint CR_Lu[] = { - 476, - 0x0041, 0x005a, - 0x00c0, 0x00d6, - 0x00d8, 0x00de, - 0x0100, 0x0100, - 0x0102, 0x0102, - 0x0104, 0x0104, - 0x0106, 0x0106, - 0x0108, 0x0108, - 0x010a, 0x010a, - 0x010c, 0x010c, - 0x010e, 0x010e, - 0x0110, 0x0110, - 0x0112, 0x0112, - 0x0114, 0x0114, - 0x0116, 0x0116, - 0x0118, 0x0118, - 0x011a, 0x011a, - 0x011c, 0x011c, - 0x011e, 0x011e, - 0x0120, 0x0120, - 0x0122, 0x0122, - 0x0124, 0x0124, - 0x0126, 0x0126, - 0x0128, 0x0128, - 0x012a, 0x012a, - 0x012c, 0x012c, - 0x012e, 0x012e, - 0x0130, 0x0130, - 0x0132, 0x0132, - 0x0134, 0x0134, - 0x0136, 0x0136, - 0x0139, 0x0139, - 0x013b, 0x013b, - 0x013d, 0x013d, - 0x013f, 0x013f, - 0x0141, 0x0141, - 0x0143, 0x0143, - 0x0145, 0x0145, - 0x0147, 0x0147, - 0x014a, 0x014a, - 0x014c, 0x014c, - 0x014e, 0x014e, - 0x0150, 0x0150, - 0x0152, 0x0152, - 0x0154, 0x0154, - 0x0156, 0x0156, - 0x0158, 0x0158, - 0x015a, 0x015a, - 0x015c, 0x015c, - 0x015e, 0x015e, - 0x0160, 0x0160, - 0x0162, 0x0162, - 0x0164, 0x0164, - 0x0166, 0x0166, - 0x0168, 0x0168, - 0x016a, 0x016a, - 0x016c, 0x016c, - 0x016e, 0x016e, - 0x0170, 0x0170, - 0x0172, 0x0172, - 0x0174, 0x0174, - 0x0176, 0x0176, - 0x0178, 0x0179, - 0x017b, 0x017b, - 0x017d, 0x017d, - 0x0181, 0x0182, - 0x0184, 0x0184, - 0x0186, 0x0187, - 0x0189, 0x018b, - 0x018e, 0x0191, - 0x0193, 0x0194, - 0x0196, 0x0198, - 0x019c, 0x019d, - 0x019f, 0x01a0, - 0x01a2, 0x01a2, - 0x01a4, 0x01a4, - 0x01a6, 0x01a7, - 0x01a9, 0x01a9, - 0x01ac, 0x01ac, - 0x01ae, 0x01af, - 0x01b1, 0x01b3, - 0x01b5, 0x01b5, - 0x01b7, 0x01b8, - 0x01bc, 0x01bc, - 0x01c4, 0x01c4, - 0x01c7, 0x01c7, - 0x01ca, 0x01ca, - 0x01cd, 0x01cd, - 0x01cf, 0x01cf, - 0x01d1, 0x01d1, - 0x01d3, 0x01d3, - 0x01d5, 0x01d5, - 0x01d7, 0x01d7, - 0x01d9, 0x01d9, - 0x01db, 0x01db, - 0x01de, 0x01de, - 0x01e0, 0x01e0, - 0x01e2, 0x01e2, - 0x01e4, 0x01e4, - 0x01e6, 0x01e6, - 0x01e8, 0x01e8, - 0x01ea, 0x01ea, - 0x01ec, 0x01ec, - 0x01ee, 0x01ee, - 0x01f1, 0x01f1, - 0x01f4, 0x01f4, - 0x01f6, 0x01f8, - 0x01fa, 0x01fa, - 0x01fc, 0x01fc, - 0x01fe, 0x01fe, - 0x0200, 0x0200, - 0x0202, 0x0202, - 0x0204, 0x0204, - 0x0206, 0x0206, - 0x0208, 0x0208, - 0x020a, 0x020a, - 0x020c, 0x020c, - 0x020e, 0x020e, - 0x0210, 0x0210, - 0x0212, 0x0212, - 0x0214, 0x0214, - 0x0216, 0x0216, - 0x0218, 0x0218, - 0x021a, 0x021a, - 0x021c, 0x021c, - 0x021e, 0x021e, - 0x0220, 0x0220, - 0x0222, 0x0222, - 0x0224, 0x0224, - 0x0226, 0x0226, - 0x0228, 0x0228, - 0x022a, 0x022a, - 0x022c, 0x022c, - 0x022e, 0x022e, - 0x0230, 0x0230, - 0x0232, 0x0232, - 0x023a, 0x023b, - 0x023d, 0x023e, - 0x0241, 0x0241, - 0x0386, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x038f, - 0x0391, 0x03a1, - 0x03a3, 0x03ab, - 0x03d2, 0x03d4, - 0x03d8, 0x03d8, - 0x03da, 0x03da, - 0x03dc, 0x03dc, - 0x03de, 0x03de, - 0x03e0, 0x03e0, - 0x03e2, 0x03e2, - 0x03e4, 0x03e4, - 0x03e6, 0x03e6, - 0x03e8, 0x03e8, - 0x03ea, 0x03ea, - 0x03ec, 0x03ec, - 0x03ee, 0x03ee, - 0x03f4, 0x03f4, - 0x03f7, 0x03f7, - 0x03f9, 0x03fa, - 0x03fd, 0x042f, - 0x0460, 0x0460, - 0x0462, 0x0462, - 0x0464, 0x0464, - 0x0466, 0x0466, - 0x0468, 0x0468, - 0x046a, 0x046a, - 0x046c, 0x046c, - 0x046e, 0x046e, - 0x0470, 0x0470, - 0x0472, 0x0472, - 0x0474, 0x0474, - 0x0476, 0x0476, - 0x0478, 0x0478, - 0x047a, 0x047a, - 0x047c, 0x047c, - 0x047e, 0x047e, - 0x0480, 0x0480, - 0x048a, 0x048a, - 0x048c, 0x048c, - 0x048e, 0x048e, - 0x0490, 0x0490, - 0x0492, 0x0492, - 0x0494, 0x0494, - 0x0496, 0x0496, - 0x0498, 0x0498, - 0x049a, 0x049a, - 0x049c, 0x049c, - 0x049e, 0x049e, - 0x04a0, 0x04a0, - 0x04a2, 0x04a2, - 0x04a4, 0x04a4, - 0x04a6, 0x04a6, - 0x04a8, 0x04a8, - 0x04aa, 0x04aa, - 0x04ac, 0x04ac, - 0x04ae, 0x04ae, - 0x04b0, 0x04b0, - 0x04b2, 0x04b2, - 0x04b4, 0x04b4, - 0x04b6, 0x04b6, - 0x04b8, 0x04b8, - 0x04ba, 0x04ba, - 0x04bc, 0x04bc, - 0x04be, 0x04be, - 0x04c0, 0x04c1, - 0x04c3, 0x04c3, - 0x04c5, 0x04c5, - 0x04c7, 0x04c7, - 0x04c9, 0x04c9, - 0x04cb, 0x04cb, - 0x04cd, 0x04cd, - 0x04d0, 0x04d0, - 0x04d2, 0x04d2, - 0x04d4, 0x04d4, - 0x04d6, 0x04d6, - 0x04d8, 0x04d8, - 0x04da, 0x04da, - 0x04dc, 0x04dc, - 0x04de, 0x04de, - 0x04e0, 0x04e0, - 0x04e2, 0x04e2, - 0x04e4, 0x04e4, - 0x04e6, 0x04e6, - 0x04e8, 0x04e8, - 0x04ea, 0x04ea, - 0x04ec, 0x04ec, - 0x04ee, 0x04ee, - 0x04f0, 0x04f0, - 0x04f2, 0x04f2, - 0x04f4, 0x04f4, - 0x04f6, 0x04f6, - 0x04f8, 0x04f8, - 0x0500, 0x0500, - 0x0502, 0x0502, - 0x0504, 0x0504, - 0x0506, 0x0506, - 0x0508, 0x0508, - 0x050a, 0x050a, - 0x050c, 0x050c, - 0x050e, 0x050e, - 0x0531, 0x0556, - 0x10a0, 0x10c5, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2131, - 0x2133, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0x2c00, 0x2c2e, - 0x2c80, 0x2c80, - 0x2c82, 0x2c82, - 0x2c84, 0x2c84, - 0x2c86, 0x2c86, - 0x2c88, 0x2c88, - 0x2c8a, 0x2c8a, - 0x2c8c, 0x2c8c, - 0x2c8e, 0x2c8e, - 0x2c90, 0x2c90, - 0x2c92, 0x2c92, - 0x2c94, 0x2c94, - 0x2c96, 0x2c96, - 0x2c98, 0x2c98, - 0x2c9a, 0x2c9a, - 0x2c9c, 0x2c9c, - 0x2c9e, 0x2c9e, - 0x2ca0, 0x2ca0, - 0x2ca2, 0x2ca2, - 0x2ca4, 0x2ca4, - 0x2ca6, 0x2ca6, - 0x2ca8, 0x2ca8, - 0x2caa, 0x2caa, - 0x2cac, 0x2cac, - 0x2cae, 0x2cae, - 0x2cb0, 0x2cb0, - 0x2cb2, 0x2cb2, - 0x2cb4, 0x2cb4, - 0x2cb6, 0x2cb6, - 0x2cb8, 0x2cb8, - 0x2cba, 0x2cba, - 0x2cbc, 0x2cbc, - 0x2cbe, 0x2cbe, - 0x2cc0, 0x2cc0, - 0x2cc2, 0x2cc2, - 0x2cc4, 0x2cc4, - 0x2cc6, 0x2cc6, - 0x2cc8, 0x2cc8, - 0x2cca, 0x2cca, - 0x2ccc, 0x2ccc, - 0x2cce, 0x2cce, - 0x2cd0, 0x2cd0, - 0x2cd2, 0x2cd2, - 0x2cd4, 0x2cd4, - 0x2cd6, 0x2cd6, - 0x2cd8, 0x2cd8, - 0x2cda, 0x2cda, - 0x2cdc, 0x2cdc, - 0x2cde, 0x2cde, - 0x2ce0, 0x2ce0, - 0x2ce2, 0x2ce2, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8 -}; /* CR_Lu */ - -/* 'M': Major Category */ -static const OnigCodePoint CR_M[] = { - 133, - 0x0300, 0x036f, - 0x0483, 0x0486, - 0x0488, 0x0489, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x0610, 0x0615, - 0x064b, 0x065e, - 0x0670, 0x0670, - 0x06d6, 0x06dc, - 0x06de, 0x06e4, - 0x06e7, 0x06e8, - 0x06ea, 0x06ed, - 0x0711, 0x0711, - 0x0730, 0x074a, - 0x07a6, 0x07b0, - 0x0901, 0x0903, - 0x093c, 0x093c, - 0x093e, 0x094d, - 0x0951, 0x0954, - 0x0962, 0x0963, - 0x0981, 0x0983, - 0x09bc, 0x09bc, - 0x09be, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09cd, - 0x09d7, 0x09d7, - 0x09e2, 0x09e3, - 0x0a01, 0x0a03, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a70, 0x0a71, - 0x0a81, 0x0a83, - 0x0abc, 0x0abc, - 0x0abe, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ae2, 0x0ae3, - 0x0b01, 0x0b03, - 0x0b3c, 0x0b3c, - 0x0b3e, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b82, 0x0b82, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c82, 0x0c83, - 0x0cbc, 0x0cbc, - 0x0cbe, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0d02, 0x0d03, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d82, 0x0d83, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0e31, 0x0e31, - 0x0e34, 0x0e3a, - 0x0e47, 0x0e4e, - 0x0eb1, 0x0eb1, - 0x0eb4, 0x0eb9, - 0x0ebb, 0x0ebc, - 0x0ec8, 0x0ecd, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f3e, 0x0f3f, - 0x0f71, 0x0f84, - 0x0f86, 0x0f87, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1056, 0x1059, - 0x135f, 0x135f, - 0x1712, 0x1714, - 0x1732, 0x1734, - 0x1752, 0x1753, - 0x1772, 0x1773, - 0x17b6, 0x17d3, - 0x17dd, 0x17dd, - 0x180b, 0x180d, - 0x18a9, 0x18a9, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x19b0, 0x19c0, - 0x19c8, 0x19c9, - 0x1a17, 0x1a1b, - 0x1dc0, 0x1dc3, - 0x20d0, 0x20eb, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xa802, 0xa802, - 0xa806, 0xa806, - 0xa80b, 0xa80b, - 0xa823, 0xa827, - 0xfb1e, 0xfb1e, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0x10a01, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a0f, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0xe0100, 0xe01ef -}; /* CR_M */ - -/* 'Mc': General Category */ -static const OnigCodePoint CR_Mc[] = { - 63, - 0x0903, 0x0903, - 0x093e, 0x0940, - 0x0949, 0x094c, - 0x0982, 0x0983, - 0x09be, 0x09c0, - 0x09c7, 0x09c8, - 0x09cb, 0x09cc, - 0x09d7, 0x09d7, - 0x0a03, 0x0a03, - 0x0a3e, 0x0a40, - 0x0a83, 0x0a83, - 0x0abe, 0x0ac0, - 0x0ac9, 0x0ac9, - 0x0acb, 0x0acc, - 0x0b02, 0x0b03, - 0x0b3e, 0x0b3e, - 0x0b40, 0x0b40, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4c, - 0x0b57, 0x0b57, - 0x0bbe, 0x0bbf, - 0x0bc1, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcc, - 0x0bd7, 0x0bd7, - 0x0c01, 0x0c03, - 0x0c41, 0x0c44, - 0x0c82, 0x0c83, - 0x0cbe, 0x0cbe, - 0x0cc0, 0x0cc4, - 0x0cc7, 0x0cc8, - 0x0cca, 0x0ccb, - 0x0cd5, 0x0cd6, - 0x0d02, 0x0d03, - 0x0d3e, 0x0d40, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4c, - 0x0d57, 0x0d57, - 0x0d82, 0x0d83, - 0x0dcf, 0x0dd1, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df3, - 0x0f3e, 0x0f3f, - 0x0f7f, 0x0f7f, - 0x102c, 0x102c, - 0x1031, 0x1031, - 0x1038, 0x1038, - 0x1056, 0x1057, - 0x17b6, 0x17b6, - 0x17be, 0x17c5, - 0x17c7, 0x17c8, - 0x1923, 0x1926, - 0x1929, 0x192b, - 0x1930, 0x1931, - 0x1933, 0x1938, - 0x19b0, 0x19c0, - 0x19c8, 0x19c9, - 0x1a19, 0x1a1b, - 0xa802, 0xa802, - 0xa823, 0xa824, - 0xa827, 0xa827, - 0x1d165, 0x1d166, - 0x1d16d, 0x1d172 -}; /* CR_Mc */ - -/* 'Me': General Category */ -static const OnigCodePoint CR_Me[] = { - 4, - 0x0488, 0x0489, - 0x06de, 0x06de, - 0x20dd, 0x20e0, - 0x20e2, 0x20e4 -}; /* CR_Me */ - -/* 'Mn': General Category */ -static const OnigCodePoint CR_Mn[] = { - 124, - 0x0300, 0x036f, - 0x0483, 0x0486, - 0x0591, 0x05b9, - 0x05bb, 0x05bd, - 0x05bf, 0x05bf, - 0x05c1, 0x05c2, - 0x05c4, 0x05c5, - 0x05c7, 0x05c7, - 0x0610, 0x0615, - 0x064b, 0x065e, - 0x0670, 0x0670, - 0x06d6, 0x06dc, - 0x06df, 0x06e4, - 0x06e7, 0x06e8, - 0x06ea, 0x06ed, - 0x0711, 0x0711, - 0x0730, 0x074a, - 0x07a6, 0x07b0, - 0x0901, 0x0902, - 0x093c, 0x093c, - 0x0941, 0x0948, - 0x094d, 0x094d, - 0x0951, 0x0954, - 0x0962, 0x0963, - 0x0981, 0x0981, - 0x09bc, 0x09bc, - 0x09c1, 0x09c4, - 0x09cd, 0x09cd, - 0x09e2, 0x09e3, - 0x0a01, 0x0a02, - 0x0a3c, 0x0a3c, - 0x0a41, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a70, 0x0a71, - 0x0a81, 0x0a82, - 0x0abc, 0x0abc, - 0x0ac1, 0x0ac5, - 0x0ac7, 0x0ac8, - 0x0acd, 0x0acd, - 0x0ae2, 0x0ae3, - 0x0b01, 0x0b01, - 0x0b3c, 0x0b3c, - 0x0b3f, 0x0b3f, - 0x0b41, 0x0b43, - 0x0b4d, 0x0b4d, - 0x0b56, 0x0b56, - 0x0b82, 0x0b82, - 0x0bc0, 0x0bc0, - 0x0bcd, 0x0bcd, - 0x0c3e, 0x0c40, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0cbc, 0x0cbc, - 0x0cbf, 0x0cbf, - 0x0cc6, 0x0cc6, - 0x0ccc, 0x0ccd, - 0x0d41, 0x0d43, - 0x0d4d, 0x0d4d, - 0x0dca, 0x0dca, - 0x0dd2, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0e31, 0x0e31, - 0x0e34, 0x0e3a, - 0x0e47, 0x0e4e, - 0x0eb1, 0x0eb1, - 0x0eb4, 0x0eb9, - 0x0ebb, 0x0ebc, - 0x0ec8, 0x0ecd, - 0x0f18, 0x0f19, - 0x0f35, 0x0f35, - 0x0f37, 0x0f37, - 0x0f39, 0x0f39, - 0x0f71, 0x0f7e, - 0x0f80, 0x0f84, - 0x0f86, 0x0f87, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fc6, 0x0fc6, - 0x102d, 0x1030, - 0x1032, 0x1032, - 0x1036, 0x1037, - 0x1039, 0x1039, - 0x1058, 0x1059, - 0x135f, 0x135f, - 0x1712, 0x1714, - 0x1732, 0x1734, - 0x1752, 0x1753, - 0x1772, 0x1773, - 0x17b7, 0x17bd, - 0x17c6, 0x17c6, - 0x17c9, 0x17d3, - 0x17dd, 0x17dd, - 0x180b, 0x180d, - 0x18a9, 0x18a9, - 0x1920, 0x1922, - 0x1927, 0x1928, - 0x1932, 0x1932, - 0x1939, 0x193b, - 0x1a17, 0x1a18, - 0x1dc0, 0x1dc3, - 0x20d0, 0x20dc, - 0x20e1, 0x20e1, - 0x20e5, 0x20eb, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xa806, 0xa806, - 0xa80b, 0xa80b, - 0xa825, 0xa826, - 0xfb1e, 0xfb1e, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0x10a01, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a0f, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x1d167, 0x1d169, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0xe0100, 0xe01ef -}; /* CR_Mn */ - -/* 'N': Major Category */ -static const OnigCodePoint CR_N[] = { - 53, - 0x0030, 0x0039, - 0x00b2, 0x00b3, - 0x00b9, 0x00b9, - 0x00bc, 0x00be, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x09f4, 0x09f9, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bf2, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f33, - 0x1040, 0x1049, - 0x1369, 0x137c, - 0x16ee, 0x16f0, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0x2070, 0x2070, - 0x2074, 0x2079, - 0x2080, 0x2089, - 0x2153, 0x2183, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2cfd, 0x2cfd, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303a, - 0x3192, 0x3195, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0xff10, 0xff19, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018a, - 0x10320, 0x10323, - 0x1034a, 0x1034a, - 0x103d1, 0x103d5, - 0x104a0, 0x104a9, - 0x10a40, 0x10a47, - 0x1d7ce, 0x1d7ff -}; /* CR_N */ - -/* 'Nd': General Category */ -static const OnigCodePoint CR_Nd[] = { - 23, - 0x0030, 0x0039, - 0x0660, 0x0669, - 0x06f0, 0x06f9, - 0x0966, 0x096f, - 0x09e6, 0x09ef, - 0x0a66, 0x0a6f, - 0x0ae6, 0x0aef, - 0x0b66, 0x0b6f, - 0x0be6, 0x0bef, - 0x0c66, 0x0c6f, - 0x0ce6, 0x0cef, - 0x0d66, 0x0d6f, - 0x0e50, 0x0e59, - 0x0ed0, 0x0ed9, - 0x0f20, 0x0f29, - 0x1040, 0x1049, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x1d7ce, 0x1d7ff -}; /* CR_Nd */ - -/* 'Nl': General Category */ -static const OnigCodePoint CR_Nl[] = { - 8, - 0x16ee, 0x16f0, - 0x2160, 0x2183, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303a, - 0x10140, 0x10174, - 0x1034a, 0x1034a, - 0x103d1, 0x103d5 -}; /* CR_Nl */ - -/* 'No': General Category */ -static const OnigCodePoint CR_No[] = { - 26, - 0x00b2, 0x00b3, - 0x00b9, 0x00b9, - 0x00bc, 0x00be, - 0x09f4, 0x09f9, - 0x0bf0, 0x0bf2, - 0x0f2a, 0x0f33, - 0x1369, 0x137c, - 0x17f0, 0x17f9, - 0x2070, 0x2070, - 0x2074, 0x2079, - 0x2080, 0x2089, - 0x2153, 0x215f, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2cfd, 0x2cfd, - 0x3192, 0x3195, - 0x3220, 0x3229, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x10107, 0x10133, - 0x10175, 0x10178, - 0x1018a, 0x1018a, - 0x10320, 0x10323, - 0x10a40, 0x10a47 -}; /* CR_No */ - -/* 'P': Major Category */ -static const OnigCodePoint CR_P[] = { - 96, - 0x0021, 0x0023, - 0x0025, 0x002a, - 0x002c, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005b, 0x005d, - 0x005f, 0x005f, - 0x007b, 0x007b, - 0x007d, 0x007d, - 0x00a1, 0x00a1, - 0x00ab, 0x00ab, - 0x00b7, 0x00b7, - 0x00bb, 0x00bb, - 0x00bf, 0x00bf, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x058a, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05c6, 0x05c6, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f3a, 0x0f3d, - 0x0f85, 0x0f85, - 0x0fd0, 0x0fd1, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x19de, 0x19df, - 0x1a1e, 0x1a1f, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x205e, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2329, 0x232a, - 0x23b4, 0x23b6, - 0x2768, 0x2775, - 0x27c5, 0x27c6, - 0x27e6, 0x27eb, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xfd3e, 0xfd3f, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f, - 0x10a50, 0x10a58 -}; /* CR_P */ - -/* 'Pc': General Category */ -static const OnigCodePoint CR_Pc[] = { - 6, - 0x005f, 0x005f, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xff3f, 0xff3f -}; /* CR_Pc */ - -/* 'Pd': General Category */ -static const OnigCodePoint CR_Pd[] = { - 12, - 0x002d, 0x002d, - 0x058a, 0x058a, - 0x1806, 0x1806, - 0x2010, 0x2015, - 0x2e17, 0x2e17, - 0x301c, 0x301c, - 0x3030, 0x3030, - 0x30a0, 0x30a0, - 0xfe31, 0xfe32, - 0xfe58, 0xfe58, - 0xfe63, 0xfe63, - 0xff0d, 0xff0d -}; /* CR_Pd */ - -/* 'Pe': General Category */ -static const OnigCodePoint CR_Pe[] = { - 65, - 0x0029, 0x0029, - 0x005d, 0x005d, - 0x007d, 0x007d, - 0x0f3b, 0x0f3b, - 0x0f3d, 0x0f3d, - 0x169c, 0x169c, - 0x2046, 0x2046, - 0x207e, 0x207e, - 0x208e, 0x208e, - 0x232a, 0x232a, - 0x23b5, 0x23b5, - 0x2769, 0x2769, - 0x276b, 0x276b, - 0x276d, 0x276d, - 0x276f, 0x276f, - 0x2771, 0x2771, - 0x2773, 0x2773, - 0x2775, 0x2775, - 0x27c6, 0x27c6, - 0x27e7, 0x27e7, - 0x27e9, 0x27e9, - 0x27eb, 0x27eb, - 0x2984, 0x2984, - 0x2986, 0x2986, - 0x2988, 0x2988, - 0x298a, 0x298a, - 0x298c, 0x298c, - 0x298e, 0x298e, - 0x2990, 0x2990, - 0x2992, 0x2992, - 0x2994, 0x2994, - 0x2996, 0x2996, - 0x2998, 0x2998, - 0x29d9, 0x29d9, - 0x29db, 0x29db, - 0x29fd, 0x29fd, - 0x3009, 0x3009, - 0x300b, 0x300b, - 0x300d, 0x300d, - 0x300f, 0x300f, - 0x3011, 0x3011, - 0x3015, 0x3015, - 0x3017, 0x3017, - 0x3019, 0x3019, - 0x301b, 0x301b, - 0x301e, 0x301f, - 0xfd3f, 0xfd3f, - 0xfe18, 0xfe18, - 0xfe36, 0xfe36, - 0xfe38, 0xfe38, - 0xfe3a, 0xfe3a, - 0xfe3c, 0xfe3c, - 0xfe3e, 0xfe3e, - 0xfe40, 0xfe40, - 0xfe42, 0xfe42, - 0xfe44, 0xfe44, - 0xfe48, 0xfe48, - 0xfe5a, 0xfe5a, - 0xfe5c, 0xfe5c, - 0xfe5e, 0xfe5e, - 0xff09, 0xff09, - 0xff3d, 0xff3d, - 0xff5d, 0xff5d, - 0xff60, 0xff60, - 0xff63, 0xff63 -}; /* CR_Pe */ - -/* 'Pf': General Category */ -static const OnigCodePoint CR_Pf[] = { - 9, - 0x00bb, 0x00bb, - 0x2019, 0x2019, - 0x201d, 0x201d, - 0x203a, 0x203a, - 0x2e03, 0x2e03, - 0x2e05, 0x2e05, - 0x2e0a, 0x2e0a, - 0x2e0d, 0x2e0d, - 0x2e1d, 0x2e1d -}; /* CR_Pf */ - -/* 'Pi': General Category */ -static const OnigCodePoint CR_Pi[] = { - 10, - 0x00ab, 0x00ab, - 0x2018, 0x2018, - 0x201b, 0x201c, - 0x201f, 0x201f, - 0x2039, 0x2039, - 0x2e02, 0x2e02, - 0x2e04, 0x2e04, - 0x2e09, 0x2e09, - 0x2e0c, 0x2e0c, - 0x2e1c, 0x2e1c -}; /* CR_Pi */ - -/* 'Po': General Category */ -static const OnigCodePoint CR_Po[] = { - 88, - 0x0021, 0x0023, - 0x0025, 0x0027, - 0x002a, 0x002a, - 0x002c, 0x002c, - 0x002e, 0x002f, - 0x003a, 0x003b, - 0x003f, 0x0040, - 0x005c, 0x005c, - 0x00a1, 0x00a1, - 0x00b7, 0x00b7, - 0x00bf, 0x00bf, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x055a, 0x055f, - 0x0589, 0x0589, - 0x05be, 0x05be, - 0x05c0, 0x05c0, - 0x05c3, 0x05c3, - 0x05c6, 0x05c6, - 0x05f3, 0x05f4, - 0x060c, 0x060d, - 0x061b, 0x061b, - 0x061e, 0x061f, - 0x066a, 0x066d, - 0x06d4, 0x06d4, - 0x0700, 0x070d, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0df4, 0x0df4, - 0x0e4f, 0x0e4f, - 0x0e5a, 0x0e5b, - 0x0f04, 0x0f12, - 0x0f85, 0x0f85, - 0x0fd0, 0x0fd1, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1361, 0x1368, - 0x166d, 0x166e, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x1805, - 0x1807, 0x180a, - 0x1944, 0x1945, - 0x19de, 0x19df, - 0x1a1e, 0x1a1f, - 0x2016, 0x2017, - 0x2020, 0x2027, - 0x2030, 0x2038, - 0x203b, 0x203e, - 0x2041, 0x2043, - 0x2047, 0x2051, - 0x2053, 0x2053, - 0x2055, 0x205e, - 0x23b6, 0x23b6, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2e00, 0x2e01, - 0x2e06, 0x2e08, - 0x2e0b, 0x2e0b, - 0x2e0e, 0x2e16, - 0x3001, 0x3003, - 0x303d, 0x303d, - 0x30fb, 0x30fb, - 0xfe10, 0xfe16, - 0xfe19, 0xfe19, - 0xfe30, 0xfe30, - 0xfe45, 0xfe46, - 0xfe49, 0xfe4c, - 0xfe50, 0xfe52, - 0xfe54, 0xfe57, - 0xfe5f, 0xfe61, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff07, - 0xff0a, 0xff0a, - 0xff0c, 0xff0c, - 0xff0e, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3c, 0xff3c, - 0xff61, 0xff61, - 0xff64, 0xff65, - 0x10100, 0x10101, - 0x1039f, 0x1039f, - 0x10a50, 0x10a58 -}; /* CR_Po */ - -/* 'Ps': General Category */ -static const OnigCodePoint CR_Ps[] = { - 67, - 0x0028, 0x0028, - 0x005b, 0x005b, - 0x007b, 0x007b, - 0x0f3a, 0x0f3a, - 0x0f3c, 0x0f3c, - 0x169b, 0x169b, - 0x201a, 0x201a, - 0x201e, 0x201e, - 0x2045, 0x2045, - 0x207d, 0x207d, - 0x208d, 0x208d, - 0x2329, 0x2329, - 0x23b4, 0x23b4, - 0x2768, 0x2768, - 0x276a, 0x276a, - 0x276c, 0x276c, - 0x276e, 0x276e, - 0x2770, 0x2770, - 0x2772, 0x2772, - 0x2774, 0x2774, - 0x27c5, 0x27c5, - 0x27e6, 0x27e6, - 0x27e8, 0x27e8, - 0x27ea, 0x27ea, - 0x2983, 0x2983, - 0x2985, 0x2985, - 0x2987, 0x2987, - 0x2989, 0x2989, - 0x298b, 0x298b, - 0x298d, 0x298d, - 0x298f, 0x298f, - 0x2991, 0x2991, - 0x2993, 0x2993, - 0x2995, 0x2995, - 0x2997, 0x2997, - 0x29d8, 0x29d8, - 0x29da, 0x29da, - 0x29fc, 0x29fc, - 0x3008, 0x3008, - 0x300a, 0x300a, - 0x300c, 0x300c, - 0x300e, 0x300e, - 0x3010, 0x3010, - 0x3014, 0x3014, - 0x3016, 0x3016, - 0x3018, 0x3018, - 0x301a, 0x301a, - 0x301d, 0x301d, - 0xfd3e, 0xfd3e, - 0xfe17, 0xfe17, - 0xfe35, 0xfe35, - 0xfe37, 0xfe37, - 0xfe39, 0xfe39, - 0xfe3b, 0xfe3b, - 0xfe3d, 0xfe3d, - 0xfe3f, 0xfe3f, - 0xfe41, 0xfe41, - 0xfe43, 0xfe43, - 0xfe47, 0xfe47, - 0xfe59, 0xfe59, - 0xfe5b, 0xfe5b, - 0xfe5d, 0xfe5d, - 0xff08, 0xff08, - 0xff3b, 0xff3b, - 0xff5b, 0xff5b, - 0xff5f, 0xff5f, - 0xff62, 0xff62 -}; /* CR_Ps */ - -/* 'S': Major Category */ -static const OnigCodePoint CR_S[] = { - 162, - 0x0024, 0x0024, - 0x002b, 0x002b, - 0x003c, 0x003e, - 0x005e, 0x005e, - 0x0060, 0x0060, - 0x007c, 0x007c, - 0x007e, 0x007e, - 0x00a2, 0x00a9, - 0x00ac, 0x00ac, - 0x00ae, 0x00b1, - 0x00b4, 0x00b4, - 0x00b6, 0x00b6, - 0x00b8, 0x00b8, - 0x00d7, 0x00d7, - 0x00f7, 0x00f7, - 0x02c2, 0x02c5, - 0x02d2, 0x02df, - 0x02e5, 0x02ed, - 0x02ef, 0x02ff, - 0x0374, 0x0375, - 0x0384, 0x0385, - 0x03f6, 0x03f6, - 0x0482, 0x0482, - 0x060b, 0x060b, - 0x060e, 0x060f, - 0x06e9, 0x06e9, - 0x06fd, 0x06fe, - 0x09f2, 0x09f3, - 0x09fa, 0x09fa, - 0x0af1, 0x0af1, - 0x0b70, 0x0b70, - 0x0bf3, 0x0bfa, - 0x0e3f, 0x0e3f, - 0x0f01, 0x0f03, - 0x0f13, 0x0f17, - 0x0f1a, 0x0f1f, - 0x0f34, 0x0f34, - 0x0f36, 0x0f36, - 0x0f38, 0x0f38, - 0x0fbe, 0x0fc5, - 0x0fc7, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1360, 0x1360, - 0x1390, 0x1399, - 0x17db, 0x17db, - 0x1940, 0x1940, - 0x19e0, 0x19ff, - 0x1fbd, 0x1fbd, - 0x1fbf, 0x1fc1, - 0x1fcd, 0x1fcf, - 0x1fdd, 0x1fdf, - 0x1fed, 0x1fef, - 0x1ffd, 0x1ffe, - 0x2044, 0x2044, - 0x2052, 0x2052, - 0x207a, 0x207c, - 0x208a, 0x208c, - 0x20a0, 0x20b5, - 0x2100, 0x2101, - 0x2103, 0x2106, - 0x2108, 0x2109, - 0x2114, 0x2114, - 0x2116, 0x2118, - 0x211e, 0x2123, - 0x2125, 0x2125, - 0x2127, 0x2127, - 0x2129, 0x2129, - 0x212e, 0x212e, - 0x2132, 0x2132, - 0x213a, 0x213b, - 0x2140, 0x2144, - 0x214a, 0x214c, - 0x2190, 0x2328, - 0x232b, 0x23b3, - 0x23b7, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x249c, 0x24e9, - 0x2500, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2767, - 0x2794, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c4, - 0x27d0, 0x27e5, - 0x27f0, 0x2982, - 0x2999, 0x29d7, - 0x29dc, 0x29fb, - 0x29fe, 0x2b13, - 0x2ce5, 0x2cea, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3004, 0x3004, - 0x3012, 0x3013, - 0x3020, 0x3020, - 0x3036, 0x3037, - 0x303e, 0x303f, - 0x309b, 0x309c, - 0x3190, 0x3191, - 0x3196, 0x319f, - 0x31c0, 0x31cf, - 0x3200, 0x321e, - 0x322a, 0x3243, - 0x3250, 0x3250, - 0x3260, 0x327f, - 0x328a, 0x32b0, - 0x32c0, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa828, 0xa82b, - 0xfb29, 0xfb29, - 0xfdfc, 0xfdfd, - 0xfe62, 0xfe62, - 0xfe64, 0xfe66, - 0xfe69, 0xfe69, - 0xff04, 0xff04, - 0xff0b, 0xff0b, - 0xff1c, 0xff1e, - 0xff3e, 0xff3e, - 0xff40, 0xff40, - 0xff5c, 0xff5c, - 0xff5e, 0xff5e, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfffc, 0xfffd, - 0x10102, 0x10102, - 0x10137, 0x1013f, - 0x10179, 0x10189, - 0x103d0, 0x103d0, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d164, - 0x1d16a, 0x1d16c, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d200, 0x1d241, - 0x1d245, 0x1d245, - 0x1d300, 0x1d356, - 0x1d6c1, 0x1d6c1, - 0x1d6db, 0x1d6db, - 0x1d6fb, 0x1d6fb, - 0x1d715, 0x1d715, - 0x1d735, 0x1d735, - 0x1d74f, 0x1d74f, - 0x1d76f, 0x1d76f, - 0x1d789, 0x1d789, - 0x1d7a9, 0x1d7a9, - 0x1d7c3, 0x1d7c3 -}; /* CR_S */ - -/* 'Sc': General Category */ -static const OnigCodePoint CR_Sc[] = { - 14, - 0x0024, 0x0024, - 0x00a2, 0x00a5, - 0x060b, 0x060b, - 0x09f2, 0x09f3, - 0x0af1, 0x0af1, - 0x0bf9, 0x0bf9, - 0x0e3f, 0x0e3f, - 0x17db, 0x17db, - 0x20a0, 0x20b5, - 0xfdfc, 0xfdfc, - 0xfe69, 0xfe69, - 0xff04, 0xff04, - 0xffe0, 0xffe1, - 0xffe5, 0xffe6 -}; /* CR_Sc */ - -/* 'Sk': General Category */ -static const OnigCodePoint CR_Sk[] = { - 23, - 0x005e, 0x005e, - 0x0060, 0x0060, - 0x00a8, 0x00a8, - 0x00af, 0x00af, - 0x00b4, 0x00b4, - 0x00b8, 0x00b8, - 0x02c2, 0x02c5, - 0x02d2, 0x02df, - 0x02e5, 0x02ed, - 0x02ef, 0x02ff, - 0x0374, 0x0375, - 0x0384, 0x0385, - 0x1fbd, 0x1fbd, - 0x1fbf, 0x1fc1, - 0x1fcd, 0x1fcf, - 0x1fdd, 0x1fdf, - 0x1fed, 0x1fef, - 0x1ffd, 0x1ffe, - 0x309b, 0x309c, - 0xa700, 0xa716, - 0xff3e, 0xff3e, - 0xff40, 0xff40, - 0xffe3, 0xffe3 -}; /* CR_Sk */ - -/* 'Sm': General Category */ -static const OnigCodePoint CR_Sm[] = { - 59, - 0x002b, 0x002b, - 0x003c, 0x003e, - 0x007c, 0x007c, - 0x007e, 0x007e, - 0x00ac, 0x00ac, - 0x00b1, 0x00b1, - 0x00d7, 0x00d7, - 0x00f7, 0x00f7, - 0x03f6, 0x03f6, - 0x2044, 0x2044, - 0x2052, 0x2052, - 0x207a, 0x207c, - 0x208a, 0x208c, - 0x2140, 0x2144, - 0x214b, 0x214b, - 0x2190, 0x2194, - 0x219a, 0x219b, - 0x21a0, 0x21a0, - 0x21a3, 0x21a3, - 0x21a6, 0x21a6, - 0x21ae, 0x21ae, - 0x21ce, 0x21cf, - 0x21d2, 0x21d2, - 0x21d4, 0x21d4, - 0x21f4, 0x22ff, - 0x2308, 0x230b, - 0x2320, 0x2321, - 0x237c, 0x237c, - 0x239b, 0x23b3, - 0x25b7, 0x25b7, - 0x25c1, 0x25c1, - 0x25f8, 0x25ff, - 0x266f, 0x266f, - 0x27c0, 0x27c4, - 0x27d0, 0x27e5, - 0x27f0, 0x27ff, - 0x2900, 0x2982, - 0x2999, 0x29d7, - 0x29dc, 0x29fb, - 0x29fe, 0x2aff, - 0xfb29, 0xfb29, - 0xfe62, 0xfe62, - 0xfe64, 0xfe66, - 0xff0b, 0xff0b, - 0xff1c, 0xff1e, - 0xff5c, 0xff5c, - 0xff5e, 0xff5e, - 0xffe2, 0xffe2, - 0xffe9, 0xffec, - 0x1d6c1, 0x1d6c1, - 0x1d6db, 0x1d6db, - 0x1d6fb, 0x1d6fb, - 0x1d715, 0x1d715, - 0x1d735, 0x1d735, - 0x1d74f, 0x1d74f, - 0x1d76f, 0x1d76f, - 0x1d789, 0x1d789, - 0x1d7a9, 0x1d7a9, - 0x1d7c3, 0x1d7c3 -}; /* CR_Sm */ - -/* 'So': General Category */ -static const OnigCodePoint CR_So[] = { - 120, - 0x00a6, 0x00a7, - 0x00a9, 0x00a9, - 0x00ae, 0x00ae, - 0x00b0, 0x00b0, - 0x00b6, 0x00b6, - 0x0482, 0x0482, - 0x060e, 0x060f, - 0x06e9, 0x06e9, - 0x06fd, 0x06fe, - 0x09fa, 0x09fa, - 0x0b70, 0x0b70, - 0x0bf3, 0x0bf8, - 0x0bfa, 0x0bfa, - 0x0f01, 0x0f03, - 0x0f13, 0x0f17, - 0x0f1a, 0x0f1f, - 0x0f34, 0x0f34, - 0x0f36, 0x0f36, - 0x0f38, 0x0f38, - 0x0fbe, 0x0fc5, - 0x0fc7, 0x0fcc, - 0x0fcf, 0x0fcf, - 0x1360, 0x1360, - 0x1390, 0x1399, - 0x1940, 0x1940, - 0x19e0, 0x19ff, - 0x2100, 0x2101, - 0x2103, 0x2106, - 0x2108, 0x2109, - 0x2114, 0x2114, - 0x2116, 0x2118, - 0x211e, 0x2123, - 0x2125, 0x2125, - 0x2127, 0x2127, - 0x2129, 0x2129, - 0x212e, 0x212e, - 0x2132, 0x2132, - 0x213a, 0x213b, - 0x214a, 0x214a, - 0x214c, 0x214c, - 0x2195, 0x2199, - 0x219c, 0x219f, - 0x21a1, 0x21a2, - 0x21a4, 0x21a5, - 0x21a7, 0x21ad, - 0x21af, 0x21cd, - 0x21d0, 0x21d1, - 0x21d3, 0x21d3, - 0x21d5, 0x21f3, - 0x2300, 0x2307, - 0x230c, 0x231f, - 0x2322, 0x2328, - 0x232b, 0x237b, - 0x237d, 0x239a, - 0x23b7, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x249c, 0x24e9, - 0x2500, 0x25b6, - 0x25b8, 0x25c0, - 0x25c2, 0x25f7, - 0x2600, 0x266e, - 0x2670, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2767, - 0x2794, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x2800, 0x28ff, - 0x2b00, 0x2b13, - 0x2ce5, 0x2cea, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3004, 0x3004, - 0x3012, 0x3013, - 0x3020, 0x3020, - 0x3036, 0x3037, - 0x303e, 0x303f, - 0x3190, 0x3191, - 0x3196, 0x319f, - 0x31c0, 0x31cf, - 0x3200, 0x321e, - 0x322a, 0x3243, - 0x3250, 0x3250, - 0x3260, 0x327f, - 0x328a, 0x32b0, - 0x32c0, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa490, 0xa4c6, - 0xa828, 0xa82b, - 0xfdfd, 0xfdfd, - 0xffe4, 0xffe4, - 0xffe8, 0xffe8, - 0xffed, 0xffee, - 0xfffc, 0xfffd, - 0x10102, 0x10102, - 0x10137, 0x1013f, - 0x10179, 0x10189, - 0x103d0, 0x103d0, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d164, - 0x1d16a, 0x1d16c, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d200, 0x1d241, - 0x1d245, 0x1d245, - 0x1d300, 0x1d356 -}; /* CR_So */ - -/* 'Z': Major Category */ -static const OnigCodePoint CR_Z[] = { - 9, - 0x0020, 0x0020, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Z */ - -/* 'Zl': General Category */ -static const OnigCodePoint CR_Zl[] = { - 1, - 0x2028, 0x2028 -}; /* CR_Zl */ - -/* 'Zp': General Category */ -static const OnigCodePoint CR_Zp[] = { - 1, - 0x2029, 0x2029 -}; /* CR_Zp */ - -/* 'Zs': General Category */ -static const OnigCodePoint CR_Zs[] = { - 8, - 0x0020, 0x0020, - 0x00a0, 0x00a0, - 0x1680, 0x1680, - 0x180e, 0x180e, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; /* CR_Zs */ - -/* 'Arabic': Script */ -static const OnigCodePoint CR_Arabic[] = { - 17, - 0x060b, 0x060b, - 0x060d, 0x0615, - 0x061e, 0x061e, - 0x0621, 0x063a, - 0x0641, 0x064a, - 0x0656, 0x065e, - 0x066a, 0x066f, - 0x0671, 0x06dc, - 0x06de, 0x06ff, - 0x0750, 0x076d, - 0xfb50, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfc, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc -}; /* CR_Arabic */ - -/* 'Armenian': Script */ -static const OnigCodePoint CR_Armenian[] = { - 5, - 0x0531, 0x0556, - 0x0559, 0x055f, - 0x0561, 0x0587, - 0x058a, 0x058a, - 0xfb13, 0xfb17 -}; /* CR_Armenian */ - -/* 'Bengali': Script */ -static const OnigCodePoint CR_Bengali[] = { - 14, - 0x0981, 0x0983, - 0x0985, 0x098c, - 0x098f, 0x0990, - 0x0993, 0x09a8, - 0x09aa, 0x09b0, - 0x09b2, 0x09b2, - 0x09b6, 0x09b9, - 0x09bc, 0x09c4, - 0x09c7, 0x09c8, - 0x09cb, 0x09ce, - 0x09d7, 0x09d7, - 0x09dc, 0x09dd, - 0x09df, 0x09e3, - 0x09e6, 0x09fa -}; /* CR_Bengali */ - -/* 'Bopomofo': Script */ -static const OnigCodePoint CR_Bopomofo[] = { - 2, - 0x3105, 0x312c, - 0x31a0, 0x31b7 -}; /* CR_Bopomofo */ - -/* 'Braille': Script */ -static const OnigCodePoint CR_Braille[] = { - 1, - 0x2800, 0x28ff -}; /* CR_Braille */ - -/* 'Buginese': Script */ -static const OnigCodePoint CR_Buginese[] = { - 2, - 0x1a00, 0x1a1b, - 0x1a1e, 0x1a1f -}; /* CR_Buginese */ - -/* 'Buhid': Script */ -static const OnigCodePoint CR_Buhid[] = { - 1, - 0x1740, 0x1753 -}; /* CR_Buhid */ - -/* 'Canadian_Aboriginal': Script */ -static const OnigCodePoint CR_Canadian_Aboriginal[] = { - 1, - 0x1401, 0x1676 -}; /* CR_Canadian_Aboriginal */ - -/* 'Cherokee': Script */ -static const OnigCodePoint CR_Cherokee[] = { - 1, - 0x13a0, 0x13f4 -}; /* CR_Cherokee */ - -/* 'Common': Script */ -static const OnigCodePoint CR_Common[] = { - 126, - 0x0000, 0x0040, - 0x005b, 0x0060, - 0x007b, 0x00a9, - 0x00ab, 0x00b9, - 0x00bb, 0x00bf, - 0x00d7, 0x00d7, - 0x00f7, 0x00f7, - 0x02b9, 0x02df, - 0x02e5, 0x02ff, - 0x037e, 0x037e, - 0x0387, 0x0387, - 0x0589, 0x0589, - 0x0600, 0x0603, - 0x060c, 0x060c, - 0x061b, 0x061b, - 0x061f, 0x061f, - 0x0640, 0x0640, - 0x0660, 0x0669, - 0x06dd, 0x06dd, - 0x0964, 0x0965, - 0x0970, 0x0970, - 0x0e3f, 0x0e3f, - 0x10fb, 0x10fb, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x2000, 0x200b, - 0x200e, 0x2063, - 0x206a, 0x2070, - 0x2074, 0x207e, - 0x2080, 0x208e, - 0x20a0, 0x20b5, - 0x2100, 0x2125, - 0x2127, 0x2129, - 0x212c, 0x214c, - 0x2153, 0x2183, - 0x2190, 0x23db, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x2460, 0x269c, - 0x26a0, 0x26b1, - 0x2701, 0x2704, - 0x2706, 0x2709, - 0x270c, 0x2727, - 0x2729, 0x274b, - 0x274d, 0x274d, - 0x274f, 0x2752, - 0x2756, 0x2756, - 0x2758, 0x275e, - 0x2761, 0x2794, - 0x2798, 0x27af, - 0x27b1, 0x27be, - 0x27c0, 0x27c6, - 0x27d0, 0x27eb, - 0x27f0, 0x27ff, - 0x2900, 0x2b13, - 0x2e00, 0x2e17, - 0x2e1c, 0x2e1d, - 0x2ff0, 0x2ffb, - 0x3000, 0x3004, - 0x3006, 0x3006, - 0x3008, 0x3020, - 0x3030, 0x3037, - 0x303c, 0x303f, - 0x309b, 0x309c, - 0x30a0, 0x30a0, - 0x30fb, 0x30fc, - 0x3190, 0x319f, - 0x31c0, 0x31cf, - 0x3220, 0x3243, - 0x3250, 0x325f, - 0x327e, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa700, 0xa716, - 0xe000, 0xf8ff, - 0xfd3e, 0xfd3f, - 0xfdfd, 0xfdfd, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe66, - 0xfe68, 0xfe6b, - 0xfeff, 0xfeff, - 0xff01, 0xff20, - 0xff3b, 0xff40, - 0xff5b, 0xff65, - 0xff70, 0xff70, - 0xff9e, 0xff9f, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfff9, 0xfffd, - 0x10100, 0x10102, - 0x10107, 0x10133, - 0x10137, 0x1013f, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d12a, 0x1d166, - 0x1d16a, 0x1d17a, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d300, 0x1d356, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d7c9, - 0x1d7ce, 0x1d7ff, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; /* CR_Common */ - -/* 'Coptic': Script */ -static const OnigCodePoint CR_Coptic[] = { - 3, - 0x03e2, 0x03ef, - 0x2c80, 0x2cea, - 0x2cf9, 0x2cff -}; /* CR_Coptic */ - -/* 'Cypriot': Script */ -static const OnigCodePoint CR_Cypriot[] = { - 6, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f -}; /* CR_Cypriot */ - -/* 'Cyrillic': Script */ -static const OnigCodePoint CR_Cyrillic[] = { - 6, - 0x0400, 0x0486, - 0x0488, 0x04ce, - 0x04d0, 0x04f9, - 0x0500, 0x050f, - 0x1d2b, 0x1d2b, - 0x1d78, 0x1d78 -}; /* CR_Cyrillic */ - -/* 'Deseret': Script */ -static const OnigCodePoint CR_Deseret[] = { - 1, - 0x10400, 0x1044f -}; /* CR_Deseret */ - -/* 'Devanagari': Script */ -static const OnigCodePoint CR_Devanagari[] = { - 6, - 0x0901, 0x0939, - 0x093c, 0x094d, - 0x0950, 0x0954, - 0x0958, 0x0963, - 0x0966, 0x096f, - 0x097d, 0x097d -}; /* CR_Devanagari */ - -/* 'Ethiopic': Script */ -static const OnigCodePoint CR_Ethiopic[] = { - 27, - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135f, 0x137c, - 0x1380, 0x1399, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde -}; /* CR_Ethiopic */ - -/* 'Georgian': Script */ -static const OnigCodePoint CR_Georgian[] = { - 4, - 0x10a0, 0x10c5, - 0x10d0, 0x10fa, - 0x10fc, 0x10fc, - 0x2d00, 0x2d25 -}; /* CR_Georgian */ - -/* 'Glagolitic': Script */ -static const OnigCodePoint CR_Glagolitic[] = { - 2, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e -}; /* CR_Glagolitic */ - -/* 'Gothic': Script */ -static const OnigCodePoint CR_Gothic[] = { - 1, - 0x10330, 0x1034a -}; /* CR_Gothic */ - -/* 'Greek': Script */ -static const OnigCodePoint CR_Greek[] = { - 31, - 0x0374, 0x0375, - 0x037a, 0x037a, - 0x0384, 0x0386, - 0x0388, 0x038a, - 0x038c, 0x038c, - 0x038e, 0x03a1, - 0x03a3, 0x03ce, - 0x03d0, 0x03e1, - 0x03f0, 0x03ff, - 0x1d26, 0x1d2a, - 0x1d5d, 0x1d61, - 0x1d66, 0x1d6a, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2126, 0x2126, - 0x10140, 0x1018a, - 0x1d200, 0x1d245 -}; /* CR_Greek */ - -/* 'Gujarati': Script */ -static const OnigCodePoint CR_Gujarati[] = { - 14, - 0x0a81, 0x0a83, - 0x0a85, 0x0a8d, - 0x0a8f, 0x0a91, - 0x0a93, 0x0aa8, - 0x0aaa, 0x0ab0, - 0x0ab2, 0x0ab3, - 0x0ab5, 0x0ab9, - 0x0abc, 0x0ac5, - 0x0ac7, 0x0ac9, - 0x0acb, 0x0acd, - 0x0ad0, 0x0ad0, - 0x0ae0, 0x0ae3, - 0x0ae6, 0x0aef, - 0x0af1, 0x0af1 -}; /* CR_Gujarati */ - -/* 'Gurmukhi': Script */ -static const OnigCodePoint CR_Gurmukhi[] = { - 15, - 0x0a01, 0x0a03, - 0x0a05, 0x0a0a, - 0x0a0f, 0x0a10, - 0x0a13, 0x0a28, - 0x0a2a, 0x0a30, - 0x0a32, 0x0a33, - 0x0a35, 0x0a36, - 0x0a38, 0x0a39, - 0x0a3c, 0x0a3c, - 0x0a3e, 0x0a42, - 0x0a47, 0x0a48, - 0x0a4b, 0x0a4d, - 0x0a59, 0x0a5c, - 0x0a5e, 0x0a5e, - 0x0a66, 0x0a74 -}; /* CR_Gurmukhi */ - -/* 'Han': Script */ -static const OnigCodePoint CR_Han[] = { - 14, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x3005, 0x3005, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303b, - 0x3400, 0x4db5, - 0x4e00, 0x9fbb, - 0xf900, 0xfa2d, - 0xfa30, 0xfa6a, - 0xfa70, 0xfad9, - 0x20000, 0x2a6d6, - 0x2f800, 0x2fa1d -}; /* CR_Han */ - -/* 'Hangul': Script */ -static const OnigCodePoint CR_Hangul[] = { - 12, - 0x1100, 0x1159, - 0x115f, 0x11a2, - 0x11a8, 0x11f9, - 0x3131, 0x318e, - 0x3200, 0x321e, - 0x3260, 0x327d, - 0xac00, 0xd7a3, - 0xffa0, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc -}; /* CR_Hangul */ - -/* 'Hanunoo': Script */ -static const OnigCodePoint CR_Hanunoo[] = { - 1, - 0x1720, 0x1734 -}; /* CR_Hanunoo */ - -/* 'Hebrew': Script */ -static const OnigCodePoint CR_Hebrew[] = { - 10, - 0x0591, 0x05b9, - 0x05bb, 0x05c7, - 0x05d0, 0x05ea, - 0x05f0, 0x05f4, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfb4f -}; /* CR_Hebrew */ - -/* 'Hiragana': Script */ -static const OnigCodePoint CR_Hiragana[] = { - 2, - 0x3041, 0x3096, - 0x309d, 0x309f -}; /* CR_Hiragana */ - -/* 'Inherited': Script */ -static const OnigCodePoint CR_Inherited[] = { - 15, - 0x0300, 0x036f, - 0x064b, 0x0655, - 0x0670, 0x0670, - 0x1dc0, 0x1dc3, - 0x200c, 0x200d, - 0x20d0, 0x20eb, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe23, - 0x1d167, 0x1d169, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0xe0100, 0xe01ef -}; /* CR_Inherited */ - -/* 'Kannada': Script */ -static const OnigCodePoint CR_Kannada[] = { - 13, - 0x0c82, 0x0c83, - 0x0c85, 0x0c8c, - 0x0c8e, 0x0c90, - 0x0c92, 0x0ca8, - 0x0caa, 0x0cb3, - 0x0cb5, 0x0cb9, - 0x0cbc, 0x0cc4, - 0x0cc6, 0x0cc8, - 0x0cca, 0x0ccd, - 0x0cd5, 0x0cd6, - 0x0cde, 0x0cde, - 0x0ce0, 0x0ce1, - 0x0ce6, 0x0cef -}; /* CR_Kannada */ - -/* 'Katakana': Script */ -static const OnigCodePoint CR_Katakana[] = { - 5, - 0x30a1, 0x30fa, - 0x30fd, 0x30ff, - 0x31f0, 0x31ff, - 0xff66, 0xff6f, - 0xff71, 0xff9d -}; /* CR_Katakana */ - -/* 'Kharoshthi': Script */ -static const OnigCodePoint CR_Kharoshthi[] = { - 8, - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58 -}; /* CR_Kharoshthi */ - -/* 'Khmer': Script */ -static const OnigCodePoint CR_Khmer[] = { - 4, - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x19e0, 0x19ff -}; /* CR_Khmer */ - -/* 'Lao': Script */ -static const OnigCodePoint CR_Lao[] = { - 18, - 0x0e81, 0x0e82, - 0x0e84, 0x0e84, - 0x0e87, 0x0e88, - 0x0e8a, 0x0e8a, - 0x0e8d, 0x0e8d, - 0x0e94, 0x0e97, - 0x0e99, 0x0e9f, - 0x0ea1, 0x0ea3, - 0x0ea5, 0x0ea5, - 0x0ea7, 0x0ea7, - 0x0eaa, 0x0eab, - 0x0ead, 0x0eb9, - 0x0ebb, 0x0ebd, - 0x0ec0, 0x0ec4, - 0x0ec6, 0x0ec6, - 0x0ec8, 0x0ecd, - 0x0ed0, 0x0ed9, - 0x0edc, 0x0edd -}; /* CR_Lao */ - -/* 'Latin': Script */ -static const OnigCodePoint CR_Latin[] = { - 23, - 0x0041, 0x005a, - 0x0061, 0x007a, - 0x00aa, 0x00aa, - 0x00ba, 0x00ba, - 0x00c0, 0x00d6, - 0x00d8, 0x00f6, - 0x00f8, 0x0241, - 0x0250, 0x02b8, - 0x02e0, 0x02e4, - 0x1d00, 0x1d25, - 0x1d2c, 0x1d5c, - 0x1d62, 0x1d65, - 0x1d6b, 0x1d77, - 0x1d79, 0x1dbf, - 0x1e00, 0x1e9b, - 0x1ea0, 0x1ef9, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x2094, - 0x212a, 0x212b, - 0xfb00, 0xfb06, - 0xff21, 0xff3a, - 0xff41, 0xff5a -}; /* CR_Latin */ - -/* 'Limbu': Script */ -static const OnigCodePoint CR_Limbu[] = { - 5, - 0x1900, 0x191c, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x194f -}; /* CR_Limbu */ - -/* 'Linear_B': Script */ -static const OnigCodePoint CR_Linear_B[] = { - 7, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa -}; /* CR_Linear_B */ - -/* 'Malayalam': Script */ -static const OnigCodePoint CR_Malayalam[] = { - 11, - 0x0d02, 0x0d03, - 0x0d05, 0x0d0c, - 0x0d0e, 0x0d10, - 0x0d12, 0x0d28, - 0x0d2a, 0x0d39, - 0x0d3e, 0x0d43, - 0x0d46, 0x0d48, - 0x0d4a, 0x0d4d, - 0x0d57, 0x0d57, - 0x0d60, 0x0d61, - 0x0d66, 0x0d6f -}; /* CR_Malayalam */ - -/* 'Mongolian': Script */ -static const OnigCodePoint CR_Mongolian[] = { - 4, - 0x1800, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a9 -}; /* CR_Mongolian */ - -/* 'Myanmar': Script */ -static const OnigCodePoint CR_Myanmar[] = { - 6, - 0x1000, 0x1021, - 0x1023, 0x1027, - 0x1029, 0x102a, - 0x102c, 0x1032, - 0x1036, 0x1039, - 0x1040, 0x1059 -}; /* CR_Myanmar */ - -/* 'New_Tai_Lue': Script */ -static const OnigCodePoint CR_New_Tai_Lue[] = { - 4, - 0x1980, 0x19a9, - 0x19b0, 0x19c9, - 0x19d0, 0x19d9, - 0x19de, 0x19df -}; /* CR_New_Tai_Lue */ - -/* 'Ogham': Script */ -static const OnigCodePoint CR_Ogham[] = { - 1, - 0x1680, 0x169c -}; /* CR_Ogham */ - -/* 'Old_Italic': Script */ -static const OnigCodePoint CR_Old_Italic[] = { - 2, - 0x10300, 0x1031e, - 0x10320, 0x10323 -}; /* CR_Old_Italic */ - -/* 'Old_Persian': Script */ -static const OnigCodePoint CR_Old_Persian[] = { - 2, - 0x103a0, 0x103c3, - 0x103c8, 0x103d5 -}; /* CR_Old_Persian */ - -/* 'Oriya': Script */ -static const OnigCodePoint CR_Oriya[] = { - 14, - 0x0b01, 0x0b03, - 0x0b05, 0x0b0c, - 0x0b0f, 0x0b10, - 0x0b13, 0x0b28, - 0x0b2a, 0x0b30, - 0x0b32, 0x0b33, - 0x0b35, 0x0b39, - 0x0b3c, 0x0b43, - 0x0b47, 0x0b48, - 0x0b4b, 0x0b4d, - 0x0b56, 0x0b57, - 0x0b5c, 0x0b5d, - 0x0b5f, 0x0b61, - 0x0b66, 0x0b71 -}; /* CR_Oriya */ - -/* 'Osmanya': Script */ -static const OnigCodePoint CR_Osmanya[] = { - 2, - 0x10480, 0x1049d, - 0x104a0, 0x104a9 -}; /* CR_Osmanya */ - -/* 'Runic': Script */ -static const OnigCodePoint CR_Runic[] = { - 2, - 0x16a0, 0x16ea, - 0x16ee, 0x16f0 -}; /* CR_Runic */ - -/* 'Shavian': Script */ -static const OnigCodePoint CR_Shavian[] = { - 1, - 0x10450, 0x1047f -}; /* CR_Shavian */ - -/* 'Sinhala': Script */ -static const OnigCodePoint CR_Sinhala[] = { - 11, - 0x0d82, 0x0d83, - 0x0d85, 0x0d96, - 0x0d9a, 0x0db1, - 0x0db3, 0x0dbb, - 0x0dbd, 0x0dbd, - 0x0dc0, 0x0dc6, - 0x0dca, 0x0dca, - 0x0dcf, 0x0dd4, - 0x0dd6, 0x0dd6, - 0x0dd8, 0x0ddf, - 0x0df2, 0x0df4 -}; /* CR_Sinhala */ - -/* 'Syloti_Nagri': Script */ -static const OnigCodePoint CR_Syloti_Nagri[] = { - 1, - 0xa800, 0xa82b -}; /* CR_Syloti_Nagri */ - -/* 'Syriac': Script */ -static const OnigCodePoint CR_Syriac[] = { - 3, - 0x0700, 0x070d, - 0x070f, 0x074a, - 0x074d, 0x074f -}; /* CR_Syriac */ - -/* 'Tagalog': Script */ -static const OnigCodePoint CR_Tagalog[] = { - 2, - 0x1700, 0x170c, - 0x170e, 0x1714 -}; /* CR_Tagalog */ - -/* 'Tagbanwa': Script */ -static const OnigCodePoint CR_Tagbanwa[] = { - 3, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773 -}; /* CR_Tagbanwa */ - -/* 'Tai_Le': Script */ -static const OnigCodePoint CR_Tai_Le[] = { - 2, - 0x1950, 0x196d, - 0x1970, 0x1974 -}; /* CR_Tai_Le */ - -/* 'Tamil': Script */ -static const OnigCodePoint CR_Tamil[] = { - 15, - 0x0b82, 0x0b83, - 0x0b85, 0x0b8a, - 0x0b8e, 0x0b90, - 0x0b92, 0x0b95, - 0x0b99, 0x0b9a, - 0x0b9c, 0x0b9c, - 0x0b9e, 0x0b9f, - 0x0ba3, 0x0ba4, - 0x0ba8, 0x0baa, - 0x0bae, 0x0bb9, - 0x0bbe, 0x0bc2, - 0x0bc6, 0x0bc8, - 0x0bca, 0x0bcd, - 0x0bd7, 0x0bd7, - 0x0be6, 0x0bfa -}; /* CR_Tamil */ - -/* 'Telugu': Script */ -static const OnigCodePoint CR_Telugu[] = { - 12, - 0x0c01, 0x0c03, - 0x0c05, 0x0c0c, - 0x0c0e, 0x0c10, - 0x0c12, 0x0c28, - 0x0c2a, 0x0c33, - 0x0c35, 0x0c39, - 0x0c3e, 0x0c44, - 0x0c46, 0x0c48, - 0x0c4a, 0x0c4d, - 0x0c55, 0x0c56, - 0x0c60, 0x0c61, - 0x0c66, 0x0c6f -}; /* CR_Telugu */ - -/* 'Thaana': Script */ -static const OnigCodePoint CR_Thaana[] = { - 1, - 0x0780, 0x07b1 -}; /* CR_Thaana */ - -/* 'Thai': Script */ -static const OnigCodePoint CR_Thai[] = { - 2, - 0x0e01, 0x0e3a, - 0x0e40, 0x0e5b -}; /* CR_Thai */ - -/* 'Tibetan': Script */ -static const OnigCodePoint CR_Tibetan[] = { - 7, - 0x0f00, 0x0f47, - 0x0f49, 0x0f6a, - 0x0f71, 0x0f8b, - 0x0f90, 0x0f97, - 0x0f99, 0x0fbc, - 0x0fbe, 0x0fcc, - 0x0fcf, 0x0fd1 -}; /* CR_Tibetan */ - -/* 'Tifinagh': Script */ -static const OnigCodePoint CR_Tifinagh[] = { - 2, - 0x2d30, 0x2d65, - 0x2d6f, 0x2d6f -}; /* CR_Tifinagh */ - -/* 'Ugaritic': Script */ -static const OnigCodePoint CR_Ugaritic[] = { - 2, - 0x10380, 0x1039d, - 0x1039f, 0x1039f -}; /* CR_Ugaritic */ - -/* 'Yi': Script */ -static const OnigCodePoint CR_Yi[] = { - 2, - 0xa000, 0xa48c, - 0xa490, 0xa4c6 -}; /* CR_Yi */ - - -#endif /* USE_UNICODE_PROPERTIES */ - - -typedef struct { - int n; - OnigCodePoint code[3]; -} CodePointList3; - -typedef struct { - OnigCodePoint from; - CodePointList3 to; -} CaseFold_11_Type; - -typedef struct { - OnigCodePoint from; - CodePointList3 to; -} CaseUnfold_11_Type; - -typedef struct { - int n; - OnigCodePoint code[2]; -} CodePointList2; - -typedef struct { - OnigCodePoint from[2]; - CodePointList2 to; -} CaseUnfold_12_Type; - -typedef struct { - OnigCodePoint from[3]; - CodePointList2 to; -} CaseUnfold_13_Type; - -static const CaseFold_11_Type CaseFold[] = { - { 0x0041, {1, {0x0061}}}, - { 0x0042, {1, {0x0062}}}, - { 0x0043, {1, {0x0063}}}, - { 0x0044, {1, {0x0064}}}, - { 0x0045, {1, {0x0065}}}, - { 0x0046, {1, {0x0066}}}, - { 0x0047, {1, {0x0067}}}, - { 0x0048, {1, {0x0068}}}, - { 0x004a, {1, {0x006a}}}, - { 0x004b, {1, {0x006b}}}, - { 0x004c, {1, {0x006c}}}, - { 0x004d, {1, {0x006d}}}, - { 0x004e, {1, {0x006e}}}, - { 0x004f, {1, {0x006f}}}, - { 0x0050, {1, {0x0070}}}, - { 0x0051, {1, {0x0071}}}, - { 0x0052, {1, {0x0072}}}, - { 0x0053, {1, {0x0073}}}, - { 0x0054, {1, {0x0074}}}, - { 0x0055, {1, {0x0075}}}, - { 0x0056, {1, {0x0076}}}, - { 0x0057, {1, {0x0077}}}, - { 0x0058, {1, {0x0078}}}, - { 0x0059, {1, {0x0079}}}, - { 0x005a, {1, {0x007a}}}, - { 0x00b5, {1, {0x03bc}}}, - { 0x00c0, {1, {0x00e0}}}, - { 0x00c1, {1, {0x00e1}}}, - { 0x00c2, {1, {0x00e2}}}, - { 0x00c3, {1, {0x00e3}}}, - { 0x00c4, {1, {0x00e4}}}, - { 0x00c5, {1, {0x00e5}}}, - { 0x00c6, {1, {0x00e6}}}, - { 0x00c7, {1, {0x00e7}}}, - { 0x00c8, {1, {0x00e8}}}, - { 0x00c9, {1, {0x00e9}}}, - { 0x00ca, {1, {0x00ea}}}, - { 0x00cb, {1, {0x00eb}}}, - { 0x00cc, {1, {0x00ec}}}, - { 0x00cd, {1, {0x00ed}}}, - { 0x00ce, {1, {0x00ee}}}, - { 0x00cf, {1, {0x00ef}}}, - { 0x00d0, {1, {0x00f0}}}, - { 0x00d1, {1, {0x00f1}}}, - { 0x00d2, {1, {0x00f2}}}, - { 0x00d3, {1, {0x00f3}}}, - { 0x00d4, {1, {0x00f4}}}, - { 0x00d5, {1, {0x00f5}}}, - { 0x00d6, {1, {0x00f6}}}, - { 0x00d8, {1, {0x00f8}}}, - { 0x00d9, {1, {0x00f9}}}, - { 0x00da, {1, {0x00fa}}}, - { 0x00db, {1, {0x00fb}}}, - { 0x00dc, {1, {0x00fc}}}, - { 0x00dd, {1, {0x00fd}}}, - { 0x00de, {1, {0x00fe}}}, - { 0x00df, {2, {0x0073, 0x0073}}}, - { 0x0100, {1, {0x0101}}}, - { 0x0102, {1, {0x0103}}}, - { 0x0104, {1, {0x0105}}}, - { 0x0106, {1, {0x0107}}}, - { 0x0108, {1, {0x0109}}}, - { 0x010a, {1, {0x010b}}}, - { 0x010c, {1, {0x010d}}}, - { 0x010e, {1, {0x010f}}}, - { 0x0110, {1, {0x0111}}}, - { 0x0112, {1, {0x0113}}}, - { 0x0114, {1, {0x0115}}}, - { 0x0116, {1, {0x0117}}}, - { 0x0118, {1, {0x0119}}}, - { 0x011a, {1, {0x011b}}}, - { 0x011c, {1, {0x011d}}}, - { 0x011e, {1, {0x011f}}}, - { 0x0120, {1, {0x0121}}}, - { 0x0122, {1, {0x0123}}}, - { 0x0124, {1, {0x0125}}}, - { 0x0126, {1, {0x0127}}}, - { 0x0128, {1, {0x0129}}}, - { 0x012a, {1, {0x012b}}}, - { 0x012c, {1, {0x012d}}}, - { 0x012e, {1, {0x012f}}}, - { 0x0132, {1, {0x0133}}}, - { 0x0134, {1, {0x0135}}}, - { 0x0136, {1, {0x0137}}}, - { 0x0139, {1, {0x013a}}}, - { 0x013b, {1, {0x013c}}}, - { 0x013d, {1, {0x013e}}}, - { 0x013f, {1, {0x0140}}}, - { 0x0141, {1, {0x0142}}}, - { 0x0143, {1, {0x0144}}}, - { 0x0145, {1, {0x0146}}}, - { 0x0147, {1, {0x0148}}}, - { 0x0149, {2, {0x02bc, 0x006e}}}, - { 0x014a, {1, {0x014b}}}, - { 0x014c, {1, {0x014d}}}, - { 0x014e, {1, {0x014f}}}, - { 0x0150, {1, {0x0151}}}, - { 0x0152, {1, {0x0153}}}, - { 0x0154, {1, {0x0155}}}, - { 0x0156, {1, {0x0157}}}, - { 0x0158, {1, {0x0159}}}, - { 0x015a, {1, {0x015b}}}, - { 0x015c, {1, {0x015d}}}, - { 0x015e, {1, {0x015f}}}, - { 0x0160, {1, {0x0161}}}, - { 0x0162, {1, {0x0163}}}, - { 0x0164, {1, {0x0165}}}, - { 0x0166, {1, {0x0167}}}, - { 0x0168, {1, {0x0169}}}, - { 0x016a, {1, {0x016b}}}, - { 0x016c, {1, {0x016d}}}, - { 0x016e, {1, {0x016f}}}, - { 0x0170, {1, {0x0171}}}, - { 0x0172, {1, {0x0173}}}, - { 0x0174, {1, {0x0175}}}, - { 0x0176, {1, {0x0177}}}, - { 0x0178, {1, {0x00ff}}}, - { 0x0179, {1, {0x017a}}}, - { 0x017b, {1, {0x017c}}}, - { 0x017d, {1, {0x017e}}}, - { 0x017f, {1, {0x0073}}}, - { 0x0181, {1, {0x0253}}}, - { 0x0182, {1, {0x0183}}}, - { 0x0184, {1, {0x0185}}}, - { 0x0186, {1, {0x0254}}}, - { 0x0187, {1, {0x0188}}}, - { 0x0189, {1, {0x0256}}}, - { 0x018a, {1, {0x0257}}}, - { 0x018b, {1, {0x018c}}}, - { 0x018e, {1, {0x01dd}}}, - { 0x018f, {1, {0x0259}}}, - { 0x0190, {1, {0x025b}}}, - { 0x0191, {1, {0x0192}}}, - { 0x0193, {1, {0x0260}}}, - { 0x0194, {1, {0x0263}}}, - { 0x0196, {1, {0x0269}}}, - { 0x0197, {1, {0x0268}}}, - { 0x0198, {1, {0x0199}}}, - { 0x019c, {1, {0x026f}}}, - { 0x019d, {1, {0x0272}}}, - { 0x019f, {1, {0x0275}}}, - { 0x01a0, {1, {0x01a1}}}, - { 0x01a2, {1, {0x01a3}}}, - { 0x01a4, {1, {0x01a5}}}, - { 0x01a6, {1, {0x0280}}}, - { 0x01a7, {1, {0x01a8}}}, - { 0x01a9, {1, {0x0283}}}, - { 0x01ac, {1, {0x01ad}}}, - { 0x01ae, {1, {0x0288}}}, - { 0x01af, {1, {0x01b0}}}, - { 0x01b1, {1, {0x028a}}}, - { 0x01b2, {1, {0x028b}}}, - { 0x01b3, {1, {0x01b4}}}, - { 0x01b5, {1, {0x01b6}}}, - { 0x01b7, {1, {0x0292}}}, - { 0x01b8, {1, {0x01b9}}}, - { 0x01bc, {1, {0x01bd}}}, - { 0x01c4, {1, {0x01c6}}}, - { 0x01c5, {1, {0x01c6}}}, - { 0x01c7, {1, {0x01c9}}}, - { 0x01c8, {1, {0x01c9}}}, - { 0x01ca, {1, {0x01cc}}}, - { 0x01cb, {1, {0x01cc}}}, - { 0x01cd, {1, {0x01ce}}}, - { 0x01cf, {1, {0x01d0}}}, - { 0x01d1, {1, {0x01d2}}}, - { 0x01d3, {1, {0x01d4}}}, - { 0x01d5, {1, {0x01d6}}}, - { 0x01d7, {1, {0x01d8}}}, - { 0x01d9, {1, {0x01da}}}, - { 0x01db, {1, {0x01dc}}}, - { 0x01de, {1, {0x01df}}}, - { 0x01e0, {1, {0x01e1}}}, - { 0x01e2, {1, {0x01e3}}}, - { 0x01e4, {1, {0x01e5}}}, - { 0x01e6, {1, {0x01e7}}}, - { 0x01e8, {1, {0x01e9}}}, - { 0x01ea, {1, {0x01eb}}}, - { 0x01ec, {1, {0x01ed}}}, - { 0x01ee, {1, {0x01ef}}}, - { 0x01f0, {2, {0x006a, 0x030c}}}, - { 0x01f1, {1, {0x01f3}}}, - { 0x01f2, {1, {0x01f3}}}, - { 0x01f4, {1, {0x01f5}}}, - { 0x01f6, {1, {0x0195}}}, - { 0x01f7, {1, {0x01bf}}}, - { 0x01f8, {1, {0x01f9}}}, - { 0x01fa, {1, {0x01fb}}}, - { 0x01fc, {1, {0x01fd}}}, - { 0x01fe, {1, {0x01ff}}}, - { 0x0200, {1, {0x0201}}}, - { 0x0202, {1, {0x0203}}}, - { 0x0204, {1, {0x0205}}}, - { 0x0206, {1, {0x0207}}}, - { 0x0208, {1, {0x0209}}}, - { 0x020a, {1, {0x020b}}}, - { 0x020c, {1, {0x020d}}}, - { 0x020e, {1, {0x020f}}}, - { 0x0210, {1, {0x0211}}}, - { 0x0212, {1, {0x0213}}}, - { 0x0214, {1, {0x0215}}}, - { 0x0216, {1, {0x0217}}}, - { 0x0218, {1, {0x0219}}}, - { 0x021a, {1, {0x021b}}}, - { 0x021c, {1, {0x021d}}}, - { 0x021e, {1, {0x021f}}}, - { 0x0220, {1, {0x019e}}}, - { 0x0222, {1, {0x0223}}}, - { 0x0224, {1, {0x0225}}}, - { 0x0226, {1, {0x0227}}}, - { 0x0228, {1, {0x0229}}}, - { 0x022a, {1, {0x022b}}}, - { 0x022c, {1, {0x022d}}}, - { 0x022e, {1, {0x022f}}}, - { 0x0230, {1, {0x0231}}}, - { 0x0232, {1, {0x0233}}}, - { 0x023b, {1, {0x023c}}}, - { 0x023d, {1, {0x019a}}}, - { 0x0241, {1, {0x0294}}}, - { 0x0345, {1, {0x03b9}}}, - { 0x0386, {1, {0x03ac}}}, - { 0x0388, {1, {0x03ad}}}, - { 0x0389, {1, {0x03ae}}}, - { 0x038a, {1, {0x03af}}}, - { 0x038c, {1, {0x03cc}}}, - { 0x038e, {1, {0x03cd}}}, - { 0x038f, {1, {0x03ce}}}, - { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, - { 0x0391, {1, {0x03b1}}}, - { 0x0392, {1, {0x03b2}}}, - { 0x0393, {1, {0x03b3}}}, - { 0x0394, {1, {0x03b4}}}, - { 0x0395, {1, {0x03b5}}}, - { 0x0396, {1, {0x03b6}}}, - { 0x0397, {1, {0x03b7}}}, - { 0x0398, {1, {0x03b8}}}, - { 0x0399, {1, {0x03b9}}}, - { 0x039a, {1, {0x03ba}}}, - { 0x039b, {1, {0x03bb}}}, - { 0x039c, {1, {0x03bc}}}, - { 0x039d, {1, {0x03bd}}}, - { 0x039e, {1, {0x03be}}}, - { 0x039f, {1, {0x03bf}}}, - { 0x03a0, {1, {0x03c0}}}, - { 0x03a1, {1, {0x03c1}}}, - { 0x03a3, {1, {0x03c3}}}, - { 0x03a4, {1, {0x03c4}}}, - { 0x03a5, {1, {0x03c5}}}, - { 0x03a6, {1, {0x03c6}}}, - { 0x03a7, {1, {0x03c7}}}, - { 0x03a8, {1, {0x03c8}}}, - { 0x03a9, {1, {0x03c9}}}, - { 0x03aa, {1, {0x03ca}}}, - { 0x03ab, {1, {0x03cb}}}, - { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, - { 0x03c2, {1, {0x03c3}}}, - { 0x03d0, {1, {0x03b2}}}, - { 0x03d1, {1, {0x03b8}}}, - { 0x03d5, {1, {0x03c6}}}, - { 0x03d6, {1, {0x03c0}}}, - { 0x03d8, {1, {0x03d9}}}, - { 0x03da, {1, {0x03db}}}, - { 0x03dc, {1, {0x03dd}}}, - { 0x03de, {1, {0x03df}}}, - { 0x03e0, {1, {0x03e1}}}, - { 0x03e2, {1, {0x03e3}}}, - { 0x03e4, {1, {0x03e5}}}, - { 0x03e6, {1, {0x03e7}}}, - { 0x03e8, {1, {0x03e9}}}, - { 0x03ea, {1, {0x03eb}}}, - { 0x03ec, {1, {0x03ed}}}, - { 0x03ee, {1, {0x03ef}}}, - { 0x03f0, {1, {0x03ba}}}, - { 0x03f1, {1, {0x03c1}}}, - { 0x03f4, {1, {0x03b8}}}, - { 0x03f5, {1, {0x03b5}}}, - { 0x03f7, {1, {0x03f8}}}, - { 0x03f9, {1, {0x03f2}}}, - { 0x03fa, {1, {0x03fb}}}, - { 0x0400, {1, {0x0450}}}, - { 0x0401, {1, {0x0451}}}, - { 0x0402, {1, {0x0452}}}, - { 0x0403, {1, {0x0453}}}, - { 0x0404, {1, {0x0454}}}, - { 0x0405, {1, {0x0455}}}, - { 0x0406, {1, {0x0456}}}, - { 0x0407, {1, {0x0457}}}, - { 0x0408, {1, {0x0458}}}, - { 0x0409, {1, {0x0459}}}, - { 0x040a, {1, {0x045a}}}, - { 0x040b, {1, {0x045b}}}, - { 0x040c, {1, {0x045c}}}, - { 0x040d, {1, {0x045d}}}, - { 0x040e, {1, {0x045e}}}, - { 0x040f, {1, {0x045f}}}, - { 0x0410, {1, {0x0430}}}, - { 0x0411, {1, {0x0431}}}, - { 0x0412, {1, {0x0432}}}, - { 0x0413, {1, {0x0433}}}, - { 0x0414, {1, {0x0434}}}, - { 0x0415, {1, {0x0435}}}, - { 0x0416, {1, {0x0436}}}, - { 0x0417, {1, {0x0437}}}, - { 0x0418, {1, {0x0438}}}, - { 0x0419, {1, {0x0439}}}, - { 0x041a, {1, {0x043a}}}, - { 0x041b, {1, {0x043b}}}, - { 0x041c, {1, {0x043c}}}, - { 0x041d, {1, {0x043d}}}, - { 0x041e, {1, {0x043e}}}, - { 0x041f, {1, {0x043f}}}, - { 0x0420, {1, {0x0440}}}, - { 0x0421, {1, {0x0441}}}, - { 0x0422, {1, {0x0442}}}, - { 0x0423, {1, {0x0443}}}, - { 0x0424, {1, {0x0444}}}, - { 0x0425, {1, {0x0445}}}, - { 0x0426, {1, {0x0446}}}, - { 0x0427, {1, {0x0447}}}, - { 0x0428, {1, {0x0448}}}, - { 0x0429, {1, {0x0449}}}, - { 0x042a, {1, {0x044a}}}, - { 0x042b, {1, {0x044b}}}, - { 0x042c, {1, {0x044c}}}, - { 0x042d, {1, {0x044d}}}, - { 0x042e, {1, {0x044e}}}, - { 0x042f, {1, {0x044f}}}, - { 0x0460, {1, {0x0461}}}, - { 0x0462, {1, {0x0463}}}, - { 0x0464, {1, {0x0465}}}, - { 0x0466, {1, {0x0467}}}, - { 0x0468, {1, {0x0469}}}, - { 0x046a, {1, {0x046b}}}, - { 0x046c, {1, {0x046d}}}, - { 0x046e, {1, {0x046f}}}, - { 0x0470, {1, {0x0471}}}, - { 0x0472, {1, {0x0473}}}, - { 0x0474, {1, {0x0475}}}, - { 0x0476, {1, {0x0477}}}, - { 0x0478, {1, {0x0479}}}, - { 0x047a, {1, {0x047b}}}, - { 0x047c, {1, {0x047d}}}, - { 0x047e, {1, {0x047f}}}, - { 0x0480, {1, {0x0481}}}, - { 0x048a, {1, {0x048b}}}, - { 0x048c, {1, {0x048d}}}, - { 0x048e, {1, {0x048f}}}, - { 0x0490, {1, {0x0491}}}, - { 0x0492, {1, {0x0493}}}, - { 0x0494, {1, {0x0495}}}, - { 0x0496, {1, {0x0497}}}, - { 0x0498, {1, {0x0499}}}, - { 0x049a, {1, {0x049b}}}, - { 0x049c, {1, {0x049d}}}, - { 0x049e, {1, {0x049f}}}, - { 0x04a0, {1, {0x04a1}}}, - { 0x04a2, {1, {0x04a3}}}, - { 0x04a4, {1, {0x04a5}}}, - { 0x04a6, {1, {0x04a7}}}, - { 0x04a8, {1, {0x04a9}}}, - { 0x04aa, {1, {0x04ab}}}, - { 0x04ac, {1, {0x04ad}}}, - { 0x04ae, {1, {0x04af}}}, - { 0x04b0, {1, {0x04b1}}}, - { 0x04b2, {1, {0x04b3}}}, - { 0x04b4, {1, {0x04b5}}}, - { 0x04b6, {1, {0x04b7}}}, - { 0x04b8, {1, {0x04b9}}}, - { 0x04ba, {1, {0x04bb}}}, - { 0x04bc, {1, {0x04bd}}}, - { 0x04be, {1, {0x04bf}}}, - { 0x04c1, {1, {0x04c2}}}, - { 0x04c3, {1, {0x04c4}}}, - { 0x04c5, {1, {0x04c6}}}, - { 0x04c7, {1, {0x04c8}}}, - { 0x04c9, {1, {0x04ca}}}, - { 0x04cb, {1, {0x04cc}}}, - { 0x04cd, {1, {0x04ce}}}, - { 0x04d0, {1, {0x04d1}}}, - { 0x04d2, {1, {0x04d3}}}, - { 0x04d4, {1, {0x04d5}}}, - { 0x04d6, {1, {0x04d7}}}, - { 0x04d8, {1, {0x04d9}}}, - { 0x04da, {1, {0x04db}}}, - { 0x04dc, {1, {0x04dd}}}, - { 0x04de, {1, {0x04df}}}, - { 0x04e0, {1, {0x04e1}}}, - { 0x04e2, {1, {0x04e3}}}, - { 0x04e4, {1, {0x04e5}}}, - { 0x04e6, {1, {0x04e7}}}, - { 0x04e8, {1, {0x04e9}}}, - { 0x04ea, {1, {0x04eb}}}, - { 0x04ec, {1, {0x04ed}}}, - { 0x04ee, {1, {0x04ef}}}, - { 0x04f0, {1, {0x04f1}}}, - { 0x04f2, {1, {0x04f3}}}, - { 0x04f4, {1, {0x04f5}}}, - { 0x04f6, {1, {0x04f7}}}, - { 0x04f8, {1, {0x04f9}}}, - { 0x0500, {1, {0x0501}}}, - { 0x0502, {1, {0x0503}}}, - { 0x0504, {1, {0x0505}}}, - { 0x0506, {1, {0x0507}}}, - { 0x0508, {1, {0x0509}}}, - { 0x050a, {1, {0x050b}}}, - { 0x050c, {1, {0x050d}}}, - { 0x050e, {1, {0x050f}}}, - { 0x0531, {1, {0x0561}}}, - { 0x0532, {1, {0x0562}}}, - { 0x0533, {1, {0x0563}}}, - { 0x0534, {1, {0x0564}}}, - { 0x0535, {1, {0x0565}}}, - { 0x0536, {1, {0x0566}}}, - { 0x0537, {1, {0x0567}}}, - { 0x0538, {1, {0x0568}}}, - { 0x0539, {1, {0x0569}}}, - { 0x053a, {1, {0x056a}}}, - { 0x053b, {1, {0x056b}}}, - { 0x053c, {1, {0x056c}}}, - { 0x053d, {1, {0x056d}}}, - { 0x053e, {1, {0x056e}}}, - { 0x053f, {1, {0x056f}}}, - { 0x0540, {1, {0x0570}}}, - { 0x0541, {1, {0x0571}}}, - { 0x0542, {1, {0x0572}}}, - { 0x0543, {1, {0x0573}}}, - { 0x0544, {1, {0x0574}}}, - { 0x0545, {1, {0x0575}}}, - { 0x0546, {1, {0x0576}}}, - { 0x0547, {1, {0x0577}}}, - { 0x0548, {1, {0x0578}}}, - { 0x0549, {1, {0x0579}}}, - { 0x054a, {1, {0x057a}}}, - { 0x054b, {1, {0x057b}}}, - { 0x054c, {1, {0x057c}}}, - { 0x054d, {1, {0x057d}}}, - { 0x054e, {1, {0x057e}}}, - { 0x054f, {1, {0x057f}}}, - { 0x0550, {1, {0x0580}}}, - { 0x0551, {1, {0x0581}}}, - { 0x0552, {1, {0x0582}}}, - { 0x0553, {1, {0x0583}}}, - { 0x0554, {1, {0x0584}}}, - { 0x0555, {1, {0x0585}}}, - { 0x0556, {1, {0x0586}}}, - { 0x0587, {2, {0x0565, 0x0582}}}, - { 0x10a0, {1, {0x2d00}}}, - { 0x10a1, {1, {0x2d01}}}, - { 0x10a2, {1, {0x2d02}}}, - { 0x10a3, {1, {0x2d03}}}, - { 0x10a4, {1, {0x2d04}}}, - { 0x10a5, {1, {0x2d05}}}, - { 0x10a6, {1, {0x2d06}}}, - { 0x10a7, {1, {0x2d07}}}, - { 0x10a8, {1, {0x2d08}}}, - { 0x10a9, {1, {0x2d09}}}, - { 0x10aa, {1, {0x2d0a}}}, - { 0x10ab, {1, {0x2d0b}}}, - { 0x10ac, {1, {0x2d0c}}}, - { 0x10ad, {1, {0x2d0d}}}, - { 0x10ae, {1, {0x2d0e}}}, - { 0x10af, {1, {0x2d0f}}}, - { 0x10b0, {1, {0x2d10}}}, - { 0x10b1, {1, {0x2d11}}}, - { 0x10b2, {1, {0x2d12}}}, - { 0x10b3, {1, {0x2d13}}}, - { 0x10b4, {1, {0x2d14}}}, - { 0x10b5, {1, {0x2d15}}}, - { 0x10b6, {1, {0x2d16}}}, - { 0x10b7, {1, {0x2d17}}}, - { 0x10b8, {1, {0x2d18}}}, - { 0x10b9, {1, {0x2d19}}}, - { 0x10ba, {1, {0x2d1a}}}, - { 0x10bb, {1, {0x2d1b}}}, - { 0x10bc, {1, {0x2d1c}}}, - { 0x10bd, {1, {0x2d1d}}}, - { 0x10be, {1, {0x2d1e}}}, - { 0x10bf, {1, {0x2d1f}}}, - { 0x10c0, {1, {0x2d20}}}, - { 0x10c1, {1, {0x2d21}}}, - { 0x10c2, {1, {0x2d22}}}, - { 0x10c3, {1, {0x2d23}}}, - { 0x10c4, {1, {0x2d24}}}, - { 0x10c5, {1, {0x2d25}}}, - { 0x1e00, {1, {0x1e01}}}, - { 0x1e02, {1, {0x1e03}}}, - { 0x1e04, {1, {0x1e05}}}, - { 0x1e06, {1, {0x1e07}}}, - { 0x1e08, {1, {0x1e09}}}, - { 0x1e0a, {1, {0x1e0b}}}, - { 0x1e0c, {1, {0x1e0d}}}, - { 0x1e0e, {1, {0x1e0f}}}, - { 0x1e10, {1, {0x1e11}}}, - { 0x1e12, {1, {0x1e13}}}, - { 0x1e14, {1, {0x1e15}}}, - { 0x1e16, {1, {0x1e17}}}, - { 0x1e18, {1, {0x1e19}}}, - { 0x1e1a, {1, {0x1e1b}}}, - { 0x1e1c, {1, {0x1e1d}}}, - { 0x1e1e, {1, {0x1e1f}}}, - { 0x1e20, {1, {0x1e21}}}, - { 0x1e22, {1, {0x1e23}}}, - { 0x1e24, {1, {0x1e25}}}, - { 0x1e26, {1, {0x1e27}}}, - { 0x1e28, {1, {0x1e29}}}, - { 0x1e2a, {1, {0x1e2b}}}, - { 0x1e2c, {1, {0x1e2d}}}, - { 0x1e2e, {1, {0x1e2f}}}, - { 0x1e30, {1, {0x1e31}}}, - { 0x1e32, {1, {0x1e33}}}, - { 0x1e34, {1, {0x1e35}}}, - { 0x1e36, {1, {0x1e37}}}, - { 0x1e38, {1, {0x1e39}}}, - { 0x1e3a, {1, {0x1e3b}}}, - { 0x1e3c, {1, {0x1e3d}}}, - { 0x1e3e, {1, {0x1e3f}}}, - { 0x1e40, {1, {0x1e41}}}, - { 0x1e42, {1, {0x1e43}}}, - { 0x1e44, {1, {0x1e45}}}, - { 0x1e46, {1, {0x1e47}}}, - { 0x1e48, {1, {0x1e49}}}, - { 0x1e4a, {1, {0x1e4b}}}, - { 0x1e4c, {1, {0x1e4d}}}, - { 0x1e4e, {1, {0x1e4f}}}, - { 0x1e50, {1, {0x1e51}}}, - { 0x1e52, {1, {0x1e53}}}, - { 0x1e54, {1, {0x1e55}}}, - { 0x1e56, {1, {0x1e57}}}, - { 0x1e58, {1, {0x1e59}}}, - { 0x1e5a, {1, {0x1e5b}}}, - { 0x1e5c, {1, {0x1e5d}}}, - { 0x1e5e, {1, {0x1e5f}}}, - { 0x1e60, {1, {0x1e61}}}, - { 0x1e62, {1, {0x1e63}}}, - { 0x1e64, {1, {0x1e65}}}, - { 0x1e66, {1, {0x1e67}}}, - { 0x1e68, {1, {0x1e69}}}, - { 0x1e6a, {1, {0x1e6b}}}, - { 0x1e6c, {1, {0x1e6d}}}, - { 0x1e6e, {1, {0x1e6f}}}, - { 0x1e70, {1, {0x1e71}}}, - { 0x1e72, {1, {0x1e73}}}, - { 0x1e74, {1, {0x1e75}}}, - { 0x1e76, {1, {0x1e77}}}, - { 0x1e78, {1, {0x1e79}}}, - { 0x1e7a, {1, {0x1e7b}}}, - { 0x1e7c, {1, {0x1e7d}}}, - { 0x1e7e, {1, {0x1e7f}}}, - { 0x1e80, {1, {0x1e81}}}, - { 0x1e82, {1, {0x1e83}}}, - { 0x1e84, {1, {0x1e85}}}, - { 0x1e86, {1, {0x1e87}}}, - { 0x1e88, {1, {0x1e89}}}, - { 0x1e8a, {1, {0x1e8b}}}, - { 0x1e8c, {1, {0x1e8d}}}, - { 0x1e8e, {1, {0x1e8f}}}, - { 0x1e90, {1, {0x1e91}}}, - { 0x1e92, {1, {0x1e93}}}, - { 0x1e94, {1, {0x1e95}}}, - { 0x1e96, {2, {0x0068, 0x0331}}}, - { 0x1e97, {2, {0x0074, 0x0308}}}, - { 0x1e98, {2, {0x0077, 0x030a}}}, - { 0x1e99, {2, {0x0079, 0x030a}}}, - { 0x1e9a, {2, {0x0061, 0x02be}}}, - { 0x1e9b, {1, {0x1e61}}}, - { 0x1ea0, {1, {0x1ea1}}}, - { 0x1ea2, {1, {0x1ea3}}}, - { 0x1ea4, {1, {0x1ea5}}}, - { 0x1ea6, {1, {0x1ea7}}}, - { 0x1ea8, {1, {0x1ea9}}}, - { 0x1eaa, {1, {0x1eab}}}, - { 0x1eac, {1, {0x1ead}}}, - { 0x1eae, {1, {0x1eaf}}}, - { 0x1eb0, {1, {0x1eb1}}}, - { 0x1eb2, {1, {0x1eb3}}}, - { 0x1eb4, {1, {0x1eb5}}}, - { 0x1eb6, {1, {0x1eb7}}}, - { 0x1eb8, {1, {0x1eb9}}}, - { 0x1eba, {1, {0x1ebb}}}, - { 0x1ebc, {1, {0x1ebd}}}, - { 0x1ebe, {1, {0x1ebf}}}, - { 0x1ec0, {1, {0x1ec1}}}, - { 0x1ec2, {1, {0x1ec3}}}, - { 0x1ec4, {1, {0x1ec5}}}, - { 0x1ec6, {1, {0x1ec7}}}, - { 0x1ec8, {1, {0x1ec9}}}, - { 0x1eca, {1, {0x1ecb}}}, - { 0x1ecc, {1, {0x1ecd}}}, - { 0x1ece, {1, {0x1ecf}}}, - { 0x1ed0, {1, {0x1ed1}}}, - { 0x1ed2, {1, {0x1ed3}}}, - { 0x1ed4, {1, {0x1ed5}}}, - { 0x1ed6, {1, {0x1ed7}}}, - { 0x1ed8, {1, {0x1ed9}}}, - { 0x1eda, {1, {0x1edb}}}, - { 0x1edc, {1, {0x1edd}}}, - { 0x1ede, {1, {0x1edf}}}, - { 0x1ee0, {1, {0x1ee1}}}, - { 0x1ee2, {1, {0x1ee3}}}, - { 0x1ee4, {1, {0x1ee5}}}, - { 0x1ee6, {1, {0x1ee7}}}, - { 0x1ee8, {1, {0x1ee9}}}, - { 0x1eea, {1, {0x1eeb}}}, - { 0x1eec, {1, {0x1eed}}}, - { 0x1eee, {1, {0x1eef}}}, - { 0x1ef0, {1, {0x1ef1}}}, - { 0x1ef2, {1, {0x1ef3}}}, - { 0x1ef4, {1, {0x1ef5}}}, - { 0x1ef6, {1, {0x1ef7}}}, - { 0x1ef8, {1, {0x1ef9}}}, - { 0x1f08, {1, {0x1f00}}}, - { 0x1f09, {1, {0x1f01}}}, - { 0x1f0a, {1, {0x1f02}}}, - { 0x1f0b, {1, {0x1f03}}}, - { 0x1f0c, {1, {0x1f04}}}, - { 0x1f0d, {1, {0x1f05}}}, - { 0x1f0e, {1, {0x1f06}}}, - { 0x1f0f, {1, {0x1f07}}}, - { 0x1f18, {1, {0x1f10}}}, - { 0x1f19, {1, {0x1f11}}}, - { 0x1f1a, {1, {0x1f12}}}, - { 0x1f1b, {1, {0x1f13}}}, - { 0x1f1c, {1, {0x1f14}}}, - { 0x1f1d, {1, {0x1f15}}}, - { 0x1f28, {1, {0x1f20}}}, - { 0x1f29, {1, {0x1f21}}}, - { 0x1f2a, {1, {0x1f22}}}, - { 0x1f2b, {1, {0x1f23}}}, - { 0x1f2c, {1, {0x1f24}}}, - { 0x1f2d, {1, {0x1f25}}}, - { 0x1f2e, {1, {0x1f26}}}, - { 0x1f2f, {1, {0x1f27}}}, - { 0x1f38, {1, {0x1f30}}}, - { 0x1f39, {1, {0x1f31}}}, - { 0x1f3a, {1, {0x1f32}}}, - { 0x1f3b, {1, {0x1f33}}}, - { 0x1f3c, {1, {0x1f34}}}, - { 0x1f3d, {1, {0x1f35}}}, - { 0x1f3e, {1, {0x1f36}}}, - { 0x1f3f, {1, {0x1f37}}}, - { 0x1f48, {1, {0x1f40}}}, - { 0x1f49, {1, {0x1f41}}}, - { 0x1f4a, {1, {0x1f42}}}, - { 0x1f4b, {1, {0x1f43}}}, - { 0x1f4c, {1, {0x1f44}}}, - { 0x1f4d, {1, {0x1f45}}}, - { 0x1f50, {2, {0x03c5, 0x0313}}}, - { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, - { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, - { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, - { 0x1f59, {1, {0x1f51}}}, - { 0x1f5b, {1, {0x1f53}}}, - { 0x1f5d, {1, {0x1f55}}}, - { 0x1f5f, {1, {0x1f57}}}, - { 0x1f68, {1, {0x1f60}}}, - { 0x1f69, {1, {0x1f61}}}, - { 0x1f6a, {1, {0x1f62}}}, - { 0x1f6b, {1, {0x1f63}}}, - { 0x1f6c, {1, {0x1f64}}}, - { 0x1f6d, {1, {0x1f65}}}, - { 0x1f6e, {1, {0x1f66}}}, - { 0x1f6f, {1, {0x1f67}}}, - { 0x1f80, {2, {0x1f00, 0x03b9}}}, - { 0x1f81, {2, {0x1f01, 0x03b9}}}, - { 0x1f82, {2, {0x1f02, 0x03b9}}}, - { 0x1f83, {2, {0x1f03, 0x03b9}}}, - { 0x1f84, {2, {0x1f04, 0x03b9}}}, - { 0x1f85, {2, {0x1f05, 0x03b9}}}, - { 0x1f86, {2, {0x1f06, 0x03b9}}}, - { 0x1f87, {2, {0x1f07, 0x03b9}}}, - { 0x1f88, {2, {0x1f00, 0x03b9}}}, - { 0x1f89, {2, {0x1f01, 0x03b9}}}, - { 0x1f8a, {2, {0x1f02, 0x03b9}}}, - { 0x1f8b, {2, {0x1f03, 0x03b9}}}, - { 0x1f8c, {2, {0x1f04, 0x03b9}}}, - { 0x1f8d, {2, {0x1f05, 0x03b9}}}, - { 0x1f8e, {2, {0x1f06, 0x03b9}}}, - { 0x1f8f, {2, {0x1f07, 0x03b9}}}, - { 0x1f90, {2, {0x1f20, 0x03b9}}}, - { 0x1f91, {2, {0x1f21, 0x03b9}}}, - { 0x1f92, {2, {0x1f22, 0x03b9}}}, - { 0x1f93, {2, {0x1f23, 0x03b9}}}, - { 0x1f94, {2, {0x1f24, 0x03b9}}}, - { 0x1f95, {2, {0x1f25, 0x03b9}}}, - { 0x1f96, {2, {0x1f26, 0x03b9}}}, - { 0x1f97, {2, {0x1f27, 0x03b9}}}, - { 0x1f98, {2, {0x1f20, 0x03b9}}}, - { 0x1f99, {2, {0x1f21, 0x03b9}}}, - { 0x1f9a, {2, {0x1f22, 0x03b9}}}, - { 0x1f9b, {2, {0x1f23, 0x03b9}}}, - { 0x1f9c, {2, {0x1f24, 0x03b9}}}, - { 0x1f9d, {2, {0x1f25, 0x03b9}}}, - { 0x1f9e, {2, {0x1f26, 0x03b9}}}, - { 0x1f9f, {2, {0x1f27, 0x03b9}}}, - { 0x1fa0, {2, {0x1f60, 0x03b9}}}, - { 0x1fa1, {2, {0x1f61, 0x03b9}}}, - { 0x1fa2, {2, {0x1f62, 0x03b9}}}, - { 0x1fa3, {2, {0x1f63, 0x03b9}}}, - { 0x1fa4, {2, {0x1f64, 0x03b9}}}, - { 0x1fa5, {2, {0x1f65, 0x03b9}}}, - { 0x1fa6, {2, {0x1f66, 0x03b9}}}, - { 0x1fa7, {2, {0x1f67, 0x03b9}}}, - { 0x1fa8, {2, {0x1f60, 0x03b9}}}, - { 0x1fa9, {2, {0x1f61, 0x03b9}}}, - { 0x1faa, {2, {0x1f62, 0x03b9}}}, - { 0x1fab, {2, {0x1f63, 0x03b9}}}, - { 0x1fac, {2, {0x1f64, 0x03b9}}}, - { 0x1fad, {2, {0x1f65, 0x03b9}}}, - { 0x1fae, {2, {0x1f66, 0x03b9}}}, - { 0x1faf, {2, {0x1f67, 0x03b9}}}, - { 0x1fb2, {2, {0x1f70, 0x03b9}}}, - { 0x1fb3, {2, {0x03b1, 0x03b9}}}, - { 0x1fb4, {2, {0x03ac, 0x03b9}}}, - { 0x1fb6, {2, {0x03b1, 0x0342}}}, - { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, - { 0x1fb8, {1, {0x1fb0}}}, - { 0x1fb9, {1, {0x1fb1}}}, - { 0x1fba, {1, {0x1f70}}}, - { 0x1fbb, {1, {0x1f71}}}, - { 0x1fbc, {2, {0x03b1, 0x03b9}}}, - { 0x1fbe, {1, {0x03b9}}}, - { 0x1fc2, {2, {0x1f74, 0x03b9}}}, - { 0x1fc3, {2, {0x03b7, 0x03b9}}}, - { 0x1fc4, {2, {0x03ae, 0x03b9}}}, - { 0x1fc6, {2, {0x03b7, 0x0342}}}, - { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, - { 0x1fc8, {1, {0x1f72}}}, - { 0x1fc9, {1, {0x1f73}}}, - { 0x1fca, {1, {0x1f74}}}, - { 0x1fcb, {1, {0x1f75}}}, - { 0x1fcc, {2, {0x03b7, 0x03b9}}}, - { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, - { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, - { 0x1fd6, {2, {0x03b9, 0x0342}}}, - { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, - { 0x1fd8, {1, {0x1fd0}}}, - { 0x1fd9, {1, {0x1fd1}}}, - { 0x1fda, {1, {0x1f76}}}, - { 0x1fdb, {1, {0x1f77}}}, - { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, - { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, - { 0x1fe4, {2, {0x03c1, 0x0313}}}, - { 0x1fe6, {2, {0x03c5, 0x0342}}}, - { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, - { 0x1fe8, {1, {0x1fe0}}}, - { 0x1fe9, {1, {0x1fe1}}}, - { 0x1fea, {1, {0x1f7a}}}, - { 0x1feb, {1, {0x1f7b}}}, - { 0x1fec, {1, {0x1fe5}}}, - { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, - { 0x1ff3, {2, {0x03c9, 0x03b9}}}, - { 0x1ff4, {2, {0x03ce, 0x03b9}}}, - { 0x1ff6, {2, {0x03c9, 0x0342}}}, - { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, - { 0x1ff8, {1, {0x1f78}}}, - { 0x1ff9, {1, {0x1f79}}}, - { 0x1ffa, {1, {0x1f7c}}}, - { 0x1ffb, {1, {0x1f7d}}}, - { 0x1ffc, {2, {0x03c9, 0x03b9}}}, - { 0x2126, {1, {0x03c9}}}, - { 0x212a, {1, {0x006b}}}, - { 0x212b, {1, {0x00e5}}}, - { 0x2160, {1, {0x2170}}}, - { 0x2161, {1, {0x2171}}}, - { 0x2162, {1, {0x2172}}}, - { 0x2163, {1, {0x2173}}}, - { 0x2164, {1, {0x2174}}}, - { 0x2165, {1, {0x2175}}}, - { 0x2166, {1, {0x2176}}}, - { 0x2167, {1, {0x2177}}}, - { 0x2168, {1, {0x2178}}}, - { 0x2169, {1, {0x2179}}}, - { 0x216a, {1, {0x217a}}}, - { 0x216b, {1, {0x217b}}}, - { 0x216c, {1, {0x217c}}}, - { 0x216d, {1, {0x217d}}}, - { 0x216e, {1, {0x217e}}}, - { 0x216f, {1, {0x217f}}}, - { 0x24b6, {1, {0x24d0}}}, - { 0x24b7, {1, {0x24d1}}}, - { 0x24b8, {1, {0x24d2}}}, - { 0x24b9, {1, {0x24d3}}}, - { 0x24ba, {1, {0x24d4}}}, - { 0x24bb, {1, {0x24d5}}}, - { 0x24bc, {1, {0x24d6}}}, - { 0x24bd, {1, {0x24d7}}}, - { 0x24be, {1, {0x24d8}}}, - { 0x24bf, {1, {0x24d9}}}, - { 0x24c0, {1, {0x24da}}}, - { 0x24c1, {1, {0x24db}}}, - { 0x24c2, {1, {0x24dc}}}, - { 0x24c3, {1, {0x24dd}}}, - { 0x24c4, {1, {0x24de}}}, - { 0x24c5, {1, {0x24df}}}, - { 0x24c6, {1, {0x24e0}}}, - { 0x24c7, {1, {0x24e1}}}, - { 0x24c8, {1, {0x24e2}}}, - { 0x24c9, {1, {0x24e3}}}, - { 0x24ca, {1, {0x24e4}}}, - { 0x24cb, {1, {0x24e5}}}, - { 0x24cc, {1, {0x24e6}}}, - { 0x24cd, {1, {0x24e7}}}, - { 0x24ce, {1, {0x24e8}}}, - { 0x24cf, {1, {0x24e9}}}, - { 0x2c00, {1, {0x2c30}}}, - { 0x2c01, {1, {0x2c31}}}, - { 0x2c02, {1, {0x2c32}}}, - { 0x2c03, {1, {0x2c33}}}, - { 0x2c04, {1, {0x2c34}}}, - { 0x2c05, {1, {0x2c35}}}, - { 0x2c06, {1, {0x2c36}}}, - { 0x2c07, {1, {0x2c37}}}, - { 0x2c08, {1, {0x2c38}}}, - { 0x2c09, {1, {0x2c39}}}, - { 0x2c0a, {1, {0x2c3a}}}, - { 0x2c0b, {1, {0x2c3b}}}, - { 0x2c0c, {1, {0x2c3c}}}, - { 0x2c0d, {1, {0x2c3d}}}, - { 0x2c0e, {1, {0x2c3e}}}, - { 0x2c0f, {1, {0x2c3f}}}, - { 0x2c10, {1, {0x2c40}}}, - { 0x2c11, {1, {0x2c41}}}, - { 0x2c12, {1, {0x2c42}}}, - { 0x2c13, {1, {0x2c43}}}, - { 0x2c14, {1, {0x2c44}}}, - { 0x2c15, {1, {0x2c45}}}, - { 0x2c16, {1, {0x2c46}}}, - { 0x2c17, {1, {0x2c47}}}, - { 0x2c18, {1, {0x2c48}}}, - { 0x2c19, {1, {0x2c49}}}, - { 0x2c1a, {1, {0x2c4a}}}, - { 0x2c1b, {1, {0x2c4b}}}, - { 0x2c1c, {1, {0x2c4c}}}, - { 0x2c1d, {1, {0x2c4d}}}, - { 0x2c1e, {1, {0x2c4e}}}, - { 0x2c1f, {1, {0x2c4f}}}, - { 0x2c20, {1, {0x2c50}}}, - { 0x2c21, {1, {0x2c51}}}, - { 0x2c22, {1, {0x2c52}}}, - { 0x2c23, {1, {0x2c53}}}, - { 0x2c24, {1, {0x2c54}}}, - { 0x2c25, {1, {0x2c55}}}, - { 0x2c26, {1, {0x2c56}}}, - { 0x2c27, {1, {0x2c57}}}, - { 0x2c28, {1, {0x2c58}}}, - { 0x2c29, {1, {0x2c59}}}, - { 0x2c2a, {1, {0x2c5a}}}, - { 0x2c2b, {1, {0x2c5b}}}, - { 0x2c2c, {1, {0x2c5c}}}, - { 0x2c2d, {1, {0x2c5d}}}, - { 0x2c2e, {1, {0x2c5e}}}, - { 0x2c80, {1, {0x2c81}}}, - { 0x2c82, {1, {0x2c83}}}, - { 0x2c84, {1, {0x2c85}}}, - { 0x2c86, {1, {0x2c87}}}, - { 0x2c88, {1, {0x2c89}}}, - { 0x2c8a, {1, {0x2c8b}}}, - { 0x2c8c, {1, {0x2c8d}}}, - { 0x2c8e, {1, {0x2c8f}}}, - { 0x2c90, {1, {0x2c91}}}, - { 0x2c92, {1, {0x2c93}}}, - { 0x2c94, {1, {0x2c95}}}, - { 0x2c96, {1, {0x2c97}}}, - { 0x2c98, {1, {0x2c99}}}, - { 0x2c9a, {1, {0x2c9b}}}, - { 0x2c9c, {1, {0x2c9d}}}, - { 0x2c9e, {1, {0x2c9f}}}, - { 0x2ca0, {1, {0x2ca1}}}, - { 0x2ca2, {1, {0x2ca3}}}, - { 0x2ca4, {1, {0x2ca5}}}, - { 0x2ca6, {1, {0x2ca7}}}, - { 0x2ca8, {1, {0x2ca9}}}, - { 0x2caa, {1, {0x2cab}}}, - { 0x2cac, {1, {0x2cad}}}, - { 0x2cae, {1, {0x2caf}}}, - { 0x2cb0, {1, {0x2cb1}}}, - { 0x2cb2, {1, {0x2cb3}}}, - { 0x2cb4, {1, {0x2cb5}}}, - { 0x2cb6, {1, {0x2cb7}}}, - { 0x2cb8, {1, {0x2cb9}}}, - { 0x2cba, {1, {0x2cbb}}}, - { 0x2cbc, {1, {0x2cbd}}}, - { 0x2cbe, {1, {0x2cbf}}}, - { 0x2cc0, {1, {0x2cc1}}}, - { 0x2cc2, {1, {0x2cc3}}}, - { 0x2cc4, {1, {0x2cc5}}}, - { 0x2cc6, {1, {0x2cc7}}}, - { 0x2cc8, {1, {0x2cc9}}}, - { 0x2cca, {1, {0x2ccb}}}, - { 0x2ccc, {1, {0x2ccd}}}, - { 0x2cce, {1, {0x2ccf}}}, - { 0x2cd0, {1, {0x2cd1}}}, - { 0x2cd2, {1, {0x2cd3}}}, - { 0x2cd4, {1, {0x2cd5}}}, - { 0x2cd6, {1, {0x2cd7}}}, - { 0x2cd8, {1, {0x2cd9}}}, - { 0x2cda, {1, {0x2cdb}}}, - { 0x2cdc, {1, {0x2cdd}}}, - { 0x2cde, {1, {0x2cdf}}}, - { 0x2ce0, {1, {0x2ce1}}}, - { 0x2ce2, {1, {0x2ce3}}}, - { 0xfb00, {2, {0x0066, 0x0066}}}, - { 0xfb01, {2, {0x0066, 0x0069}}}, - { 0xfb02, {2, {0x0066, 0x006c}}}, - { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, - { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, - { 0xfb05, {2, {0x0073, 0x0074}}}, - { 0xfb06, {2, {0x0073, 0x0074}}}, - { 0xfb13, {2, {0x0574, 0x0576}}}, - { 0xfb14, {2, {0x0574, 0x0565}}}, - { 0xfb15, {2, {0x0574, 0x056b}}}, - { 0xfb16, {2, {0x057e, 0x0576}}}, - { 0xfb17, {2, {0x0574, 0x056d}}}, - { 0xff21, {1, {0xff41}}}, - { 0xff22, {1, {0xff42}}}, - { 0xff23, {1, {0xff43}}}, - { 0xff24, {1, {0xff44}}}, - { 0xff25, {1, {0xff45}}}, - { 0xff26, {1, {0xff46}}}, - { 0xff27, {1, {0xff47}}}, - { 0xff28, {1, {0xff48}}}, - { 0xff29, {1, {0xff49}}}, - { 0xff2a, {1, {0xff4a}}}, - { 0xff2b, {1, {0xff4b}}}, - { 0xff2c, {1, {0xff4c}}}, - { 0xff2d, {1, {0xff4d}}}, - { 0xff2e, {1, {0xff4e}}}, - { 0xff2f, {1, {0xff4f}}}, - { 0xff30, {1, {0xff50}}}, - { 0xff31, {1, {0xff51}}}, - { 0xff32, {1, {0xff52}}}, - { 0xff33, {1, {0xff53}}}, - { 0xff34, {1, {0xff54}}}, - { 0xff35, {1, {0xff55}}}, - { 0xff36, {1, {0xff56}}}, - { 0xff37, {1, {0xff57}}}, - { 0xff38, {1, {0xff58}}}, - { 0xff39, {1, {0xff59}}}, - { 0xff3a, {1, {0xff5a}}}, - { 0x10400, {1, {0x10428}}}, - { 0x10401, {1, {0x10429}}}, - { 0x10402, {1, {0x1042a}}}, - { 0x10403, {1, {0x1042b}}}, - { 0x10404, {1, {0x1042c}}}, - { 0x10405, {1, {0x1042d}}}, - { 0x10406, {1, {0x1042e}}}, - { 0x10407, {1, {0x1042f}}}, - { 0x10408, {1, {0x10430}}}, - { 0x10409, {1, {0x10431}}}, - { 0x1040a, {1, {0x10432}}}, - { 0x1040b, {1, {0x10433}}}, - { 0x1040c, {1, {0x10434}}}, - { 0x1040d, {1, {0x10435}}}, - { 0x1040e, {1, {0x10436}}}, - { 0x1040f, {1, {0x10437}}}, - { 0x10410, {1, {0x10438}}}, - { 0x10411, {1, {0x10439}}}, - { 0x10412, {1, {0x1043a}}}, - { 0x10413, {1, {0x1043b}}}, - { 0x10414, {1, {0x1043c}}}, - { 0x10415, {1, {0x1043d}}}, - { 0x10416, {1, {0x1043e}}}, - { 0x10417, {1, {0x1043f}}}, - { 0x10418, {1, {0x10440}}}, - { 0x10419, {1, {0x10441}}}, - { 0x1041a, {1, {0x10442}}}, - { 0x1041b, {1, {0x10443}}}, - { 0x1041c, {1, {0x10444}}}, - { 0x1041d, {1, {0x10445}}}, - { 0x1041e, {1, {0x10446}}}, - { 0x1041f, {1, {0x10447}}}, - { 0x10420, {1, {0x10448}}}, - { 0x10421, {1, {0x10449}}}, - { 0x10422, {1, {0x1044a}}}, - { 0x10423, {1, {0x1044b}}}, - { 0x10424, {1, {0x1044c}}}, - { 0x10425, {1, {0x1044d}}}, - { 0x10426, {1, {0x1044e}}}, - { 0x10427, {1, {0x1044f}}} -}; - -static const CaseFold_11_Type CaseFold_Locale[] = { - { 0x0049, {1, {0x0069}}}, - { 0x0130, {2, {0x0069, 0x0307}}} -}; - -static const CaseUnfold_11_Type CaseUnfold_11[] = { - { 0x0061, {1, {0x0041 }}}, - { 0x0062, {1, {0x0042 }}}, - { 0x0063, {1, {0x0043 }}}, - { 0x0064, {1, {0x0044 }}}, - { 0x0065, {1, {0x0045 }}}, - { 0x0066, {1, {0x0046 }}}, - { 0x0067, {1, {0x0047 }}}, - { 0x0068, {1, {0x0048 }}}, - { 0x006a, {1, {0x004a }}}, - { 0x006b, {2, {0x212a, 0x004b }}}, - { 0x006c, {1, {0x004c }}}, - { 0x006d, {1, {0x004d }}}, - { 0x006e, {1, {0x004e }}}, - { 0x006f, {1, {0x004f }}}, - { 0x0070, {1, {0x0050 }}}, - { 0x0071, {1, {0x0051 }}}, - { 0x0072, {1, {0x0052 }}}, - { 0x0073, {2, {0x0053, 0x017f }}}, - { 0x0074, {1, {0x0054 }}}, - { 0x0075, {1, {0x0055 }}}, - { 0x0076, {1, {0x0056 }}}, - { 0x0077, {1, {0x0057 }}}, - { 0x0078, {1, {0x0058 }}}, - { 0x0079, {1, {0x0059 }}}, - { 0x007a, {1, {0x005a }}}, - { 0x00e0, {1, {0x00c0 }}}, - { 0x00e1, {1, {0x00c1 }}}, - { 0x00e2, {1, {0x00c2 }}}, - { 0x00e3, {1, {0x00c3 }}}, - { 0x00e4, {1, {0x00c4 }}}, - { 0x00e5, {2, {0x212b, 0x00c5 }}}, - { 0x00e6, {1, {0x00c6 }}}, - { 0x00e7, {1, {0x00c7 }}}, - { 0x00e8, {1, {0x00c8 }}}, - { 0x00e9, {1, {0x00c9 }}}, - { 0x00ea, {1, {0x00ca }}}, - { 0x00eb, {1, {0x00cb }}}, - { 0x00ec, {1, {0x00cc }}}, - { 0x00ed, {1, {0x00cd }}}, - { 0x00ee, {1, {0x00ce }}}, - { 0x00ef, {1, {0x00cf }}}, - { 0x00f0, {1, {0x00d0 }}}, - { 0x00f1, {1, {0x00d1 }}}, - { 0x00f2, {1, {0x00d2 }}}, - { 0x00f3, {1, {0x00d3 }}}, - { 0x00f4, {1, {0x00d4 }}}, - { 0x00f5, {1, {0x00d5 }}}, - { 0x00f6, {1, {0x00d6 }}}, - { 0x00f8, {1, {0x00d8 }}}, - { 0x00f9, {1, {0x00d9 }}}, - { 0x00fa, {1, {0x00da }}}, - { 0x00fb, {1, {0x00db }}}, - { 0x00fc, {1, {0x00dc }}}, - { 0x00fd, {1, {0x00dd }}}, - { 0x00fe, {1, {0x00de }}}, - { 0x00ff, {1, {0x0178 }}}, - { 0x0101, {1, {0x0100 }}}, - { 0x0103, {1, {0x0102 }}}, - { 0x0105, {1, {0x0104 }}}, - { 0x0107, {1, {0x0106 }}}, - { 0x0109, {1, {0x0108 }}}, - { 0x010b, {1, {0x010a }}}, - { 0x010d, {1, {0x010c }}}, - { 0x010f, {1, {0x010e }}}, - { 0x0111, {1, {0x0110 }}}, - { 0x0113, {1, {0x0112 }}}, - { 0x0115, {1, {0x0114 }}}, - { 0x0117, {1, {0x0116 }}}, - { 0x0119, {1, {0x0118 }}}, - { 0x011b, {1, {0x011a }}}, - { 0x011d, {1, {0x011c }}}, - { 0x011f, {1, {0x011e }}}, - { 0x0121, {1, {0x0120 }}}, - { 0x0123, {1, {0x0122 }}}, - { 0x0125, {1, {0x0124 }}}, - { 0x0127, {1, {0x0126 }}}, - { 0x0129, {1, {0x0128 }}}, - { 0x012b, {1, {0x012a }}}, - { 0x012d, {1, {0x012c }}}, - { 0x012f, {1, {0x012e }}}, - { 0x0133, {1, {0x0132 }}}, - { 0x0135, {1, {0x0134 }}}, - { 0x0137, {1, {0x0136 }}}, - { 0x013a, {1, {0x0139 }}}, - { 0x013c, {1, {0x013b }}}, - { 0x013e, {1, {0x013d }}}, - { 0x0140, {1, {0x013f }}}, - { 0x0142, {1, {0x0141 }}}, - { 0x0144, {1, {0x0143 }}}, - { 0x0146, {1, {0x0145 }}}, - { 0x0148, {1, {0x0147 }}}, - { 0x014b, {1, {0x014a }}}, - { 0x014d, {1, {0x014c }}}, - { 0x014f, {1, {0x014e }}}, - { 0x0151, {1, {0x0150 }}}, - { 0x0153, {1, {0x0152 }}}, - { 0x0155, {1, {0x0154 }}}, - { 0x0157, {1, {0x0156 }}}, - { 0x0159, {1, {0x0158 }}}, - { 0x015b, {1, {0x015a }}}, - { 0x015d, {1, {0x015c }}}, - { 0x015f, {1, {0x015e }}}, - { 0x0161, {1, {0x0160 }}}, - { 0x0163, {1, {0x0162 }}}, - { 0x0165, {1, {0x0164 }}}, - { 0x0167, {1, {0x0166 }}}, - { 0x0169, {1, {0x0168 }}}, - { 0x016b, {1, {0x016a }}}, - { 0x016d, {1, {0x016c }}}, - { 0x016f, {1, {0x016e }}}, - { 0x0171, {1, {0x0170 }}}, - { 0x0173, {1, {0x0172 }}}, - { 0x0175, {1, {0x0174 }}}, - { 0x0177, {1, {0x0176 }}}, - { 0x017a, {1, {0x0179 }}}, - { 0x017c, {1, {0x017b }}}, - { 0x017e, {1, {0x017d }}}, - { 0x0183, {1, {0x0182 }}}, - { 0x0185, {1, {0x0184 }}}, - { 0x0188, {1, {0x0187 }}}, - { 0x018c, {1, {0x018b }}}, - { 0x0192, {1, {0x0191 }}}, - { 0x0195, {1, {0x01f6 }}}, - { 0x0199, {1, {0x0198 }}}, - { 0x019a, {1, {0x023d }}}, - { 0x019e, {1, {0x0220 }}}, - { 0x01a1, {1, {0x01a0 }}}, - { 0x01a3, {1, {0x01a2 }}}, - { 0x01a5, {1, {0x01a4 }}}, - { 0x01a8, {1, {0x01a7 }}}, - { 0x01ad, {1, {0x01ac }}}, - { 0x01b0, {1, {0x01af }}}, - { 0x01b4, {1, {0x01b3 }}}, - { 0x01b6, {1, {0x01b5 }}}, - { 0x01b9, {1, {0x01b8 }}}, - { 0x01bd, {1, {0x01bc }}}, - { 0x01bf, {1, {0x01f7 }}}, - { 0x01c6, {2, {0x01c4, 0x01c5 }}}, - { 0x01c9, {2, {0x01c7, 0x01c8 }}}, - { 0x01cc, {2, {0x01ca, 0x01cb }}}, - { 0x01ce, {1, {0x01cd }}}, - { 0x01d0, {1, {0x01cf }}}, - { 0x01d2, {1, {0x01d1 }}}, - { 0x01d4, {1, {0x01d3 }}}, - { 0x01d6, {1, {0x01d5 }}}, - { 0x01d8, {1, {0x01d7 }}}, - { 0x01da, {1, {0x01d9 }}}, - { 0x01dc, {1, {0x01db }}}, - { 0x01dd, {1, {0x018e }}}, - { 0x01df, {1, {0x01de }}}, - { 0x01e1, {1, {0x01e0 }}}, - { 0x01e3, {1, {0x01e2 }}}, - { 0x01e5, {1, {0x01e4 }}}, - { 0x01e7, {1, {0x01e6 }}}, - { 0x01e9, {1, {0x01e8 }}}, - { 0x01eb, {1, {0x01ea }}}, - { 0x01ed, {1, {0x01ec }}}, - { 0x01ef, {1, {0x01ee }}}, - { 0x01f3, {2, {0x01f1, 0x01f2 }}}, - { 0x01f5, {1, {0x01f4 }}}, - { 0x01f9, {1, {0x01f8 }}}, - { 0x01fb, {1, {0x01fa }}}, - { 0x01fd, {1, {0x01fc }}}, - { 0x01ff, {1, {0x01fe }}}, - { 0x0201, {1, {0x0200 }}}, - { 0x0203, {1, {0x0202 }}}, - { 0x0205, {1, {0x0204 }}}, - { 0x0207, {1, {0x0206 }}}, - { 0x0209, {1, {0x0208 }}}, - { 0x020b, {1, {0x020a }}}, - { 0x020d, {1, {0x020c }}}, - { 0x020f, {1, {0x020e }}}, - { 0x0211, {1, {0x0210 }}}, - { 0x0213, {1, {0x0212 }}}, - { 0x0215, {1, {0x0214 }}}, - { 0x0217, {1, {0x0216 }}}, - { 0x0219, {1, {0x0218 }}}, - { 0x021b, {1, {0x021a }}}, - { 0x021d, {1, {0x021c }}}, - { 0x021f, {1, {0x021e }}}, - { 0x0223, {1, {0x0222 }}}, - { 0x0225, {1, {0x0224 }}}, - { 0x0227, {1, {0x0226 }}}, - { 0x0229, {1, {0x0228 }}}, - { 0x022b, {1, {0x022a }}}, - { 0x022d, {1, {0x022c }}}, - { 0x022f, {1, {0x022e }}}, - { 0x0231, {1, {0x0230 }}}, - { 0x0233, {1, {0x0232 }}}, - { 0x023c, {1, {0x023b }}}, - { 0x0253, {1, {0x0181 }}}, - { 0x0254, {1, {0x0186 }}}, - { 0x0256, {1, {0x0189 }}}, - { 0x0257, {1, {0x018a }}}, - { 0x0259, {1, {0x018f }}}, - { 0x025b, {1, {0x0190 }}}, - { 0x0260, {1, {0x0193 }}}, - { 0x0263, {1, {0x0194 }}}, - { 0x0268, {1, {0x0197 }}}, - { 0x0269, {1, {0x0196 }}}, - { 0x026f, {1, {0x019c }}}, - { 0x0272, {1, {0x019d }}}, - { 0x0275, {1, {0x019f }}}, - { 0x0280, {1, {0x01a6 }}}, - { 0x0283, {1, {0x01a9 }}}, - { 0x0288, {1, {0x01ae }}}, - { 0x028a, {1, {0x01b1 }}}, - { 0x028b, {1, {0x01b2 }}}, - { 0x0292, {1, {0x01b7 }}}, - { 0x0294, {1, {0x0241 }}}, - { 0x03ac, {1, {0x0386 }}}, - { 0x03ad, {1, {0x0388 }}}, - { 0x03ae, {1, {0x0389 }}}, - { 0x03af, {1, {0x038a }}}, - { 0x03b1, {1, {0x0391 }}}, - { 0x03b2, {2, {0x0392, 0x03d0 }}}, - { 0x03b3, {1, {0x0393 }}}, - { 0x03b4, {1, {0x0394 }}}, - { 0x03b5, {2, {0x03f5, 0x0395 }}}, - { 0x03b6, {1, {0x0396 }}}, - { 0x03b7, {1, {0x0397 }}}, - { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, - { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, - { 0x03ba, {2, {0x03f0, 0x039a }}}, - { 0x03bb, {1, {0x039b }}}, - { 0x03bc, {2, {0x00b5, 0x039c }}}, - { 0x03bd, {1, {0x039d }}}, - { 0x03be, {1, {0x039e }}}, - { 0x03bf, {1, {0x039f }}}, - { 0x03c0, {2, {0x03a0, 0x03d6 }}}, - { 0x03c1, {2, {0x03f1, 0x03a1 }}}, - { 0x03c3, {2, {0x03a3, 0x03c2 }}}, - { 0x03c4, {1, {0x03a4 }}}, - { 0x03c5, {1, {0x03a5 }}}, - { 0x03c6, {2, {0x03a6, 0x03d5 }}}, - { 0x03c7, {1, {0x03a7 }}}, - { 0x03c8, {1, {0x03a8 }}}, - { 0x03c9, {2, {0x03a9, 0x2126 }}}, - { 0x03ca, {1, {0x03aa }}}, - { 0x03cb, {1, {0x03ab }}}, - { 0x03cc, {1, {0x038c }}}, - { 0x03cd, {1, {0x038e }}}, - { 0x03ce, {1, {0x038f }}}, - { 0x03d9, {1, {0x03d8 }}}, - { 0x03db, {1, {0x03da }}}, - { 0x03dd, {1, {0x03dc }}}, - { 0x03df, {1, {0x03de }}}, - { 0x03e1, {1, {0x03e0 }}}, - { 0x03e3, {1, {0x03e2 }}}, - { 0x03e5, {1, {0x03e4 }}}, - { 0x03e7, {1, {0x03e6 }}}, - { 0x03e9, {1, {0x03e8 }}}, - { 0x03eb, {1, {0x03ea }}}, - { 0x03ed, {1, {0x03ec }}}, - { 0x03ef, {1, {0x03ee }}}, - { 0x03f2, {1, {0x03f9 }}}, - { 0x03f8, {1, {0x03f7 }}}, - { 0x03fb, {1, {0x03fa }}}, - { 0x0430, {1, {0x0410 }}}, - { 0x0431, {1, {0x0411 }}}, - { 0x0432, {1, {0x0412 }}}, - { 0x0433, {1, {0x0413 }}}, - { 0x0434, {1, {0x0414 }}}, - { 0x0435, {1, {0x0415 }}}, - { 0x0436, {1, {0x0416 }}}, - { 0x0437, {1, {0x0417 }}}, - { 0x0438, {1, {0x0418 }}}, - { 0x0439, {1, {0x0419 }}}, - { 0x043a, {1, {0x041a }}}, - { 0x043b, {1, {0x041b }}}, - { 0x043c, {1, {0x041c }}}, - { 0x043d, {1, {0x041d }}}, - { 0x043e, {1, {0x041e }}}, - { 0x043f, {1, {0x041f }}}, - { 0x0440, {1, {0x0420 }}}, - { 0x0441, {1, {0x0421 }}}, - { 0x0442, {1, {0x0422 }}}, - { 0x0443, {1, {0x0423 }}}, - { 0x0444, {1, {0x0424 }}}, - { 0x0445, {1, {0x0425 }}}, - { 0x0446, {1, {0x0426 }}}, - { 0x0447, {1, {0x0427 }}}, - { 0x0448, {1, {0x0428 }}}, - { 0x0449, {1, {0x0429 }}}, - { 0x044a, {1, {0x042a }}}, - { 0x044b, {1, {0x042b }}}, - { 0x044c, {1, {0x042c }}}, - { 0x044d, {1, {0x042d }}}, - { 0x044e, {1, {0x042e }}}, - { 0x044f, {1, {0x042f }}}, - { 0x0450, {1, {0x0400 }}}, - { 0x0451, {1, {0x0401 }}}, - { 0x0452, {1, {0x0402 }}}, - { 0x0453, {1, {0x0403 }}}, - { 0x0454, {1, {0x0404 }}}, - { 0x0455, {1, {0x0405 }}}, - { 0x0456, {1, {0x0406 }}}, - { 0x0457, {1, {0x0407 }}}, - { 0x0458, {1, {0x0408 }}}, - { 0x0459, {1, {0x0409 }}}, - { 0x045a, {1, {0x040a }}}, - { 0x045b, {1, {0x040b }}}, - { 0x045c, {1, {0x040c }}}, - { 0x045d, {1, {0x040d }}}, - { 0x045e, {1, {0x040e }}}, - { 0x045f, {1, {0x040f }}}, - { 0x0461, {1, {0x0460 }}}, - { 0x0463, {1, {0x0462 }}}, - { 0x0465, {1, {0x0464 }}}, - { 0x0467, {1, {0x0466 }}}, - { 0x0469, {1, {0x0468 }}}, - { 0x046b, {1, {0x046a }}}, - { 0x046d, {1, {0x046c }}}, - { 0x046f, {1, {0x046e }}}, - { 0x0471, {1, {0x0470 }}}, - { 0x0473, {1, {0x0472 }}}, - { 0x0475, {1, {0x0474 }}}, - { 0x0477, {1, {0x0476 }}}, - { 0x0479, {1, {0x0478 }}}, - { 0x047b, {1, {0x047a }}}, - { 0x047d, {1, {0x047c }}}, - { 0x047f, {1, {0x047e }}}, - { 0x0481, {1, {0x0480 }}}, - { 0x048b, {1, {0x048a }}}, - { 0x048d, {1, {0x048c }}}, - { 0x048f, {1, {0x048e }}}, - { 0x0491, {1, {0x0490 }}}, - { 0x0493, {1, {0x0492 }}}, - { 0x0495, {1, {0x0494 }}}, - { 0x0497, {1, {0x0496 }}}, - { 0x0499, {1, {0x0498 }}}, - { 0x049b, {1, {0x049a }}}, - { 0x049d, {1, {0x049c }}}, - { 0x049f, {1, {0x049e }}}, - { 0x04a1, {1, {0x04a0 }}}, - { 0x04a3, {1, {0x04a2 }}}, - { 0x04a5, {1, {0x04a4 }}}, - { 0x04a7, {1, {0x04a6 }}}, - { 0x04a9, {1, {0x04a8 }}}, - { 0x04ab, {1, {0x04aa }}}, - { 0x04ad, {1, {0x04ac }}}, - { 0x04af, {1, {0x04ae }}}, - { 0x04b1, {1, {0x04b0 }}}, - { 0x04b3, {1, {0x04b2 }}}, - { 0x04b5, {1, {0x04b4 }}}, - { 0x04b7, {1, {0x04b6 }}}, - { 0x04b9, {1, {0x04b8 }}}, - { 0x04bb, {1, {0x04ba }}}, - { 0x04bd, {1, {0x04bc }}}, - { 0x04bf, {1, {0x04be }}}, - { 0x04c2, {1, {0x04c1 }}}, - { 0x04c4, {1, {0x04c3 }}}, - { 0x04c6, {1, {0x04c5 }}}, - { 0x04c8, {1, {0x04c7 }}}, - { 0x04ca, {1, {0x04c9 }}}, - { 0x04cc, {1, {0x04cb }}}, - { 0x04ce, {1, {0x04cd }}}, - { 0x04d1, {1, {0x04d0 }}}, - { 0x04d3, {1, {0x04d2 }}}, - { 0x04d5, {1, {0x04d4 }}}, - { 0x04d7, {1, {0x04d6 }}}, - { 0x04d9, {1, {0x04d8 }}}, - { 0x04db, {1, {0x04da }}}, - { 0x04dd, {1, {0x04dc }}}, - { 0x04df, {1, {0x04de }}}, - { 0x04e1, {1, {0x04e0 }}}, - { 0x04e3, {1, {0x04e2 }}}, - { 0x04e5, {1, {0x04e4 }}}, - { 0x04e7, {1, {0x04e6 }}}, - { 0x04e9, {1, {0x04e8 }}}, - { 0x04eb, {1, {0x04ea }}}, - { 0x04ed, {1, {0x04ec }}}, - { 0x04ef, {1, {0x04ee }}}, - { 0x04f1, {1, {0x04f0 }}}, - { 0x04f3, {1, {0x04f2 }}}, - { 0x04f5, {1, {0x04f4 }}}, - { 0x04f7, {1, {0x04f6 }}}, - { 0x04f9, {1, {0x04f8 }}}, - { 0x0501, {1, {0x0500 }}}, - { 0x0503, {1, {0x0502 }}}, - { 0x0505, {1, {0x0504 }}}, - { 0x0507, {1, {0x0506 }}}, - { 0x0509, {1, {0x0508 }}}, - { 0x050b, {1, {0x050a }}}, - { 0x050d, {1, {0x050c }}}, - { 0x050f, {1, {0x050e }}}, - { 0x0561, {1, {0x0531 }}}, - { 0x0562, {1, {0x0532 }}}, - { 0x0563, {1, {0x0533 }}}, - { 0x0564, {1, {0x0534 }}}, - { 0x0565, {1, {0x0535 }}}, - { 0x0566, {1, {0x0536 }}}, - { 0x0567, {1, {0x0537 }}}, - { 0x0568, {1, {0x0538 }}}, - { 0x0569, {1, {0x0539 }}}, - { 0x056a, {1, {0x053a }}}, - { 0x056b, {1, {0x053b }}}, - { 0x056c, {1, {0x053c }}}, - { 0x056d, {1, {0x053d }}}, - { 0x056e, {1, {0x053e }}}, - { 0x056f, {1, {0x053f }}}, - { 0x0570, {1, {0x0540 }}}, - { 0x0571, {1, {0x0541 }}}, - { 0x0572, {1, {0x0542 }}}, - { 0x0573, {1, {0x0543 }}}, - { 0x0574, {1, {0x0544 }}}, - { 0x0575, {1, {0x0545 }}}, - { 0x0576, {1, {0x0546 }}}, - { 0x0577, {1, {0x0547 }}}, - { 0x0578, {1, {0x0548 }}}, - { 0x0579, {1, {0x0549 }}}, - { 0x057a, {1, {0x054a }}}, - { 0x057b, {1, {0x054b }}}, - { 0x057c, {1, {0x054c }}}, - { 0x057d, {1, {0x054d }}}, - { 0x057e, {1, {0x054e }}}, - { 0x057f, {1, {0x054f }}}, - { 0x0580, {1, {0x0550 }}}, - { 0x0581, {1, {0x0551 }}}, - { 0x0582, {1, {0x0552 }}}, - { 0x0583, {1, {0x0553 }}}, - { 0x0584, {1, {0x0554 }}}, - { 0x0585, {1, {0x0555 }}}, - { 0x0586, {1, {0x0556 }}}, - { 0x1e01, {1, {0x1e00 }}}, - { 0x1e03, {1, {0x1e02 }}}, - { 0x1e05, {1, {0x1e04 }}}, - { 0x1e07, {1, {0x1e06 }}}, - { 0x1e09, {1, {0x1e08 }}}, - { 0x1e0b, {1, {0x1e0a }}}, - { 0x1e0d, {1, {0x1e0c }}}, - { 0x1e0f, {1, {0x1e0e }}}, - { 0x1e11, {1, {0x1e10 }}}, - { 0x1e13, {1, {0x1e12 }}}, - { 0x1e15, {1, {0x1e14 }}}, - { 0x1e17, {1, {0x1e16 }}}, - { 0x1e19, {1, {0x1e18 }}}, - { 0x1e1b, {1, {0x1e1a }}}, - { 0x1e1d, {1, {0x1e1c }}}, - { 0x1e1f, {1, {0x1e1e }}}, - { 0x1e21, {1, {0x1e20 }}}, - { 0x1e23, {1, {0x1e22 }}}, - { 0x1e25, {1, {0x1e24 }}}, - { 0x1e27, {1, {0x1e26 }}}, - { 0x1e29, {1, {0x1e28 }}}, - { 0x1e2b, {1, {0x1e2a }}}, - { 0x1e2d, {1, {0x1e2c }}}, - { 0x1e2f, {1, {0x1e2e }}}, - { 0x1e31, {1, {0x1e30 }}}, - { 0x1e33, {1, {0x1e32 }}}, - { 0x1e35, {1, {0x1e34 }}}, - { 0x1e37, {1, {0x1e36 }}}, - { 0x1e39, {1, {0x1e38 }}}, - { 0x1e3b, {1, {0x1e3a }}}, - { 0x1e3d, {1, {0x1e3c }}}, - { 0x1e3f, {1, {0x1e3e }}}, - { 0x1e41, {1, {0x1e40 }}}, - { 0x1e43, {1, {0x1e42 }}}, - { 0x1e45, {1, {0x1e44 }}}, - { 0x1e47, {1, {0x1e46 }}}, - { 0x1e49, {1, {0x1e48 }}}, - { 0x1e4b, {1, {0x1e4a }}}, - { 0x1e4d, {1, {0x1e4c }}}, - { 0x1e4f, {1, {0x1e4e }}}, - { 0x1e51, {1, {0x1e50 }}}, - { 0x1e53, {1, {0x1e52 }}}, - { 0x1e55, {1, {0x1e54 }}}, - { 0x1e57, {1, {0x1e56 }}}, - { 0x1e59, {1, {0x1e58 }}}, - { 0x1e5b, {1, {0x1e5a }}}, - { 0x1e5d, {1, {0x1e5c }}}, - { 0x1e5f, {1, {0x1e5e }}}, - { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, - { 0x1e63, {1, {0x1e62 }}}, - { 0x1e65, {1, {0x1e64 }}}, - { 0x1e67, {1, {0x1e66 }}}, - { 0x1e69, {1, {0x1e68 }}}, - { 0x1e6b, {1, {0x1e6a }}}, - { 0x1e6d, {1, {0x1e6c }}}, - { 0x1e6f, {1, {0x1e6e }}}, - { 0x1e71, {1, {0x1e70 }}}, - { 0x1e73, {1, {0x1e72 }}}, - { 0x1e75, {1, {0x1e74 }}}, - { 0x1e77, {1, {0x1e76 }}}, - { 0x1e79, {1, {0x1e78 }}}, - { 0x1e7b, {1, {0x1e7a }}}, - { 0x1e7d, {1, {0x1e7c }}}, - { 0x1e7f, {1, {0x1e7e }}}, - { 0x1e81, {1, {0x1e80 }}}, - { 0x1e83, {1, {0x1e82 }}}, - { 0x1e85, {1, {0x1e84 }}}, - { 0x1e87, {1, {0x1e86 }}}, - { 0x1e89, {1, {0x1e88 }}}, - { 0x1e8b, {1, {0x1e8a }}}, - { 0x1e8d, {1, {0x1e8c }}}, - { 0x1e8f, {1, {0x1e8e }}}, - { 0x1e91, {1, {0x1e90 }}}, - { 0x1e93, {1, {0x1e92 }}}, - { 0x1e95, {1, {0x1e94 }}}, - { 0x1ea1, {1, {0x1ea0 }}}, - { 0x1ea3, {1, {0x1ea2 }}}, - { 0x1ea5, {1, {0x1ea4 }}}, - { 0x1ea7, {1, {0x1ea6 }}}, - { 0x1ea9, {1, {0x1ea8 }}}, - { 0x1eab, {1, {0x1eaa }}}, - { 0x1ead, {1, {0x1eac }}}, - { 0x1eaf, {1, {0x1eae }}}, - { 0x1eb1, {1, {0x1eb0 }}}, - { 0x1eb3, {1, {0x1eb2 }}}, - { 0x1eb5, {1, {0x1eb4 }}}, - { 0x1eb7, {1, {0x1eb6 }}}, - { 0x1eb9, {1, {0x1eb8 }}}, - { 0x1ebb, {1, {0x1eba }}}, - { 0x1ebd, {1, {0x1ebc }}}, - { 0x1ebf, {1, {0x1ebe }}}, - { 0x1ec1, {1, {0x1ec0 }}}, - { 0x1ec3, {1, {0x1ec2 }}}, - { 0x1ec5, {1, {0x1ec4 }}}, - { 0x1ec7, {1, {0x1ec6 }}}, - { 0x1ec9, {1, {0x1ec8 }}}, - { 0x1ecb, {1, {0x1eca }}}, - { 0x1ecd, {1, {0x1ecc }}}, - { 0x1ecf, {1, {0x1ece }}}, - { 0x1ed1, {1, {0x1ed0 }}}, - { 0x1ed3, {1, {0x1ed2 }}}, - { 0x1ed5, {1, {0x1ed4 }}}, - { 0x1ed7, {1, {0x1ed6 }}}, - { 0x1ed9, {1, {0x1ed8 }}}, - { 0x1edb, {1, {0x1eda }}}, - { 0x1edd, {1, {0x1edc }}}, - { 0x1edf, {1, {0x1ede }}}, - { 0x1ee1, {1, {0x1ee0 }}}, - { 0x1ee3, {1, {0x1ee2 }}}, - { 0x1ee5, {1, {0x1ee4 }}}, - { 0x1ee7, {1, {0x1ee6 }}}, - { 0x1ee9, {1, {0x1ee8 }}}, - { 0x1eeb, {1, {0x1eea }}}, - { 0x1eed, {1, {0x1eec }}}, - { 0x1eef, {1, {0x1eee }}}, - { 0x1ef1, {1, {0x1ef0 }}}, - { 0x1ef3, {1, {0x1ef2 }}}, - { 0x1ef5, {1, {0x1ef4 }}}, - { 0x1ef7, {1, {0x1ef6 }}}, - { 0x1ef9, {1, {0x1ef8 }}}, - { 0x1f00, {1, {0x1f08 }}}, - { 0x1f01, {1, {0x1f09 }}}, - { 0x1f02, {1, {0x1f0a }}}, - { 0x1f03, {1, {0x1f0b }}}, - { 0x1f04, {1, {0x1f0c }}}, - { 0x1f05, {1, {0x1f0d }}}, - { 0x1f06, {1, {0x1f0e }}}, - { 0x1f07, {1, {0x1f0f }}}, - { 0x1f10, {1, {0x1f18 }}}, - { 0x1f11, {1, {0x1f19 }}}, - { 0x1f12, {1, {0x1f1a }}}, - { 0x1f13, {1, {0x1f1b }}}, - { 0x1f14, {1, {0x1f1c }}}, - { 0x1f15, {1, {0x1f1d }}}, - { 0x1f20, {1, {0x1f28 }}}, - { 0x1f21, {1, {0x1f29 }}}, - { 0x1f22, {1, {0x1f2a }}}, - { 0x1f23, {1, {0x1f2b }}}, - { 0x1f24, {1, {0x1f2c }}}, - { 0x1f25, {1, {0x1f2d }}}, - { 0x1f26, {1, {0x1f2e }}}, - { 0x1f27, {1, {0x1f2f }}}, - { 0x1f30, {1, {0x1f38 }}}, - { 0x1f31, {1, {0x1f39 }}}, - { 0x1f32, {1, {0x1f3a }}}, - { 0x1f33, {1, {0x1f3b }}}, - { 0x1f34, {1, {0x1f3c }}}, - { 0x1f35, {1, {0x1f3d }}}, - { 0x1f36, {1, {0x1f3e }}}, - { 0x1f37, {1, {0x1f3f }}}, - { 0x1f40, {1, {0x1f48 }}}, - { 0x1f41, {1, {0x1f49 }}}, - { 0x1f42, {1, {0x1f4a }}}, - { 0x1f43, {1, {0x1f4b }}}, - { 0x1f44, {1, {0x1f4c }}}, - { 0x1f45, {1, {0x1f4d }}}, - { 0x1f51, {1, {0x1f59 }}}, - { 0x1f53, {1, {0x1f5b }}}, - { 0x1f55, {1, {0x1f5d }}}, - { 0x1f57, {1, {0x1f5f }}}, - { 0x1f60, {1, {0x1f68 }}}, - { 0x1f61, {1, {0x1f69 }}}, - { 0x1f62, {1, {0x1f6a }}}, - { 0x1f63, {1, {0x1f6b }}}, - { 0x1f64, {1, {0x1f6c }}}, - { 0x1f65, {1, {0x1f6d }}}, - { 0x1f66, {1, {0x1f6e }}}, - { 0x1f67, {1, {0x1f6f }}}, - { 0x1f70, {1, {0x1fba }}}, - { 0x1f71, {1, {0x1fbb }}}, - { 0x1f72, {1, {0x1fc8 }}}, - { 0x1f73, {1, {0x1fc9 }}}, - { 0x1f74, {1, {0x1fca }}}, - { 0x1f75, {1, {0x1fcb }}}, - { 0x1f76, {1, {0x1fda }}}, - { 0x1f77, {1, {0x1fdb }}}, - { 0x1f78, {1, {0x1ff8 }}}, - { 0x1f79, {1, {0x1ff9 }}}, - { 0x1f7a, {1, {0x1fea }}}, - { 0x1f7b, {1, {0x1feb }}}, - { 0x1f7c, {1, {0x1ffa }}}, - { 0x1f7d, {1, {0x1ffb }}}, - { 0x1fb0, {1, {0x1fb8 }}}, - { 0x1fb1, {1, {0x1fb9 }}}, - { 0x1fd0, {1, {0x1fd8 }}}, - { 0x1fd1, {1, {0x1fd9 }}}, - { 0x1fe0, {1, {0x1fe8 }}}, - { 0x1fe1, {1, {0x1fe9 }}}, - { 0x1fe5, {1, {0x1fec }}}, - { 0x2170, {1, {0x2160 }}}, - { 0x2171, {1, {0x2161 }}}, - { 0x2172, {1, {0x2162 }}}, - { 0x2173, {1, {0x2163 }}}, - { 0x2174, {1, {0x2164 }}}, - { 0x2175, {1, {0x2165 }}}, - { 0x2176, {1, {0x2166 }}}, - { 0x2177, {1, {0x2167 }}}, - { 0x2178, {1, {0x2168 }}}, - { 0x2179, {1, {0x2169 }}}, - { 0x217a, {1, {0x216a }}}, - { 0x217b, {1, {0x216b }}}, - { 0x217c, {1, {0x216c }}}, - { 0x217d, {1, {0x216d }}}, - { 0x217e, {1, {0x216e }}}, - { 0x217f, {1, {0x216f }}}, - { 0x24d0, {1, {0x24b6 }}}, - { 0x24d1, {1, {0x24b7 }}}, - { 0x24d2, {1, {0x24b8 }}}, - { 0x24d3, {1, {0x24b9 }}}, - { 0x24d4, {1, {0x24ba }}}, - { 0x24d5, {1, {0x24bb }}}, - { 0x24d6, {1, {0x24bc }}}, - { 0x24d7, {1, {0x24bd }}}, - { 0x24d8, {1, {0x24be }}}, - { 0x24d9, {1, {0x24bf }}}, - { 0x24da, {1, {0x24c0 }}}, - { 0x24db, {1, {0x24c1 }}}, - { 0x24dc, {1, {0x24c2 }}}, - { 0x24dd, {1, {0x24c3 }}}, - { 0x24de, {1, {0x24c4 }}}, - { 0x24df, {1, {0x24c5 }}}, - { 0x24e0, {1, {0x24c6 }}}, - { 0x24e1, {1, {0x24c7 }}}, - { 0x24e2, {1, {0x24c8 }}}, - { 0x24e3, {1, {0x24c9 }}}, - { 0x24e4, {1, {0x24ca }}}, - { 0x24e5, {1, {0x24cb }}}, - { 0x24e6, {1, {0x24cc }}}, - { 0x24e7, {1, {0x24cd }}}, - { 0x24e8, {1, {0x24ce }}}, - { 0x24e9, {1, {0x24cf }}}, - { 0x2c30, {1, {0x2c00 }}}, - { 0x2c31, {1, {0x2c01 }}}, - { 0x2c32, {1, {0x2c02 }}}, - { 0x2c33, {1, {0x2c03 }}}, - { 0x2c34, {1, {0x2c04 }}}, - { 0x2c35, {1, {0x2c05 }}}, - { 0x2c36, {1, {0x2c06 }}}, - { 0x2c37, {1, {0x2c07 }}}, - { 0x2c38, {1, {0x2c08 }}}, - { 0x2c39, {1, {0x2c09 }}}, - { 0x2c3a, {1, {0x2c0a }}}, - { 0x2c3b, {1, {0x2c0b }}}, - { 0x2c3c, {1, {0x2c0c }}}, - { 0x2c3d, {1, {0x2c0d }}}, - { 0x2c3e, {1, {0x2c0e }}}, - { 0x2c3f, {1, {0x2c0f }}}, - { 0x2c40, {1, {0x2c10 }}}, - { 0x2c41, {1, {0x2c11 }}}, - { 0x2c42, {1, {0x2c12 }}}, - { 0x2c43, {1, {0x2c13 }}}, - { 0x2c44, {1, {0x2c14 }}}, - { 0x2c45, {1, {0x2c15 }}}, - { 0x2c46, {1, {0x2c16 }}}, - { 0x2c47, {1, {0x2c17 }}}, - { 0x2c48, {1, {0x2c18 }}}, - { 0x2c49, {1, {0x2c19 }}}, - { 0x2c4a, {1, {0x2c1a }}}, - { 0x2c4b, {1, {0x2c1b }}}, - { 0x2c4c, {1, {0x2c1c }}}, - { 0x2c4d, {1, {0x2c1d }}}, - { 0x2c4e, {1, {0x2c1e }}}, - { 0x2c4f, {1, {0x2c1f }}}, - { 0x2c50, {1, {0x2c20 }}}, - { 0x2c51, {1, {0x2c21 }}}, - { 0x2c52, {1, {0x2c22 }}}, - { 0x2c53, {1, {0x2c23 }}}, - { 0x2c54, {1, {0x2c24 }}}, - { 0x2c55, {1, {0x2c25 }}}, - { 0x2c56, {1, {0x2c26 }}}, - { 0x2c57, {1, {0x2c27 }}}, - { 0x2c58, {1, {0x2c28 }}}, - { 0x2c59, {1, {0x2c29 }}}, - { 0x2c5a, {1, {0x2c2a }}}, - { 0x2c5b, {1, {0x2c2b }}}, - { 0x2c5c, {1, {0x2c2c }}}, - { 0x2c5d, {1, {0x2c2d }}}, - { 0x2c5e, {1, {0x2c2e }}}, - { 0x2c81, {1, {0x2c80 }}}, - { 0x2c83, {1, {0x2c82 }}}, - { 0x2c85, {1, {0x2c84 }}}, - { 0x2c87, {1, {0x2c86 }}}, - { 0x2c89, {1, {0x2c88 }}}, - { 0x2c8b, {1, {0x2c8a }}}, - { 0x2c8d, {1, {0x2c8c }}}, - { 0x2c8f, {1, {0x2c8e }}}, - { 0x2c91, {1, {0x2c90 }}}, - { 0x2c93, {1, {0x2c92 }}}, - { 0x2c95, {1, {0x2c94 }}}, - { 0x2c97, {1, {0x2c96 }}}, - { 0x2c99, {1, {0x2c98 }}}, - { 0x2c9b, {1, {0x2c9a }}}, - { 0x2c9d, {1, {0x2c9c }}}, - { 0x2c9f, {1, {0x2c9e }}}, - { 0x2ca1, {1, {0x2ca0 }}}, - { 0x2ca3, {1, {0x2ca2 }}}, - { 0x2ca5, {1, {0x2ca4 }}}, - { 0x2ca7, {1, {0x2ca6 }}}, - { 0x2ca9, {1, {0x2ca8 }}}, - { 0x2cab, {1, {0x2caa }}}, - { 0x2cad, {1, {0x2cac }}}, - { 0x2caf, {1, {0x2cae }}}, - { 0x2cb1, {1, {0x2cb0 }}}, - { 0x2cb3, {1, {0x2cb2 }}}, - { 0x2cb5, {1, {0x2cb4 }}}, - { 0x2cb7, {1, {0x2cb6 }}}, - { 0x2cb9, {1, {0x2cb8 }}}, - { 0x2cbb, {1, {0x2cba }}}, - { 0x2cbd, {1, {0x2cbc }}}, - { 0x2cbf, {1, {0x2cbe }}}, - { 0x2cc1, {1, {0x2cc0 }}}, - { 0x2cc3, {1, {0x2cc2 }}}, - { 0x2cc5, {1, {0x2cc4 }}}, - { 0x2cc7, {1, {0x2cc6 }}}, - { 0x2cc9, {1, {0x2cc8 }}}, - { 0x2ccb, {1, {0x2cca }}}, - { 0x2ccd, {1, {0x2ccc }}}, - { 0x2ccf, {1, {0x2cce }}}, - { 0x2cd1, {1, {0x2cd0 }}}, - { 0x2cd3, {1, {0x2cd2 }}}, - { 0x2cd5, {1, {0x2cd4 }}}, - { 0x2cd7, {1, {0x2cd6 }}}, - { 0x2cd9, {1, {0x2cd8 }}}, - { 0x2cdb, {1, {0x2cda }}}, - { 0x2cdd, {1, {0x2cdc }}}, - { 0x2cdf, {1, {0x2cde }}}, - { 0x2ce1, {1, {0x2ce0 }}}, - { 0x2ce3, {1, {0x2ce2 }}}, - { 0x2d00, {1, {0x10a0 }}}, - { 0x2d01, {1, {0x10a1 }}}, - { 0x2d02, {1, {0x10a2 }}}, - { 0x2d03, {1, {0x10a3 }}}, - { 0x2d04, {1, {0x10a4 }}}, - { 0x2d05, {1, {0x10a5 }}}, - { 0x2d06, {1, {0x10a6 }}}, - { 0x2d07, {1, {0x10a7 }}}, - { 0x2d08, {1, {0x10a8 }}}, - { 0x2d09, {1, {0x10a9 }}}, - { 0x2d0a, {1, {0x10aa }}}, - { 0x2d0b, {1, {0x10ab }}}, - { 0x2d0c, {1, {0x10ac }}}, - { 0x2d0d, {1, {0x10ad }}}, - { 0x2d0e, {1, {0x10ae }}}, - { 0x2d0f, {1, {0x10af }}}, - { 0x2d10, {1, {0x10b0 }}}, - { 0x2d11, {1, {0x10b1 }}}, - { 0x2d12, {1, {0x10b2 }}}, - { 0x2d13, {1, {0x10b3 }}}, - { 0x2d14, {1, {0x10b4 }}}, - { 0x2d15, {1, {0x10b5 }}}, - { 0x2d16, {1, {0x10b6 }}}, - { 0x2d17, {1, {0x10b7 }}}, - { 0x2d18, {1, {0x10b8 }}}, - { 0x2d19, {1, {0x10b9 }}}, - { 0x2d1a, {1, {0x10ba }}}, - { 0x2d1b, {1, {0x10bb }}}, - { 0x2d1c, {1, {0x10bc }}}, - { 0x2d1d, {1, {0x10bd }}}, - { 0x2d1e, {1, {0x10be }}}, - { 0x2d1f, {1, {0x10bf }}}, - { 0x2d20, {1, {0x10c0 }}}, - { 0x2d21, {1, {0x10c1 }}}, - { 0x2d22, {1, {0x10c2 }}}, - { 0x2d23, {1, {0x10c3 }}}, - { 0x2d24, {1, {0x10c4 }}}, - { 0x2d25, {1, {0x10c5 }}}, - { 0xff41, {1, {0xff21 }}}, - { 0xff42, {1, {0xff22 }}}, - { 0xff43, {1, {0xff23 }}}, - { 0xff44, {1, {0xff24 }}}, - { 0xff45, {1, {0xff25 }}}, - { 0xff46, {1, {0xff26 }}}, - { 0xff47, {1, {0xff27 }}}, - { 0xff48, {1, {0xff28 }}}, - { 0xff49, {1, {0xff29 }}}, - { 0xff4a, {1, {0xff2a }}}, - { 0xff4b, {1, {0xff2b }}}, - { 0xff4c, {1, {0xff2c }}}, - { 0xff4d, {1, {0xff2d }}}, - { 0xff4e, {1, {0xff2e }}}, - { 0xff4f, {1, {0xff2f }}}, - { 0xff50, {1, {0xff30 }}}, - { 0xff51, {1, {0xff31 }}}, - { 0xff52, {1, {0xff32 }}}, - { 0xff53, {1, {0xff33 }}}, - { 0xff54, {1, {0xff34 }}}, - { 0xff55, {1, {0xff35 }}}, - { 0xff56, {1, {0xff36 }}}, - { 0xff57, {1, {0xff37 }}}, - { 0xff58, {1, {0xff38 }}}, - { 0xff59, {1, {0xff39 }}}, - { 0xff5a, {1, {0xff3a }}}, - { 0x10428, {1, {0x10400 }}}, - { 0x10429, {1, {0x10401 }}}, - { 0x1042a, {1, {0x10402 }}}, - { 0x1042b, {1, {0x10403 }}}, - { 0x1042c, {1, {0x10404 }}}, - { 0x1042d, {1, {0x10405 }}}, - { 0x1042e, {1, {0x10406 }}}, - { 0x1042f, {1, {0x10407 }}}, - { 0x10430, {1, {0x10408 }}}, - { 0x10431, {1, {0x10409 }}}, - { 0x10432, {1, {0x1040a }}}, - { 0x10433, {1, {0x1040b }}}, - { 0x10434, {1, {0x1040c }}}, - { 0x10435, {1, {0x1040d }}}, - { 0x10436, {1, {0x1040e }}}, - { 0x10437, {1, {0x1040f }}}, - { 0x10438, {1, {0x10410 }}}, - { 0x10439, {1, {0x10411 }}}, - { 0x1043a, {1, {0x10412 }}}, - { 0x1043b, {1, {0x10413 }}}, - { 0x1043c, {1, {0x10414 }}}, - { 0x1043d, {1, {0x10415 }}}, - { 0x1043e, {1, {0x10416 }}}, - { 0x1043f, {1, {0x10417 }}}, - { 0x10440, {1, {0x10418 }}}, - { 0x10441, {1, {0x10419 }}}, - { 0x10442, {1, {0x1041a }}}, - { 0x10443, {1, {0x1041b }}}, - { 0x10444, {1, {0x1041c }}}, - { 0x10445, {1, {0x1041d }}}, - { 0x10446, {1, {0x1041e }}}, - { 0x10447, {1, {0x1041f }}}, - { 0x10448, {1, {0x10420 }}}, - { 0x10449, {1, {0x10421 }}}, - { 0x1044a, {1, {0x10422 }}}, - { 0x1044b, {1, {0x10423 }}}, - { 0x1044c, {1, {0x10424 }}}, - { 0x1044d, {1, {0x10425 }}}, - { 0x1044e, {1, {0x10426 }}}, - { 0x1044f, {1, {0x10427 }}} -}; - -static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { - { 0x0069, {1, {0x0049 }}} -}; - -static const CaseUnfold_12_Type CaseUnfold_12[] = { - { {0x0061, 0x02be}, {1, {0x1e9a }}}, - { {0x0066, 0x0066}, {1, {0xfb00 }}}, - { {0x0066, 0x0069}, {1, {0xfb01 }}}, - { {0x0066, 0x006c}, {1, {0xfb02 }}}, - { {0x0068, 0x0331}, {1, {0x1e96 }}}, - { {0x006a, 0x030c}, {1, {0x01f0 }}}, - { {0x0073, 0x0073}, {1, {0x00df }}}, - { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, - { {0x0074, 0x0308}, {1, {0x1e97 }}}, - { {0x0077, 0x030a}, {1, {0x1e98 }}}, - { {0x0079, 0x030a}, {1, {0x1e99 }}}, - { {0x02bc, 0x006e}, {1, {0x0149 }}}, - { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, - { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, - { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, - { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, - { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, - { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, - { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, - { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, - { {0x03c5, 0x0313}, {1, {0x1f50 }}}, - { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, - { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, - { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, - { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, - { {0x0565, 0x0582}, {1, {0x0587 }}}, - { {0x0574, 0x0565}, {1, {0xfb14 }}}, - { {0x0574, 0x056b}, {1, {0xfb15 }}}, - { {0x0574, 0x056d}, {1, {0xfb17 }}}, - { {0x0574, 0x0576}, {1, {0xfb13 }}}, - { {0x057e, 0x0576}, {1, {0xfb16 }}}, - { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, - { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, - { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, - { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, - { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, - { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, - { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, - { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, - { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, - { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, - { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, - { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, - { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, - { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, - { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, - { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, - { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, - { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, - { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, - { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, - { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, - { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, - { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, - { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, - { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, - { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, - { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} -}; - -static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { - { {0x0069, 0x0307}, {1, {0x0130 }}} -}; - -static const CaseUnfold_13_Type CaseUnfold_13[] = { - { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, - { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, - { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, - { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, - { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, - { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, - { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, - { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, - { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, - { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, - { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, - { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, - { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, - { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} -}; - - -static PosixBracketEntryType HashEntryData[] = { - { (UChar* )"NEWLINE", 0, 7 }, - { (UChar* )"Alpha", 1, 5 }, - { (UChar* )"Blank", 2, 5 }, - { (UChar* )"Cntrl", 3, 5 }, - { (UChar* )"Digit", 4, 5 }, - { (UChar* )"Graph", 5, 5 }, - { (UChar* )"Lower", 6, 5 }, - { (UChar* )"Print", 7, 5 }, - { (UChar* )"Punct", 8, 5 }, - { (UChar* )"Space", 9, 5 }, - { (UChar* )"Upper", 10, 5 }, - { (UChar* )"XDigit", 11, 6 }, - { (UChar* )"Word", 12, 4 }, - { (UChar* )"Alnum", 13, 5 }, - { (UChar* )"ASCII", 14, 5 }, - -#ifdef USE_UNICODE_PROPERTIES - { (UChar* )"Any", 15, 3 }, - { (UChar* )"Assigned", 16, 8 }, - { (UChar* )"C", 17, 1 }, - { (UChar* )"Cc", 18, 2 }, - { (UChar* )"Cf", 19, 2 }, - { (UChar* )"Cn", 20, 2 }, - { (UChar* )"Co", 21, 2 }, - { (UChar* )"Cs", 22, 2 }, - { (UChar* )"L", 23, 1 }, - { (UChar* )"Ll", 24, 2 }, - { (UChar* )"Lm", 25, 2 }, - { (UChar* )"Lo", 26, 2 }, - { (UChar* )"Lt", 27, 2 }, - { (UChar* )"Lu", 28, 2 }, - { (UChar* )"M", 29, 1 }, - { (UChar* )"Mc", 30, 2 }, - { (UChar* )"Me", 31, 2 }, - { (UChar* )"Mn", 32, 2 }, - { (UChar* )"N", 33, 1 }, - { (UChar* )"Nd", 34, 2 }, - { (UChar* )"Nl", 35, 2 }, - { (UChar* )"No", 36, 2 }, - { (UChar* )"P", 37, 1 }, - { (UChar* )"Pc", 38, 2 }, - { (UChar* )"Pd", 39, 2 }, - { (UChar* )"Pe", 40, 2 }, - { (UChar* )"Pf", 41, 2 }, - { (UChar* )"Pi", 42, 2 }, - { (UChar* )"Po", 43, 2 }, - { (UChar* )"Ps", 44, 2 }, - { (UChar* )"S", 45, 1 }, - { (UChar* )"Sc", 46, 2 }, - { (UChar* )"Sk", 47, 2 }, - { (UChar* )"Sm", 48, 2 }, - { (UChar* )"So", 49, 2 }, - { (UChar* )"Z", 50, 1 }, - { (UChar* )"Zl", 51, 2 }, - { (UChar* )"Zp", 52, 2 }, - { (UChar* )"Zs", 53, 2 }, - { (UChar* )"Arabic", 54, 6 }, - { (UChar* )"Armenian", 55, 8 }, - { (UChar* )"Bengali", 56, 7 }, - { (UChar* )"Bopomofo", 57, 8 }, - { (UChar* )"Braille", 58, 7 }, - { (UChar* )"Buginese", 59, 8 }, - { (UChar* )"Buhid", 60, 5 }, - { (UChar* )"Canadian_Aboriginal", 61, 19 }, - { (UChar* )"Cherokee", 62, 8 }, - { (UChar* )"Common", 63, 6 }, - { (UChar* )"Coptic", 64, 6 }, - { (UChar* )"Cypriot", 65, 7 }, - { (UChar* )"Cyrillic", 66, 8 }, - { (UChar* )"Deseret", 67, 7 }, - { (UChar* )"Devanagari", 68, 10 }, - { (UChar* )"Ethiopic", 69, 8 }, - { (UChar* )"Georgian", 70, 8 }, - { (UChar* )"Glagolitic", 71, 10 }, - { (UChar* )"Gothic", 72, 6 }, - { (UChar* )"Greek", 73, 5 }, - { (UChar* )"Gujarati", 74, 8 }, - { (UChar* )"Gurmukhi", 75, 8 }, - { (UChar* )"Han", 76, 3 }, - { (UChar* )"Hangul", 77, 6 }, - { (UChar* )"Hanunoo", 78, 7 }, - { (UChar* )"Hebrew", 79, 6 }, - { (UChar* )"Hiragana", 80, 8 }, - { (UChar* )"Inherited", 81, 9 }, - { (UChar* )"Kannada", 82, 7 }, - { (UChar* )"Katakana", 83, 8 }, - { (UChar* )"Kharoshthi", 84, 10 }, - { (UChar* )"Khmer", 85, 5 }, - { (UChar* )"Lao", 86, 3 }, - { (UChar* )"Latin", 87, 5 }, - { (UChar* )"Limbu", 88, 5 }, - { (UChar* )"Linear_B", 89, 8 }, - { (UChar* )"Malayalam", 90, 9 }, - { (UChar* )"Mongolian", 91, 9 }, - { (UChar* )"Myanmar", 92, 7 }, - { (UChar* )"New_Tai_Lue", 93, 11 }, - { (UChar* )"Ogham", 94, 5 }, - { (UChar* )"Old_Italic", 95, 10 }, - { (UChar* )"Old_Persian", 96, 11 }, - { (UChar* )"Oriya", 97, 5 }, - { (UChar* )"Osmanya", 98, 7 }, - { (UChar* )"Runic", 99, 5 }, - { (UChar* )"Shavian", 100, 7 }, - { (UChar* )"Sinhala", 101, 7 }, - { (UChar* )"Syloti_Nagri", 102, 12 }, - { (UChar* )"Syriac", 103, 6 }, - { (UChar* )"Tagalog", 104, 7 }, - { (UChar* )"Tagbanwa", 105, 8 }, - { (UChar* )"Tai_Le", 106, 6 }, - { (UChar* )"Tamil", 107, 5 }, - { (UChar* )"Telugu", 108, 6 }, - { (UChar* )"Thaana", 109, 6 }, - { (UChar* )"Thai", 110, 4 }, - { (UChar* )"Tibetan", 111, 7 }, - { (UChar* )"Tifinagh", 112, 8 }, - { (UChar* )"Ugaritic", 113, 8 }, - { (UChar* )"Yi", 114, 2 }, -#endif /* USE_UNICODE_PROPERTIES */ - { (UChar* )NULL, -1, 0 } -}; - -#ifdef USE_UNICODE_PROPERTIES -#define CODE_RANGES_NUM 115 -#else -#define CODE_RANGES_NUM 15 -#endif - -static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; -static int CodeRangeTableInited = 0; - -static void init_code_range_array(void) { - THREAD_ATOMIC_START; - - CodeRanges[0] = CR_NEWLINE; - CodeRanges[1] = CR_Alpha; - CodeRanges[2] = CR_Blank; - CodeRanges[3] = CR_Cntrl; - CodeRanges[4] = CR_Digit; - CodeRanges[5] = CR_Graph; - CodeRanges[6] = CR_Lower; - CodeRanges[7] = CR_Print; - CodeRanges[8] = CR_Punct; - CodeRanges[9] = CR_Space; - CodeRanges[10] = CR_Upper; - CodeRanges[11] = CR_XDigit; - CodeRanges[12] = CR_Word; - CodeRanges[13] = CR_Alnum; - CodeRanges[14] = CR_ASCII; - -#ifdef USE_UNICODE_PROPERTIES - CodeRanges[15] = CR_Any; - CodeRanges[16] = CR_Assigned; - CodeRanges[17] = CR_C; - CodeRanges[18] = CR_Cc; - CodeRanges[19] = CR_Cf; - CodeRanges[20] = CR_Cn; - CodeRanges[21] = CR_Co; - CodeRanges[22] = CR_Cs; - CodeRanges[23] = CR_L; - CodeRanges[24] = CR_Ll; - CodeRanges[25] = CR_Lm; - CodeRanges[26] = CR_Lo; - CodeRanges[27] = CR_Lt; - CodeRanges[28] = CR_Lu; - CodeRanges[29] = CR_M; - CodeRanges[30] = CR_Mc; - CodeRanges[31] = CR_Me; - CodeRanges[32] = CR_Mn; - CodeRanges[33] = CR_N; - CodeRanges[34] = CR_Nd; - CodeRanges[35] = CR_Nl; - CodeRanges[36] = CR_No; - CodeRanges[37] = CR_P; - CodeRanges[38] = CR_Pc; - CodeRanges[39] = CR_Pd; - CodeRanges[40] = CR_Pe; - CodeRanges[41] = CR_Pf; - CodeRanges[42] = CR_Pi; - CodeRanges[43] = CR_Po; - CodeRanges[44] = CR_Ps; - CodeRanges[45] = CR_S; - CodeRanges[46] = CR_Sc; - CodeRanges[47] = CR_Sk; - CodeRanges[48] = CR_Sm; - CodeRanges[49] = CR_So; - CodeRanges[50] = CR_Z; - CodeRanges[51] = CR_Zl; - CodeRanges[52] = CR_Zp; - CodeRanges[53] = CR_Zs; - CodeRanges[54] = CR_Arabic; - CodeRanges[55] = CR_Armenian; - CodeRanges[56] = CR_Bengali; - CodeRanges[57] = CR_Bopomofo; - CodeRanges[58] = CR_Braille; - CodeRanges[59] = CR_Buginese; - CodeRanges[60] = CR_Buhid; - CodeRanges[61] = CR_Canadian_Aboriginal; - CodeRanges[62] = CR_Cherokee; - CodeRanges[63] = CR_Common; - CodeRanges[64] = CR_Coptic; - CodeRanges[65] = CR_Cypriot; - CodeRanges[66] = CR_Cyrillic; - CodeRanges[67] = CR_Deseret; - CodeRanges[68] = CR_Devanagari; - CodeRanges[69] = CR_Ethiopic; - CodeRanges[70] = CR_Georgian; - CodeRanges[71] = CR_Glagolitic; - CodeRanges[72] = CR_Gothic; - CodeRanges[73] = CR_Greek; - CodeRanges[74] = CR_Gujarati; - CodeRanges[75] = CR_Gurmukhi; - CodeRanges[76] = CR_Han; - CodeRanges[77] = CR_Hangul; - CodeRanges[78] = CR_Hanunoo; - CodeRanges[79] = CR_Hebrew; - CodeRanges[80] = CR_Hiragana; - CodeRanges[81] = CR_Inherited; - CodeRanges[82] = CR_Kannada; - CodeRanges[83] = CR_Katakana; - CodeRanges[84] = CR_Kharoshthi; - CodeRanges[85] = CR_Khmer; - CodeRanges[86] = CR_Lao; - CodeRanges[87] = CR_Latin; - CodeRanges[88] = CR_Limbu; - CodeRanges[89] = CR_Linear_B; - CodeRanges[90] = CR_Malayalam; - CodeRanges[91] = CR_Mongolian; - CodeRanges[92] = CR_Myanmar; - CodeRanges[93] = CR_New_Tai_Lue; - CodeRanges[94] = CR_Ogham; - CodeRanges[95] = CR_Old_Italic; - CodeRanges[96] = CR_Old_Persian; - CodeRanges[97] = CR_Oriya; - CodeRanges[98] = CR_Osmanya; - CodeRanges[99] = CR_Runic; - CodeRanges[100] = CR_Shavian; - CodeRanges[101] = CR_Sinhala; - CodeRanges[102] = CR_Syloti_Nagri; - CodeRanges[103] = CR_Syriac; - CodeRanges[104] = CR_Tagalog; - CodeRanges[105] = CR_Tagbanwa; - CodeRanges[106] = CR_Tai_Le; - CodeRanges[107] = CR_Tamil; - CodeRanges[108] = CR_Telugu; - CodeRanges[109] = CR_Thaana; - CodeRanges[110] = CR_Thai; - CodeRanges[111] = CR_Tibetan; - CodeRanges[112] = CR_Tifinagh; - CodeRanges[113] = CR_Ugaritic; - CodeRanges[114] = CR_Yi; -#endif /* USE_UNICODE_PROPERTIES */ - - CodeRangeTableInited = 1; - THREAD_ATOMIC_END; -} - -extern int -onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) -{ - if ( -#ifdef USE_UNICODE_PROPERTIES - ctype <= ONIGENC_MAX_STD_CTYPE && -#endif - code < 256) { - return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); - } - - if (ctype >= CODE_RANGES_NUM) { - return ONIGERR_TYPE_BUG; - } - - if (CodeRangeTableInited == 0) init_code_range_array(); - - return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); -} - - -extern int -onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) -{ - if (ctype >= CODE_RANGES_NUM) { - return ONIGERR_TYPE_BUG; - } - - if (CodeRangeTableInited == 0) init_code_range_array(); - - *ranges = CodeRanges[ctype]; - - return 0; -} - -extern int -onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, - const OnigCodePoint* ranges[]) -{ - *sb_out = 0x00; - return onigenc_unicode_ctype_code_range(ctype, ranges); -} - -#include "st.h" - -#define PROPERTY_NAME_MAX_SIZE 20 - -static st_table* NameCtypeTable; -static int NameTableInited = 0; - -static int init_name_ctype_table(void) -{ - PosixBracketEntryType *pb; - - THREAD_ATOMIC_START; - - NameCtypeTable = onig_st_init_strend_table_with_size(100); - if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; - - for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { - onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, - (st_data_t )pb->ctype); - } - - NameTableInited = 1; - THREAD_ATOMIC_END; - return 0; -} - -extern int -onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) -{ - int len; - hash_data_type ctype; - UChar buf[PROPERTY_NAME_MAX_SIZE]; - UChar *p; - OnigCodePoint code; - - p = name; - len = 0; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - - buf[len++] = (UChar )code; - if (len >= PROPERTY_NAME_MAX_SIZE) - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - - p += enclen(enc, p); - } - - buf[len] = 0; - - if (NameTableInited == 0) init_name_ctype_table(); - - if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) { - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - } - - return (int )ctype; -} - - -static int -code2_cmp(OnigCodePoint* x, OnigCodePoint* y) -{ - if (x[0] == y[0] && x[1] == y[1]) return 0; - return 1; -} - -static int -code2_hash(OnigCodePoint* x) -{ - return (int )(x[0] + x[1]); -} - -static struct st_hash_type type_code2_hash = { - code2_cmp, - code2_hash, -}; - -static int -code3_cmp(OnigCodePoint* x, OnigCodePoint* y) -{ - if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; - return 1; -} - -static int -code3_hash(OnigCodePoint* x) -{ - return (int )(x[0] + x[1] + x[2]); -} - -static struct st_hash_type type_code3_hash = { - code3_cmp, - code3_hash, -}; - - -static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ -static st_table* Unfold1Table; -static st_table* Unfold2Table; -static st_table* Unfold3Table; -static int CaseFoldInited = 0; - - -extern void onigenc_end_unicode(void) -{ - THREAD_ATOMIC_START; - - if (FoldTable != 0) st_free_table(FoldTable); - if (Unfold1Table != 0) st_free_table(Unfold1Table); - if (Unfold2Table != 0) st_free_table(Unfold2Table); - if (Unfold3Table != 0) st_free_table(Unfold3Table); - - CaseFoldInited = 0; - - THREAD_ATOMIC_END; -} - -static int init_case_fold_table(void) -{ - const CaseFold_11_Type *p; - const CaseUnfold_11_Type *p1; - const CaseUnfold_12_Type *p2; - const CaseUnfold_13_Type *p3; - int i; - - THREAD_ATOMIC_START; - - FoldTable = st_init_numtable_with_size(1200); - if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; - for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { - p = &CaseFold[i]; - st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); - } - for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); - i++) { - p = &CaseFold_Locale[i]; - st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); - } - - Unfold1Table = st_init_numtable_with_size(1000); - if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; - - for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); - i++) { - p1 = &CaseUnfold_11[i]; - st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); - } - for (i = 0; - i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); - i++) { - p1 = &CaseUnfold_11_Locale[i]; - st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); - } - - Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); - if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; - - for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); - i++) { - p2 = &CaseUnfold_12[i]; - st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); - } - for (i = 0; - i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); - i++) { - p2 = &CaseUnfold_12_Locale[i]; - st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); - } - - Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); - if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; - - for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); - i++) { - p3 = &CaseUnfold_13[i]; - st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); - } - - - onig_add_end_call(onigenc_end_unicode); - - CaseFoldInited = 1; - THREAD_ATOMIC_END; - return 0; -} - -extern int -onigenc_unicode_mbc_case_fold(OnigEncoding enc, - OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, - UChar* fold) -{ - CodePointList3 *to; - OnigCodePoint code; - int i, len, rlen; - const UChar *p = *pp; - - if (CaseFoldInited == 0) init_case_fold_table(); - - code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enclen(enc, p); - *pp += len; - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - if (code == 0x0049) { - return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); - } - else if (code == 0x0130) { - return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); - } - } -#endif - - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { - if (to->n == 1) { - return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); - } -#if 0 - /* NO NEEDS TO CHECK */ - else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { -#else - else { -#endif - rlen = 0; - for (i = 0; i < to->n; i++) { - len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); - fold += len; - rlen += len; - } - return rlen; - } - } - - for (i = 0; i < len; i++) { - *fold++ = *p++; - } - return len; -} - -extern int -onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - const CaseUnfold_11_Type* p11; - OnigCodePoint code; - int i, j, k, r; - - /* if (CaseFoldInited == 0) init_case_fold_table(); */ - - for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); - i++) { - p11 = &CaseUnfold_11[i]; - for (j = 0; j < p11->to.n; j++) { - code = p11->from; - r = (*f)(p11->to.code[j], &code, 1, arg); - if (r != 0) return r; - - code = p11->to.code[j]; - r = (*f)(p11->from, &code, 1, arg); - if (r != 0) return r; - - for (k = 0; k < j; k++) { - r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); - if (r != 0) return r; - - r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); - if (r != 0) return r; - } - } - } - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - code = 0x0131; - r = (*f)(0x0049, &code, 1, arg); - if (r != 0) return r; - code = 0x0049; - r = (*f)(0x0131, &code, 1, arg); - if (r != 0) return r; - - code = 0x0130; - r = (*f)(0x0069, &code, 1, arg); - if (r != 0) return r; - code = 0x0069; - r = (*f)(0x0130, &code, 1, arg); - if (r != 0) return r; - } - else { -#endif - for (i = 0; - i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); - i++) { - p11 = &CaseUnfold_11_Locale[i]; - for (j = 0; j < p11->to.n; j++) { - code = p11->from; - r = (*f)(p11->to.code[j], &code, 1, arg); - if (r != 0) return r; - - code = p11->to.code[j]; - r = (*f)(p11->from, &code, 1, arg); - if (r != 0) return r; - - for (k = 0; k < j; k++) { - r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), - 1, arg); - if (r != 0) return r; - - r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), - 1, arg); - if (r != 0) return r; - } - } - } -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - } -#endif - - if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); - i++) { - for (j = 0; j < CaseUnfold_12[i].to.n; j++) { - r = (*f)(CaseUnfold_12[i].to.code[j], - (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); - if (r != 0) return r; - - for (k = 0; k < CaseUnfold_12[i].to.n; k++) { - if (k == j) continue; - - r = (*f)(CaseUnfold_12[i].to.code[j], - (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); - if (r != 0) return r; - } - } - } - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { -#endif - for (i = 0; - i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); - i++) { - for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { - r = (*f)(CaseUnfold_12_Locale[i].to.code[j], - (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); - if (r != 0) return r; - - for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { - if (k == j) continue; - - r = (*f)(CaseUnfold_12_Locale[i].to.code[j], - (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), - 1, arg); - if (r != 0) return r; - } - } - } -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - } -#endif - - for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); - i++) { - for (j = 0; j < CaseUnfold_13[i].to.n; j++) { - r = (*f)(CaseUnfold_13[i].to.code[j], - (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); - if (r != 0) return r; - - for (k = 0; k < CaseUnfold_13[i].to.n; k++) { - if (k == j) continue; - - r = (*f)(CaseUnfold_13[i].to.code[j], - (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); - if (r != 0) return r; - } - } - } - } - - return 0; -} - -extern int -onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, - OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[]) -{ - int n, i, j, k, len; - OnigCodePoint code, codes[3]; - CodePointList3 *to, *z3; - CodePointList2 *z2; - - if (CaseFoldInited == 0) init_case_fold_table(); - - n = 0; - - code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enclen(enc, p); - -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - if (code == 0x0049) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0131; - return 1; - } - else if (code == 0x0130) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0069; - return 1; - } - else if (code == 0x0131) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0049; - return 1; - } - else if (code == 0x0069) { - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = 0x0130; - return 1; - } - } -#endif - - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { - if (to->n == 1) { - OnigCodePoint orig_code = code; - - items[0].byte_len = len; - items[0].code_len = 1; - items[0].code[0] = to->code[0]; - n++; - - code = to->code[0]; - if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { - for (i = 0; i < to->n; i++) { - if (to->code[i] != orig_code) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = to->code[i]; - n++; - } - } - } - } - else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - OnigCodePoint cs[3][4]; - int fn, ncs[3]; - - for (fn = 0; fn < to->n; fn++) { - cs[fn][0] = to->code[fn]; - if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], - (void* )&z3) != 0) { - for (i = 0; i < z3->n; i++) { - cs[fn][i+1] = z3->code[i]; - } - ncs[fn] = z3->n + 1; - } - else - ncs[fn] = 1; - } - - if (fn == 2) { - for (i = 0; i < ncs[0]; i++) { - for (j = 0; j < ncs[1]; j++) { - items[n].byte_len = len; - items[n].code_len = 2; - items[n].code[0] = cs[0][i]; - items[n].code[1] = cs[1][j]; - n++; - } - } - - if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, - (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - if (z2->code[i] == code) continue; - - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - } - else { - for (i = 0; i < ncs[0]; i++) { - for (j = 0; j < ncs[1]; j++) { - for (k = 0; k < ncs[2]; k++) { - items[n].byte_len = len; - items[n].code_len = 3; - items[n].code[0] = cs[0][i]; - items[n].code[1] = cs[1][j]; - items[n].code[2] = cs[2][k]; - n++; - } - } - } - - if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, - (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - if (z2->code[i] == code) continue; - - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - } - - /* multi char folded code is not head of another folded multi char */ - flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ - } - } - else { - if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { - for (i = 0; i < to->n; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = to->code[i]; - n++; - } - } - } - - - if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - p += len; - if (p < end) { - int clen; - - codes[0] = code; - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 - && to->n == 1) { - codes[1] = to->code[0]; - } - else - codes[1] = code; - - clen = enclen(enc, p); - len += clen; - if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - - p += clen; - if (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 - && to->n == 1) { - codes[2] = to->code[0]; - } - else - codes[2] = code; - - clen = enclen(enc, p); - len += clen; - if (onig_st_lookup(Unfold3Table, (st_data_t )codes, - (void* )&z2) != 0) { - for (i = 0; i < z2->n; i++) { - items[n].byte_len = len; - items[n].code_len = 1; - items[n].code[0] = z2->code[i]; - n++; - } - } - } - } - } - - return n; -} diff --git a/src/openalpr/support/regex/utf8.c b/src/openalpr/support/regex/utf8.c deleted file mode 100644 index 5e2c172..0000000 --- a/src/openalpr/support/regex/utf8.c +++ /dev/null @@ -1,305 +0,0 @@ -/********************************************************************** - utf8.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regenc.h" - -#define USE_INVALID_CODE_SCHEME - -#ifdef USE_INVALID_CODE_SCHEME -/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ -#define INVALID_CODE_FE 0xfffffffe -#define INVALID_CODE_FF 0xffffffff -#define VALID_CODE_LIMIT 0x7fffffff -#endif - -#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) - -static const int EncLen_UTF8[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 -}; - -static int -mbc_enc_len(const UChar* p) -{ - return EncLen_UTF8[*p]; -} - -static int -is_mbc_newline(const UChar* p, const UChar* end) -{ - if (p < end) { - if (*p == 0x0a) return 1; - -#ifdef USE_UNICODE_ALL_LINE_TERMINATORS -#ifndef USE_CRNL_AS_LINE_TERMINATOR - if (*p == 0x0d) return 1; -#endif - if (p + 1 < end) { - if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ - return 1; - if (p + 2 < end) { - if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) - && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ - return 1; - } - } -#endif - } - - return 0; -} - -static OnigCodePoint -mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) -{ - int c, len; - OnigCodePoint n; - - len = enclen(ONIG_ENCODING_UTF8, p); - c = *p++; - if (len > 1) { - len--; - n = c & ((1 << (6 - len)) - 1); - while (len--) { - c = *p++; - n = (n << 6) | (c & ((1 << 6) - 1)); - } - return n; - } - else { -#ifdef USE_INVALID_CODE_SCHEME - if (c > 0xfd) { - return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); - } -#endif - return (OnigCodePoint )c; - } -} - -static int -code_to_mbclen(OnigCodePoint code) -{ - if ((code & 0xffffff80) == 0) return 1; - else if ((code & 0xfffff800) == 0) return 2; - else if ((code & 0xffff0000) == 0) return 3; - else if ((code & 0xffe00000) == 0) return 4; - else if ((code & 0xfc000000) == 0) return 5; - else if ((code & 0x80000000) == 0) return 6; -#ifdef USE_INVALID_CODE_SCHEME - else if (code == INVALID_CODE_FE) return 1; - else if (code == INVALID_CODE_FF) return 1; -#endif - else - return ONIGERR_INVALID_CODE_POINT_VALUE; -} - -static int -code_to_mbc(OnigCodePoint code, UChar *buf) -{ -#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) -#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) - - if ((code & 0xffffff80) == 0) { - *buf = (UChar )code; - return 1; - } - else { - UChar *p = buf; - - if ((code & 0xfffff800) == 0) { - *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); - } - else if ((code & 0xffff0000) == 0) { - *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); - *p++ = UTF8_TRAILS(code, 6); - } - else if ((code & 0xffe00000) == 0) { - *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); - *p++ = UTF8_TRAILS(code, 12); - *p++ = UTF8_TRAILS(code, 6); - } - else if ((code & 0xfc000000) == 0) { - *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); - *p++ = UTF8_TRAILS(code, 18); - *p++ = UTF8_TRAILS(code, 12); - *p++ = UTF8_TRAILS(code, 6); - } - else if ((code & 0x80000000) == 0) { - *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); - *p++ = UTF8_TRAILS(code, 24); - *p++ = UTF8_TRAILS(code, 18); - *p++ = UTF8_TRAILS(code, 12); - *p++ = UTF8_TRAILS(code, 6); - } -#ifdef USE_INVALID_CODE_SCHEME - else if (code == INVALID_CODE_FE) { - *p = 0xfe; - return 1; - } - else if (code == INVALID_CODE_FF) { - *p = 0xff; - return 1; - } -#endif - else { - return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - } - - *p++ = UTF8_TRAIL0(code); - return p - buf; - } -} - -static int -mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, - const UChar* end, UChar* fold) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { -#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI - if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { - if (*p == 0x49) { - *fold++ = 0xc4; - *fold = 0xb1; - (*pp)++; - return 2; - } - } -#endif - - *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - (*pp)++; - return 1; /* return byte length of converted char to lower */ - } - else { - return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag, - pp, end, fold); - } -} - -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - (*pp)++; - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - else { - (*pp) += enclen(ONIG_ENCODING_UTF8, p); - - if (*p == 0xc3) { - int c = *(p + 1); - if (c >= 0x80) { - if (c <= (UChar )0x9e) { /* upper */ - if (c == (UChar )0x97) return FALSE; - return TRUE; - } - else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ - if (c == (UChar )'\267') return FALSE; - return TRUE; - } - else if (c == (UChar )0x9f && - (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - return TRUE; - } - } - } - } - - return FALSE; -} -#endif - - -static int -get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, - const OnigCodePoint* ranges[]) -{ - *sb_out = 0x80; - return onigenc_unicode_ctype_code_range(ctype, ranges); -} - - -static UChar* -left_adjust_char_head(const UChar* start, const UChar* s) -{ - const UChar *p; - - if (s <= start) return (UChar* )s; - p = s; - - while (!utf8_islead(*p) && p > start) p--; - return (UChar* )p; -} - -static int -get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) -{ - return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8, - flag, p, end, items); -} - -OnigEncodingType OnigEncodingUTF8 = { - mbc_enc_len, - "UTF-8", /* name */ - 6, /* max byte length */ - 1, /* min byte length */ - is_mbc_newline, - mbc_to_code, - code_to_mbclen, - code_to_mbc, - mbc_case_fold, - onigenc_unicode_apply_all_case_fold, - get_case_fold_codes_by_str, - onigenc_unicode_property_name_to_ctype, - onigenc_unicode_is_code_ctype, - get_ctype_code_range, - left_adjust_char_head, - onigenc_always_true_is_allowed_reverse_match -}; diff --git a/src/openalpr/support/regex/win32/onig_config.h b/src/openalpr/support/regex/win32/onig_config.h deleted file mode 100644 index 7ee9e25..0000000 --- a/src/openalpr/support/regex/win32/onig_config.h +++ /dev/null @@ -1,84 +0,0 @@ -#define STDC_HEADERS 1 -#define HAVE_SYS_TYPES_H 1 -#define HAVE_SYS_STAT_H 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STRING_H 1 -#define HAVE_MEMORY_H 1 -#define HAVE_FLOAT_H 1 -#define HAVE_OFF_T 1 -#define SIZEOF_INT 4 -#define SIZEOF_SHORT 2 -#define SIZEOF_LONG 4 -#define SIZEOF_LONG_LONG 0 -#define SIZEOF___INT64 8 -#define SIZEOF_OFF_T 4 -#define SIZEOF_VOIDP 4 -#define SIZEOF_FLOAT 4 -#define SIZEOF_DOUBLE 8 -#define HAVE_PROTOTYPES 1 -#define TOKEN_PASTE(x,y) x##y -#define HAVE_STDARG_PROTOTYPES 1 -#ifndef NORETURN -#if _MSC_VER > 1100 -#define NORETURN(x) __declspec(noreturn) x -#else -#define NORETURN(x) x -#endif -#endif -#define HAVE_DECL_SYS_NERR 1 -#define STDC_HEADERS 1 -#define HAVE_STDLIB_H 1 -#define HAVE_STRING_H 1 -#define HAVE_LIMITS_H 1 -#define HAVE_FCNTL_H 1 -#define HAVE_SYS_UTIME_H 1 -#define HAVE_MEMORY_H 1 -#define uid_t int -#define gid_t int -#define HAVE_STRUCT_STAT_ST_RDEV 1 -#define HAVE_ST_RDEV 1 -#define GETGROUPS_T int -#define RETSIGTYPE void -#define HAVE_ALLOCA 1 -#define HAVE_DUP2 1 -#define HAVE_MEMCMP 1 -#define HAVE_MEMMOVE 1 -#define HAVE_MKDIR 1 -#define HAVE_STRCASECMP 1 -#define HAVE_STRNCASECMP 1 -#define HAVE_STRERROR 1 -#define HAVE_STRFTIME 1 -#define HAVE_STRCHR 1 -#define HAVE_STRSTR 1 -#define HAVE_STRTOD 1 -#define HAVE_STRTOL 1 -#define HAVE_STRTOUL 1 -#define HAVE_FLOCK 1 -#define HAVE_VSNPRINTF 1 -#define HAVE_FINITE 1 -#define HAVE_FMOD 1 -#define HAVE_FREXP 1 -#define HAVE_HYPOT 1 -#define HAVE_MODF 1 -#define HAVE_WAITPID 1 -#define HAVE_CHSIZE 1 -#define HAVE_TIMES 1 -#define HAVE__SETJMP 1 -#define HAVE_TELLDIR 1 -#define HAVE_SEEKDIR 1 -#define HAVE_MKTIME 1 -#define HAVE_COSH 1 -#define HAVE_SINH 1 -#define HAVE_TANH 1 -#define HAVE_EXECVE 1 -#define HAVE_TZNAME 1 -#define HAVE_DAYLIGHT 1 -#define SETPGRP_VOID 1 -#define inline __inline -#define NEED_IO_SEEK_BETWEEN_RW 1 -#define RSHIFT(x,y) ((x)>>(int)y) -#define FILE_COUNT _cnt -#define FILE_READPTR _ptr -#define DEFAULT_KCODE KCODE_NONE -#define DLEXT ".so" -#define DLEXT2 ".dll"