diff --git a/oniguruma/doc/API b/oniguruma/doc/API index 2309e5ec6..049db02b0 100644 --- a/oniguruma/doc/API +++ b/oniguruma/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 6.9.2 2019/03/25 +Oniguruma API Version 6.9.3 2019/07/06 #include @@ -168,6 +168,9 @@ Oniguruma API Version 6.9.2 2019/03/25 # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) + This function is deprecate, and it does not allow the case where + the encoding of pattern and target is different. + Create a regex object. This function is deluxe version of onig_new(). @@ -299,6 +302,7 @@ Oniguruma API Version 6.9.2 2019/03/25 const UChar* range, OnigRegion* region, OnigOptionType option) Search string and return search result and matching region. + Do not pass invalid byte string in the regex character encoding. normal return: match position offset (i.e. p - str >= 0) not found: ONIG_MISMATCH (< 0) @@ -323,15 +327,19 @@ Oniguruma API Version 6.9.2 2019/03/25 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp) - arguments - 1-7: same as onig_search() - 8 mp: match parameter values (match_stack_limit, retry_limit_in_match) + Search string and return search result and matching region. + Do not pass invalid byte string in the regex character encoding. + + arguments + 1-7: same as onig_search() + 8 mp: match parameter values (match_stack_limit, retry_limit_in_match) # int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) Match string and return result and matching region. + Do not pass invalid byte string in the regex character encoding. normal return: match length (>= 0) not match: ONIG_MISMATCH ( < 0) @@ -353,6 +361,9 @@ Oniguruma API Version 6.9.2 2019/03/25 const UChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp) + Match string and return result and matching region. + Do not pass invalid byte string in the regex character encoding. + arguments 1-6: same as onig_match() 7 mp: match parameter values (match_stack_limit, retry_limit_in_match) @@ -364,6 +375,7 @@ Oniguruma API Version 6.9.2 2019/03/25 void* callback_arg) Scan string and callback with matching region. + Do not pass invalid byte string in the regex character encoding. normal return: number of matching times error: error code @@ -611,14 +623,20 @@ Oniguruma API Version 6.9.2 2019/03/25 # int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) + + Return number of characters in the string. + + # int onigenc_strlen_null(OnigEncoding enc, const UChar* s) Return number of characters in the string. + Do not pass invalid byte string in the character encoding. # int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) Return number of bytes in the string. + Do not pass invalid byte string in the character encoding. # int onig_set_default_syntax(OnigSyntaxType* syntax) diff --git a/oniguruma/src/ascii.c b/oniguruma/src/ascii.c index e83e4d64f..82c1d16b3 100644 --- a/oniguruma/src/ascii.c +++ b/oniguruma/src/ascii.c @@ -1,4 +1,4 @@ -/********************************************************************** +/********************************************************************** ascii.c - Oniguruma (regular expression library) **********************************************************************/ /*- @@ -98,7 +98,11 @@ OnigEncodingType OnigEncodingASCII = { "US-ASCII", /* name */ 1, /* max enc length */ 1, /* min enc length */ +#ifdef USE_CRNL_AS_LINE_TERMINATOR + onigenc_is_mbc_newline_0x0a_or_0x0d, +#else onigenc_is_mbc_newline_0x0a, +#endif onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, diff --git a/oniguruma/src/regenc.c b/oniguruma/src/regenc.c index 63765650f..75811a87a 100644 --- a/oniguruma/src/regenc.c +++ b/oniguruma/src/regenc.c @@ -1,4 +1,4 @@ -/********************************************************************** +/********************************************************************** regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- @@ -694,6 +694,17 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) return 0; } +#ifdef USE_CRNL_AS_LINE_TERMINATOR +extern int +onigenc_is_mbc_newline_0x0a_or_0x0d(const UChar* p, const UChar* end) +{ + if (p < end) { + if ((*p == 0x0a) || (*p == 0x0d)) return 1; + } + return 0; +} +#endif + /* for single byte encodings */ extern int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, diff --git a/oniguruma/src/regenc.h b/oniguruma/src/regenc.h index bd2819e21..99332c355 100644 --- a/oniguruma/src/regenc.h +++ b/oniguruma/src/regenc.h @@ -1,4 +1,4 @@ -#ifndef REGENC_H +#ifndef REGENC_H #define REGENC_H /********************************************************************** regenc.h - Oniguruma (regular expression library) @@ -111,15 +111,16 @@ struct PropertyNameCtype { int ctype; }; -/* #define USE_CRNL_AS_LINE_TERMINATOR */ +#define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES #define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER #define USE_UNICODE_WORD_BREAK /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ -#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII +//#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UTF8 #define ENC_SKIP_OFFSET_1_OR_0 7 @@ -146,7 +147,9 @@ extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPair extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); - +#ifdef USE_CRNL_AS_LINE_TERMINATOR +extern int onigenc_is_mbc_newline_0x0a_or_0x0d P_((const UChar* p, const UChar* end)); +#endif /* methods for single byte encoding */ extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); diff --git a/oniguruma/src/regexec.c b/oniguruma/src/regexec.c index 6618996c8..4bc577033 100644 --- a/oniguruma/src/regexec.c +++ b/oniguruma/src/regexec.c @@ -1,4 +1,4 @@ -/********************************************************************** +/********************************************************************** regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- diff --git a/oniguruma/src/regext.c b/oniguruma/src/regext.c index 8dc4c4c3b..f4e774cfa 100644 --- a/oniguruma/src/regext.c +++ b/oniguruma/src/regext.c @@ -29,6 +29,7 @@ #include "regint.h" +#if 0 static void conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) { @@ -95,7 +96,6 @@ static int conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, UChar** conv, UChar** conv_end) { -#if 0 int len = (int )(end - s); if (to == ONIG_ENCODING_UTF16_BE) { @@ -156,9 +156,9 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e goto swap32; } } -#endif return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; } +#endif extern int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, diff --git a/oniguruma/src/regint.h b/oniguruma/src/regint.h index 56767e83f..2ea48321a 100644 --- a/oniguruma/src/regint.h +++ b/oniguruma/src/regint.h @@ -1,4 +1,4 @@ -#ifndef REGINT_H +#ifndef REGINT_H #define REGINT_H /********************************************************************** regint.h - Oniguruma (regular expression library) diff --git a/oniguruma/src/utf8.c b/oniguruma/src/utf8.c index 70c150367..74fbc6fb2 100644 --- a/oniguruma/src/utf8.c +++ b/oniguruma/src/utf8.c @@ -1,4 +1,4 @@ -/********************************************************************** +/********************************************************************** utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- @@ -297,7 +297,11 @@ OnigEncodingType OnigEncodingUTF8 = { 6, #endif 1, /* min enc length */ +#ifdef USE_CRNL_AS_LINE_TERMINATOR + onigenc_is_mbc_newline_0x0a_or_0x0d, +#else onigenc_is_mbc_newline_0x0a, +#endif mbc_to_code, code_to_mbclen, code_to_mbc, diff --git a/oniguruma/version.txt b/oniguruma/version.txt index 6b9255cf0..5f54f91ea 100644 --- a/oniguruma/version.txt +++ b/oniguruma/version.txt @@ -1 +1 @@ -6.9.2 +6.9.3 diff --git a/scioniguruma/OnigurumaRegExEngine.cxx b/scioniguruma/OnigurumaRegExEngine.cxx index b9a9e89df..3e939f9dd 100644 --- a/scioniguruma/OnigurumaRegExEngine.cxx +++ b/scioniguruma/OnigurumaRegExEngine.cxx @@ -75,6 +75,7 @@ static void SetSimpleOptions(OnigOptionType& onigOptions, ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_EXTEND); ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_SINGLELINE); ONIG_OPTION_ON(onigOptions, ONIG_OPTION_NEGATE_SINGLELINE); + //ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_ASCII_RANGE); //ONIG_OPTION_OFF(onigOptions, ONIG_OPTION_CAPTURE_GROUP);