diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..3f90bdf54 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.sh text eol=lf +*.bat text eol=crlf diff --git a/Versions/build.txt b/Versions/build.txt index 480d250f5..7e73f8fff 100644 --- a/Versions/build.txt +++ b/Versions/build.txt @@ -1 +1 @@ -1610 +1611 diff --git a/onigmo/.gitignore b/onigmo/.gitignore index 085b4a143..43dbb4835 100644 --- a/onigmo/.gitignore +++ b/onigmo/.gitignore @@ -38,16 +38,30 @@ /build*/ # autotools generated files +/aclocal.m4 /autom4te.cache +/compile +/config.guess /config.h +/config.h.in /config.log /config.status +/config.sub +/configure +/depcomp +/install-sh /libtool +/ltmain.sh +/m4/*.m4 +/missing /onigmo-config /onigmo.pc /Makefile +/Makefile.in /sample/Makefile +/sample/Makefile.in /stamp-h1 +/test-driver # generated executable files /enc/mktable diff --git a/onigmo/HISTORY b/onigmo/HISTORY index 43a14e5d3..9858cc638 100644 --- a/onigmo/HISTORY +++ b/onigmo/HISTORY @@ -1,5 +1,43 @@ History of Onigmo (Oniguruma-mod) +2019/01/30: Version 6.2.0 + +2019/01/30: [dist] Update LTVERSION to "6:5:0". +2019/01/30: [dist] Delete all autotools generated files from the repository. + (PR #115) +2019/01/30: [test] Update tests. (PR #127) +2019/01/30: [impl] Add USE_CASE_MAP_API configuration. (PR #125) +2019/01/29: [test] Add some tests. (PR #124) +2019/01/29: [impl] Revise the code for searching. (PR #123) +2019/01/28: [bug] Fix initialization of the table for quick search. + This was caused by PR #113. + (Issue #120) (PR #121) +2019/01/26: [spec] (thanks omochimetaru) + Make it possible to extend UTF-8 to 31 bits. (PR #111) +2019/01/25: [dist] Remove minor version from the py command. (PR #119) +2019/01/25: [impl] Avoid negative character. (PR #118) +2019/01/25: [impl] Fix lgtm.com warnings. (PR #117) +2019/01/25: [bug] Fix that "ss" in look-behind causes syntax error. + (Issue #92) (PR #116) +2019/01/24: [bug] Fix performance regression if quantifier lower bound is 1. + (Issue #100) (PR #114) +2019/01/24: [bug] Fix performance problem with /k/i and /s/i. + (Issue #97) (PR #113) +2019/01/24: [new] Update Unicode data: Unicode 11.0.0, Emoji 11.0 (PR #112) +2019/01/24: [bug] Import the latest code from Ruby (PR #112) +2019/01/24: [impl] (thanks aycabta) + Support gperf 3.1 with backward compatibility. (PR #101) +2018/12/10: [dist] (thanks Xavier RENE-CORAIL) + Add LGTM.com code quality badges. (PR #108) +2018/11/10: [impl] (thanks Urabe, Shyouhei) + Avoid negative character. (PR #107) +2018/01/19: [impl] (thanks Tom Lord) + Remove unused variable. (PR #99) +2017/09/27: [spec] Import Ruby r58965 + Change max byte length of UTF-8 to 4 bytes. +2017/09/26: [new] Update Unicode data: Unicode 10.0.0, Emoji 5.0 (PR #93) + + 2017/09/26: Version 6.1.3 2017/09/26: [dist] Update LTVERSION to "6:4:0". diff --git a/onigmo/README b/onigmo/README index 87a48fa25..8f468bb4a 100644 --- a/onigmo/README +++ b/onigmo/README @@ -92,9 +92,10 @@ Install Case 1: Unix and Cygwin platform - 1. ./configure - 2. make - 3. make install + 1. ./autogen.sh (If `configure` doesn't exist.) + 2. ./configure + 3. make + 4. make install * uninstall diff --git a/onigmo/README.md b/onigmo/README.md index 144640491..ca29c0b7e 100644 --- a/onigmo/README.md +++ b/onigmo/README.md @@ -32,9 +32,10 @@ Install ### Case 1: Unix and Cygwin platform - 1. `./configure` - 2. `make` - 3. `make install` + 1. `./autogen.sh` (If `configure` doesn't exist.) + 2. `./configure` + 3. `make` + 4. `make install` * test diff --git a/onigmo/enc/ascii.c b/onigmo/enc/ascii.c index 8b32c414f..233971e2b 100644 --- a/onigmo/enc/ascii.c +++ b/onigmo/enc/ascii.c @@ -54,7 +54,11 @@ OnigEncodingDefine(ascii, ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif ENCINDEX_ASCII, ONIGENC_FLAG_NONE, }; diff --git a/onigmo/enc/unicode.c b/onigmo/enc/unicode.c index b0fe4361e..71c1bf16c 100644 --- a/onigmo/enc/unicode.c +++ b/onigmo/enc/unicode.c @@ -651,6 +651,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, return n; } +#ifdef USE_CASE_MAP_API /* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */ #define CASE_MAPPING_SLACK 12 #define MODIFIED (flags |= ONIGENC_CASE_MODIFIED) @@ -794,6 +795,7 @@ SpecialsCopy: *flagP = flags; return (int )(to - to_start); } +#endif #if 0 const char onigenc_unicode_version_string[] = diff --git a/onigmo/enc/unicode/name2ctype.h b/onigmo/enc/unicode/name2ctype.h index 7cfa702f0..d1c7d8e3d 100644 --- a/onigmo/enc/unicode/name2ctype.h +++ b/onigmo/enc/unicode/name2ctype.h @@ -1,4 +1,4 @@ -/* ANSI-C code produced by gperf version 3.1 */ +/* ANSI-C code produced by gperf version 3.1 */ /* Command-line: gperf -7 -c -j1 -i1 -t -C -P -T -H uniname2ctype_hash -Q uniname2ctype_pool -N uniname2ctype_p */ #ifndef USE_UNICODE_PROPERTIES /* Computed positions: -k'1,3' */ @@ -36382,7 +36382,7 @@ uniname2ctype_hash (register const char *str, register size_t len) #ifndef USE_UNICODE_PROPERTIES return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]]; #else /* USE_UNICODE_PROPERTIES */ - register unsigned int hval = len; + register unsigned int hval = (unsigned int)len; switch (hval) { diff --git a/onigmo/enc/us_ascii.c b/onigmo/enc/us_ascii.c index 08f9072c4..253ee6957 100644 --- a/onigmo/enc/us_ascii.c +++ b/onigmo/enc/us_ascii.c @@ -32,7 +32,11 @@ OnigEncodingDefine(us_ascii, US_ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif ENCINDEX_US_ASCII, ONIGENC_FLAG_NONE, }; diff --git a/onigmo/enc/utf_8.c b/onigmo/enc/utf_8.c index bba6fd1f8..212aa37ad 100644 --- a/onigmo/enc/utf_8.c +++ b/onigmo/enc/utf_8.c @@ -572,7 +572,11 @@ OnigEncodingDefine(utf_8, UTF_8) = { get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif ENCINDEX_UTF_8, ONIGENC_FLAG_UNICODE, }; diff --git a/onigmo/onigmo.h b/onigmo/onigmo.h index 385f2d6a8..fbbf6c9b0 100644 --- a/onigmo/onigmo.h +++ b/onigmo/onigmo.h @@ -4,8 +4,8 @@ onigmo.h - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2009 K.Kosako - * Copyright (c) 2011-2017 K.Takata + * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,8 +38,8 @@ extern "C" { #endif #define ONIGMO_VERSION_MAJOR 6 -#define ONIGMO_VERSION_MINOR 1 -#define ONIGMO_VERSION_TEENY 3 +#define ONIGMO_VERSION_MINOR 2 +#define ONIGMO_VERSION_TEENY 0 #ifndef ONIG_EXTERN # ifdef RUBY_EXTERN @@ -784,8 +784,8 @@ typedef struct re_pattern_buffer { unsigned char *exact; unsigned char *exact_end; unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ + int *reserved1; + int *reserved2; OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ diff --git a/onigmo/regcomp.c b/onigmo/regcomp.c index 919418bde..8d8631dc0 100644 --- a/onigmo/regcomp.c +++ b/onigmo/regcomp.c @@ -1,9 +1,9 @@ -/********************************************************************** +/********************************************************************** regcomp.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2013 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -4179,93 +4179,10 @@ restart: return r; } -#ifndef USE_SUNDAY_QUICK_SEARCH -/* set skip map for Boyer-Moore search */ -static int -set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) -{ - OnigDistance i, len; - int clen, flen, n, j, k; - UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - OnigEncoding enc = reg->enc; - - len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len; - - n = 0; - for (i = 0; i < len - 1; i += clen) { - p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } - for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - 1 - i - j); - for (k = 0; k < n; k++) { - skip[buf[k][j]] = (UChar )(len - 1 - i - j); - } - } - } - } - else { -# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE - /* This should not happen. */ - return ONIGERR_TYPE_BUG; -# else - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len; - - n = 0; - for (i = 0; i < len - 1; i += clen) { - p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } - for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - 1 - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j); - } - } - } -# endif - } - return 0; -} - -#else /* USE_SUNDAY_QUICK_SEARCH */ - /* set skip map for Sunday's quick search */ static int set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) + UChar skip[], int ignore_case) { OnigDistance i, len; int clen, flen, n, j, k; @@ -4274,96 +4191,61 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, OnigEncoding enc = reg->enc; len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - if (ignore_case) { - for (i = 0; i < len; i += clen) { - p = s + i; - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { - /* Different length isn't supported. Stop optimization at here. */ - end = p; - goto endcheck; - } - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); - if (flen != clen) { - /* Different length isn't supported. Stop optimization at here. */ - end = p; - goto endcheck; - } - } - } -endcheck: - ; - } - - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - skip[i] = (UChar )(len + 1); - n = 0; - for (i = 0; i < len; i += clen) { - p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - i - j); - for (k = 0; k < n; k++) { - ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); - skip[buf[j]] = (UChar )(len - i - j); - } - } - } - } - else { -# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE + if (len >= ONIG_CHAR_TABLE_SIZE) { /* This should not happen. */ return ONIGERR_TYPE_BUG; -# else - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1); + } - n = 0; + if (ignore_case) { for (i = 0; i < len; i += clen) { p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) clen = (int )(end - p); for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } - for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - i - j); + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); + if (flen != clen) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; } } } -# endif +endcheck: + len = end - s; } - return len; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + skip[i] = (UChar )(len + 1); + n = 0; + for (i = 0; i < len; i += clen) { + p = s + i; + if (ignore_case) + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); + clen = enclen(enc, p, end); + if (p + clen > end) + clen = (int )(end - p); + + for (j = 0; j < clen; j++) { + skip[s[i + j]] = (UChar )(len - i - j); + for (k = 0; k < n; k++) { + ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); + skip[buf[j]] = (UChar )(len - i - j); + } + } + } + + return (int)len; } -#endif /* USE_SUNDAY_QUICK_SEARCH */ typedef struct { OnigDistance min; /* min byte length */ @@ -5349,7 +5231,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { e->len = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 1); + reg->map, 1); reg->exact_end = reg->exact + e->len; if (e->len >= 3) { reg->optimize = (allow_reverse != 0 @@ -5368,7 +5250,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 0); + reg->map, 0); reg->optimize = (allow_reverse != 0 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } @@ -5653,8 +5535,6 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg)) { if (IS_NOT_NULL(reg->p)) xfree(reg->p); if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); @@ -5681,8 +5561,6 @@ onig_memsize(const regex_t *reg) if (IS_NULL(reg)) return 0; if (IS_NOT_NULL(reg->p)) size += reg->alloc; if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; - if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; - if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); @@ -5966,8 +5844,6 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->syntax = syntax; (reg)->optimize = 0; (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; - (reg)->int_map_backward = (int* )NULL; (reg)->chain = (regex_t* )NULL; (reg)->p = (UChar* )NULL; diff --git a/onigmo/regenc.c b/onigmo/regenc.c index 686006639..64374d7bd 100644 --- a/onigmo/regenc.c +++ b/onigmo/regenc.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -951,6 +951,7 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, } #endif +#ifdef USE_CASE_MAP_API extern int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) @@ -1010,3 +1011,4 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar *flagP = flags; return (int )(to - to_start); } +#endif diff --git a/onigmo/regenc.h b/onigmo/regenc.h index d21b5afc1..08f19c978 100644 --- a/onigmo/regenc.h +++ b/onigmo/regenc.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -129,12 +129,14 @@ typedef struct { #endif +/* config */ #define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES #define USE_UNICODE_AGE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ #define USE_ASCII_ALL_LINE_BREAKS // LF, VT, FF, CR /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ +/* #define USE_CASE_MAP_API */ #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII diff --git a/onigmo/regerror.c b/onigmo/regerror.c index 59cf53068..63b67fce1 100644 --- a/onigmo/regerror.c +++ b/onigmo/regerror.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,14 +63,18 @@ onig_error_code_to_format(OnigPosition code) p = "parse depth limit over"; break; case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET: p = "default multibyte-encoding is not set"; break; +#if 0 case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: p = "can't convert to wide-char on specified multibyte-encoding"; break; +#endif case ONIGERR_INVALID_ARGUMENT: p = "invalid argument"; break; case ONIGERR_END_PATTERN_AT_LEFT_BRACE: p = "end pattern at left brace"; break; +#if 0 case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: p = "end pattern at left bracket"; break; +#endif case ONIGERR_EMPTY_CHAR_CLASS: p = "empty char-class"; break; case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: @@ -87,16 +91,20 @@ onig_error_code_to_format(OnigPosition code) p = "invalid control-code syntax"; break; case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: p = "char-class value at end of range"; break; +#if 0 case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: p = "char-class value at start of range"; break; +#endif case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: p = "unmatched range specifier in char-class"; break; case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: p = "target of repeat operator is not specified"; break; case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: p = "target of repeat operator is invalid"; break; +#if 0 case ONIGERR_NESTED_REPEAT_OPERATOR: p = "nested repeat operator"; break; +#endif case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: p = "unmatched close parenthesis"; break; case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: @@ -121,14 +129,18 @@ onig_error_code_to_format(OnigPosition code) p = "upper is smaller than lower in repeat range"; break; case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: p = "empty range in char class"; break; +#if 0 case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: p = "mismatch multibyte code length in char-class range"; break; +#endif case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: p = "too many multibyte code ranges are specified"; break; case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: p = "too short multibyte code string"; break; +#if 0 case ONIGERR_TOO_BIG_BACKREF_NUMBER: p = "too big backref number"; break; +#endif case ONIGERR_INVALID_BACKREF: #ifdef USE_NAMED_GROUP p = "invalid backref number/name"; break; diff --git a/onigmo/regexec.c b/onigmo/regexec.c index 837c35e66..fdd67184d 100644 --- a/onigmo/regexec.c +++ b/onigmo/regexec.c @@ -2,8 +2,8 @@ regexec.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -3330,219 +3330,6 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, return (UChar* )NULL; } -#ifndef USE_SUNDAY_QUICK_SEARCH -/* Boyer-Moore-Horspool search applied to a multibyte string */ -static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *p, *end; - const UChar *tail; - ptrdiff_t skip, tlen1; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); -# endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif - } - - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search */ -static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *t, *p, *end; - const UChar *tail; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", - (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); -# endif - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n", - (intptr_t )(s - text), s); -# endif - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; - } -# endif - } - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */ -static UChar* -bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *end; - const UChar *tail; - ptrdiff_t skip, tlen1; - OnigEncoding enc = reg->enc; - int case_fold_flag = reg->case_fold_flag; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -# endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif - } - - return (UChar* )NULL; -} - -/* Boyer-Moore-Horspool search (ignore case) */ -static UChar* -bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *p, *end; - const UChar *tail; - OnigEncoding enc = reg->enc; - int case_fold_flag = reg->case_fold_flag; - -# ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", - (int )text, text, (int )text_end, text_end, (int )text_range, text_range); -# endif - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - (target_end - target) + 1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - s += reg->map[*s]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - (target_end - target) + 1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - s += reg->int_map[*s]; - } -# endif - } - return (UChar* )NULL; -} - -#else /* USE_SUNDAY_QUICK_SEARCH */ - /* Sunday's quick search applied to a multibyte string */ static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, @@ -3567,39 +3354,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -3626,32 +3393,17 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->map[s[1]]; + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; } + if (s + 1 >= end) break; + s += reg->map[s[1]]; } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; - } -# endif - } + return (UChar* )NULL; } @@ -3680,35 +3432,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif + while (s < end) { + se = s + tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, se + 1)) + return (UChar* )s; + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -3737,83 +3471,17 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; - } -# endif - } - return (UChar* )NULL; -} -#endif /* USE_SUNDAY_QUICK_SEARCH */ - -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = (int )(end - s); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + while (s < end) { + p = s - tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + p, s + 1)) + return (UChar* )p; + if (s + 1 >= end) break; + s += reg->map[s[1]]; } return (UChar* )NULL; } -#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -4048,21 +3716,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT_BM: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: -#ifdef USE_INT_MAP_BACKWARD - if (IS_NULL(reg->int_map_backward)) { - int r; - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); -#else goto exact_method; -#endif break; case ONIG_OPTIMIZE_MAP: diff --git a/onigmo/regint.h b/onigmo/regint.h index 8dc8bf6a9..48f13fd8e 100644 --- a/onigmo/regint.h +++ b/onigmo/regint.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2013 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -81,13 +81,12 @@ /* #define USE_OP_PUSH_OR_JUMP_EXACT */ #define USE_QTFR_PEEK_NEXT #define USE_ST_LIBRARY -#define USE_SUNDAY_QUICK_SEARCH #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ #define DEFAULT_PARSE_DEPTH_LIMIT 4096 -#define OPT_EXACT_MAXLEN 24 +#define OPT_EXACT_MAXLEN 24 /* This must be smaller than ONIG_CHAR_TABLE_SIZE. */ /* check config */ #if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP) @@ -128,14 +127,14 @@ # undef ONIG_ESCAPE_UCHAR_COLLISION #endif -#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ +/* #define USE_WORD_BEGIN_END */ /* "\<": word-begin, "\>": word-end */ #ifdef RUBY # undef USE_CAPTURE_HISTORY #else -# define USE_CAPTURE_HISTORY +/* # define USE_CAPTURE_HISTORY */ #endif -#define USE_VARIABLE_META_CHARS -#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_VARIABLE_META_CHARS */ +/* #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ /* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ diff --git a/onigmo/regparse.c b/onigmo/regparse.c index 5e51e3950..b54a98914 100644 --- a/onigmo/regparse.c +++ b/onigmo/regparse.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/onigmo/regparse.h b/onigmo/regparse.h index acdd3e2f5..5e8b1f6d0 100644 --- a/onigmo/regparse.h +++ b/onigmo/regparse.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/onigmo/regversion.c b/onigmo/regversion.c index d80e8d280..9e9e3875b 100644 --- a/onigmo/regversion.c +++ b/onigmo/regversion.c @@ -2,8 +2,8 @@ regversion.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako - * Copyright (c) 2011-2017 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,8 +48,8 @@ extern const char* onig_copyright(void) { const char *s = - "Onigmo " ONIG_VERSION_STRING " : Copyright (C) 2002-2016 K.Kosako, " - "2011-2017 K.Takata"; + "Onigmo " ONIG_VERSION_STRING " : Copyright (C) 2002-2018 K.Kosako, " + "2011-2019 K.Takata"; return s; } diff --git a/onigmo/version.txt b/onigmo/version.txt index 88d06f108..6abaeb2f9 100644 --- a/onigmo/version.txt +++ b/onigmo/version.txt @@ -1 +1 @@ -6.1.3 +6.2.0 diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf index b86f8f1a9..4ae33eccc 100644 --- a/res/Notepad3.exe.manifest.conf +++ b/res/Notepad3.exe.manifest.conf @@ -3,7 +3,7 @@ Notepad3 develop diff --git a/src/Notepad3.cppcheck b/src/Notepad3.cppcheck new file mode 100644 index 000000000..ee5822030 --- /dev/null +++ b/src/Notepad3.cppcheck @@ -0,0 +1,14 @@ + + + win64 + true + + windows + + + cert + + + clang-tidy + + diff --git a/src/VersionEx.h b/src/VersionEx.h index b9f0b2469..a702587da 100644 --- a/src/VersionEx.h +++ b/src/VersionEx.h @@ -6,8 +6,8 @@ #define APPNAME "Notepad3" #define VERSION_MAJOR 5 #define VERSION_MINOR 19 -#define VERSION_REV 128 -#define VERSION_BUILD 1610 +#define VERSION_REV 130 +#define VERSION_BUILD 1611 #define SCINTILLA_VER 413 -#define ONIGMO_REGEX_VER 6.1.3 +#define ONIGMO_REGEX_VER 6.2.0 #define VERSION_PATCH "develop"