diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..3f90bdf54
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.sh text eol=lf
+*.bat text eol=crlf
diff --git a/Versions/build.txt b/Versions/build.txt
index 8caa513e2..7e73f8fff 100644
--- a/Versions/build.txt
+++ b/Versions/build.txt
@@ -1 +1 @@
-1608
+1611
diff --git a/minipath/src/Dlapi.c b/minipath/src/Dlapi.c
index 8524b04f5..27aa9c2fa 100644
--- a/minipath/src/Dlapi.c
+++ b/minipath/src/Dlapi.c
@@ -1346,7 +1346,7 @@ BOOL DriveBox_SelectDrive(HWND hwnd,LPCWSTR lpszPath)
}
// Don't select anything
- SendMessage(hwnd,CB_SETCURSEL,(WPARAM)-1,0);
+ SendMessage(hwnd,CB_SETCURSEL,(WPARAM)1,0);
return FALSE;
}
diff --git a/onigmo/.gitignore b/onigmo/.gitignore
index 085b4a143..43dbb4835 100644
--- a/onigmo/.gitignore
+++ b/onigmo/.gitignore
@@ -38,16 +38,30 @@
/build*/
# autotools generated files
+/aclocal.m4
/autom4te.cache
+/compile
+/config.guess
/config.h
+/config.h.in
/config.log
/config.status
+/config.sub
+/configure
+/depcomp
+/install-sh
/libtool
+/ltmain.sh
+/m4/*.m4
+/missing
/onigmo-config
/onigmo.pc
/Makefile
+/Makefile.in
/sample/Makefile
+/sample/Makefile.in
/stamp-h1
+/test-driver
# generated executable files
/enc/mktable
diff --git a/onigmo/HISTORY b/onigmo/HISTORY
index 43a14e5d3..9858cc638 100644
--- a/onigmo/HISTORY
+++ b/onigmo/HISTORY
@@ -1,5 +1,43 @@
History of Onigmo (Oniguruma-mod)
+2019/01/30: Version 6.2.0
+
+2019/01/30: [dist] Update LTVERSION to "6:5:0".
+2019/01/30: [dist] Delete all autotools generated files from the repository.
+ (PR #115)
+2019/01/30: [test] Update tests. (PR #127)
+2019/01/30: [impl] Add USE_CASE_MAP_API configuration. (PR #125)
+2019/01/29: [test] Add some tests. (PR #124)
+2019/01/29: [impl] Revise the code for searching. (PR #123)
+2019/01/28: [bug] Fix initialization of the table for quick search.
+ This was caused by PR #113.
+ (Issue #120) (PR #121)
+2019/01/26: [spec] (thanks omochimetaru)
+ Make it possible to extend UTF-8 to 31 bits. (PR #111)
+2019/01/25: [dist] Remove minor version from the py command. (PR #119)
+2019/01/25: [impl] Avoid negative character. (PR #118)
+2019/01/25: [impl] Fix lgtm.com warnings. (PR #117)
+2019/01/25: [bug] Fix that "ss" in look-behind causes syntax error.
+ (Issue #92) (PR #116)
+2019/01/24: [bug] Fix performance regression if quantifier lower bound is 1.
+ (Issue #100) (PR #114)
+2019/01/24: [bug] Fix performance problem with /k/i and /s/i.
+ (Issue #97) (PR #113)
+2019/01/24: [new] Update Unicode data: Unicode 11.0.0, Emoji 11.0 (PR #112)
+2019/01/24: [bug] Import the latest code from Ruby (PR #112)
+2019/01/24: [impl] (thanks aycabta)
+ Support gperf 3.1 with backward compatibility. (PR #101)
+2018/12/10: [dist] (thanks Xavier RENE-CORAIL)
+ Add LGTM.com code quality badges. (PR #108)
+2018/11/10: [impl] (thanks Urabe, Shyouhei)
+ Avoid negative character. (PR #107)
+2018/01/19: [impl] (thanks Tom Lord)
+ Remove unused variable. (PR #99)
+2017/09/27: [spec] Import Ruby r58965
+ Change max byte length of UTF-8 to 4 bytes.
+2017/09/26: [new] Update Unicode data: Unicode 10.0.0, Emoji 5.0 (PR #93)
+
+
2017/09/26: Version 6.1.3
2017/09/26: [dist] Update LTVERSION to "6:4:0".
diff --git a/onigmo/README b/onigmo/README
index 87a48fa25..8f468bb4a 100644
--- a/onigmo/README
+++ b/onigmo/README
@@ -92,9 +92,10 @@ Install
Case 1: Unix and Cygwin platform
- 1. ./configure
- 2. make
- 3. make install
+ 1. ./autogen.sh (If `configure` doesn't exist.)
+ 2. ./configure
+ 3. make
+ 4. make install
* uninstall
diff --git a/onigmo/README.md b/onigmo/README.md
index 144640491..ca29c0b7e 100644
--- a/onigmo/README.md
+++ b/onigmo/README.md
@@ -32,9 +32,10 @@ Install
### Case 1: Unix and Cygwin platform
- 1. `./configure`
- 2. `make`
- 3. `make install`
+ 1. `./autogen.sh` (If `configure` doesn't exist.)
+ 2. `./configure`
+ 3. `make`
+ 4. `make install`
* test
diff --git a/onigmo/enc/ascii.c b/onigmo/enc/ascii.c
index 8b32c414f..233971e2b 100644
--- a/onigmo/enc/ascii.c
+++ b/onigmo/enc/ascii.c
@@ -54,7 +54,11 @@ OnigEncodingDefine(ascii, ASCII) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+#ifdef USE_CASE_MAP_API
onigenc_single_byte_ascii_only_case_map,
+#else
+ NULL,
+#endif
ENCINDEX_ASCII,
ONIGENC_FLAG_NONE,
};
diff --git a/onigmo/enc/unicode.c b/onigmo/enc/unicode.c
index c41fe8fbd..71c1bf16c 100644
--- a/onigmo/enc/unicode.c
+++ b/onigmo/enc/unicode.c
@@ -651,6 +651,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
return n;
}
+#ifdef USE_CASE_MAP_API
/* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */
#define CASE_MAPPING_SLACK 12
#define MODIFIED (flags |= ONIGENC_CASE_MODIFIED)
@@ -678,15 +679,13 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
*pp += codepoint_length;
if (code <= 'z') { /* ASCII comes first */
- if (code >= 'a' && code <= 'z') {
+ if (code >= 'a' /*&& code <= 'z'*/) {
if (flags & ONIGENC_CASE_UPCASE) {
MODIFIED;
if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i')
code = I_WITH_DOT_ABOVE;
- else {
- code -= 'a';
- code += 'A';
- }
+ else
+ code -= 'a' - 'A';
}
}
else if (code >= 'A' && code <= 'Z') {
@@ -796,6 +795,7 @@ SpecialsCopy:
*flagP = flags;
return (int )(to - to_start);
}
+#endif
#if 0
const char onigenc_unicode_version_string[] =
diff --git a/onigmo/enc/unicode/name2ctype.h b/onigmo/enc/unicode/name2ctype.h
index 7cfa702f0..d1c7d8e3d 100644
--- a/onigmo/enc/unicode/name2ctype.h
+++ b/onigmo/enc/unicode/name2ctype.h
@@ -1,4 +1,4 @@
-/* ANSI-C code produced by gperf version 3.1 */
+/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -c -j1 -i1 -t -C -P -T -H uniname2ctype_hash -Q uniname2ctype_pool -N uniname2ctype_p */
#ifndef USE_UNICODE_PROPERTIES
/* Computed positions: -k'1,3' */
@@ -36382,7 +36382,7 @@ uniname2ctype_hash (register const char *str, register size_t len)
#ifndef USE_UNICODE_PROPERTIES
return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]];
#else /* USE_UNICODE_PROPERTIES */
- register unsigned int hval = len;
+ register unsigned int hval = (unsigned int)len;
switch (hval)
{
diff --git a/onigmo/enc/us_ascii.c b/onigmo/enc/us_ascii.c
index 08f9072c4..253ee6957 100644
--- a/onigmo/enc/us_ascii.c
+++ b/onigmo/enc/us_ascii.c
@@ -32,7 +32,11 @@ OnigEncodingDefine(us_ascii, US_ASCII) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+#ifdef USE_CASE_MAP_API
onigenc_single_byte_ascii_only_case_map,
+#else
+ NULL,
+#endif
ENCINDEX_US_ASCII,
ONIGENC_FLAG_NONE,
};
diff --git a/onigmo/enc/utf_8.c b/onigmo/enc/utf_8.c
index f5ec6b9ea..212aa37ad 100644
--- a/onigmo/enc/utf_8.c
+++ b/onigmo/enc/utf_8.c
@@ -37,13 +37,19 @@
#endif
#define USE_INVALID_CODE_SCHEME
+/* #define USE_UTF8_31BITS */
#ifdef USE_INVALID_CODE_SCHEME
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
# define INVALID_CODE_FE 0xfffffffe
# define INVALID_CODE_FF 0xffffffff
#endif
+
+#ifndef USE_UTF8_31BITS
#define VALID_CODE_LIMIT 0x0010ffff
+#else
+#define VALID_CODE_LIMIT 0x7fffffff
+#endif
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
@@ -63,14 +69,19 @@ static const int EncLen_UTF8[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+#ifndef USE_UTF8_31BITS
4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+#else
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+#endif
};
typedef enum {
FAILURE = -2,
ACCEPT,
S0, S1, S2, S3,
- S4, S5, S6, S7
+ S4, S5, S6, S7,
+ S8, S9,S10,S11,
} state_t;
#define A ACCEPT
#define F FAILURE
@@ -91,7 +102,11 @@ static const signed char trans[][0x100] = {
/* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
+#ifndef USE_UTF8_31BITS
/* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F
+#else
+ /* f */ 5, 6, 6, 6, 6, 6, 6, 6, 8, 9, 9, 9,10,11, F, F
+#endif
},
{ /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
@@ -219,6 +234,80 @@ static const signed char trans[][0x100] = {
/* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
/* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
},
+#ifdef USE_UTF8_31BITS
+ { /* S8 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ F, F, F, F, F, F, F, F, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* 9 */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* a */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* b */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S9 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* 9 */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* a */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* b */ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S10 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ F, F, F, F, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* 9 */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* a */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* b */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S11 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* 9 */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* a */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* b */ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+#endif // USE_UTF8_31BITS
};
#undef A
#undef F
@@ -244,8 +333,24 @@ mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3);
s = trans[s][*p++];
+
+#ifndef USE_UTF8_31BITS
return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+#else
+ if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+ if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-4);
+ s = trans[s][*p++];
+ if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(5) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+ if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-5);
+ s = trans[s][*p++];
+ return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(6) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+#endif
}
static int
@@ -308,7 +413,13 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) return 2;
else if ((code & 0xffff0000) == 0) return 3;
+#ifndef USE_UTF8_31BITS
else if (code <= VALID_CODE_LIMIT) return 4;
+#else
+ else if ((code & 0xffe00000) == 0) return 4;
+ else if ((code & 0xfc000000) == 0) return 5;
+ else if (code <= VALID_CODE_LIMIT) return 6;
+#endif
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) return 1;
else if (code == INVALID_CODE_FF) return 1;
@@ -337,11 +448,33 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
+#ifndef USE_UTF8_31BITS
else if (code <= VALID_CODE_LIMIT) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
+#else
+ else if ((code & 0xffe00000) == 0) {
+ *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xfc000000) == 0) {
+ *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if (code <= VALID_CODE_LIMIT) {
+ *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
+ *p++ = UTF8_TRAILS(code, 24);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+#endif
+
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) {
*p = 0xfe;
@@ -421,7 +554,11 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncodingDefine(utf_8, UTF_8) = {
mbc_enc_len,
"UTF-8", /* name */
+#ifndef USE_UTF8_31BITS
4, /* max byte length */
+#else
+ 6, /* max byte length */
+#endif
1, /* min byte length */
is_mbc_newline,
mbc_to_code,
@@ -435,7 +572,11 @@ OnigEncodingDefine(utf_8, UTF_8) = {
get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+#ifdef USE_CASE_MAP_API
onigenc_unicode_case_map,
+#else
+ NULL,
+#endif
ENCINDEX_UTF_8,
ONIGENC_FLAG_UNICODE,
};
diff --git a/onigmo/onigmo.h b/onigmo/onigmo.h
index 385f2d6a8..fbbf6c9b0 100644
--- a/onigmo/onigmo.h
+++ b/onigmo/onigmo.h
@@ -4,8 +4,8 @@
onigmo.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2009 K.Kosako
- * Copyright (c) 2011-2017 K.Takata
+ * Copyright (c) 2002-2016 K.Kosako
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,8 @@ extern "C" {
#endif
#define ONIGMO_VERSION_MAJOR 6
-#define ONIGMO_VERSION_MINOR 1
-#define ONIGMO_VERSION_TEENY 3
+#define ONIGMO_VERSION_MINOR 2
+#define ONIGMO_VERSION_TEENY 0
#ifndef ONIG_EXTERN
# ifdef RUBY_EXTERN
@@ -784,8 +784,8 @@ typedef struct re_pattern_buffer {
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
- int *int_map; /* BM skip for exact_len > 255 */
- int *int_map_backward; /* BM skip for backward search */
+ int *reserved1;
+ int *reserved2;
OnigDistance dmin; /* min-distance of exact or map */
OnigDistance dmax; /* max-distance of exact or map */
diff --git a/onigmo/regcomp.c b/onigmo/regcomp.c
index c843b1dea..8d8631dc0 100644
--- a/onigmo/regcomp.c
+++ b/onigmo/regcomp.c
@@ -1,9 +1,9 @@
-/**********************************************************************
+/**********************************************************************
regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2013 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2002-2018 K.Kosako
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -2770,10 +2770,8 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
if (sn->end <= sn->s)
break;
- if (exact != 0 &&
- !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- }
- else {
+ if (exact == 0 ||
+ NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
n = node;
}
}
@@ -3264,6 +3262,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
}
#endif
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
+#define IN_LOOK_BEHIND (1<<6)
+
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(? (?s;
end = sn->end;
if (start >= end) return 0;
+ is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+
r = 0;
top_root = root = prev_node = snode = NULL_NODE;
alt_num = 1;
@@ -3593,7 +3604,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
len = enclen(reg->enc, p, end);
varlen = is_case_fold_variable_len(n, items, len);
- if (n == 0 || varlen == 0) {
+ if (n == 0 || varlen == 0 || is_in_look_behind) {
if (IS_NULL(snode)) {
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
onig_node_free(top_root);
@@ -3854,13 +3865,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
#endif
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-#define IN_CALL (1<<4)
-#define IN_RECCALL (1<<5)
-
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
2. expand ignore-case in char class.
@@ -3902,7 +3906,7 @@ restart:
case NT_STR:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg);
+ r = expand_case_fold_string(node, reg, state);
}
break;
@@ -4145,7 +4149,7 @@ restart:
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
if (NTYPE(node) != NT_ANCHOR) goto restart;
- r = setup_tree(an->target, reg, state, env);
+ r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
@@ -4158,7 +4162,8 @@ restart:
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
if (NTYPE(node) != NT_ANCHOR) goto restart;
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
+ env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
@@ -4174,93 +4179,10 @@ restart:
return r;
}
-#ifndef USE_SUNDAY_QUICK_SEARCH
-/* set skip map for Boyer-Moore search */
-static int
-set_bm_skip(UChar* s, UChar* end, regex_t* reg,
- UChar skip[], int** int_skip, int ignore_case)
-{
- OnigDistance i, len;
- int clen, flen, n, j, k;
- UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- OnigEncoding enc = reg->enc;
-
- len = end - s;
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
-
- n = 0;
- for (i = 0; i < len - 1; i += clen) {
- p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
- if (p + clen > end)
- clen = (int )(end - p);
-
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- skip[s[i + j]] = (UChar )(len - 1 - i - j);
- for (k = 0; k < n; k++) {
- skip[buf[k][j]] = (UChar )(len - 1 - i - j);
- }
- }
- }
- }
- else {
-# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
- /* This should not happen. */
- return ONIGERR_TYPE_BUG;
-# else
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
- }
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
-
- n = 0;
- for (i = 0; i < len - 1; i += clen) {
- p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
- if (p + clen > end)
- clen = (int )(end - p);
-
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
- for (k = 0; k < n; k++) {
- (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
- }
- }
- }
-# endif
- }
- return 0;
-}
-
-#else /* USE_SUNDAY_QUICK_SEARCH */
-
/* set skip map for Sunday's quick search */
static int
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
- UChar skip[], int** int_skip, int ignore_case)
+ UChar skip[], int ignore_case)
{
OnigDistance i, len;
int clen, flen, n, j, k;
@@ -4269,96 +4191,61 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
OnigEncoding enc = reg->enc;
len = end - s;
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
-
- if (ignore_case) {
- for (i = 0; i < len; i += clen) {
- p = s + i;
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
- if (p + clen > end)
- clen = (int )(end - p);
-
- for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
- /* Different length isn't supported. Stop optimization at here. */
- end = p;
- goto endcheck;
- }
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
- if (flen != clen) {
- /* Different length isn't supported. Stop optimization at here. */
- end = p;
- goto endcheck;
- }
- }
- }
-endcheck:
- ;
- }
-
- len = end - s;
- n = 0;
- for (i = 0; i < len; i += clen) {
- p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- clen = enclen(enc, p, end);
- if (p + clen > end)
- clen = (int )(end - p);
-
- for (j = 0; j < clen; j++) {
- skip[s[i + j]] = (UChar )(len - i - j);
- for (k = 0; k < n; k++) {
- ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
- skip[buf[j]] = (UChar )(len - i - j);
- }
- }
- }
- }
- else {
-# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
+ if (len >= ONIG_CHAR_TABLE_SIZE) {
/* This should not happen. */
return ONIGERR_TYPE_BUG;
-# else
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
- }
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
+ }
- n = 0;
+ if (ignore_case) {
for (i = 0; i < len; i += clen) {
p = s + i;
- if (ignore_case)
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
clen = enclen(enc, p, end);
if (p + clen > end)
clen = (int )(end - p);
for (j = 0; j < n; j++) {
- if ((items[j].code_len != 1) || (items[j].byte_len != clen))
- return 1; /* different length isn't supported. */
- flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
- if (flen != clen)
- return 1; /* different length isn't supported. */
- }
- for (j = 0; j < clen; j++) {
- (*int_skip)[s[i + j]] = (int )(len - i - j);
- for (k = 0; k < n; k++) {
- (*int_skip)[buf[k][j]] = (int )(len - i - j);
+ if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
+ }
+ flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
+ if (flen != clen) {
+ /* Different length isn't supported. Stop optimization at here. */
+ end = p;
+ goto endcheck;
}
}
}
-# endif
+endcheck:
+ len = end - s;
}
- return len;
+
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ skip[i] = (UChar )(len + 1);
+ n = 0;
+ for (i = 0; i < len; i += clen) {
+ p = s + i;
+ if (ignore_case)
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
+ p, end, items);
+ clen = enclen(enc, p, end);
+ if (p + clen > end)
+ clen = (int )(end - p);
+
+ for (j = 0; j < clen; j++) {
+ skip[s[i + j]] = (UChar )(len - i - j);
+ for (k = 0; k < n; k++) {
+ ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
+ skip[buf[j]] = (UChar )(len - i - j);
+ }
+ }
+ }
+
+ return (int)len;
}
-#endif /* USE_SUNDAY_QUICK_SEARCH */
typedef struct {
OnigDistance min; /* min byte length */
@@ -5036,7 +4923,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
int n = onigenc_strlen(env->enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+ max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
}
else {
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
@@ -5344,7 +5231,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
if (e->ignore_case > 0) {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
- reg->map, &(reg->int_map), 1);
+ reg->map, 1);
reg->exact_end = reg->exact + e->len;
if (e->len >= 3) {
reg->optimize = (allow_reverse != 0
@@ -5363,7 +5250,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
else {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
set_bm_skip(reg->exact, reg->exact_end, reg,
- reg->map, &(reg->int_map), 0);
+ reg->map, 0);
reg->optimize = (allow_reverse != 0
? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
}
@@ -5648,8 +5535,6 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg)) {
if (IS_NOT_NULL(reg->p)) xfree(reg->p);
if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
- if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
- if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
@@ -5676,8 +5561,6 @@ onig_memsize(const regex_t *reg)
if (IS_NULL(reg)) return 0;
if (IS_NOT_NULL(reg->p)) size += reg->alloc;
if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
- if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
- if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
@@ -5961,8 +5844,6 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
(reg)->syntax = syntax;
(reg)->optimize = 0;
(reg)->exact = (UChar* )NULL;
- (reg)->int_map = (int* )NULL;
- (reg)->int_map_backward = (int* )NULL;
(reg)->chain = (regex_t* )NULL;
(reg)->p = (UChar* )NULL;
diff --git a/onigmo/regenc.c b/onigmo/regenc.c
index 7be3166ff..64374d7bd 100644
--- a/onigmo/regenc.c
+++ b/onigmo/regenc.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -951,6 +951,7 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
}
#endif
+#ifdef USE_CASE_MAP_API
extern int
onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
@@ -969,7 +970,7 @@ onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const
if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- code += 'A' - 'a';
+ code -= 'a' - 'A';
} else if (code >= 'A' && code <= 'Z' &&
(flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
@@ -997,8 +998,7 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar
if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- code -= 'a';
- code += 'A';
+ code -= 'a' - 'A';
} else if (code >= 'A' && code <= 'Z' &&
(flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
@@ -1011,3 +1011,4 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar
*flagP = flags;
return (int )(to - to_start);
}
+#endif
diff --git a/onigmo/regenc.h b/onigmo/regenc.h
index d21b5afc1..08f19c978 100644
--- a/onigmo/regenc.h
+++ b/onigmo/regenc.h
@@ -5,7 +5,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -129,12 +129,14 @@ typedef struct {
#endif
+/* config */
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
#define USE_UNICODE_AGE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
#define USE_ASCII_ALL_LINE_BREAKS // LF, VT, FF, CR
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
+/* #define USE_CASE_MAP_API */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
diff --git a/onigmo/regerror.c b/onigmo/regerror.c
index 59cf53068..63b67fce1 100644
--- a/onigmo/regerror.c
+++ b/onigmo/regerror.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -63,14 +63,18 @@ onig_error_code_to_format(OnigPosition code)
p = "parse depth limit over"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
p = "default multibyte-encoding is not set"; break;
+#if 0
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
p = "can't convert to wide-char on specified multibyte-encoding"; break;
+#endif
case ONIGERR_INVALID_ARGUMENT:
p = "invalid argument"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
p = "end pattern at left brace"; break;
+#if 0
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
p = "end pattern at left bracket"; break;
+#endif
case ONIGERR_EMPTY_CHAR_CLASS:
p = "empty char-class"; break;
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
@@ -87,16 +91,20 @@ onig_error_code_to_format(OnigPosition code)
p = "invalid control-code syntax"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
p = "char-class value at end of range"; break;
+#if 0
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
p = "char-class value at start of range"; break;
+#endif
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
p = "unmatched range specifier in char-class"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
p = "target of repeat operator is not specified"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
p = "target of repeat operator is invalid"; break;
+#if 0
case ONIGERR_NESTED_REPEAT_OPERATOR:
p = "nested repeat operator"; break;
+#endif
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
p = "unmatched close parenthesis"; break;
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
@@ -121,14 +129,18 @@ onig_error_code_to_format(OnigPosition code)
p = "upper is smaller than lower in repeat range"; break;
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
p = "empty range in char class"; break;
+#if 0
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
p = "mismatch multibyte code length in char-class range"; break;
+#endif
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
p = "too many multibyte code ranges are specified"; break;
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
p = "too short multibyte code string"; break;
+#if 0
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
p = "too big backref number"; break;
+#endif
case ONIGERR_INVALID_BACKREF:
#ifdef USE_NAMED_GROUP
p = "invalid backref number/name"; break;
diff --git a/onigmo/regexec.c b/onigmo/regexec.c
index 9b6232e30..fdd67184d 100644
--- a/onigmo/regexec.c
+++ b/onigmo/regexec.c
@@ -2,8 +2,8 @@
regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2002-2018 K.Kosako
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1808,7 +1808,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
/* default behavior: return first-matching result. */
goto finish;
- NEXT;
CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
DATA_ENSURE(1);
@@ -2369,7 +2368,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
goto fail;
- NEXT;
CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
@@ -2379,7 +2377,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
goto fail;
- NEXT;
CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
@@ -2389,7 +2386,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
goto fail;
- NEXT;
CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
@@ -2399,7 +2395,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
goto fail;
- NEXT;
#endif
CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
@@ -2432,7 +2427,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP;
}
goto fail;
- NEXT;
CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
@@ -2451,7 +2445,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP;
}
goto fail;
- NEXT;
CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
@@ -2483,7 +2476,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
}
goto fail;
- NEXT;
CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
if (s != msa->gpos)
@@ -2549,12 +2541,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
mem = 1;
goto backref;
- NEXT;
CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
mem = 2;
goto backref;
- NEXT;
CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
GET_MEMNUM_INC(mem, p);
@@ -2964,7 +2954,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc;
- NEXT;
CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
@@ -2997,7 +2986,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_REPEAT(mem, stkp);
si = GET_STACK_INDEX(stkp);
goto repeat_inc_ng;
- NEXT;
CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
STACK_PUSH_POS(s, sprev, pkeep);
@@ -3022,7 +3010,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
STACK_POP_TIL_POS_NOT;
goto fail;
- NEXT;
CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
STACK_PUSH_STOP_BT;
@@ -3063,7 +3050,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
STACK_POP_TIL_LOOK_BEHIND_NOT;
goto fail;
- NEXT;
CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
/* Save the absent-start-pos and the original end-pos. */
@@ -3120,7 +3106,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
STACK_POP_TIL_ABSENT;
goto fail;
- NEXT;
#ifdef USE_SUBEXP_CALL
CASE(OP_CALL) MOP_IN(OP_CALL);
@@ -3150,7 +3135,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FINISH)
goto finish;
- NEXT;
CASE(OP_FAIL)
if (0) {
@@ -3346,219 +3330,6 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
return (UChar* )NULL;
}
-#ifndef USE_SUNDAY_QUICK_SEARCH
-/* Boyer-Moore-Horspool search applied to a multibyte string */
-static UChar*
-bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *p, *end;
- const UChar *tail;
- ptrdiff_t skip, tlen1;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
-# endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
- }
-
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search */
-static UChar*
-bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *t, *p, *end;
- const UChar *tail;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
- (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
-# endif
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
- (intptr_t )(s - text), s);
-# endif
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->int_map[*s];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
-static UChar*
-bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *end;
- const UChar *tail;
- ptrdiff_t skip, tlen1;
- OnigEncoding enc = reg->enc;
- int case_fold_flag = reg->case_fold_flag;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
- (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-# endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
- }
-
- return (UChar* )NULL;
-}
-
-/* Boyer-Moore-Horspool search (ignore case) */
-static UChar*
-bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *p, *end;
- const UChar *tail;
- OnigEncoding enc = reg->enc;
- int case_fold_flag = reg->case_fold_flag;
-
-# ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
- (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-# endif
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s - (target_end - target) + 1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s - (target_end - target) + 1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- s += reg->int_map[*s];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-
-#else /* USE_SUNDAY_QUICK_SEARCH */
-
/* Sunday's quick search applied to a multibyte string */
static UChar*
bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
@@ -3583,39 +3354,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (s + 1 >= end) break;
- skip = reg->map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )s;
+ p--; t--;
}
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (s + 1 >= end) break;
- skip = reg->int_map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
+ if (s + 1 >= end) break;
+ skip = reg->map[se[1]];
+ t = s;
+ do {
+ s += enclen(enc, s, end);
+ } while ((s - t) < skip && s < end);
}
return (UChar* )NULL;
@@ -3642,32 +3393,17 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
end = text_end;
s = text + tlen1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + 1 >= end) break;
- s += reg->map[s[1]];
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
}
+ if (s + 1 >= end) break;
+ s += reg->map[s[1]];
}
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + 1 >= end) break;
- s += reg->int_map[s[1]];
- }
-# endif
- }
+
return (UChar* )NULL;
}
@@ -3696,35 +3432,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
s = text;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- if (s + 1 >= end) break;
- skip = reg->map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- se = s + tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, se + 1))
- return (UChar* )s;
- if (s + 1 >= end) break;
- skip = reg->int_map[se[1]];
- t = s;
- do {
- s += enclen(enc, s, end);
- } while ((s - t) < skip && s < end);
- }
-# endif
+ while (s < end) {
+ se = s + tlen1;
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, se + 1))
+ return (UChar* )s;
+ if (s + 1 >= end) break;
+ skip = reg->map[se[1]];
+ t = s;
+ do {
+ s += enclen(enc, s, end);
+ } while ((s - t) < skip && s < end);
}
return (UChar* )NULL;
@@ -3753,83 +3471,17 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
end = text_end;
s = text + tlen1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s - tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- if (s + 1 >= end) break;
- s += reg->map[s[1]];
- }
- }
- else { /* see int_map[] */
-# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
- while (s < end) {
- p = s - tlen1;
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- p, s + 1))
- return (UChar* )p;
- if (s + 1 >= end) break;
- s += reg->int_map[s[1]];
- }
-# endif
- }
- return (UChar* )NULL;
-}
-#endif /* USE_SUNDAY_QUICK_SEARCH */
-
-#ifdef USE_INT_MAP_BACKWARD
-static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
- int** skip)
-{
- int i, len;
-
- if (IS_NULL(*skip)) {
- *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*skip)) return ONIGERR_MEMORY;
- }
-
- len = (int )(end - s);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- (*skip)[i] = len;
-
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
-
- return 0;
-}
-
-static UChar*
-bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- const UChar *s, *t, *p;
-
- s = text_end - (target_end - target);
- if (text_start < s)
- s = text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
-
- while (s >= text) {
- p = s;
- t = target;
- while (t < target_end && *p == *t) {
- p++; t++;
- }
- if (t == target_end)
- return (UChar* )s;
-
- s -= reg->int_map_backward[*s];
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
+ while (s < end) {
+ p = s - tlen1;
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ p, s + 1))
+ return (UChar* )p;
+ if (s + 1 >= end) break;
+ s += reg->map[s[1]];
}
return (UChar* )NULL;
}
-#endif
static UChar*
map_search(OnigEncoding enc, UChar map[],
@@ -4064,21 +3716,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
case ONIG_OPTIMIZE_EXACT_BM:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
-#ifdef USE_INT_MAP_BACKWARD
- if (IS_NULL(reg->int_map_backward)) {
- int r;
- if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
- goto exact_method;
-
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
- &(reg->int_map_backward));
- if (r) return r;
- }
- p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
- end, p);
-#else
goto exact_method;
-#endif
break;
case ONIG_OPTIMIZE_MAP:
diff --git a/onigmo/regint.h b/onigmo/regint.h
index 8dc8bf6a9..48f13fd8e 100644
--- a/onigmo/regint.h
+++ b/onigmo/regint.h
@@ -5,7 +5,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2013 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -81,13 +81,12 @@
/* #define USE_OP_PUSH_OR_JUMP_EXACT */
#define USE_QTFR_PEEK_NEXT
#define USE_ST_LIBRARY
-#define USE_SUNDAY_QUICK_SEARCH
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
#define DEFAULT_PARSE_DEPTH_LIMIT 4096
-#define OPT_EXACT_MAXLEN 24
+#define OPT_EXACT_MAXLEN 24 /* This must be smaller than ONIG_CHAR_TABLE_SIZE. */
/* check config */
#if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP)
@@ -128,14 +127,14 @@
# undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
-#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
+/* #define USE_WORD_BEGIN_END */ /* "\<": word-begin, "\>": word-end */
#ifdef RUBY
# undef USE_CAPTURE_HISTORY
#else
-# define USE_CAPTURE_HISTORY
+/* # define USE_CAPTURE_HISTORY */
#endif
-#define USE_VARIABLE_META_CHARS
-#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+/* #define USE_VARIABLE_META_CHARS */
+/* #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
diff --git a/onigmo/regparse.c b/onigmo/regparse.c
index 5e51e3950..b54a98914 100644
--- a/onigmo/regparse.c
+++ b/onigmo/regparse.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/onigmo/regparse.h b/onigmo/regparse.h
index acdd3e2f5..5e8b1f6d0 100644
--- a/onigmo/regparse.h
+++ b/onigmo/regparse.h
@@ -5,7 +5,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako
- * Copyright (c) 2011-2016 K.Takata
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/onigmo/regversion.c b/onigmo/regversion.c
index d80e8d280..9e9e3875b 100644
--- a/onigmo/regversion.c
+++ b/onigmo/regversion.c
@@ -2,8 +2,8 @@
regversion.c - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako
- * Copyright (c) 2011-2017 K.Takata
+ * Copyright (c) 2002-2018 K.Kosako
+ * Copyright (c) 2011-2019 K.Takata
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,8 +48,8 @@ extern const char*
onig_copyright(void)
{
const char *s =
- "Onigmo " ONIG_VERSION_STRING " : Copyright (C) 2002-2016 K.Kosako, "
- "2011-2017 K.Takata";
+ "Onigmo " ONIG_VERSION_STRING " : Copyright (C) 2002-2018 K.Kosako, "
+ "2011-2019 K.Takata";
return s;
}
diff --git a/onigmo/version.txt b/onigmo/version.txt
index 88d06f108..6abaeb2f9 100644
--- a/onigmo/version.txt
+++ b/onigmo/version.txt
@@ -1 +1 @@
-6.1.3
+6.2.0
diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf
index 29533d7bb..4ae33eccc 100644
--- a/res/Notepad3.exe.manifest.conf
+++ b/res/Notepad3.exe.manifest.conf
@@ -3,7 +3,7 @@
Notepad3 develop
diff --git a/scintilla/doc/ScintillaDoc.html b/scintilla/doc/ScintillaDoc.html
index 709b0e7ad..a3d2d27c6 100644
--- a/scintilla/doc/ScintillaDoc.html
+++ b/scintilla/doc/ScintillaDoc.html
@@ -5003,8 +5003,6 @@ struct Sci_TextToFind {
There is some interaction between call tips and autocompletion lists in that showing a
call tip cancels any active autocompletion list, and vice versa.
- Call tips are not implemented on Qt.
-
Call tips can highlight part of the text within them. You could use this to highlight the
current argument to a function by counting the number of commas (or whatever separator your
language uses). See SciTEBase::CharAdded() in SciTEBase.cxx for an
diff --git a/scintilla/doc/ScintillaHistory.html b/scintilla/doc/ScintillaHistory.html
index 6a90f17e0..11b388ce6 100644
--- a/scintilla/doc/ScintillaHistory.html
+++ b/scintilla/doc/ScintillaHistory.html
@@ -540,6 +540,7 @@
jj5 |
| Jad Altahan |
+ Andrea Ricchi |
@@ -558,6 +559,10 @@
Released 10 January 2019.
+
+ Calltips implemented on Qt.
+ Bug #1548.
+
The C++ lexer, with styling.within.preprocessor on, now interprets "(" in preprocessor "#if("
as an operator instead of part of the directive. This improves folding as well which could become
@@ -568,9 +573,21 @@
Feature #1253.
+ Fix inconsistency with dot styling in Nim.
+ Feature #1260.
+
+
+ Enhance the styling of backticks in Nim.
+ Feature #1261.
+
+
Fix fold behaviour with comments in nim.
Feature #1254.
+
+ Fix TCL lexer recognizing '"' after "," inside a bracketed substitution.
+ Bug #1947.
+
Release 4.1.3
diff --git a/scintilla/lexers/LexNim.cxx b/scintilla/lexers/LexNim.cxx
index 109b3547b..fec9159ef 100644
--- a/scintilla/lexers/LexNim.cxx
+++ b/scintilla/lexers/LexNim.cxx
@@ -424,13 +424,17 @@ void SCI_METHOD LexerNim::Lex(Sci_PositionU startPos, Sci_Position length,
sc.SetState(SCE_NIM_DEFAULT);
break;
case SCE_NIM_IDENTIFIER:
- if (!IsAWordChar(sc.ch)) {
+ if (sc.ch == '.' || !IsAWordChar(sc.ch)) {
char s[100];
sc.GetCurrent(s, sizeof(s));
int style = SCE_NIM_IDENTIFIER;
if (keywords.InList(s) && !funcNameExists) {
- style = SCE_NIM_WORD;
+ // Prevent styling keywords if they are sub-identifiers
+ Sci_Position segStart = styler.GetStartSegment() - 1;
+ if (segStart < 0 || styler.SafeGetCharAt(segStart, '\0') != '.') {
+ style = SCE_NIM_WORD;
+ }
} else if (funcNameExists) {
style = SCE_NIM_FUNCNAME;
}
@@ -450,6 +454,18 @@ void SCI_METHOD LexerNim::Lex(Sci_PositionU startPos, Sci_Position length,
sc.ForwardSetState(SCE_NIM_DEFAULT);
}
break;
+ case SCE_NIM_FUNCNAME:
+ if (sc.ch == '`') {
+ funcNameExists = false;
+ sc.ForwardSetState(SCE_NIM_DEFAULT);
+ } else if (sc.atLineEnd) {
+ // Prevent leaking the style to the next line if not closed
+ funcNameExists = false;
+
+ sc.ChangeState(SCE_NIM_STRINGEOL);
+ sc.ForwardSetState(SCE_NIM_DEFAULT);
+ }
+ break;
case SCE_NIM_COMMENT:
if (sc.Match(']', '#')) {
if (commentNestLevel > 0) {
@@ -523,7 +539,10 @@ void SCI_METHOD LexerNim::Lex(Sci_PositionU startPos, Sci_Position length,
}
break;
case SCE_NIM_BACKTICKS:
- if (sc.ch == '`' || sc.atLineEnd) {
+ if (sc.ch == '`' ) {
+ sc.ForwardSetState(SCE_NIM_DEFAULT);
+ } else if (sc.atLineEnd) {
+ sc.ChangeState(SCE_NIM_STRINGEOL);
sc.ForwardSetState(SCE_NIM_DEFAULT);
}
break;
@@ -627,10 +646,10 @@ void SCI_METHOD LexerNim::Lex(Sci_PositionU startPos, Sci_Position length,
}
// Operator definition
else if (sc.ch == '`') {
- sc.SetState(SCE_NIM_BACKTICKS);
-
if (funcNameExists) {
- funcNameExists = false;
+ sc.SetState(SCE_NIM_FUNCNAME);
+ } else {
+ sc.SetState(SCE_NIM_BACKTICKS);
}
}
// Keyword
diff --git a/scintilla/lexers/LexTCL.cxx b/scintilla/lexers/LexTCL.cxx
index 0948f4880..1ea6ecf6e 100644
--- a/scintilla/lexers/LexTCL.cxx
+++ b/scintilla/lexers/LexTCL.cxx
@@ -128,8 +128,10 @@ next:
continue;
case ',':
sc.SetState(SCE_TCL_OPERATOR);
- if (subParen)
+ if (subParen) {
sc.ForwardSetState(SCE_TCL_SUBSTITUTION);
+ goto next; // Already forwarded so avoid loop's Forward()
+ }
continue;
default :
// maybe spaces should be allowed ???
diff --git a/scionigmo/OnigmoRegExEngine.cxx b/scionigmo/OnigmoRegExEngine.cxx
index fbc543e8a..0eb925b60 100644
--- a/scionigmo/OnigmoRegExEngine.cxx
+++ b/scionigmo/OnigmoRegExEngine.cxx
@@ -251,7 +251,7 @@ Sci::Position OnigmoRegExEngine::FindText(Document* doc, Sci::Position minPos, S
ONIG_OPTION_ON(onigmoOptions, (rangeEnd != docLen) ? ONIG_OPTION_NOTEOL : ONIG_OPTION_NONE);
std::string sPattern(pattern);
- std::string const sRegExprStrg = translateRegExpr(sPattern, word, wordStart, doc->eolMode, onigmoOptions);
+ std::string const & sRegExprStrg = translateRegExpr(sPattern, word, wordStart, doc->eolMode, onigmoOptions);
bool const bReCompile = (m_RegExpr == nullptr) || (m_CmplOptions != onigmoOptions) || (m_RegExprStrg.compare(sRegExprStrg) != 0);
@@ -360,7 +360,7 @@ const char* OnigmoRegExEngine::SubstituteByPosition(Document* doc, const char* t
return nullptr;
}
std::string sText(text, *length);
- std::string const rawReplStrg = convertReplExpr(sText);
+ std::string const & rawReplStrg = convertReplExpr(sText);
m_SubstBuffer.clear();
diff --git a/src/Dialogs.c b/src/Dialogs.c
index 1ab60140a..900f85920 100644
--- a/src/Dialogs.c
+++ b/src/Dialogs.c
@@ -508,28 +508,26 @@ INT_PTR CALLBACK AboutDlgProc(HWND hwnd, UINT umsg, WPARAM wParam, LPARAM lParam
{
case WM_INITDIALOG:
{
- {
- if (Globals.hDlgIcon) { SendMessage(hwnd, WM_SETICON, ICON_SMALL, (LPARAM)Globals.hDlgIcon); }
+ if (Globals.hDlgIcon) { SendMessage(hwnd, WM_SETICON, ICON_SMALL, (LPARAM)Globals.hDlgIcon); }
- SetDlgItemText(hwnd, IDC_VERSION, MKWCS(VERSION_FILEVERSION_LONG));
+ SetDlgItemText(hwnd, IDC_VERSION, MKWCS(VERSION_FILEVERSION_LONG));
- if (hFontTitle) { DeleteObject(hFontTitle); }
+ if (hFontTitle) { DeleteObject(hFontTitle); }
- if (NULL == (hFontTitle = (HFONT)SendDlgItemMessage(hwnd, IDC_VERSION, WM_GETFONT, 0, 0))) {
- hFontTitle = GetStockObject(DEFAULT_GUI_FONT);
- }
-
- LOGFONT lf;
- GetObject(hFontTitle, sizeof(LOGFONT), &lf);
- lf.lfWeight = FW_BOLD;
- lf.lfWidth = ScaleIntFontSize(8);
- lf.lfHeight = ScaleIntFontSize(22);
- // lf.lfQuality = ANTIALIASED_QUALITY;
- hFontTitle = CreateFontIndirect(&lf);
-
- SendDlgItemMessage(hwnd, IDC_VERSION, WM_SETFONT, (WPARAM)hFontTitle, true);
+ if (NULL == (hFontTitle = (HFONT)SendDlgItemMessage(hwnd, IDC_VERSION, WM_GETFONT, 0, 0))) {
+ hFontTitle = GetStockObject(DEFAULT_GUI_FONT);
}
+ LOGFONT lf;
+ GetObject(hFontTitle, sizeof(LOGFONT), &lf);
+ lf.lfWeight = FW_BOLD;
+ lf.lfWidth = ScaleIntFontSize(8);
+ lf.lfHeight = ScaleIntFontSize(22);
+ // lf.lfQuality = ANTIALIASED_QUALITY;
+ hFontTitle = CreateFontIndirect(&lf);
+
+ SendDlgItemMessage(hwnd, IDC_VERSION, WM_SETFONT, (WPARAM)hFontTitle, true);
+
SetDlgItemText(hwnd, IDC_SCI_VERSION, VERSION_SCIVERSION);
SetDlgItemText(hwnd, IDC_COPYRIGHT, VERSION_LEGALCOPYRIGHT);
SetDlgItemText(hwnd, IDC_AUTHORNAME, VERSION_AUTHORNAME);
diff --git a/src/Dlapi.c b/src/Dlapi.c
index 2f8e4794d..909c7211c 100644
--- a/src/Dlapi.c
+++ b/src/Dlapi.c
@@ -1266,7 +1266,7 @@ bool DriveBox_SelectDrive(HWND hwnd,LPCWSTR lpszPath)
}
// Don't select anything
- SendMessage(hwnd,CB_SETCURSEL,(WPARAM)-1,0);
+ SendMessage(hwnd,CB_SETCURSEL,(WPARAM)1,0);
return false;
}
diff --git a/src/Edit.c b/src/Edit.c
index 0ea9d2f7b..60d0c71cc 100644
--- a/src/Edit.c
+++ b/src/Edit.c
@@ -3479,7 +3479,7 @@ void EditStripLastCharacter(HWND hwnd, bool bIgnoreSelection, bool bTrailingBlan
if (bTrailingBlanksOnly)
{
DocPos i = iEndPos;
- char ch = '\0';
+ char ch;
do {
ch = SciCall_GetCharAt(--i);
} while ((i >= iStartPos) && IsBlankChar(ch));
diff --git a/src/Encoding.c b/src/Encoding.c
index 7f403b8de..bb00fb5f1 100644
--- a/src/Encoding.c
+++ b/src/Encoding.c
@@ -343,7 +343,7 @@ typedef struct _ee {
} ENCODINGENTRY, *PENCODINGENTRY;
int CmpEncoding(const void *s1, const void *s2) {
- return StrCmp(((PENCODINGENTRY)s1)->wch, ((PENCODINGENTRY)s2)->wch);
+ return StrCmp(((const PENCODINGENTRY)s1)->wch, ((const PENCODINGENTRY)s2)->wch);
}
// ============================================================================
diff --git a/src/Notepad3.c b/src/Notepad3.c
index bdf92934d..4e9c8f417 100644
--- a/src/Notepad3.c
+++ b/src/Notepad3.c
@@ -2937,7 +2937,7 @@ LRESULT MsgInitMenu(HWND hwnd, WPARAM wParam, LPARAM lParam)
EnableCmd(hmenu,IDM_VIEW_CUSTOMIZETB, Settings.ShowToolbar);
CheckCmd(hmenu,IDM_VIEW_STATUSBAR,Settings.ShowStatusbar);
- i = SciCall_GetLexer();
+ //i = SciCall_GetLexer();
//EnableCmd(hmenu,IDM_VIEW_AUTOCLOSETAGS,(i == SCLEX_HTML || i == SCLEX_XML));
CheckCmd(hmenu, IDM_VIEW_AUTOCLOSETAGS, Settings.AutoCloseTags /*&& (i == SCLEX_HTML || i == SCLEX_XML)*/);
diff --git a/src/Notepad3.cppcheck b/src/Notepad3.cppcheck
new file mode 100644
index 000000000..ee5822030
--- /dev/null
+++ b/src/Notepad3.cppcheck
@@ -0,0 +1,14 @@
+
+
+ win64
+ true
+
+ windows
+
+
+ cert
+
+
+ clang-tidy
+
+
diff --git a/src/Styles.c b/src/Styles.c
index 0b6bd76dc..85fc209c8 100644
--- a/src/Styles.c
+++ b/src/Styles.c
@@ -338,14 +338,13 @@ void Style_Load()
//
void Style_Save()
{
- WCHAR tch[32] = { L'\0' };;
- WCHAR szTmpStyle[BUFSIZE_STYLE_VALUE] = { L'\0' };
size_t const len = NUMLEXERS * AVG_NUM_OF_STYLES_PER_LEXER * 100;
WCHAR *pIniSection = AllocMem(len * sizeof(WCHAR), HEAP_ZERO_MEMORY);
if (pIniSection) {
// Custom colors
for (int i = 0; i < 16; i++) {
if (s_colorCustom[i] != s_colorDefault[i]) {
+ WCHAR tch[32] = { L'\0' };
WCHAR wch[32] = { L'\0' };
StringCchPrintf(tch, COUNTOF(tch), L"%02i", i + 1);
StringCchPrintf(wch, COUNTOF(wch), L"#%02X%02X%02X",
@@ -393,6 +392,7 @@ void Style_Save()
while (g_pLexArray[iLexer]->Styles[i].iStyle != -1) {
if (((*pLexFunction)(FCT_SETTING_CHANGE, 0) & (((__int64)1) << (i+2))) != 0LL) {
// normalize
+ WCHAR szTmpStyle[BUFSIZE_STYLE_VALUE];
szTmpStyle[0] = L'\0'; // clear
Style_CopyStyles_IfNotDefined(g_pLexArray[iLexer]->Styles[i].szValue, szTmpStyle, COUNTOF(szTmpStyle), true, true);
IniSectionSetString(pIniSection, g_pLexArray[iLexer]->Styles[i].pszName, szTmpStyle);
@@ -468,7 +468,6 @@ bool Style_Export(HWND hwnd)
WCHAR szFile[MAX_PATH * 2] = { L'\0' };
WCHAR szFilter[256] = { L'\0' };
OPENFILENAME ofn;
- DWORD dwError = ERROR_SUCCESS;
ZeroMemory(&ofn,sizeof(OPENFILENAME));
GetLngString(IDS_MUI_FILTER_INI,szFilter,COUNTOF(szFilter));
@@ -485,6 +484,7 @@ bool Style_Export(HWND hwnd)
if (GetSaveFileName(&ofn))
{
+ DWORD dwError = ERROR_SUCCESS;
size_t const len = NUMLEXERS * AVG_NUM_OF_STYLES_PER_LEXER * 100;
WCHAR *pIniSection = AllocMem(len * sizeof(WCHAR), HEAP_ZERO_MEMORY);
if (pIniSection) {
@@ -843,10 +843,10 @@ void Style_SetLexer(HWND hwnd, PEDITLEXER pLexNew)
else {
SendMessage(hwnd, SCI_SETCARETSTYLE, CARETSTYLE_LINE, 0);
- WCHAR wch[32] = { L'\0' };
iValue = 1;
fValue = 1.0f; // default caret width
if (Style_StrGetSize(pCurrentStandard->Styles[STY_CARET].szValue, &fValue)) {
+ WCHAR wch[32] = { L'\0' };
iValue = clampi(float2int(fValue), 1, 3); // don't allow invisible 0
StringCchPrintf(wch,COUNTOF(wch),L"size:%i",iValue);
StringCchCat(wchSpecificStyle,COUNTOF(wchSpecificStyle),wch);
@@ -1822,10 +1822,10 @@ bool Style_StrGetFont(LPCWSTR lpszStyle, LPWSTR lpszFont, int cchFont)
//
bool Style_StrGetFontQuality(LPCWSTR lpszStyle,LPWSTR lpszQuality,int cchQuality)
{
- WCHAR tch[BUFSIZE_STYLE_VALUE] = { L'\0' };
WCHAR *p = StrStrI(lpszStyle, L"smoothing:");
if (p)
{
+ WCHAR tch[BUFSIZE_STYLE_VALUE] = { L'\0' };
StringCchCopy(tch,COUNTOF(tch),p + CSTRLEN(L"smoothing:"));
p = StrChr(tch, L';');
if (p)
@@ -1929,12 +1929,10 @@ bool Style_StrGetSize(LPCWSTR lpszStyle, float* f)
//
bool Style_StrGetSizeStr(LPCWSTR lpszStyle,LPWSTR lpszSize,int cchSize)
{
- WCHAR tch[BUFSIZE_STYLE_VALUE] = { L'\0' };
- WCHAR wchFloatVal[64];
-
WCHAR *p = StrStrI(lpszStyle, L"size:");
if (p)
{
+ WCHAR tch[BUFSIZE_STYLE_VALUE] = { L'\0' };
StringCchCopy(tch, COUNTOF(tch), (p + CSTRLEN(L"size:")));
p = StrChr(tch, L';');
if (p) { *p = L'\0'; }
@@ -1942,6 +1940,7 @@ bool Style_StrGetSizeStr(LPCWSTR lpszStyle,LPWSTR lpszSize,int cchSize)
float fValue = 0.0f;
if (Char2FloatW(tch, &fValue)) {
+ WCHAR wchFloatVal[64];
fValue = (float)fabs(fValue);
Float2String(fValue, wchFloatVal, COUNTOF(wchFloatVal));
@@ -2034,12 +2033,12 @@ void Style_AppendWeightStr(LPWSTR lpszWeight, int cchSize, int fontWeight)
//
bool Style_StrGetColor(bool bFore, LPCWSTR lpszStyle, COLORREF* rgb)
{
- WCHAR tch[BUFSIZE_STYLE_VALUE] = { L'\0' };
WCHAR *pItem = (bFore) ? L"fore:" : L"back:";
WCHAR *p = StrStrI(lpszStyle, pItem);
if (p)
{
+ WCHAR tch[BUFSIZE_STYLE_VALUE] = { L'\0' };
StringCchCopy(tch, COUNTOF(tch), p + StringCchLenW(pItem,0));
if (tch[0] == L'#')
tch[0] = L' ';
@@ -2586,11 +2585,11 @@ bool Style_SelectFont(HWND hwnd,LPWSTR lpszStyle,int cchStyle, LPCWSTR sLexerNam
StringCchCat(szNewStyle, COUNTOF(szNewStyle), newSize);
- WCHAR chset[32] = { L'\0' };
if (bGlobalDefaultStyle &&
(lf.lfCharSet != DEFAULT_CHARSET) &&
(lf.lfCharSet != ANSI_CHARSET) &&
(lf.lfCharSet != Globals.iDefaultCharSet)) {
+ WCHAR chset[32] = { L'\0' };
if (lf.lfCharSet == iCharSet) {
if (StrStrI(lpszStyle, L"charset:"))
{
@@ -3129,7 +3128,6 @@ INT_PTR CALLBACK Style_CustomizeSchemesDlgProc(HWND hwnd,UINT umsg,WPARAM wParam
static PEDITLEXER pCurrentLexer = NULL;
static PEDITSTYLE pCurrentStyle = NULL;
static int iCurStyleIdx = -1;
- static HFONT hFontTitle;
static HBRUSH hbrFore;
static HBRUSH hbrBack;
static bool bIsStyleSelected = false;
@@ -3137,13 +3135,11 @@ INT_PTR CALLBACK Style_CustomizeSchemesDlgProc(HWND hwnd,UINT umsg,WPARAM wParam
static WCHAR* Style_StylesBackup[NUMLEXERS * AVG_NUM_OF_STYLES_PER_LEXER];
static __int64 Style_ChangedBackup[NUMLEXERS];
- WCHAR tchBuf[128] = { L'\0' };
- WCHAR wchText[512] = { L'\0' };
-
switch(umsg)
{
case WM_INITDIALOG:
{
+ WCHAR wchText[512] = { L'\0' };
if (Globals.hDlgIcon) { SendMessage(hwnd, WM_SETICON, ICON_SMALL, (LPARAM)Globals.hDlgIcon); }
GetLngString(IDS_MUI_STYLEEDIT_HELP, wchText, COUNTOF(wchText));
SetDlgItemText(hwnd, IDC_STYLEEDIT_HELP, wchText);
@@ -3207,6 +3203,7 @@ INT_PTR CALLBACK Style_CustomizeSchemesDlgProc(HWND hwnd,UINT umsg,WPARAM wParam
MakeBitmapButton(hwnd,IDC_NEXTSTYLE,Globals.hInstance,IDB_NEXT);
// Setup title font
+ static HFONT hFontTitle = NULL;
if (hFontTitle) {
DeleteObject(hFontTitle);
}
@@ -3225,6 +3222,7 @@ INT_PTR CALLBACK Style_CustomizeSchemesDlgProc(HWND hwnd,UINT umsg,WPARAM wParam
else
SetDlgPos(hwnd, Settings.CustomSchemesDlgPosX, Settings.CustomSchemesDlgPosY);
+ WCHAR tchBuf[128] = { L'\0' };
HMENU hmenu = GetSystemMenu(hwnd, false);
GetLngString(IDS_MUI_PREVIEW, tchBuf, COUNTOF(tchBuf));
InsertMenu(hmenu, 0, MF_BYPOSITION | MF_STRING | MF_ENABLED, IDS_MUI_PREVIEW, tchBuf);
diff --git a/src/Version.h b/src/Version.h
index 4fe3effde..2c8463108 100644
--- a/src/Version.h
+++ b/src/Version.h
@@ -71,7 +71,7 @@
#endif
#elif (_MSC_VER >= 1916)
#if(_MSC_FULL_VER >= 191627026)
- #define VER_CPL MS Visual C++ 2017 v15.9.5
+ #define VER_CPL MS Visual C++ 2017 v15.9.(5-6)
#elif(_MSC_FULL_VER >= 191627025)
#define VER_CPL MS Visual C++ 2017 v15.9.4
#elif(_MSC_FULL_VER >= 191627024)
diff --git a/src/VersionEx.h b/src/VersionEx.h
index 057886679..a702587da 100644
--- a/src/VersionEx.h
+++ b/src/VersionEx.h
@@ -6,8 +6,8 @@
#define APPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 19
-#define VERSION_REV 125
-#define VERSION_BUILD 1608
+#define VERSION_REV 130
+#define VERSION_BUILD 1611
#define SCINTILLA_VER 413
-#define ONIGMO_REGEX_VER 6.1.3
+#define ONIGMO_REGEX_VER 6.2.0
#define VERSION_PATCH "develop"