mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-11 21:03:05 +08:00
+fix: regex dot(.) not matching linefeed(LF)/newline character, if not enabled for line-breaks
This commit is contained in:
parent
7b35d58af3
commit
3985f999b5
@ -64,7 +64,7 @@ using namespace Scintilla::Internal;
|
||||
|
||||
enum class EOLmode : int { UDEF = -1, CRLF = SC_EOL_CRLF, CR = SC_EOL_CR, LF = SC_EOL_LF };
|
||||
|
||||
static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8_CR };
|
||||
static OnigEncoding s_UsedEncodingsTypes[] = { ONIG_ENCODING_UTF8 };
|
||||
|
||||
// ============================================================================
|
||||
// ============================================================================
|
||||
@ -328,11 +328,9 @@ Sci::Position OnigurumaRegExEngine::FindText(Document* doc, Sci::Position minPos
|
||||
|
||||
try {
|
||||
|
||||
OnigEncoding const onigEncType = ((eolMode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8);
|
||||
|
||||
OnigErrorInfo einfo;
|
||||
int const res = onig_new(&m_RegExpr, UCharCPtr(m_RegExprStrg.c_str()), UCharCPtr(m_RegExprStrg.c_str() + m_RegExprStrg.length()),
|
||||
m_CmplOptions, onigEncType, &m_OnigSyntax, &einfo);
|
||||
m_CmplOptions, ONIG_ENCODING_UTF8, &m_OnigSyntax, &einfo);
|
||||
|
||||
if (res != ONIG_NORMAL) {
|
||||
onig_error_code_to_str(UCharPtr(m_ErrorInfo), res, &einfo);
|
||||
@ -795,10 +793,8 @@ OnigPos SimpleRegExEngine::Find(const OnigUChar* pattern, const OnigUChar* docum
|
||||
try {
|
||||
onig_free(m_RegExpr);
|
||||
|
||||
OnigEncoding const onigEncType = ((m_EOLmode == EOLmode::CR) ? ONIG_ENCODING_UTF8_CR : ONIG_ENCODING_UTF8);
|
||||
|
||||
OnigErrorInfo einfo;
|
||||
int res = onig_new(&m_RegExpr, pattern, (pattern + patternLen), m_Options, onigEncType, &m_OnigSyntax, &einfo);
|
||||
int res = onig_new(&m_RegExpr, pattern, (pattern + patternLen), m_Options, ONIG_ENCODING_UTF8, &m_OnigSyntax, &einfo);
|
||||
|
||||
if (res != ONIG_NORMAL) {
|
||||
//onig_error_code_to_str(m_ErrorInfo, res, &einfo);
|
||||
|
||||
@ -93,12 +93,37 @@ ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
ascii_is_newline(const UChar *p, const UChar *end) {
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
if (p + 1 < end) {
|
||||
if ((*p == CARRIAGE_RET) && (*(p+1) == NEWLINE_CODE))
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
if (p < end) {
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if ((*p == CARRIAGE_RET) || (*p == NEWLINE_CODE) || (*p == END_OF_FILE))
|
||||
return 1;
|
||||
#else
|
||||
if ((*p == CARRIAGE_RET) || (*p == NEWLINE_CODE))
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if (p == end)
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
OnigEncodingType OnigEncodingASCII = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"US-ASCII", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
ascii_is_newline,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
@ -116,29 +141,3 @@ OnigEncodingType OnigEncodingASCII = {
|
||||
ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
|
||||
0, 0
|
||||
};
|
||||
|
||||
|
||||
OnigEncodingType OnigEncodingASCII_CR = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"US-ASCII", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0d,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
ascii_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
init,
|
||||
0, /* is_initialized */
|
||||
onigenc_always_true_is_valid_mbc_string,
|
||||
ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
|
||||
0, 0
|
||||
};
|
||||
|
||||
|
||||
@ -4,16 +4,12 @@
|
||||
#ifndef _ONIGURUMA_CONFIG_H_
|
||||
#define _ONIGURUMA_CONFIG_H_
|
||||
|
||||
#define STDC_HEADERS 1
|
||||
#define HAVE_INTTYPES_H 1
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_MEMORY_H 1
|
||||
#define HAVE_FLOAT_H 1
|
||||
#define HAVE_OFF_T 1
|
||||
#define SIZEOF_INT 4
|
||||
#define SIZEOF_SHORT 2
|
||||
#define SIZEOF_LONG 4
|
||||
#define SIZEOF_LONG_LONG 8
|
||||
#define SIZEOF___INT64 8
|
||||
@ -27,9 +23,7 @@
|
||||
#endif
|
||||
#define SIZEOF_FLOAT 4
|
||||
#define SIZEOF_DOUBLE 8
|
||||
#define HAVE_PROTOTYPES 1
|
||||
#define TOKEN_PASTE(x,y) x##y
|
||||
#define HAVE_STDARG_PROTOTYPES 1
|
||||
#ifndef NORETURN
|
||||
#if _MSC_VER > 1100
|
||||
#define NORETURN(x) __declspec(noreturn) x
|
||||
@ -38,53 +32,24 @@
|
||||
#endif
|
||||
#endif
|
||||
#define HAVE_DECL_SYS_NERR 1
|
||||
#define STDC_HEADERS 1
|
||||
#define HAVE_STDINT_H 1
|
||||
#define HAVE_STDLIB_H 1
|
||||
#define HAVE_STRING_H 1
|
||||
#define HAVE_LIMITS_H 1
|
||||
#define HAVE_FCNTL_H 1
|
||||
#define HAVE_SYS_UTIME_H 1
|
||||
#define HAVE_MEMORY_H 1
|
||||
#define uid_t int
|
||||
#define gid_t int
|
||||
#define HAVE_STRUCT_STAT_ST_RDEV 1
|
||||
#define HAVE_ST_RDEV 1
|
||||
#define GETGROUPS_T int
|
||||
#define RETSIGTYPE void
|
||||
#define HAVE_ALLOCA 1
|
||||
#define HAVE_DUP2 1
|
||||
#define HAVE_MEMCMP 1
|
||||
#define HAVE_MEMMOVE 1
|
||||
#define HAVE_MKDIR 1
|
||||
#define HAVE_STRCASECMP 1
|
||||
#define HAVE_STRNCASECMP 1
|
||||
#define HAVE_STRERROR 1
|
||||
#define HAVE_STRFTIME 1
|
||||
#define HAVE_STRCHR 1
|
||||
#define HAVE_STRSTR 1
|
||||
#define HAVE_STRTOD 1
|
||||
#define HAVE_STRTOL 1
|
||||
#define HAVE_STRTOUL 1
|
||||
#define HAVE_FLOCK 1
|
||||
#define HAVE_VSNPRINTF 1
|
||||
#define HAVE_FINITE 1
|
||||
#define HAVE_FMOD 1
|
||||
#define HAVE_FREXP 1
|
||||
#define HAVE_HYPOT 1
|
||||
#define HAVE_MODF 1
|
||||
#define HAVE_WAITPID 1
|
||||
#define HAVE_CHSIZE 1
|
||||
#define HAVE_TIMES 1
|
||||
#define HAVE__SETJMP 1
|
||||
#define HAVE_TELLDIR 1
|
||||
#define HAVE_SEEKDIR 1
|
||||
#define HAVE_MKTIME 1
|
||||
#define HAVE_COSH 1
|
||||
#define HAVE_SINH 1
|
||||
#define HAVE_TANH 1
|
||||
#define HAVE_EXECVE 1
|
||||
#define HAVE_TZNAME 1
|
||||
#define HAVE_DAYLIGHT 1
|
||||
#define SETPGRP_VOID 1
|
||||
#define inline __inline
|
||||
|
||||
@ -189,9 +189,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
||||
#else // lean and mean
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingASCII_CR;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8_CR;
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
@ -228,9 +226,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingUTF8_CR;
|
||||
#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
|
||||
#else // lean and mean
|
||||
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
|
||||
#define ONIG_ENCODING_ASCII_CR (&OnigEncodingASCII_CR)
|
||||
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
|
||||
#define ONIG_ENCODING_UTF8_CR (&OnigEncodingUTF8_CR)
|
||||
#endif
|
||||
|
||||
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
|
||||
|
||||
@ -697,6 +697,7 @@ onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
|
||||
{
|
||||
@ -714,22 +715,6 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_is_mbc_newline_0x0d(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p < end) {
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if ((*p == CARRIAGE_RET)||(*p == END_OF_FILE)) return 1; // CR
|
||||
#else
|
||||
if (*p == CARRIAGE_RET) return 1; // CR
|
||||
#endif
|
||||
}
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if (p == end)
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* for single byte encodings */
|
||||
extern int
|
||||
|
||||
@ -120,11 +120,12 @@ struct PropertyNameCtype {
|
||||
|
||||
#define USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
#define USE_CRNL_AS_LINE_TERMINATOR
|
||||
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
#define USE_UNICODE_WORD_BREAK
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
#define USE_UNICODE_ALL_LINE_TERMINATORS /* see Unicode.org UTS #18 */
|
||||
|
||||
|
||||
//~#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
@ -159,7 +160,7 @@ extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPair
|
||||
extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
|
||||
extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
|
||||
extern int onigenc_is_mbc_newline_0x0d P_((const UChar* p, const UChar* end));
|
||||
|
||||
|
||||
/* methods for single byte encoding */
|
||||
extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
|
||||
@ -261,6 +261,50 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
static int
|
||||
is_utf8_newline(const UChar *p, const UChar *end)
|
||||
{
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
if (p + 1 < end) {
|
||||
if ((*p == CARRIAGE_RET) && (*(p+1) == NEWLINE_CODE)) // CRLF
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if (p + 2 < end) {
|
||||
if ((*p == 0xe2) && (*(p+1) == 0x80) && ((*(p+2) == 0xa8) || (*(p+2) == 0xa9))) // LS or PS
|
||||
return 1;
|
||||
}
|
||||
if (p + 1 < end) {
|
||||
if ((*p == 0xc2) && (*(p+1) == 0x85)) // NEL
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (p < end) {
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if ((*p == CARRIAGE_RET) || (*p == NEWLINE_CODE) || (*p == END_OF_FILE))
|
||||
return 1;
|
||||
#else
|
||||
if ((*p == CARRIAGE_RET) || (*p == NEWLINE_CODE))
|
||||
return 1;
|
||||
#endif
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((*p == 0x0b) || (*p == 0x0c)) // VT or FF
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_END_OF_FILE_AS_LINE_TERMINATOR
|
||||
if (p == end)
|
||||
return 1;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
OnigEncodingType OnigEncodingUTF8 = {
|
||||
mbc_enc_len,
|
||||
"UTF-8", /* name */
|
||||
@ -270,7 +314,7 @@ OnigEncodingType OnigEncodingUTF8 = {
|
||||
6,
|
||||
#endif
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
is_utf8_newline,
|
||||
mbc_to_code,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
@ -288,34 +332,3 @@ OnigEncodingType OnigEncodingUTF8 = {
|
||||
ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
|
||||
0, 0
|
||||
};
|
||||
|
||||
|
||||
OnigEncodingType OnigEncodingUTF8_CR = {
|
||||
mbc_enc_len,
|
||||
"UTF-8", /* name */
|
||||
#ifdef USE_RFC3629_RANGE
|
||||
4, /* max enc length */
|
||||
#else
|
||||
6,
|
||||
#endif
|
||||
1, /* min enc length */
|
||||
//is_mbc_newline,
|
||||
onigenc_is_mbc_newline_0x0d,
|
||||
mbc_to_code,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
NULL, /* init */
|
||||
NULL, /* is_initialized */
|
||||
is_valid_mbc_string,
|
||||
ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1_OR_0,
|
||||
0, 0
|
||||
};
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user