mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
Merge pull request #3167 from RaiKoHoff/Dev_Lexilla
Hyperlink RegEx pattern fixed to use valid Unicode
This commit is contained in:
commit
d8fa7ff7fa
@ -31,8 +31,9 @@ Supported character encodings:
|
||||
|
||||
Master branch
|
||||
-------------
|
||||
* NEW API: ONIG_SYNTAX_PYTHON
|
||||
* NEW API: ONIG_OPTION_CALLBACK_EACH_MATCH
|
||||
* NEW API: ONIG_OPTION_IGNORECASE_IS_ASCII
|
||||
* NEW API: ONIG_SYNTAX_PYTHON
|
||||
|
||||
|
||||
Version 6.9.6
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Oniguruma API Version 6.9.7 2021/01/18
|
||||
Oniguruma API Version 6.9.7 2021/03/01
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
@ -333,6 +333,9 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
|
||||
* If option ONIG_OPTION_CALLBACK_EACH_MATCH is used,
|
||||
it will return ONIG_MISMATCH even if there is a match.
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
2 str: target string
|
||||
@ -344,11 +347,12 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
6 region: address for return group match range info (NULL is allowed)
|
||||
7 option: search time option
|
||||
|
||||
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
|
||||
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
|
||||
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
|
||||
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
|
||||
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
|
||||
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
|
||||
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
|
||||
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
|
||||
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
|
||||
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
|
||||
ONIG_OPTION_CALLBACK_EACH_MATCH Call back for all successful matches. (including the case of the same matching start position).
|
||||
|
||||
|
||||
# int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
|
||||
@ -374,6 +378,9 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
not match: ONIG_MISMATCH (< 0)
|
||||
error: error code (< 0)
|
||||
|
||||
* If option ONIG_OPTION_CALLBACK_EACH_MATCH is used,
|
||||
it will return ONIG_MISMATCH even if there is a match.
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
2 str: target string
|
||||
@ -382,11 +389,12 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
5 region: address for return group match range info (NULL is allowed)
|
||||
6 option: search time option
|
||||
|
||||
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
|
||||
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
|
||||
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
|
||||
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
|
||||
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
|
||||
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
|
||||
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
|
||||
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
|
||||
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
|
||||
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
|
||||
ONIG_OPTION_CALLBACK_EACH_MATCH Call back for all successful matches.
|
||||
|
||||
|
||||
# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
|
||||
@ -703,6 +711,23 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
# OnigCallbackEachMatchFunc onig_get_callback_each_match(void)
|
||||
|
||||
Return the current callback function for ONIG_OPTION_CALLBACK_EACH_MATCH.
|
||||
|
||||
|
||||
# int onig_set_callback_each_match(OnigCallbackEachMatchFunc func)
|
||||
|
||||
Set the callback function for ONIG_OPTION_CALLBACK_EACH_MATCH.
|
||||
If NULL is set, the callback will never be executed.
|
||||
|
||||
return value
|
||||
normal: 0
|
||||
|
||||
arguments
|
||||
1 func: callback function
|
||||
|
||||
|
||||
# int onig_number_of_capture_histories(regex_t* reg)
|
||||
|
||||
Return the number of capture history defined in the pattern.
|
||||
@ -714,7 +739,6 @@ Oniguruma API Version 6.9.7 2021/01/18
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
|
||||
# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
|
||||
|
||||
Return the root node of capture history data tree.
|
||||
|
||||
@ -740,7 +740,7 @@ typedef struct {
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
} OnigCompileInfo;
|
||||
|
||||
typedef int (*OnigCallbackEachMatchFunc)(const UChar* str, const UChar* end, const UChar* range, const UChar* match_start, OnigRegion* region, void* user_data);
|
||||
typedef int (*OnigCallbackEachMatchFunc)(const UChar* str, const UChar* end, const UChar* match_start, OnigRegion* region, void* user_data);
|
||||
|
||||
|
||||
/* types for callout */
|
||||
|
||||
@ -3095,7 +3095,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
if (reg->capture_history != 0) {
|
||||
int r;
|
||||
OnigCaptureTreeNode* node;
|
||||
|
||||
if (IS_NULL(region->history_root)) {
|
||||
@ -3112,9 +3111,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
node->end = (int )(s - str);
|
||||
|
||||
stkp = stk_base;
|
||||
r = make_capture_history_tree(region->history_root, &stkp,
|
||||
i = make_capture_history_tree(region->history_root, &stkp,
|
||||
stk, (UChar* )str, reg);
|
||||
if (r < 0) MATCH_AT_ERROR_RETURN(r);
|
||||
if (i < 0) MATCH_AT_ERROR_RETURN(i);
|
||||
}
|
||||
#endif /* USE_CAPTURE_HISTORY */
|
||||
#ifdef USE_POSIX_API
|
||||
@ -3126,9 +3125,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
if (OPTON_CALLBACK_EACH_MATCH(options) &&
|
||||
IS_NOT_NULL(CallbackEachMatch)) {
|
||||
int r = CallbackEachMatch(str, end, in_right_range, sstart, region,
|
||||
msa->mp->callout_user_data);
|
||||
if (r < 0) MATCH_AT_ERROR_RETURN(r);
|
||||
i = CallbackEachMatch(str, end, sstart, region,
|
||||
msa->mp->callout_user_data);
|
||||
if (i < 0) MATCH_AT_ERROR_RETURN(i);
|
||||
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
if (! OPTON_FIND_LONGEST(options))
|
||||
|
||||
14
src/Edit.c
14
src/Edit.c
@ -88,10 +88,18 @@ static const char *const s_pColorRegEx_A = "#([0-9a-fA-F]){8}";
|
||||
static const char *const s_pUnicodeRegEx = "(\\\\[uU|xX]([0-9a-fA-F]){4}|\\\\[xX]([0-9a-fA-F]){2})+";
|
||||
|
||||
// https://mathiasbynens.be/demo/url-regex : @stephenhay
|
||||
//static const char* pUrlRegEx = "\\b(?:(?:https?|ftp|file)://|www\\.|ftp\\.)[^\\s/$.?#].[^\\s]*";
|
||||
//static const char* s_pUrlRegEx = "\\b(?:(?:https?|ftp|file)://|www\\.|ftp\\.)[^\\s/$.?#].[^\\s]*";
|
||||
|
||||
// using Gruber's Liberal Regex Pattern for All URLs (https://gist.github.com/gruber/249502)
|
||||
/// => unfortunately to slow to use as scanner
|
||||
//static const char *const s_pUrlRegEx = "(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)"
|
||||
// "(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+"
|
||||
// "(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))";
|
||||
|
||||
// pretty fast:
|
||||
static const char *const s_pUrlRegEx = "\\b(?:(?:https?|ftp|file)://|www\\.|ftp\\.)"
|
||||
"(?:\\([-a-z\\u00a1-\\uffff0-9+&@#/%=~_|$?!:,.]*\\)|[-a-z\\u00a1-\\uffff0-9+&@#/%=~_|$?!:,.])*"
|
||||
"(?:\\([-a-z\\u00a1-\\uffff0-9+&@#/%=~_|$?!:,.]*\\)|[a-z\\u00a1-\\uffff0-9+&@#/%=~_|$])";
|
||||
"(?:\\([-a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$?!:,.]*\\)|[-a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$?!:,.])*"
|
||||
"(?:\\([-a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$?!:,.]*\\)|[a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$])";
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user