Merge pull request #3167 from RaiKoHoff/Dev_Lexilla

Hyperlink RegEx pattern fixed to use valid Unicode
This commit is contained in:
Pairi Daiza 2021-03-01 20:28:03 +01:00 committed by GitHub
commit d8fa7ff7fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 55 additions and 23 deletions

View File

@ -31,8 +31,9 @@ Supported character encodings:
Master branch
-------------
* NEW API: ONIG_SYNTAX_PYTHON
* NEW API: ONIG_OPTION_CALLBACK_EACH_MATCH
* NEW API: ONIG_OPTION_IGNORECASE_IS_ASCII
* NEW API: ONIG_SYNTAX_PYTHON
Version 6.9.6

View File

@ -1,4 +1,4 @@
Oniguruma API Version 6.9.7 2021/01/18
Oniguruma API Version 6.9.7 2021/03/01
#include <oniguruma.h>
@ -333,6 +333,9 @@ Oniguruma API Version 6.9.7 2021/01/18
not found: ONIG_MISMATCH (< 0)
error: error code (< 0)
* If option ONIG_OPTION_CALLBACK_EACH_MATCH is used,
it will return ONIG_MISMATCH even if there is a match.
arguments
1 reg: regex object
2 str: target string
@ -344,11 +347,12 @@ Oniguruma API Version 6.9.7 2021/01/18
6 region: address for return group match range info (NULL is allowed)
7 option: search time option
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
ONIG_OPTION_CALLBACK_EACH_MATCH Call back for all successful matches. (including the case of the same matching start position).
# int onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
@ -374,6 +378,9 @@ Oniguruma API Version 6.9.7 2021/01/18
not match: ONIG_MISMATCH (< 0)
error: error code (< 0)
* If option ONIG_OPTION_CALLBACK_EACH_MATCH is used,
it will return ONIG_MISMATCH even if there is a match.
arguments
1 reg: regex object
2 str: target string
@ -382,11 +389,12 @@ Oniguruma API Version 6.9.7 2021/01/18
5 region: address for return group match range info (NULL is allowed)
6 option: search time option
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
ONIG_OPTION_NOTBOL Do not regard the beginning of the (str) as the beginning of the line and the beginning of the string
ONIG_OPTION_NOTEOL Do not regard the (end) as the end of a line and the end of a string
ONIG_OPTION_NOT_BEGIN_STRING Do not regard the beginning of the (str) as the beginning of a string (* fail \A)
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
ONIG_OPTION_CALLBACK_EACH_MATCH Call back for all successful matches.
# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
@ -703,6 +711,23 @@ Oniguruma API Version 6.9.7 2021/01/18
1 reg: regex object.
# OnigCallbackEachMatchFunc onig_get_callback_each_match(void)
Return the current callback function for ONIG_OPTION_CALLBACK_EACH_MATCH.
# int onig_set_callback_each_match(OnigCallbackEachMatchFunc func)
Set the callback function for ONIG_OPTION_CALLBACK_EACH_MATCH.
If NULL is set, the callback will never be executed.
return value
normal: 0
arguments
1 func: callback function
# int onig_number_of_capture_histories(regex_t* reg)
Return the number of capture history defined in the pattern.
@ -714,7 +739,6 @@ Oniguruma API Version 6.9.7 2021/01/18
1 reg: regex object.
# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
Return the root node of capture history data tree.

View File

@ -740,7 +740,7 @@ typedef struct {
OnigCaseFoldType case_fold_flag;
} OnigCompileInfo;
typedef int (*OnigCallbackEachMatchFunc)(const UChar* str, const UChar* end, const UChar* range, const UChar* match_start, OnigRegion* region, void* user_data);
typedef int (*OnigCallbackEachMatchFunc)(const UChar* str, const UChar* end, const UChar* match_start, OnigRegion* region, void* user_data);
/* types for callout */

View File

@ -3095,7 +3095,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_CAPTURE_HISTORY
if (reg->capture_history != 0) {
int r;
OnigCaptureTreeNode* node;
if (IS_NULL(region->history_root)) {
@ -3112,9 +3111,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
node->end = (int )(s - str);
stkp = stk_base;
r = make_capture_history_tree(region->history_root, &stkp,
i = make_capture_history_tree(region->history_root, &stkp,
stk, (UChar* )str, reg);
if (r < 0) MATCH_AT_ERROR_RETURN(r);
if (i < 0) MATCH_AT_ERROR_RETURN(i);
}
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_API
@ -3126,9 +3125,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (OPTON_CALLBACK_EACH_MATCH(options) &&
IS_NOT_NULL(CallbackEachMatch)) {
int r = CallbackEachMatch(str, end, in_right_range, sstart, region,
msa->mp->callout_user_data);
if (r < 0) MATCH_AT_ERROR_RETURN(r);
i = CallbackEachMatch(str, end, sstart, region,
msa->mp->callout_user_data);
if (i < 0) MATCH_AT_ERROR_RETURN(i);
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
if (! OPTON_FIND_LONGEST(options))

View File

@ -88,10 +88,18 @@ static const char *const s_pColorRegEx_A = "#([0-9a-fA-F]){8}";
static const char *const s_pUnicodeRegEx = "(\\\\[uU|xX]([0-9a-fA-F]){4}|\\\\[xX]([0-9a-fA-F]){2})+";
// https://mathiasbynens.be/demo/url-regex : @stephenhay
//static const char* pUrlRegEx = "\\b(?:(?:https?|ftp|file)://|www\\.|ftp\\.)[^\\s/$.?#].[^\\s]*";
//static const char* s_pUrlRegEx = "\\b(?:(?:https?|ftp|file)://|www\\.|ftp\\.)[^\\s/$.?#].[^\\s]*";
// using Gruber's Liberal Regex Pattern for All URLs (https://gist.github.com/gruber/249502)
/// => unfortunately to slow to use as scanner
//static const char *const s_pUrlRegEx = "(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)"
// "(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+"
// "(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))";
// pretty fast:
static const char *const s_pUrlRegEx = "\\b(?:(?:https?|ftp|file)://|www\\.|ftp\\.)"
"(?:\\([-a-z\\u00a1-\\uffff0-9+&@#/%=~_|$?!:,.]*\\)|[-a-z\\u00a1-\\uffff0-9+&@#/%=~_|$?!:,.])*"
"(?:\\([-a-z\\u00a1-\\uffff0-9+&@#/%=~_|$?!:,.]*\\)|[a-z\\u00a1-\\uffff0-9+&@#/%=~_|$])";
"(?:\\([-a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$?!:,.]*\\)|[-a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$?!:,.])*"
"(?:\\([-a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$?!:,.]*\\)|[a-zA-Z0-9\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF+&@#/%=~_|$])";
// ----------------------------------------------------------------------------