mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
+upd: merge current (v.6.9.2) Oniguruma docs
This commit is contained in:
parent
564203f052
commit
ee0a6dcfe3
5
oniguruma/.gitignore
vendored
5
oniguruma/.gitignore
vendored
@ -21,12 +21,17 @@ Makefile.in
|
||||
*.lib
|
||||
*.exe
|
||||
*.exp
|
||||
*.gcno
|
||||
*.gcda
|
||||
*.gcov
|
||||
*~
|
||||
.libs/
|
||||
.deps/
|
||||
/build
|
||||
/onig-*.tar.gz
|
||||
m4/*.m4
|
||||
/coverage
|
||||
/coverage.info
|
||||
|
||||
# src/
|
||||
/src/CaseFolding.txt
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
Oniguruma LICENSE
|
||||
-----------------
|
||||
|
||||
Copyright (c) 2002-2018 K.Kosako <kkosako0@gmail.com>
|
||||
Copyright (c) 2002-2019 K.Kosako <kkosako0@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@ -1,5 +1,50 @@
|
||||
History
|
||||
|
||||
2019/05/07: Version 6.9.2 (same as Release Candidate 3)
|
||||
|
||||
2019/04/23: Release Candidate 3 for 6.9.2
|
||||
2019/04/23: add doc/SYNTAX.md into distribution file
|
||||
2019/04/09: Release Candidate 2 for 6.9.2
|
||||
2019/04/09: fix #139: UAF in match_at()
|
||||
2019/04/01: Release Candidate 1 for 6.9.2
|
||||
2019/04/01: update Unicode version to 12.1.0 (draft)
|
||||
2019/03/29: allow {n,m} (n>m) as possessive interval
|
||||
2019/03/25: add ONIG_SYN_OP2_OPTION_ONIGURUMA
|
||||
2019/03/22: add new options ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER and
|
||||
ONIG_OPTION_TEXT_SEGMENT_WORD
|
||||
2019/03/21: PR #137: fix cross-compilation
|
||||
2019/03/20: update Unicode version to 12.0.0
|
||||
2019/03/17: add doc/SYNTAX.md
|
||||
2019/03/13: {n,m}+ and {n,m}? are possessive and reluctant range operator
|
||||
in Perl syntax
|
||||
2019/03/04: fix #132: don't execute testp if ENABLE_POSIX_API == no
|
||||
2019/02/28: re-implement bytecode by using Operation struct
|
||||
2019/02/26: fix #130: Build error on UWP with VS2017
|
||||
2019/02/03: PR #128: regerror/toascii: do not attempt to serialize NULL pointer
|
||||
2019/01/30: Build breaks without autoreconf #73
|
||||
2019/01/02: fix #127: Windows VS 2008 build errors
|
||||
2018/12/19: fix #126: Unable to compile when USE_CALLOUT is not defined
|
||||
|
||||
2018/12/11: Version 6.9.1
|
||||
|
||||
2018/10/08: use ENC_FLAG_SKIP_OFFSET_XXX values
|
||||
2018/10/06: UTF-8 supports code range from 0x0000 to 0x10FFFF
|
||||
(https://tools.ietf.org/html/rfc3629)
|
||||
2018/10/05: speed improvement
|
||||
2018/10/03: use OPTIMIZE_STR_CASE_FOLD_FAST
|
||||
2018/10/01: convert CRLF line endings to LF
|
||||
2018/09/27: set SIZEOF_SIZE_T for windows platforms
|
||||
2018/09/22: use Sunday quick search algorithm instead of Boyer-Moor-Horspool
|
||||
2018/09/20: introduce threaded code into match_at()
|
||||
2018/09/17: remove HAVE_STRINGS_H
|
||||
2018/09/16: remove HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES
|
||||
2018/09/14: add a command line option '-gc' for make_unicode_property_data.py.
|
||||
2018/09/08: remove AC_HEADER_STDC
|
||||
2018/09/06: remove AC_OUTPUT macro call
|
||||
2018/09/06: remove AC_FUNC_MEMCMP, AC_HEADER_TIME, AC_C_CONST, HAVE__SETJMP and
|
||||
HAVE_STRING_H
|
||||
2018/09/05: remove HAVE_LIMITS_H, HAVE_FLOAT_H and HAVE_STDLIB_H
|
||||
|
||||
2018/09/03: Version 6.9.0
|
||||
|
||||
2018/08/24: add Unicode Emoji properties
|
||||
|
||||
@ -24,6 +24,24 @@ Supported character encodings:
|
||||
|
||||
* GB18030: contributed by KUBO Takehiro
|
||||
* CP1251: contributed by Byte
|
||||
* doc/SYNTAX.md: contributed by seanofw
|
||||
|
||||
|
||||
New feature of version 6.9.2
|
||||
-----------------------------------
|
||||
|
||||
* Update Unicode version 12.1.0
|
||||
* NEW: Unicode Text Segment mode option (?y{g}) (?y{w})
|
||||
|
||||
g: Extended Grapheme Cluster mode / w: Word mode
|
||||
|
||||
(Unicode Standard Annex #29 [http://unicode.org/reports/tr29/])
|
||||
|
||||
|
||||
New feature of version 6.9.1
|
||||
--------------------------
|
||||
|
||||
* Speed improvement (* especially UTF-8)
|
||||
|
||||
|
||||
New feature of version 6.9.0
|
||||
@ -78,7 +96,7 @@ New feature of version 6.6.0
|
||||
--------------------------
|
||||
|
||||
* NEW: ASCII only mode options for character type/property (?WDSP)
|
||||
* NEW: Extended Grapheme Cluster boundary \y, \Y (*original)
|
||||
* NEW: Extended Grapheme Cluster boundary \y, \Y
|
||||
* NEW: Extended Grapheme Cluster \X
|
||||
* Range-clear (Absent-clear) operator restores previous range in retractions.
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Oniguruma API Version 6.8.0 2018/03/13
|
||||
Oniguruma API Version 6.9.2 2019/03/25
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
@ -92,6 +92,8 @@ Oniguruma API Version 6.8.0 2018/03/13
|
||||
(alnum, alpha, blank, cntrl, digit, graph,
|
||||
lower, print, punct, space, upper, xdigit,
|
||||
word)
|
||||
ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER Extended Grapheme Cluster mode
|
||||
ONIG_OPTION_TEXT_SEGMENT_WORD Word mode
|
||||
|
||||
5 enc: character encoding.
|
||||
|
||||
|
||||
123
oniguruma/doc/RE
123
oniguruma/doc/RE
@ -1,4 +1,4 @@
|
||||
Oniguruma Regular Expressions Version 6.8.0 2018/07/26
|
||||
Oniguruma Regular Expressions Version 6.9.2 2019/03/29
|
||||
|
||||
syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
|
||||
@ -81,16 +81,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
|
||||
\O true anychar (?m:.) (* original function)
|
||||
|
||||
\X Extended Grapheme Cluster (?>\O(?:\Y\O)*)
|
||||
\X Text Segment \X === (?>\O(?:\Y\O)*)
|
||||
|
||||
\X doesn't check whether matching start position is boundary.
|
||||
Write as \y\X if you want to ensure it.
|
||||
The meaning of this operator changes depending on the setting of
|
||||
the option (?y{..}).
|
||||
|
||||
Unicode case:
|
||||
\X doesn't check whether matching start position is boundary or not.
|
||||
Please write as \y\X if you want to ensure it.
|
||||
|
||||
[Extended Grapheme Cluster mode] (default)
|
||||
Unicode case:
|
||||
See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
|
||||
|
||||
Not Unicode case: \X === (?>\r\n|\O)
|
||||
|
||||
[Word mode]
|
||||
Currently, this mode is supported in Unicode only.
|
||||
See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
|
||||
|
||||
Not Unicode: (?>\r\n|\O)
|
||||
|
||||
|
||||
Character Property
|
||||
|
||||
@ -119,17 +127,17 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
? 1 or 0 times
|
||||
* 0 or more times
|
||||
+ 1 or more times
|
||||
{n,m} at least n but no more than m times
|
||||
{n,m} (n <= m) at least n but no more than m times
|
||||
{n,} at least n times
|
||||
{,n} at least 0 but no more than n times ({0,n})
|
||||
{n} n times
|
||||
|
||||
reluctant
|
||||
|
||||
?? 1 or 0 times
|
||||
?? 0 or 1 times
|
||||
*? 0 or more times
|
||||
+? 1 or more times
|
||||
{n,m}? at least n but not more than m times
|
||||
{n,m}? (n <= m) at least n but not more than m times
|
||||
{n,}? at least n times
|
||||
{,n}? at least 0 but not more than n times (== {0,n}?)
|
||||
|
||||
@ -138,8 +146,10 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
?+ 1 or 0 times
|
||||
*+ 0 or more times
|
||||
++ 1 or more times
|
||||
{n,m} (n > m) at least m but not more than n times
|
||||
|
||||
({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA only)
|
||||
{n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and
|
||||
ONIG_SYNTAX_PERL only.
|
||||
|
||||
ex. /a*+/ === /(?>a*)/
|
||||
|
||||
@ -150,8 +160,6 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
$ end of the line
|
||||
\b word boundary
|
||||
\B non-word boundary
|
||||
\y Extended Grapheme Cluster boundary
|
||||
\Y Extended Grapheme Cluster non-boundary
|
||||
|
||||
\A beginning of string
|
||||
\Z end of string, or before newline at the end
|
||||
@ -160,6 +168,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
\K keep (keep start position of the result string)
|
||||
|
||||
|
||||
\y Text Segment boundary
|
||||
\Y Text Segment non-boundary
|
||||
|
||||
The meaning of these operators(\y, \Y) changes depending on the setting
|
||||
of the option (?y{..}).
|
||||
|
||||
[Extended Grapheme Cluster mode] (default)
|
||||
Unicode case:
|
||||
See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
|
||||
|
||||
Not Unicode:
|
||||
All positions except between \r and \n.
|
||||
|
||||
[Word mode]
|
||||
Currently, this mode is supported in Unicode only.
|
||||
See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
|
||||
|
||||
|
||||
|
||||
6. Character class
|
||||
|
||||
@ -221,20 +247,28 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
|
||||
(?#...) comment
|
||||
|
||||
(?imxWDSP-imxWDSP:subexp) option on/off for subexp
|
||||
(?imxWDSPy-imxWDSP:subexp) option on/off for subexp
|
||||
|
||||
i: ignore case
|
||||
m: multi-line (dot (.) also matches newline)
|
||||
x: extended form
|
||||
W: ASCII only word (\w, \p{Word}, [[:word:]])
|
||||
ASCII only word bound (\b)
|
||||
D: ASCII only digit (\d, \p{Digit}, [[:digit:]])
|
||||
S: ASCII only space (\s, \p{Space}, [[:space:]])
|
||||
P: ASCII only POSIX properties (includes W,D,S)
|
||||
(alnum, alpha, blank, cntrl, digit, graph,
|
||||
lower, print, punct, space, upper, xdigit, word)
|
||||
i: ignore case
|
||||
m: multi-line (dot (.) also matches newline)
|
||||
x: extended form
|
||||
W: ASCII only word (\w, \p{Word}, [[:word:]])
|
||||
ASCII only word bound (\b)
|
||||
D: ASCII only digit (\d, \p{Digit}, [[:digit:]])
|
||||
S: ASCII only space (\s, \p{Space}, [[:space:]])
|
||||
P: ASCII only POSIX properties (includes W,D,S)
|
||||
(alnum, alpha, blank, cntrl, digit, graph,
|
||||
lower, print, punct, space, upper, xdigit, word)
|
||||
|
||||
(?imxWDSP-imxWDSP) isolated option
|
||||
y{?}: Text Segment mode
|
||||
This option changes the meaning of \X, \y, \Y.
|
||||
Currently, this option is supported in Unicode only.
|
||||
|
||||
y{g}: Extended Grapheme Cluster mode (default)
|
||||
y{w}: Word mode
|
||||
See [Unicode Standard Annex #29]
|
||||
|
||||
(?imxWDSPy-imxWDSP) isolated option
|
||||
|
||||
* It makes a group to the next ')' or end of the pattern.
|
||||
/ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/
|
||||
@ -336,7 +370,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
|
||||
else_exp can be omitted.
|
||||
Then it works as a backreference validity checker.
|
||||
|
||||
[ backreference validity checker ] (* original)
|
||||
[ Backreference validity checker ] (* original)
|
||||
|
||||
(?(n)), (?(-n)), (?(+n)), (?(n+level)) ...
|
||||
(?(<n>)), (?('-n')), (?(<+n>)) ...
|
||||
@ -470,10 +504,15 @@ A-1. Syntax-dependent options
|
||||
|
||||
A-2. Original extensions
|
||||
|
||||
+ hexadecimal digit char type \h, \H
|
||||
+ named group (?<name>...), (?'name'...)
|
||||
+ named backref \k<name>
|
||||
+ subexp call \g<name>, \g<group-num>
|
||||
+ hexadecimal digit char type \h, \H
|
||||
+ true anychar \O
|
||||
+ text segment boundary \y, \Y
|
||||
+ backreference validity checker (?(...))
|
||||
+ named group (?<name>...), (?'name'...)
|
||||
+ named backref \k<name>
|
||||
+ subexp call \g<name>, \g<group-num>
|
||||
+ absent expression (?~|...|...)
|
||||
+ absent stopper (?|...)
|
||||
|
||||
|
||||
A-3. Missing features compared with perl 5.8.0
|
||||
@ -528,28 +567,4 @@ A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8
|
||||
/(?:()|())*\1\2/ =~ ""
|
||||
/(?:\1a|())*/ =~ "a"
|
||||
|
||||
|
||||
A-5. Features disabled in default syntax
|
||||
|
||||
+ capture history
|
||||
|
||||
(?@...) and (?@<name>...)
|
||||
|
||||
ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
|
||||
|
||||
see sample/listcap.c file.
|
||||
|
||||
|
||||
A-6. Problems
|
||||
|
||||
+ Invalid encoding byte sequence is not checked.
|
||||
|
||||
ex. UTF-8
|
||||
|
||||
* Invalid first byte is treated as a character.
|
||||
/./u =~ "\xa3"
|
||||
|
||||
* Incomplete byte sequence is not checked.
|
||||
/\w+/ =~ "a\xf3\x8ec"
|
||||
|
||||
// END
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@
|
||||
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
|
||||
|
||||
<p>
|
||||
(c) K.Kosako, updated at: 2018/08/31
|
||||
(c) K.Kosako, updated at: 2019/05/14
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
@ -16,6 +16,8 @@
|
||||
<dt><b>What's new</b>
|
||||
</font>
|
||||
<ul>
|
||||
<li>2019/05/07: Version 6.9.2 released.</li>
|
||||
<li>2018/12/11: Version 6.9.1 released.</li>
|
||||
<li>2018/09/03: Version 6.9.0 released.</li>
|
||||
<li>2018/04/17: Version 6.8.2 released.</li>
|
||||
<li>2018/03/19: Version 6.8.1 released.</li>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user