diff --git a/oniguruma/.gitignore b/oniguruma/.gitignore index 398488c31..6af6a820c 100644 --- a/oniguruma/.gitignore +++ b/oniguruma/.gitignore @@ -21,12 +21,17 @@ Makefile.in *.lib *.exe *.exp +*.gcno +*.gcda +*.gcov *~ .libs/ .deps/ /build /onig-*.tar.gz m4/*.m4 +/coverage +/coverage.info # src/ /src/CaseFolding.txt diff --git a/oniguruma/COPYING b/oniguruma/COPYING index e999ab5bd..bb6959250 100644 --- a/oniguruma/COPYING +++ b/oniguruma/COPYING @@ -1,7 +1,7 @@ Oniguruma LICENSE ----------------- -Copyright (c) 2002-2018 K.Kosako +Copyright (c) 2002-2019 K.Kosako All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/oniguruma/HISTORY b/oniguruma/HISTORY index 2cd320793..0b8a9a257 100644 --- a/oniguruma/HISTORY +++ b/oniguruma/HISTORY @@ -1,5 +1,50 @@ History +2019/05/07: Version 6.9.2 (same as Release Candidate 3) + +2019/04/23: Release Candidate 3 for 6.9.2 +2019/04/23: add doc/SYNTAX.md into distribution file +2019/04/09: Release Candidate 2 for 6.9.2 +2019/04/09: fix #139: UAF in match_at() +2019/04/01: Release Candidate 1 for 6.9.2 +2019/04/01: update Unicode version to 12.1.0 (draft) +2019/03/29: allow {n,m} (n>m) as possessive interval +2019/03/25: add ONIG_SYN_OP2_OPTION_ONIGURUMA +2019/03/22: add new options ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER and + ONIG_OPTION_TEXT_SEGMENT_WORD +2019/03/21: PR #137: fix cross-compilation +2019/03/20: update Unicode version to 12.0.0 +2019/03/17: add doc/SYNTAX.md +2019/03/13: {n,m}+ and {n,m}? are possessive and reluctant range operator + in Perl syntax +2019/03/04: fix #132: don't execute testp if ENABLE_POSIX_API == no +2019/02/28: re-implement bytecode by using Operation struct +2019/02/26: fix #130: Build error on UWP with VS2017 +2019/02/03: PR #128: regerror/toascii: do not attempt to serialize NULL pointer +2019/01/30: Build breaks without autoreconf #73 +2019/01/02: fix #127: Windows VS 2008 build errors +2018/12/19: fix #126: Unable to compile when USE_CALLOUT is not defined + +2018/12/11: Version 6.9.1 + +2018/10/08: use ENC_FLAG_SKIP_OFFSET_XXX values +2018/10/06: UTF-8 supports code range from 0x0000 to 0x10FFFF + (https://tools.ietf.org/html/rfc3629) +2018/10/05: speed improvement +2018/10/03: use OPTIMIZE_STR_CASE_FOLD_FAST +2018/10/01: convert CRLF line endings to LF +2018/09/27: set SIZEOF_SIZE_T for windows platforms +2018/09/22: use Sunday quick search algorithm instead of Boyer-Moor-Horspool +2018/09/20: introduce threaded code into match_at() +2018/09/17: remove HAVE_STRINGS_H +2018/09/16: remove HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES +2018/09/14: add a command line option '-gc' for make_unicode_property_data.py. +2018/09/08: remove AC_HEADER_STDC +2018/09/06: remove AC_OUTPUT macro call +2018/09/06: remove AC_FUNC_MEMCMP, AC_HEADER_TIME, AC_C_CONST, HAVE__SETJMP and + HAVE_STRING_H +2018/09/05: remove HAVE_LIMITS_H, HAVE_FLOAT_H and HAVE_STDLIB_H + 2018/09/03: Version 6.9.0 2018/08/24: add Unicode Emoji properties diff --git a/oniguruma/README.md b/oniguruma/README.md index 086f4ffb6..873f86d43 100644 --- a/oniguruma/README.md +++ b/oniguruma/README.md @@ -24,6 +24,24 @@ Supported character encodings: * GB18030: contributed by KUBO Takehiro * CP1251: contributed by Byte +* doc/SYNTAX.md: contributed by seanofw + + +New feature of version 6.9.2 +----------------------------------- + +* Update Unicode version 12.1.0 +* NEW: Unicode Text Segment mode option (?y{g}) (?y{w}) + + g: Extended Grapheme Cluster mode / w: Word mode + + (Unicode Standard Annex #29 [http://unicode.org/reports/tr29/]) + + +New feature of version 6.9.1 +-------------------------- + +* Speed improvement (* especially UTF-8) New feature of version 6.9.0 @@ -78,7 +96,7 @@ New feature of version 6.6.0 -------------------------- * NEW: ASCII only mode options for character type/property (?WDSP) -* NEW: Extended Grapheme Cluster boundary \y, \Y (*original) +* NEW: Extended Grapheme Cluster boundary \y, \Y * NEW: Extended Grapheme Cluster \X * Range-clear (Absent-clear) operator restores previous range in retractions. diff --git a/oniguruma/doc/API b/oniguruma/doc/API index 24b531a98..2309e5ec6 100644 --- a/oniguruma/doc/API +++ b/oniguruma/doc/API @@ -1,4 +1,4 @@ -Oniguruma API Version 6.8.0 2018/03/13 +Oniguruma API Version 6.9.2 2019/03/25 #include @@ -92,6 +92,8 @@ Oniguruma API Version 6.8.0 2018/03/13 (alnum, alpha, blank, cntrl, digit, graph, lower, print, punct, space, upper, xdigit, word) + ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER Extended Grapheme Cluster mode + ONIG_OPTION_TEXT_SEGMENT_WORD Word mode 5 enc: character encoding. diff --git a/oniguruma/doc/RE b/oniguruma/doc/RE index 963d0092f..72957dd2f 100644 --- a/oniguruma/doc/RE +++ b/oniguruma/doc/RE @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.8.0 2018/07/26 +Oniguruma Regular Expressions Version 6.9.2 2019/03/29 syntax: ONIG_SYNTAX_ONIGURUMA (default) @@ -81,16 +81,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) \O true anychar (?m:.) (* original function) - \X Extended Grapheme Cluster (?>\O(?:\Y\O)*) + \X Text Segment \X === (?>\O(?:\Y\O)*) - \X doesn't check whether matching start position is boundary. - Write as \y\X if you want to ensure it. + The meaning of this operator changes depending on the setting of + the option (?y{..}). - Unicode case: + \X doesn't check whether matching start position is boundary or not. + Please write as \y\X if you want to ensure it. + + [Extended Grapheme Cluster mode] (default) + Unicode case: + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + Not Unicode case: \X === (?>\r\n|\O) + + [Word mode] + Currently, this mode is supported in Unicode only. See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] - Not Unicode: (?>\r\n|\O) - Character Property @@ -119,17 +127,17 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) ? 1 or 0 times * 0 or more times + 1 or more times - {n,m} at least n but no more than m times + {n,m} (n <= m) at least n but no more than m times {n,} at least n times {,n} at least 0 but no more than n times ({0,n}) {n} n times reluctant - ?? 1 or 0 times + ?? 0 or 1 times *? 0 or more times +? 1 or more times - {n,m}? at least n but not more than m times + {n,m}? (n <= m) at least n but not more than m times {n,}? at least n times {,n}? at least 0 but not more than n times (== {0,n}?) @@ -138,8 +146,10 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) ?+ 1 or 0 times *+ 0 or more times ++ 1 or more times + {n,m} (n > m) at least m but not more than n times - ({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA only) + {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and + ONIG_SYNTAX_PERL only. ex. /a*+/ === /(?>a*)/ @@ -150,8 +160,6 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) $ end of the line \b word boundary \B non-word boundary - \y Extended Grapheme Cluster boundary - \Y Extended Grapheme Cluster non-boundary \A beginning of string \Z end of string, or before newline at the end @@ -160,6 +168,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) \K keep (keep start position of the result string) + \y Text Segment boundary + \Y Text Segment non-boundary + + The meaning of these operators(\y, \Y) changes depending on the setting + of the option (?y{..}). + + [Extended Grapheme Cluster mode] (default) + Unicode case: + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + Not Unicode: + All positions except between \r and \n. + + [Word mode] + Currently, this mode is supported in Unicode only. + See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/] + + 6. Character class @@ -221,20 +247,28 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) (?#...) comment - (?imxWDSP-imxWDSP:subexp) option on/off for subexp + (?imxWDSPy-imxWDSP:subexp) option on/off for subexp - i: ignore case - m: multi-line (dot (.) also matches newline) - x: extended form - W: ASCII only word (\w, \p{Word}, [[:word:]]) - ASCII only word bound (\b) - D: ASCII only digit (\d, \p{Digit}, [[:digit:]]) - S: ASCII only space (\s, \p{Space}, [[:space:]]) - P: ASCII only POSIX properties (includes W,D,S) - (alnum, alpha, blank, cntrl, digit, graph, - lower, print, punct, space, upper, xdigit, word) + i: ignore case + m: multi-line (dot (.) also matches newline) + x: extended form + W: ASCII only word (\w, \p{Word}, [[:word:]]) + ASCII only word bound (\b) + D: ASCII only digit (\d, \p{Digit}, [[:digit:]]) + S: ASCII only space (\s, \p{Space}, [[:space:]]) + P: ASCII only POSIX properties (includes W,D,S) + (alnum, alpha, blank, cntrl, digit, graph, + lower, print, punct, space, upper, xdigit, word) - (?imxWDSP-imxWDSP) isolated option + y{?}: Text Segment mode + This option changes the meaning of \X, \y, \Y. + Currently, this option is supported in Unicode only. + + y{g}: Extended Grapheme Cluster mode (default) + y{w}: Word mode + See [Unicode Standard Annex #29] + + (?imxWDSPy-imxWDSP) isolated option * It makes a group to the next ')' or end of the pattern. /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/ @@ -336,7 +370,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) else_exp can be omitted. Then it works as a backreference validity checker. - [ backreference validity checker ] (* original) + [ Backreference validity checker ] (* original) (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... (?()), (?('-n')), (?(<+n>)) ... @@ -470,10 +504,15 @@ A-1. Syntax-dependent options A-2. Original extensions - + hexadecimal digit char type \h, \H - + named group (?...), (?'name'...) - + named backref \k - + subexp call \g, \g + + hexadecimal digit char type \h, \H + + true anychar \O + + text segment boundary \y, \Y + + backreference validity checker (?(...)) + + named group (?...), (?'name'...) + + named backref \k + + subexp call \g, \g + + absent expression (?~|...|...) + + absent stopper (?|...) A-3. Missing features compared with perl 5.8.0 @@ -528,28 +567,4 @@ A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8 /(?:()|())*\1\2/ =~ "" /(?:\1a|())*/ =~ "a" - -A-5. Features disabled in default syntax - - + capture history - - (?@...) and (?@...) - - ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] - - see sample/listcap.c file. - - -A-6. Problems - - + Invalid encoding byte sequence is not checked. - - ex. UTF-8 - - * Invalid first byte is treated as a character. - /./u =~ "\xa3" - - * Incomplete byte sequence is not checked. - /\w+/ =~ "a\xf3\x8ec" - // END diff --git a/oniguruma/doc/UNICODE_PROPERTIES b/oniguruma/doc/UNICODE_PROPERTIES index 1f961ebda..1148b4d01 100644 --- a/oniguruma/doc/UNICODE_PROPERTIES +++ b/oniguruma/doc/UNICODE_PROPERTIES @@ -1,4 +1,4 @@ -Unicode Properties (from Unicode Version: 11.0.0) +Unicode Properties (from Unicode Version: 12.0.0) 15: ASCII_Hex_Digit 16: Adlam @@ -56,203 +56,207 @@ Unicode Properties (from Unicode Version: 11.0.0) 68: Duployan 69: Egyptian_Hieroglyphs 70: Elbasan - 71: Emoji - 72: Emoji_Component - 73: Emoji_Modifier - 74: Emoji_Modifier_Base - 75: Emoji_Presentation - 76: Ethiopic - 77: Extended_Pictographic - 78: Extender - 79: Georgian - 80: Glagolitic - 81: Gothic - 82: Grantha - 83: Grapheme_Base - 84: Grapheme_Extend - 85: Grapheme_Link - 86: Greek - 87: Gujarati - 88: Gunjala_Gondi - 89: Gurmukhi - 90: Han - 91: Hangul - 92: Hanifi_Rohingya - 93: Hanunoo - 94: Hatran - 95: Hebrew - 96: Hex_Digit - 97: Hiragana - 98: Hyphen - 99: IDS_Binary_Operator -100: IDS_Trinary_Operator -101: ID_Continue -102: ID_Start -103: Ideographic -104: Imperial_Aramaic -105: Inherited -106: Inscriptional_Pahlavi -107: Inscriptional_Parthian -108: Javanese -109: Join_Control -110: Kaithi -111: Kannada -112: Katakana -113: Kayah_Li -114: Kharoshthi -115: Khmer -116: Khojki -117: Khudawadi -118: L -119: LC -120: Lao -121: Latin -122: Lepcha -123: Limbu -124: Linear_A -125: Linear_B -126: Lisu -127: Ll -128: Lm -129: Lo -130: Logical_Order_Exception -131: Lowercase -132: Lt -133: Lu -134: Lycian -135: Lydian -136: M -137: Mahajani -138: Makasar -139: Malayalam -140: Mandaic -141: Manichaean -142: Marchen -143: Masaram_Gondi -144: Math -145: Mc -146: Me -147: Medefaidrin -148: Meetei_Mayek -149: Mende_Kikakui -150: Meroitic_Cursive -151: Meroitic_Hieroglyphs -152: Miao -153: Mn -154: Modi -155: Mongolian -156: Mro -157: Multani -158: Myanmar -159: N -160: Nabataean -161: Nd -162: New_Tai_Lue -163: Newa -164: Nko -165: Nl -166: No -167: Noncharacter_Code_Point -168: Nushu -169: Ogham -170: Ol_Chiki -171: Old_Hungarian -172: Old_Italic -173: Old_North_Arabian -174: Old_Permic -175: Old_Persian -176: Old_Sogdian -177: Old_South_Arabian -178: Old_Turkic -179: Oriya -180: Osage -181: Osmanya -182: Other_Alphabetic -183: Other_Default_Ignorable_Code_Point -184: Other_Grapheme_Extend -185: Other_ID_Continue -186: Other_ID_Start -187: Other_Lowercase -188: Other_Math -189: Other_Uppercase -190: P -191: Pahawh_Hmong -192: Palmyrene -193: Pattern_Syntax -194: Pattern_White_Space -195: Pau_Cin_Hau -196: Pc -197: Pd -198: Pe -199: Pf -200: Phags_Pa -201: Phoenician -202: Pi -203: Po -204: Prepended_Concatenation_Mark -205: Ps -206: Psalter_Pahlavi -207: Quotation_Mark -208: Radical -209: Regional_Indicator -210: Rejang -211: Runic -212: S -213: Samaritan -214: Saurashtra -215: Sc -216: Sentence_Terminal -217: Sharada -218: Shavian -219: Siddham -220: SignWriting -221: Sinhala -222: Sk -223: Sm -224: So -225: Soft_Dotted -226: Sogdian -227: Sora_Sompeng -228: Soyombo -229: Sundanese -230: Syloti_Nagri -231: Syriac -232: Tagalog -233: Tagbanwa -234: Tai_Le -235: Tai_Tham -236: Tai_Viet -237: Takri -238: Tamil -239: Tangut -240: Telugu -241: Terminal_Punctuation -242: Thaana -243: Thai -244: Tibetan -245: Tifinagh -246: Tirhuta -247: Ugaritic -248: Unified_Ideograph -249: Unknown -250: Uppercase -251: Vai -252: Variation_Selector -253: Warang_Citi -254: White_Space -255: XID_Continue -256: XID_Start -257: Yi -258: Z -259: Zanabazar_Square -260: Zl -261: Zp -262: Zs + 71: Elymaic + 72: Emoji + 73: Emoji_Component + 74: Emoji_Modifier + 75: Emoji_Modifier_Base + 76: Emoji_Presentation + 77: Ethiopic + 78: Extended_Pictographic + 79: Extender + 80: Georgian + 81: Glagolitic + 82: Gothic + 83: Grantha + 84: Grapheme_Base + 85: Grapheme_Extend + 86: Grapheme_Link + 87: Greek + 88: Gujarati + 89: Gunjala_Gondi + 90: Gurmukhi + 91: Han + 92: Hangul + 93: Hanifi_Rohingya + 94: Hanunoo + 95: Hatran + 96: Hebrew + 97: Hex_Digit + 98: Hiragana + 99: Hyphen +100: IDS_Binary_Operator +101: IDS_Trinary_Operator +102: ID_Continue +103: ID_Start +104: Ideographic +105: Imperial_Aramaic +106: Inherited +107: Inscriptional_Pahlavi +108: Inscriptional_Parthian +109: Javanese +110: Join_Control +111: Kaithi +112: Kannada +113: Katakana +114: Kayah_Li +115: Kharoshthi +116: Khmer +117: Khojki +118: Khudawadi +119: L +120: LC +121: Lao +122: Latin +123: Lepcha +124: Limbu +125: Linear_A +126: Linear_B +127: Lisu +128: Ll +129: Lm +130: Lo +131: Logical_Order_Exception +132: Lowercase +133: Lt +134: Lu +135: Lycian +136: Lydian +137: M +138: Mahajani +139: Makasar +140: Malayalam +141: Mandaic +142: Manichaean +143: Marchen +144: Masaram_Gondi +145: Math +146: Mc +147: Me +148: Medefaidrin +149: Meetei_Mayek +150: Mende_Kikakui +151: Meroitic_Cursive +152: Meroitic_Hieroglyphs +153: Miao +154: Mn +155: Modi +156: Mongolian +157: Mro +158: Multani +159: Myanmar +160: N +161: Nabataean +162: Nandinagari +163: Nd +164: New_Tai_Lue +165: Newa +166: Nko +167: Nl +168: No +169: Noncharacter_Code_Point +170: Nushu +171: Nyiakeng_Puachue_Hmong +172: Ogham +173: Ol_Chiki +174: Old_Hungarian +175: Old_Italic +176: Old_North_Arabian +177: Old_Permic +178: Old_Persian +179: Old_Sogdian +180: Old_South_Arabian +181: Old_Turkic +182: Oriya +183: Osage +184: Osmanya +185: Other_Alphabetic +186: Other_Default_Ignorable_Code_Point +187: Other_Grapheme_Extend +188: Other_ID_Continue +189: Other_ID_Start +190: Other_Lowercase +191: Other_Math +192: Other_Uppercase +193: P +194: Pahawh_Hmong +195: Palmyrene +196: Pattern_Syntax +197: Pattern_White_Space +198: Pau_Cin_Hau +199: Pc +200: Pd +201: Pe +202: Pf +203: Phags_Pa +204: Phoenician +205: Pi +206: Po +207: Prepended_Concatenation_Mark +208: Ps +209: Psalter_Pahlavi +210: Quotation_Mark +211: Radical +212: Regional_Indicator +213: Rejang +214: Runic +215: S +216: Samaritan +217: Saurashtra +218: Sc +219: Sentence_Terminal +220: Sharada +221: Shavian +222: Siddham +223: SignWriting +224: Sinhala +225: Sk +226: Sm +227: So +228: Soft_Dotted +229: Sogdian +230: Sora_Sompeng +231: Soyombo +232: Sundanese +233: Syloti_Nagri +234: Syriac +235: Tagalog +236: Tagbanwa +237: Tai_Le +238: Tai_Tham +239: Tai_Viet +240: Takri +241: Tamil +242: Tangut +243: Telugu +244: Terminal_Punctuation +245: Thaana +246: Thai +247: Tibetan +248: Tifinagh +249: Tirhuta +250: Ugaritic +251: Unified_Ideograph +252: Unknown +253: Uppercase +254: Vai +255: Variation_Selector +256: Wancho +257: Warang_Citi +258: White_Space +259: XID_Continue +260: XID_Start +261: Yi +262: Z +263: Zanabazar_Square +264: Zl +265: Zp +266: Zs 16: Adlm 42: Aghb 15: AHex 21: Arab -104: Armi +105: Armi 22: Armn 24: Avst 25: Bali @@ -270,24 +274,24 @@ Unicode Properties (from Unicode Version: 11.0.0) 45: Cakm 38: Cans 39: Cari -119: Cased_Letter +120: Cased_Letter 52: Cher 40: CI -198: Close_Punctuation -136: Combining_Mark -196: Connector_Punctuation +201: Close_Punctuation +137: Combining_Mark +199: Connector_Punctuation 43: Control 56: Copt 59: Cprt -215: Currency_Symbol +218: Currency_Symbol 47: CWCF 48: CWCM 49: CWL 50: CWT 51: CWU 60: Cyrl -197: Dash_Punctuation -161: Decimal_Number +200: Dash_Punctuation +163: Decimal_Number 63: Dep 65: Deva 62: DI @@ -297,475 +301,488 @@ Unicode Properties (from Unicode Version: 11.0.0) 68: Dupl 69: Egyp 70: Elba -146: Enclosing_Mark - 76: Ethi - 78: Ext -199: Final_Punctuation + 71: Elym +147: Enclosing_Mark + 77: Ethi + 79: Ext +202: Final_Punctuation 44: Format - 79: Geor - 80: Glag - 88: Gong -143: Gonm - 81: Goth - 82: Gran - 83: Gr_Base - 86: Grek - 84: Gr_Ext - 85: Gr_Link - 87: Gujr - 89: Guru - 91: Hang - 90: Hani - 93: Hano - 94: Hatr - 95: Hebr - 96: Hex - 97: Hira + 80: Geor + 81: Glag + 89: Gong +144: Gonm + 82: Goth + 83: Gran + 84: Gr_Base + 87: Grek + 85: Gr_Ext + 86: Gr_Link + 88: Gujr + 90: Guru + 92: Hang + 91: Hani + 94: Hano + 95: Hatr + 96: Hebr + 97: Hex + 98: Hira 19: Hluw -191: Hmng -171: Hung -101: IDC -103: Ideo -102: IDS - 99: IDSB -100: IDST -202: Initial_Punctuation -172: Ital -108: Java -109: Join_C -113: Kali -112: Kana -114: Khar -115: Khmr -116: Khoj -111: Knda -110: Kthi -235: Lana -120: Laoo -121: Latn -122: Lepc -118: Letter -165: Letter_Number -123: Limb -124: Lina -125: Linb -260: Line_Separator -130: LOE -127: Lowercase_Letter -134: Lyci -135: Lydi -137: Mahj -138: Maka -140: Mand -141: Mani -142: Marc -136: Mark -223: Math_Symbol -147: Medf -149: Mend -150: Merc -151: Mero -139: Mlym -128: Modifier_Letter -222: Modifier_Symbol -155: Mong -156: Mroo -148: Mtei -157: Mult -158: Mymr -173: Narb -160: Nbat -167: NChar -164: Nkoo -153: Nonspacing_Mark -168: Nshu -159: Number -182: OAlpha -183: ODI -169: Ogam -184: OGr_Ext -185: OIDC -186: OIDS -170: Olck -187: OLower -188: OMath -205: Open_Punctuation -178: Orkh -179: Orya -180: Osge -181: Osma +194: Hmng +171: Hmnp +174: Hung +102: IDC +104: Ideo +103: IDS +100: IDSB +101: IDST +205: Initial_Punctuation +175: Ital +109: Java +110: Join_C +114: Kali +113: Kana +115: Khar +116: Khmr +117: Khoj +112: Knda +111: Kthi +238: Lana +121: Laoo +122: Latn +123: Lepc +119: Letter +167: Letter_Number +124: Limb +125: Lina +126: Linb +264: Line_Separator +131: LOE +128: Lowercase_Letter +135: Lyci +136: Lydi +138: Mahj +139: Maka +141: Mand +142: Mani +143: Marc +137: Mark +226: Math_Symbol +148: Medf +150: Mend +151: Merc +152: Mero +140: Mlym +129: Modifier_Letter +225: Modifier_Symbol +156: Mong +157: Mroo +149: Mtei +158: Mult +159: Mymr +162: Nand +176: Narb +161: Nbat +169: NChar +166: Nkoo +154: Nonspacing_Mark +170: Nshu +160: Number +185: OAlpha +186: ODI +172: Ogam +187: OGr_Ext +188: OIDC +189: OIDS +173: Olck +190: OLower +191: OMath +208: Open_Punctuation +181: Orkh +182: Orya +183: Osge +184: Osma 37: Other -129: Other_Letter -166: Other_Number -203: Other_Punctuation -224: Other_Symbol -189: OUpper -192: Palm -261: Paragraph_Separator -193: Pat_Syn -194: Pat_WS -195: Pauc -204: PCM -174: Perm -200: Phag -106: Phli -206: Phlp -201: Phnx -152: Plrd +130: Other_Letter +168: Other_Number +206: Other_Punctuation +227: Other_Symbol +192: OUpper +195: Palm +265: Paragraph_Separator +196: Pat_Syn +197: Pat_WS +198: Pauc +207: PCM +177: Perm +203: Phag +107: Phli +209: Phlp +204: Phnx +153: Plrd 54: Private_Use -107: Prti -190: Punctuation +108: Prti +193: Punctuation 56: Qaac -105: Qaai -207: QMark -209: RI -210: Rjng - 92: Rohg -211: Runr -213: Samr -177: Sarb -214: Saur -225: SD -258: Separator -220: Sgnw -218: Shaw -217: Shrd -219: Sidd -117: Sind -221: Sinh -226: Sogd -176: Sogo -227: Sora -228: Soyo -262: Space_Separator -145: Spacing_Mark -216: STerm -229: Sund +106: Qaai +210: QMark +212: RI +213: Rjng + 93: Rohg +214: Runr +216: Samr +180: Sarb +217: Saur +228: SD +262: Separator +223: Sgnw +221: Shaw +220: Shrd +222: Sidd +118: Sind +224: Sinh +229: Sogd +179: Sogo +230: Sora +231: Soyo +266: Space_Separator +146: Spacing_Mark +219: STerm +232: Sund 57: Surrogate -230: Sylo -212: Symbol -231: Syrc -233: Tagb -237: Takr -234: Tale -162: Talu -238: Taml -239: Tang -236: Tavt -240: Telu -241: Term -245: Tfng -232: Tglg -242: Thaa -244: Tibt -246: Tirh -132: Titlecase_Letter -247: Ugar -248: UIdeo +233: Sylo +215: Symbol +234: Syrc +236: Tagb +240: Takr +237: Tale +164: Talu +241: Taml +242: Tang +239: Tavt +243: Telu +244: Term +248: Tfng +235: Tglg +245: Thaa +247: Tibt +249: Tirh +133: Titlecase_Letter +250: Ugar +251: UIdeo 53: Unassigned -133: Uppercase_Letter -251: Vaii -252: VS -253: Wara -254: WSpace -255: XIDC -256: XIDS -175: Xpeo +134: Uppercase_Letter +254: Vaii +255: VS +257: Wara +256: Wcho +258: WSpace +259: XIDC +260: XIDS +178: Xpeo 58: Xsux -257: Yiii -259: Zanb -105: Zinh +261: Yiii +263: Zanb +106: Zinh 55: Zyyy -249: Zzzz -263: In_Basic_Latin -264: In_Latin_1_Supplement -265: In_Latin_Extended_A -266: In_Latin_Extended_B -267: In_IPA_Extensions -268: In_Spacing_Modifier_Letters -269: In_Combining_Diacritical_Marks -270: In_Greek_and_Coptic -271: In_Cyrillic -272: In_Cyrillic_Supplement -273: In_Armenian -274: In_Hebrew -275: In_Arabic -276: In_Syriac -277: In_Arabic_Supplement -278: In_Thaana -279: In_NKo -280: In_Samaritan -281: In_Mandaic -282: In_Syriac_Supplement -283: In_Arabic_Extended_A -284: In_Devanagari -285: In_Bengali -286: In_Gurmukhi -287: In_Gujarati -288: In_Oriya -289: In_Tamil -290: In_Telugu -291: In_Kannada -292: In_Malayalam -293: In_Sinhala -294: In_Thai -295: In_Lao -296: In_Tibetan -297: In_Myanmar -298: In_Georgian -299: In_Hangul_Jamo -300: In_Ethiopic -301: In_Ethiopic_Supplement -302: In_Cherokee -303: In_Unified_Canadian_Aboriginal_Syllabics -304: In_Ogham -305: In_Runic -306: In_Tagalog -307: In_Hanunoo -308: In_Buhid -309: In_Tagbanwa -310: In_Khmer -311: In_Mongolian -312: In_Unified_Canadian_Aboriginal_Syllabics_Extended -313: In_Limbu -314: In_Tai_Le -315: In_New_Tai_Lue -316: In_Khmer_Symbols -317: In_Buginese -318: In_Tai_Tham -319: In_Combining_Diacritical_Marks_Extended -320: In_Balinese -321: In_Sundanese -322: In_Batak -323: In_Lepcha -324: In_Ol_Chiki -325: In_Cyrillic_Extended_C -326: In_Georgian_Extended -327: In_Sundanese_Supplement -328: In_Vedic_Extensions -329: In_Phonetic_Extensions -330: In_Phonetic_Extensions_Supplement -331: In_Combining_Diacritical_Marks_Supplement -332: In_Latin_Extended_Additional -333: In_Greek_Extended -334: In_General_Punctuation -335: In_Superscripts_and_Subscripts -336: In_Currency_Symbols -337: In_Combining_Diacritical_Marks_for_Symbols -338: In_Letterlike_Symbols -339: In_Number_Forms -340: In_Arrows -341: In_Mathematical_Operators -342: In_Miscellaneous_Technical -343: In_Control_Pictures -344: In_Optical_Character_Recognition -345: In_Enclosed_Alphanumerics -346: In_Box_Drawing -347: In_Block_Elements -348: In_Geometric_Shapes -349: In_Miscellaneous_Symbols -350: In_Dingbats -351: In_Miscellaneous_Mathematical_Symbols_A -352: In_Supplemental_Arrows_A -353: In_Braille_Patterns -354: In_Supplemental_Arrows_B -355: In_Miscellaneous_Mathematical_Symbols_B -356: In_Supplemental_Mathematical_Operators -357: In_Miscellaneous_Symbols_and_Arrows -358: In_Glagolitic -359: In_Latin_Extended_C -360: In_Coptic -361: In_Georgian_Supplement -362: In_Tifinagh -363: In_Ethiopic_Extended -364: In_Cyrillic_Extended_A -365: In_Supplemental_Punctuation -366: In_CJK_Radicals_Supplement -367: In_Kangxi_Radicals -368: In_Ideographic_Description_Characters -369: In_CJK_Symbols_and_Punctuation -370: In_Hiragana -371: In_Katakana -372: In_Bopomofo -373: In_Hangul_Compatibility_Jamo -374: In_Kanbun -375: In_Bopomofo_Extended -376: In_CJK_Strokes -377: In_Katakana_Phonetic_Extensions -378: In_Enclosed_CJK_Letters_and_Months -379: In_CJK_Compatibility -380: In_CJK_Unified_Ideographs_Extension_A -381: In_Yijing_Hexagram_Symbols -382: In_CJK_Unified_Ideographs -383: In_Yi_Syllables -384: In_Yi_Radicals -385: In_Lisu -386: In_Vai -387: In_Cyrillic_Extended_B -388: In_Bamum -389: In_Modifier_Tone_Letters -390: In_Latin_Extended_D -391: In_Syloti_Nagri -392: In_Common_Indic_Number_Forms -393: In_Phags_pa -394: In_Saurashtra -395: In_Devanagari_Extended -396: In_Kayah_Li -397: In_Rejang -398: In_Hangul_Jamo_Extended_A -399: In_Javanese -400: In_Myanmar_Extended_B -401: In_Cham -402: In_Myanmar_Extended_A -403: In_Tai_Viet -404: In_Meetei_Mayek_Extensions -405: In_Ethiopic_Extended_A -406: In_Latin_Extended_E -407: In_Cherokee_Supplement -408: In_Meetei_Mayek -409: In_Hangul_Syllables -410: In_Hangul_Jamo_Extended_B -411: In_High_Surrogates -412: In_High_Private_Use_Surrogates -413: In_Low_Surrogates -414: In_Private_Use_Area -415: In_CJK_Compatibility_Ideographs -416: In_Alphabetic_Presentation_Forms -417: In_Arabic_Presentation_Forms_A -418: In_Variation_Selectors -419: In_Vertical_Forms -420: In_Combining_Half_Marks -421: In_CJK_Compatibility_Forms -422: In_Small_Form_Variants -423: In_Arabic_Presentation_Forms_B -424: In_Halfwidth_and_Fullwidth_Forms -425: In_Specials -426: In_Linear_B_Syllabary -427: In_Linear_B_Ideograms -428: In_Aegean_Numbers -429: In_Ancient_Greek_Numbers -430: In_Ancient_Symbols -431: In_Phaistos_Disc -432: In_Lycian -433: In_Carian -434: In_Coptic_Epact_Numbers -435: In_Old_Italic -436: In_Gothic -437: In_Old_Permic -438: In_Ugaritic -439: In_Old_Persian -440: In_Deseret -441: In_Shavian -442: In_Osmanya -443: In_Osage -444: In_Elbasan -445: In_Caucasian_Albanian -446: In_Linear_A -447: In_Cypriot_Syllabary -448: In_Imperial_Aramaic -449: In_Palmyrene -450: In_Nabataean -451: In_Hatran -452: In_Phoenician -453: In_Lydian -454: In_Meroitic_Hieroglyphs -455: In_Meroitic_Cursive -456: In_Kharoshthi -457: In_Old_South_Arabian -458: In_Old_North_Arabian -459: In_Manichaean -460: In_Avestan -461: In_Inscriptional_Parthian -462: In_Inscriptional_Pahlavi -463: In_Psalter_Pahlavi -464: In_Old_Turkic -465: In_Old_Hungarian -466: In_Hanifi_Rohingya -467: In_Rumi_Numeral_Symbols -468: In_Old_Sogdian -469: In_Sogdian -470: In_Brahmi -471: In_Kaithi -472: In_Sora_Sompeng -473: In_Chakma -474: In_Mahajani -475: In_Sharada -476: In_Sinhala_Archaic_Numbers -477: In_Khojki -478: In_Multani -479: In_Khudawadi -480: In_Grantha -481: In_Newa -482: In_Tirhuta -483: In_Siddham -484: In_Modi -485: In_Mongolian_Supplement -486: In_Takri -487: In_Ahom -488: In_Dogra -489: In_Warang_Citi -490: In_Zanabazar_Square -491: In_Soyombo -492: In_Pau_Cin_Hau -493: In_Bhaiksuki -494: In_Marchen -495: In_Masaram_Gondi -496: In_Gunjala_Gondi -497: In_Makasar -498: In_Cuneiform -499: In_Cuneiform_Numbers_and_Punctuation -500: In_Early_Dynastic_Cuneiform -501: In_Egyptian_Hieroglyphs -502: In_Anatolian_Hieroglyphs -503: In_Bamum_Supplement -504: In_Mro -505: In_Bassa_Vah -506: In_Pahawh_Hmong -507: In_Medefaidrin -508: In_Miao -509: In_Ideographic_Symbols_and_Punctuation -510: In_Tangut -511: In_Tangut_Components -512: In_Kana_Supplement -513: In_Kana_Extended_A -514: In_Nushu -515: In_Duployan -516: In_Shorthand_Format_Controls -517: In_Byzantine_Musical_Symbols -518: In_Musical_Symbols -519: In_Ancient_Greek_Musical_Notation -520: In_Mayan_Numerals -521: In_Tai_Xuan_Jing_Symbols -522: In_Counting_Rod_Numerals -523: In_Mathematical_Alphanumeric_Symbols -524: In_Sutton_SignWriting -525: In_Glagolitic_Supplement -526: In_Mende_Kikakui -527: In_Adlam -528: In_Indic_Siyaq_Numbers -529: In_Arabic_Mathematical_Alphabetic_Symbols -530: In_Mahjong_Tiles -531: In_Domino_Tiles -532: In_Playing_Cards -533: In_Enclosed_Alphanumeric_Supplement -534: In_Enclosed_Ideographic_Supplement -535: In_Miscellaneous_Symbols_and_Pictographs -536: In_Emoticons -537: In_Ornamental_Dingbats -538: In_Transport_and_Map_Symbols -539: In_Alchemical_Symbols -540: In_Geometric_Shapes_Extended -541: In_Supplemental_Arrows_C -542: In_Supplemental_Symbols_and_Pictographs -543: In_Chess_Symbols -544: In_CJK_Unified_Ideographs_Extension_B -545: In_CJK_Unified_Ideographs_Extension_C -546: In_CJK_Unified_Ideographs_Extension_D -547: In_CJK_Unified_Ideographs_Extension_E -548: In_CJK_Unified_Ideographs_Extension_F -549: In_CJK_Compatibility_Ideographs_Supplement -550: In_Tags -551: In_Variation_Selectors_Supplement -552: In_Supplementary_Private_Use_Area_A -553: In_Supplementary_Private_Use_Area_B -554: In_No_Block +252: Zzzz +267: In_Basic_Latin +268: In_Latin_1_Supplement +269: In_Latin_Extended_A +270: In_Latin_Extended_B +271: In_IPA_Extensions +272: In_Spacing_Modifier_Letters +273: In_Combining_Diacritical_Marks +274: In_Greek_and_Coptic +275: In_Cyrillic +276: In_Cyrillic_Supplement +277: In_Armenian +278: In_Hebrew +279: In_Arabic +280: In_Syriac +281: In_Arabic_Supplement +282: In_Thaana +283: In_NKo +284: In_Samaritan +285: In_Mandaic +286: In_Syriac_Supplement +287: In_Arabic_Extended_A +288: In_Devanagari +289: In_Bengali +290: In_Gurmukhi +291: In_Gujarati +292: In_Oriya +293: In_Tamil +294: In_Telugu +295: In_Kannada +296: In_Malayalam +297: In_Sinhala +298: In_Thai +299: In_Lao +300: In_Tibetan +301: In_Myanmar +302: In_Georgian +303: In_Hangul_Jamo +304: In_Ethiopic +305: In_Ethiopic_Supplement +306: In_Cherokee +307: In_Unified_Canadian_Aboriginal_Syllabics +308: In_Ogham +309: In_Runic +310: In_Tagalog +311: In_Hanunoo +312: In_Buhid +313: In_Tagbanwa +314: In_Khmer +315: In_Mongolian +316: In_Unified_Canadian_Aboriginal_Syllabics_Extended +317: In_Limbu +318: In_Tai_Le +319: In_New_Tai_Lue +320: In_Khmer_Symbols +321: In_Buginese +322: In_Tai_Tham +323: In_Combining_Diacritical_Marks_Extended +324: In_Balinese +325: In_Sundanese +326: In_Batak +327: In_Lepcha +328: In_Ol_Chiki +329: In_Cyrillic_Extended_C +330: In_Georgian_Extended +331: In_Sundanese_Supplement +332: In_Vedic_Extensions +333: In_Phonetic_Extensions +334: In_Phonetic_Extensions_Supplement +335: In_Combining_Diacritical_Marks_Supplement +336: In_Latin_Extended_Additional +337: In_Greek_Extended +338: In_General_Punctuation +339: In_Superscripts_and_Subscripts +340: In_Currency_Symbols +341: In_Combining_Diacritical_Marks_for_Symbols +342: In_Letterlike_Symbols +343: In_Number_Forms +344: In_Arrows +345: In_Mathematical_Operators +346: In_Miscellaneous_Technical +347: In_Control_Pictures +348: In_Optical_Character_Recognition +349: In_Enclosed_Alphanumerics +350: In_Box_Drawing +351: In_Block_Elements +352: In_Geometric_Shapes +353: In_Miscellaneous_Symbols +354: In_Dingbats +355: In_Miscellaneous_Mathematical_Symbols_A +356: In_Supplemental_Arrows_A +357: In_Braille_Patterns +358: In_Supplemental_Arrows_B +359: In_Miscellaneous_Mathematical_Symbols_B +360: In_Supplemental_Mathematical_Operators +361: In_Miscellaneous_Symbols_and_Arrows +362: In_Glagolitic +363: In_Latin_Extended_C +364: In_Coptic +365: In_Georgian_Supplement +366: In_Tifinagh +367: In_Ethiopic_Extended +368: In_Cyrillic_Extended_A +369: In_Supplemental_Punctuation +370: In_CJK_Radicals_Supplement +371: In_Kangxi_Radicals +372: In_Ideographic_Description_Characters +373: In_CJK_Symbols_and_Punctuation +374: In_Hiragana +375: In_Katakana +376: In_Bopomofo +377: In_Hangul_Compatibility_Jamo +378: In_Kanbun +379: In_Bopomofo_Extended +380: In_CJK_Strokes +381: In_Katakana_Phonetic_Extensions +382: In_Enclosed_CJK_Letters_and_Months +383: In_CJK_Compatibility +384: In_CJK_Unified_Ideographs_Extension_A +385: In_Yijing_Hexagram_Symbols +386: In_CJK_Unified_Ideographs +387: In_Yi_Syllables +388: In_Yi_Radicals +389: In_Lisu +390: In_Vai +391: In_Cyrillic_Extended_B +392: In_Bamum +393: In_Modifier_Tone_Letters +394: In_Latin_Extended_D +395: In_Syloti_Nagri +396: In_Common_Indic_Number_Forms +397: In_Phags_pa +398: In_Saurashtra +399: In_Devanagari_Extended +400: In_Kayah_Li +401: In_Rejang +402: In_Hangul_Jamo_Extended_A +403: In_Javanese +404: In_Myanmar_Extended_B +405: In_Cham +406: In_Myanmar_Extended_A +407: In_Tai_Viet +408: In_Meetei_Mayek_Extensions +409: In_Ethiopic_Extended_A +410: In_Latin_Extended_E +411: In_Cherokee_Supplement +412: In_Meetei_Mayek +413: In_Hangul_Syllables +414: In_Hangul_Jamo_Extended_B +415: In_High_Surrogates +416: In_High_Private_Use_Surrogates +417: In_Low_Surrogates +418: In_Private_Use_Area +419: In_CJK_Compatibility_Ideographs +420: In_Alphabetic_Presentation_Forms +421: In_Arabic_Presentation_Forms_A +422: In_Variation_Selectors +423: In_Vertical_Forms +424: In_Combining_Half_Marks +425: In_CJK_Compatibility_Forms +426: In_Small_Form_Variants +427: In_Arabic_Presentation_Forms_B +428: In_Halfwidth_and_Fullwidth_Forms +429: In_Specials +430: In_Linear_B_Syllabary +431: In_Linear_B_Ideograms +432: In_Aegean_Numbers +433: In_Ancient_Greek_Numbers +434: In_Ancient_Symbols +435: In_Phaistos_Disc +436: In_Lycian +437: In_Carian +438: In_Coptic_Epact_Numbers +439: In_Old_Italic +440: In_Gothic +441: In_Old_Permic +442: In_Ugaritic +443: In_Old_Persian +444: In_Deseret +445: In_Shavian +446: In_Osmanya +447: In_Osage +448: In_Elbasan +449: In_Caucasian_Albanian +450: In_Linear_A +451: In_Cypriot_Syllabary +452: In_Imperial_Aramaic +453: In_Palmyrene +454: In_Nabataean +455: In_Hatran +456: In_Phoenician +457: In_Lydian +458: In_Meroitic_Hieroglyphs +459: In_Meroitic_Cursive +460: In_Kharoshthi +461: In_Old_South_Arabian +462: In_Old_North_Arabian +463: In_Manichaean +464: In_Avestan +465: In_Inscriptional_Parthian +466: In_Inscriptional_Pahlavi +467: In_Psalter_Pahlavi +468: In_Old_Turkic +469: In_Old_Hungarian +470: In_Hanifi_Rohingya +471: In_Rumi_Numeral_Symbols +472: In_Old_Sogdian +473: In_Sogdian +474: In_Elymaic +475: In_Brahmi +476: In_Kaithi +477: In_Sora_Sompeng +478: In_Chakma +479: In_Mahajani +480: In_Sharada +481: In_Sinhala_Archaic_Numbers +482: In_Khojki +483: In_Multani +484: In_Khudawadi +485: In_Grantha +486: In_Newa +487: In_Tirhuta +488: In_Siddham +489: In_Modi +490: In_Mongolian_Supplement +491: In_Takri +492: In_Ahom +493: In_Dogra +494: In_Warang_Citi +495: In_Nandinagari +496: In_Zanabazar_Square +497: In_Soyombo +498: In_Pau_Cin_Hau +499: In_Bhaiksuki +500: In_Marchen +501: In_Masaram_Gondi +502: In_Gunjala_Gondi +503: In_Makasar +504: In_Tamil_Supplement +505: In_Cuneiform +506: In_Cuneiform_Numbers_and_Punctuation +507: In_Early_Dynastic_Cuneiform +508: In_Egyptian_Hieroglyphs +509: In_Egyptian_Hieroglyph_Format_Controls +510: In_Anatolian_Hieroglyphs +511: In_Bamum_Supplement +512: In_Mro +513: In_Bassa_Vah +514: In_Pahawh_Hmong +515: In_Medefaidrin +516: In_Miao +517: In_Ideographic_Symbols_and_Punctuation +518: In_Tangut +519: In_Tangut_Components +520: In_Kana_Supplement +521: In_Kana_Extended_A +522: In_Small_Kana_Extension +523: In_Nushu +524: In_Duployan +525: In_Shorthand_Format_Controls +526: In_Byzantine_Musical_Symbols +527: In_Musical_Symbols +528: In_Ancient_Greek_Musical_Notation +529: In_Mayan_Numerals +530: In_Tai_Xuan_Jing_Symbols +531: In_Counting_Rod_Numerals +532: In_Mathematical_Alphanumeric_Symbols +533: In_Sutton_SignWriting +534: In_Glagolitic_Supplement +535: In_Nyiakeng_Puachue_Hmong +536: In_Wancho +537: In_Mende_Kikakui +538: In_Adlam +539: In_Indic_Siyaq_Numbers +540: In_Ottoman_Siyaq_Numbers +541: In_Arabic_Mathematical_Alphabetic_Symbols +542: In_Mahjong_Tiles +543: In_Domino_Tiles +544: In_Playing_Cards +545: In_Enclosed_Alphanumeric_Supplement +546: In_Enclosed_Ideographic_Supplement +547: In_Miscellaneous_Symbols_and_Pictographs +548: In_Emoticons +549: In_Ornamental_Dingbats +550: In_Transport_and_Map_Symbols +551: In_Alchemical_Symbols +552: In_Geometric_Shapes_Extended +553: In_Supplemental_Arrows_C +554: In_Supplemental_Symbols_and_Pictographs +555: In_Chess_Symbols +556: In_Symbols_and_Pictographs_Extended_A +557: In_CJK_Unified_Ideographs_Extension_B +558: In_CJK_Unified_Ideographs_Extension_C +559: In_CJK_Unified_Ideographs_Extension_D +560: In_CJK_Unified_Ideographs_Extension_E +561: In_CJK_Unified_Ideographs_Extension_F +562: In_CJK_Compatibility_Ideographs_Supplement +563: In_Tags +564: In_Variation_Selectors_Supplement +565: In_Supplementary_Private_Use_Area_A +566: In_Supplementary_Private_Use_Area_B +567: In_No_Block diff --git a/oniguruma/index.html b/oniguruma/index.html index f55084e08..782e8dd33 100644 --- a/oniguruma/index.html +++ b/oniguruma/index.html @@ -8,7 +8,7 @@

Oniguruma

(Japanese)

-(c) K.Kosako, updated at: 2018/08/31 +(c) K.Kosako, updated at: 2019/05/14

@@ -16,6 +16,8 @@
What's new
    +
  • 2019/05/07: Version 6.9.2 released.
  • +
  • 2018/12/11: Version 6.9.1 released.
  • 2018/09/03: Version 6.9.0 released.
  • 2018/04/17: Version 6.8.2 released.
  • 2018/03/19: Version 6.8.1 released.
  • diff --git a/oniguruma/windows/testc.c b/oniguruma/windows/testc.c new file mode 100644 index 000000000..b657682a6 --- /dev/null +++ b/oniguruma/windows/testc.c @@ -0,0 +1,865 @@ +/* + * This program was generated by testconv.rb. + */ +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +#include + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +#endif + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ +#ifndef POSIX_TEST + static OnigEncoding use_encs[1]; + + use_encs[0] = ONIG_ENCODING_SJIS; + onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); +#endif + + err_file = stdout; + +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_SJIS); +#else + region = onig_region_new(); +#endif + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?a)", "a", 0, 1); + x2("(?ab)\\g", "abab", 0, 4); + x2("(?.zv.)\\k", "azvbazvb", 0, 8); + x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); + x2("(?|a\\g)+", "", 0, 0); + x2("(?|\\(\\g\\))+$", "()(())", 0, 6); + x3("\\g(?.){0}", "X", 0, 1, 1); + x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); + x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); + x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); + x2("(?:(?)|(?efg))\\k", "", 0, 0); + x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); + n("(?:(?abc)|(?efg))\\k", "abcefg"); + x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); + x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); + x2("(?a|\\(\\g\\))", "a", 0, 1); + x2("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); + x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); + x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); + x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); + x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); + x2("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("\\xED\\xF2", "\xed\xf2", 0, 2); + x2("", "あ", 0, 0); + x2("あ", "あ", 0, 2); + n("い", "あ"); + x2("うう", "うう", 0, 4); + x2("あいう", "あいう", 0, 6); + x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70); + x2("あ", "いあ", 2, 4); + x2("いう", "あいう", 2, 6); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "あ", 0, 2); + x2("..", "かき", 0, 4); + x2("\\w", "お", 0, 2); + n("\\W", "あ"); + x2("[\\W]", "う$", 2, 3); + x2("\\S", "そ", 0, 2); + x2("\\S", "漢", 0, 2); + x2("\\b", "気 ", 0, 0); + x2("\\b", " ほ", 1, 1); + x2("\\B", "せそ ", 2, 2); + x2("\\B", "う ", 3, 3); + x2("\\B", " い", 0, 0); + x2("[たち]", "ち", 0, 2); + n("[なに]", "ぬ"); + x2("[う-お]", "え", 0, 2); + n("[^け]", "け"); + x2("[\\w]", "ね", 0, 2); + n("[\\d]", "ふ"); + x2("[\\D]", "は", 0, 2); + n("[\\s]", "く"); + x2("[\\S]", "へ", 0, 2); + x2("[\\w\\d]", "よ", 0, 2); + x2("[\\w\\d]", " よ", 3, 5); + n("\\w鬼車", " 鬼車"); + x2("鬼\\W車", "鬼 車", 0, 5); + x2("あ.い.う", "ああいいう", 0, 10); + x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13); + x2("\\s\\wこここ", " ここここ", 0, 9); + x2("ああ.け", "ああけけ", 0, 8); + n(".い", "いえ"); + x2(".お", "おお", 0, 4); + x2("^あ", "あ", 0, 2); + x2("^む$", "む", 0, 2); + x2("^\\w$", "に", 0, 2); + x2("^\\wかきくけこ$", "zかきくけこ", 0, 11); + x2("^\\w...うえお$", "zあいううえお", 0, 13); + x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12); + x2("\\Aたちつ", "たちつ", 0, 6); + x2("むめも\\Z", "むめも", 0, 6); + x2("かきく\\z", "かきく", 0, 6); + x2("かきく\\Z", "かきく\n", 0, 6); + x2("\\Gぽぴ", "ぽぴ", 0, 4); + n("\\Gえ", "うえお"); + n("とて\\G", "とて"); + n("まみ\\A", "まみ"); + n("ま\\Aみ", "まみ"); + x2("(?=せ)せ", "せ", 0, 2); + n("(?=う).", "い"); + x2("(?!う)か", "か", 0, 2); + n("(?!と)あ", "と"); + x2("(?i:あ)", "あ", 0, 2); + x2("(?i:ぶべ)", "ぶべ", 0, 4); + n("(?i:い)", "う"); + x2("(?m:よ.)", "よ\n", 0, 3); + x2("(?m:.め)", "ま\nめ", 2, 5); + x2("あ?", "", 0, 0); + x2("変?", "化", 0, 0); + x2("変?", "変", 0, 2); + x2("量*", "", 0, 0); + x2("量*", "量", 0, 2); + x2("子*", "子子子", 0, 6); + x2("馬*", "鹿馬馬馬馬", 0, 0); + n("山+", ""); + x2("河+", "河", 0, 2); + x2("時+", "時時時時", 0, 8); + x2("え+", "ええううう", 0, 4); + x2("う+", "おうううう", 2, 10); + x2(".?", "た", 0, 2); + x2(".*", "ぱぴぷぺ", 0, 8); + x2(".+", "ろ", 0, 2); + x2(".+", "いうえか\n", 0, 8); + x2("あ|い", "あ", 0, 2); + x2("あ|い", "い", 0, 2); + x2("あい|いう", "あい", 0, 4); + x2("あい|いう", "いう", 0, 4); + x2("を(?:かき|きく)", "をかき", 0, 6); + x2("を(?:かき|きく)け", "をきくけ", 0, 8); + x2("あい|(?:あう|あを)", "あを", 0, 4); + x2("あ|い|う", "えう", 2, 4); + x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6); + n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ"); + x2("あ|^わ", "ぶあ", 2, 4); + x2("あ|^を", "をあ", 0, 2); + x2("鬼|\\G車", "け車鬼", 4, 6); + x2("鬼|\\G車", "車鬼", 0, 2); + x2("鬼|\\A車", "b車鬼", 3, 5); + x2("鬼|\\A車", "車", 0, 2); + x2("鬼|車\\Z", "車鬼", 2, 4); + x2("鬼|車\\Z", "車", 0, 2); + x2("鬼|車\\Z", "車\n", 0, 2); + x2("鬼|車\\z", "車鬼", 2, 4); + x2("鬼|車\\z", "車", 0, 2); + x2("\\w|\\s", "お", 0, 2); + x2("\\w|%", "%お", 0, 1); + x2("\\w|[&$]", "う&", 0, 2); + x2("[い-け]", "う", 0, 2); + x2("[い-け]|[^か-こ]", "あ", 0, 2); + x2("[い-け]|[^か-こ]", "か", 0, 2); + x2("[^あ]", "\n", 0, 1); + x2("(?:あ|[う-き])|いを", "うを", 0, 2); + x2("(?:あ|[う-き])|いを", "いを", 0, 4); + x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6); + x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6); + x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6); + x2("(?<=あ|いう)い", "いうい", 4, 6); + n("(?>あ|あいえ)う", "あいえう"); + x2("(?>あいえ|あ)う", "あいえう", 0, 8); + x2("あ?|い", "あ", 0, 2); + x2("あ?|い", "い", 0, 0); + x2("あ?|い", "", 0, 0); + x2("あ*|い", "ああ", 0, 4); + x2("あ*|い*", "いあ", 0, 0); + x2("あ*|い*", "あい", 0, 2); + x2("[aあ]*|い*", "aあいいい", 0, 3); + x2("あ+|い*", "", 0, 0); + x2("あ+|い*", "いいい", 0, 6); + x2("あ+|い*", "あいいい", 0, 2); + x2("あ+|い*", "aあいいい", 0, 0); + n("あ+|い+", ""); + x2("(あ|い)?", "い", 0, 2); + x2("(あ|い)*", "いあ", 0, 4); + x2("(あ|い)+", "いあい", 0, 6); + x2("(あい|うあ)+", "うああいうえ", 0, 8); + x2("(あい|うえ)+", "うああいうえ", 4, 12); + x2("(あい|うあ)+", "ああいうあ", 2, 10); + x2("(あい|うあ)+", "あいをうあ", 0, 4); + x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10); + x2("(あ|いあい)+", "あいあいあ", 0, 10); + x2("(あ|いあい)+", "いあ", 2, 4); + x2("(あ|いあい)+", "いあああいあ", 2, 8); + x2("(?:あ|い)(?:あ|い)", "あい", 0, 4); + x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6); + x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12); + x2("(?:あ+|い+){2}", "あああいいい", 0, 12); + x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12); + x2("(?:あ+|\\Aい*)うう", "うう", 0, 4); + n("(?:あ+|\\Aい*)うう", "あいうう"); + x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16); + x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14); + x2("う{0,}", "うううう", 0, 8); + x2("あ|(?i)c", "C", 0, 1); + x2("(?i)c|あ", "C", 0, 1); + x2("(?i:あ)|a", "a", 0, 1); + n("(?i:あ)|a", "A"); + x2("[あいう]?", "あいう", 0, 2); + x2("[あいう]*", "あいう", 0, 6); + x2("[^あいう]*", "あいう", 0, 0); + n("[^あいう]+", "あいう"); + x2("あ?\?", "あああ", 0, 0); + x2("いあ?\?い", "いあい", 0, 6); + x2("あ*?", "あああ", 0, 0); + x2("いあ*?", "いああ", 0, 2); + x2("いあ*?い", "いああい", 0, 8); + x2("あ+?", "あああ", 0, 2); + x2("いあ+?", "いああ", 0, 4); + x2("いあ+?い", "いああい", 0, 8); + x2("(?:天?)?\?", "天", 0, 0); + x2("(?:天?\?)?", "天", 0, 0); + x2("(?:夢?)+?", "夢夢夢", 0, 2); + x2("(?:風+)?\?", "風風風", 0, 0); + x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8); + x2("(?:あい)?{2}", "", 0, 0); + x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8); + x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0); + x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16); + n("(?:鬼車){3,}", "鬼車鬼車"); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16); + x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8); + x2("(?:鬼車){,}", "鬼車{,}", 0, 7); + x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12); + x3("(火)", "火", 0, 2, 1); + x3("(火水)", "火水", 0, 4, 1); + x2("((時間))", "時間", 0, 4); + x3("((風水))", "風水", 0, 4, 1); + x3("((昨日))", "昨日", 0, 4, 2); + x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20); + x3("(あい)(うえ)", "あいうえ", 0, 4, 1); + x3("(あい)(うえ)", "あいうえ", 4, 8, 2); + x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3); + x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4); + x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2); + x2("(^あ)", "あ", 0, 2); + x3("(あ)|(あ)", "いあ", 2, 4, 1); + x3("(^あ)|(あ)", "いあ", 2, 4, 2); + x3("(あ?)", "あああ", 0, 2, 1); + x3("(ま*)", "ままま", 0, 6, 1); + x3("(と*)", "", 0, 0, 1); + x3("(る+)", "るるるるるるる", 0, 14, 1); + x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1); + x3("(あ+|い?)", "いいいああ", 0, 2, 1); + x3("(あいう)?", "あいう", 0, 6, 1); + x3("(あいう)*", "あいう", 0, 6, 1); + x3("(あいう)+", "あいう", 0, 6, 1); + x3("(さしす|あいう)+", "あいう", 0, 6, 1); + x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1); + x3("((?i:あいう))", "あいう", 0, 6, 1); + x3("((?m:あ.う))", "あ\nう", 0, 5, 1); + x3("((?=あん)あ)", "あんい", 0, 2, 1); + x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1); + x3("あ*(.)", "ああああん", 8, 10, 1); + x3("あ*?(.)", "ああああん", 0, 2, 1); + x3("あ*?(ん)", "ああああん", 8, 10, 1); + x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1); + x3("(\\Aいい)うう", "いいうう", 0, 4, 1); + n("(\\Aいい)うう", "んいいうう"); + x3("(^いい)うう", "いいうう", 0, 4, 1); + n("(^いい)うう", "んいいうう"); + x3("ろろ(るる$)", "ろろるる", 4, 8, 1); + n("ろろ(るる$)", "ろろるるる"); + x2("(無)\\1", "無無", 0, 4); + n("(無)\\1", "無武"); + x2("(空?)\\1", "空空", 0, 4); + x2("(空?\?)\\1", "空空", 0, 0); + x2("(空*)\\1", "空空空空空", 0, 8); + x3("(空*)\\1", "空空空空空", 0, 4, 1); + x2("あ(い*)\\1", "あいいいい", 0, 10); + x2("あ(い*)\\1", "あい", 0, 2); + x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20); + x2("(あ*)(い*)\\2", "あああいいいい", 0, 14); + x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2); + x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16); + x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7); + x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12); + x2("([き-け])\\1", "くく", 0, 4); + x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8); + n("(\\w\\d\\s)\\1", "あ5 あ5"); + x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8); + x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 13); + x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12); + x2("(^こ)\\1", "ここ", 0, 4); + n("(^む)\\1", "めむむ"); + n("(あ$)\\1", "ああ"); + n("(あい\\Z)\\1", "あい"); + x2("(あ*\\Z)\\1", "あ", 2, 2); + x2(".(あ*\\Z)\\1", "いあ", 2, 4); + x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1); + x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1); + x2("((?i:あvず))\\1", "あvずあvず", 0, 10); + x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14); + x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26); + x2("[[ひふ]]", "ふ", 0, 2); + x2("[[いおう]か]", "か", 0, 2); + n("[[^あ]]", "あ"); + n("[^[あ]]", "あ"); + x2("[^[^あ]]", "あ", 0, 2); + x2("[[かきく]&&きく]", "く", 0, 2); + n("[[かきく]&&きく]", "か"); + n("[[かきく]&&きく]", "け"); + x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 2); + n("[^あ-ん&&い-を&&う-ゑ]", "ゑ"); + x2("[[^あ&&あ]&&あ-ん]", "い", 0, 2); + n("[[^あ&&あ]&&あ-ん]", "あ"); + x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 2); + n("[[^あ-ん&&いうえお]&&[^う-か]]", "い"); + x2("[^[^あいう]&&[^うえお]]", "う", 0, 2); + x2("[^[^あいう]&&[^うえお]]", "え", 0, 2); + n("[^[^あいう]&&[^うえお]]", "か"); + x2("[あ-&&-あ]", "-", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 2); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1); + n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2"); + x2("aバージョンのダウンロード<\\/b>", "aバージョンのダウンロード", 0, 32); + x2(".バージョンのダウンロード<\\/b>", "aバージョンのダウンロード", 0, 32); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +}