+upd: Oniguruma update

This commit is contained in:
rkotten 2023-09-26 14:37:20 +02:00
parent 8d558072e6
commit 562cd8afbb
23 changed files with 5023 additions and 5081 deletions

View File

@ -238,7 +238,6 @@
<ClCompile Include="oniguruma\src\regenc.c" />
<ClCompile Include="oniguruma\src\regerror.c" />
<ClCompile Include="oniguruma\src\regexec.c" />
<ClCompile Include="oniguruma\src\regext.c" />
<ClCompile Include="oniguruma\src\reggnu.c" />
<ClCompile Include="oniguruma\src\regparse.c" />
<ClCompile Include="oniguruma\src\regsyntax.c" />

View File

@ -183,9 +183,6 @@
<ClCompile Include="src\ChangeHistory.cxx">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="oniguruma\src\regext.c">
<Filter>oniguruma\src</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="include\ILexer.h">

View File

@ -4,7 +4,6 @@ ltmain.sh
stamp-h1
configure
config.status
config.h
config.h.in
onig-config
libtool

View File

@ -1,4 +1,4 @@
README 2018/04/05
README 2021/06/23
Oniguruma ---- (C) K.Kosako
@ -146,7 +146,6 @@ Source Files
regcomp.c compiling and optimization functions
regenc.c character encodings framework.
regerror.c error message function
regext.c extended API functions. (deluxe version API)
regexec.c search and match functions
regparse.c parsing functions.
regsyntax.c pattern syntax functions and built-in syntax definitions.

View File

@ -1,6 +1,4 @@
[![Build Status](https://travis-ci.org/kkos/oniguruma.svg?branch=master)](https://travis-ci.org/kkos/oniguruma)
[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/oniguruma.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#oniguruma)
[![TrustInSoft CI](https://ci.trust-in-soft.com/projects/kkos/oniguruma.svg?branch=master)](https://ci.trust-in-soft.com/projects/kkos/oniguruma)
Oniguruma
=========
@ -37,7 +35,7 @@ When using configure script, if you have the POSIX API enabled in an earlier ver
Master branch
-------------
* Update Unicode version 15.0.0
* Update Unicode version 15.1.0
* NEW API: ONIG_OPTION_MATCH_WHOLE_STRING
* Fixed: (?I) option was not enabled for character classes (Issue #264).
* Changed specification to check for incorrect POSIX bracket (Issue #253).

View File

@ -1,71 +0,0 @@
CHANGES_7.0 2021/06/23
The changes from 6.9 to 7.0 are shown below.
* Already changed
+ Defined the OnigPos type (== ptrdiff_t).
+ Changed the return value of onig_search(), onig_search_with_param(), onig_match() and onig_match_with_param() from int to OnigPos.
+ Changed the argument value of onig_regset_search(), onig_regset_search_with_param() and onig_error_code_to_str() from int to OnigPos.
+ POSIX API: Changed the regoff_t type from int to OnigPos
+ Move the definition of OnigEncodingType structure from oniguruma.h to regenc.h
+ Removed macros from oniguruma.h
ONIGENC_IS_MBC_WORD_ASCII()
ONIGENC_MBC_CASE_FOLD()
ONIGENC_LEFT_ADJUST_CHAR_HEAD()
ONIGENC_IS_VALID_MBC_STRING()
ONIGENC_APPLY_ALL_CASE_FOLD()
ONIGENC_GET_CASE_FOLD_CODES_BY_STR()
ONIGENC_STEP_BACK()
ONIGENC_MBC_ENC_LEN()
ONIGENC_MBC_MAXLEN_DIST()
ONIGENC_PROPERTY_NAME_TO_CTYPE()
ONIGENC_GET_CTYPE_CODE_RANGE()
+ change macro to function in oniguruma.h
onig_enc_len() -> onig_enc_len()
ONIGENC_IS_UNDEF() -> onigenc_is_undef()
ONIGENC_IS_SINGLEBYTE() -> onigenc_is_singlebyte()
ONIGENC_IS_MBC_HEAD() -> onigenc_is_mbc_head()
ONIGENC_IS_MBC_ASCII() -> onigenc_is_mbc_ascii()
ONIGENC_IS_CODE_ASCII() -> onigenc_is_code_ascii()
ONIGENC_IS_MBC_WORD() -> onigenc_is_mbc_word()
ONIGENC_NAME() -> onigenc_name()
ONIGENC_IS_ALLOWED_REVERSE_MATCH() -> onigenc_is_allowed_reverse_match()
ONIGENC_MBC_MAXLEN() -> onigenc_mbc_maxlen()
ONIGENC_MBC_MINLEN() -> onigenc_mbc_minlen()
ONIGENC_IS_MBC_NEWLINE() -> onigenc_is_mbc_newline()
ONIGENC_MBC_TO_CODE() -> onigenc_mbc_to_code()
ONIGENC_CODE_TO_MBCLEN() -> onigenc_code_to_mbclen()
ONIGENC_CODE_TO_MBC() -> onigenc_code_to_mbc()
ONIGENC_IS_CODE_CTYPE() -> onigenc_is_code_ctype()
+ Removed deprecated functions
onig_init()
onig_new_deluxe()
onig_get_case_fold_flag()
onig_get_default_case_fold_flag()
onig_set_default_case_fold_flag()
* Will change
+ Increase the size of the ONIG_SYN_OP/OP2_XXX flag area.
+ Improve the API of syntax-op.
(Eliminate the distinction between OP and OP2)
+ update LTVERSION
* To Be Determined
+ Remove UChar from the oniguruma.h
+ Disable Capture History (?@..) by default. (USE_CAPTURE_HISTORY)
+ Disable Callout (*name..) (?{...}) by default. (USE_CALLOUT)
//END

View File

@ -1,4 +1,4 @@
Unicode Properties (Unicode Version: 15.0.0, Emoji: 15.0)
Unicode Properties (Unicode Version: 15.1.0, Emoji: 15.1)
ASCII_Hex_Digit
Adlam
@ -90,10 +90,14 @@ Hiragana
Hyphen
IDS_Binary_Operator
IDS_Trinary_Operator
IDS_Unary_Operator
ID_Compat_Math_Continue
ID_Compat_Math_Start
ID_Continue
ID_Start
Ideographic
Imperial_Aramaic
InCB
Inherited
Inscriptional_Pahlavi
Inscriptional_Parthian
@ -354,6 +358,7 @@ Ideo
IDS
IDSB
IDST
IDSU
Initial_Punctuation
Ital
Java
@ -831,6 +836,7 @@ In_CJK_Unified_Ideographs_Extension_C
In_CJK_Unified_Ideographs_Extension_D
In_CJK_Unified_Ideographs_Extension_E
In_CJK_Unified_Ideographs_Extension_F
In_CJK_Unified_Ideographs_Extension_I
In_CJK_Compatibility_Ideographs_Supplement
In_CJK_Unified_Ideographs_Extension_G
In_CJK_Unified_Ideographs_Extension_H

View File

@ -46,7 +46,6 @@
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#include "../oniguruma/src/oniguruma.h" // Oniguruma - Regular Expression Engine
typedef int OnigPos;
// ---------------------------------------------------------------
#define UCharPtr(pchar) reinterpret_cast<OnigUChar*>(pchar)

View File

@ -42,7 +42,7 @@ headers = $(posixheaders) $(onigheaders)
onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \
$(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \
$(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj
$(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj
libobjs = $(onigobjs) $(posixobjs)
jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj
@ -68,7 +68,7 @@ encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \
$(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \
$(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj
onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \
$(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c
posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c
libsources = $(posixsources) $(onigsources)
@ -101,7 +101,6 @@ $(dllname): $(libobjs) $(encobjs)
$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h
$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h
$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h
$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h
$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
@ -113,7 +112,6 @@ $(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(
$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h
$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h
$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h
$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h
$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h

View File

@ -179,12 +179,15 @@ def merge_dic(to_dic, from_dic):
to_dic.update(from_dic)
def merge_props(to_props, from_props):
common = list(set(to_props) & set(from_props))
def merge_props(to_dic, from_dic):
to_keys = to_dic.keys()
from_keys = from_dic.keys()
common = list(set(to_keys) & set(from_keys))
if len(common) != 0:
print("merge_props: collision: %s" % sorted(common), file=sys.stderr)
to_props.extend(from_props)
for k in from_keys:
to_dic[k] = True
def add_range_into_dic(dic, name, start, end):
d = dic.get(name, None)
@ -235,7 +238,6 @@ def parse_properties(path, klass, prop_prefix = None, version_reg = None):
with open(path, 'r') as f:
dic = { }
prop = None
props = []
for line in f:
s = line.strip()
if len(s) == 0:
@ -262,10 +264,9 @@ def parse_properties(path, klass, prop_prefix = None, version_reg = None):
elif PR_TOTAL_REG.match(s) is not None:
KDIC[prop] = klass
props.append(prop)
normalize_ranges_in_dic(dic)
return (dic, props, version_match)
return (dic, version_match)
def parse_property_aliases(path):
a = { }
@ -414,10 +415,10 @@ def entry_and_print_prop_and_index(name, index):
print_prop_and_index(nname, index)
def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = None):
dic, props, ver_m = parse_properties(path, klass, prop_prefix, version_reg)
dic, ver_m = parse_properties(path, klass, prop_prefix, version_reg)
merge_dic(DIC, dic)
merge_props(PROPS, props)
return dic, props, ver_m
merge_props(PROPS, dic)
return dic, ver_m
### main ###
@ -472,26 +473,26 @@ with open('UnicodeData.txt', 'r') as f:
DIC = dic
add_primitive_props(assigned)
PROPS = DIC.keys()
PROPS = list_sub(PROPS, POSIX_LIST)
PROPS = DIC.fromkeys(DIC, True)
PROPS = {k: v for k, v in PROPS.items() if k not in POSIX_LIST}
_, _, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG)
_, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG)
if ver_m is not None:
VERSION_INFO[0] = int(ver_m.group(1))
VERSION_INFO[1] = int(ver_m.group(2))
VERSION_INFO[2] = int(ver_m.group(3))
dic, props, _ = parse_and_merge_properties('Scripts.txt', 'Script')
dic, _ = parse_and_merge_properties('Scripts.txt', 'Script')
DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic))
parse_and_merge_properties('PropList.txt', 'Binary Property')
_, _, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG)
_, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG)
if ver_m is not None:
EMOJI_VERSION_INFO[0] = int(ver_m.group(1))
EMOJI_VERSION_INFO[1] = int(ver_m.group(2))
PROPS.append('Unknown')
PROPS['Unknown'] = True
KDIC['Unknown'] = 'Script'
ALIASES = parse_property_aliases('PropertyAliases.txt')
@ -502,18 +503,18 @@ dic, BLOCKS = parse_blocks('Blocks.txt')
merge_dic(DIC, dic)
if INCLUDE_GRAPHEME_CLUSTER_DATA:
dic, props, _ = parse_properties('GraphemeBreakProperty.txt',
'GraphemeBreak Property',
GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
dic, _ = parse_properties('GraphemeBreakProperty.txt',
'GraphemeBreak Property',
GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
merge_dic(DIC, dic)
merge_props(PROPS, props)
merge_props(PROPS, dic)
#prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other'
#DIC[prop] = inverse_ranges(add_ranges_in_dic(dic))
#PROPS.append(prop)
#PROPS[prop] = True
#KDIC[prop] = 'GrapemeBreak Property'
add_posix_props(DIC)
PROPS = sorted(PROPS)
PROP_LIST = sorted(PROPS.keys())
s = '''%{
@ -534,7 +535,7 @@ for prop in POSIX_LIST:
print('')
if not(POSIX_ONLY):
for prop in PROPS:
for prop in PROP_LIST:
klass = KDIC.get(prop, None)
if klass is None:
n = len(prop)
@ -558,7 +559,7 @@ for prop in POSIX_LIST:
print(" CR_%s," % prop)
if not(POSIX_ONLY):
for prop in PROPS:
for prop in PROP_LIST:
print(" CR_%s," % prop)
for prop in BLOCKS:
@ -594,7 +595,7 @@ for prop in POSIX_LIST:
entry_and_print_prop_and_index(prop, index)
if not(POSIX_ONLY):
for prop in PROPS:
for prop in PROP_LIST:
index += 1
entry_and_print_prop_and_index(prop, index)

View File

@ -4,7 +4,7 @@
onigposix.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2020 K.Kosako
* Copyright (c) 2002-2021 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -74,7 +74,7 @@ extern "C" {
#define REG_POSIX_ENCODING_UTF16_LE 5
typedef int onig_posix_regoff_t;
typedef ptrdiff_t onig_posix_regoff_t;
typedef struct {
onig_posix_regoff_t rm_so;

View File

@ -80,6 +80,7 @@ extern "C" {
#define UChar OnigUChar
#endif
typedef int OnigPos;
typedef unsigned int OnigCodePoint;
typedef unsigned char OnigUChar;
typedef unsigned int OnigCtype;
@ -262,7 +263,7 @@ typedef enum {
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p)
//#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)

View File

@ -2,7 +2,7 @@
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2020 K.Kosako
* Copyright (c) 2002-2021 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View File

@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2020 K.Kosako
* Copyright (c) 2002-2021 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View File

@ -35,7 +35,7 @@
#include "regint.h"
extern UChar*
onig_error_code_to_format(int code)
onig_error_code_to_format(OnigPos code)
{
char *p;
@ -270,7 +270,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
extern int
onig_is_error_code_needs_param(int code)
onig_is_error_code_needs_param(OnigPos code)
{
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
@ -290,7 +290,7 @@ onig_is_error_code_needs_param(int code)
#define MAX_ERROR_PAR_LEN 30
extern int ONIG_VARIADIC_FUNC_ATTR
onig_error_code_to_str(UChar* s, int code, ...)
onig_error_code_to_str(UChar* s, OnigPos code, ...)
{
UChar *p, *q;
OnigErrorInfo* einfo;

View File

@ -1,202 +0,0 @@
/**********************************************************************
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#if 0
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
}
}
static void
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
}
}
static void
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[3];
*conv++ = s[2];
*conv++ = s[1];
*conv++ = s[0];
s += 4;
}
}
static void
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[1];
*conv++ = s[0];
s += 2;
}
}
static int
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
UChar** conv, UChar** conv_end)
{
int len = (int )(end - s);
if (to == ONIG_ENCODING_UTF16_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 2);
conv_ext0be(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_LE) {
swap16:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + len;
conv_swap2bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF16_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 2);
conv_ext0le(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_BE) {
goto swap16;
}
}
if (to == ONIG_ENCODING_UTF32_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 4);
conv_ext0be32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_LE) {
swap32:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + len;
conv_swap4bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF32_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 4);
conv_ext0le32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_BE) {
goto swap32;
}
}
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
#endif
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
UChar *cpat, *cpat_end;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
else {
cpat = (UChar* )pattern;
cpat_end = (UChar* )pattern_end;
}
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) {
r = ONIGERR_MEMORY;
goto err2;
}
r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
ci->syntax);
if (r != 0) goto err;
r = onig_compile(*reg, cpat, cpat_end, einfo);
if (r != 0) {
err:
onig_free(*reg);
*reg = NULL;
}
err2:
if (cpat != pattern) xfree(cpat);
return r;
}

View File

@ -288,6 +288,10 @@ typedef unsigned __int64 uint64_t;
#endif
#endif /* _WIN32 */
typedef size_t OnigSize;
#define INFINITE_SIZE ~((OnigSize )0)
#if SIZEOF_VOIDP == SIZEOF_LONG
typedef unsigned long hash_data_type;
#elif SIZEOF_VOIDP == SIZEOF_LONG_LONG
@ -927,7 +931,7 @@ struct re_pattern_buffer {
unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */
int map_offset;
OnigLen dist_min; /* min-distance of exact or map */
OnigLen dist_max; /* max-distance of exact or map */
OnigSize dist_max; /* max-distance of exact or map */
RegexExt* extp;
};
@ -939,7 +943,7 @@ struct re_pattern_buffer {
extern void onig_add_end_call(void (*func)(void));
extern void onig_warning(const char* s);
extern UChar* onig_error_code_to_format P_((int code));
extern UChar* onig_error_code_to_format P_((OnigPos code));
extern void ONIG_VARIADIC_FUNC_ATTR onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));

View File

@ -66,7 +66,7 @@ typedef struct {
} O2PERR;
static int
onig2posix_error_code(int code)
onig2posix_error_code(OnigPos code)
{
static const O2PERR o2p[] = {
{ ONIG_MISMATCH, REG_NOMATCH },
@ -148,11 +148,13 @@ onig2posix_error_code(int code)
};
int i;
int icode;
if (code >= 0) return 0;
icode = (int )code;
for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {
if (code == o2p[i].onig_err)
if (icode == o2p[i].onig_err)
return o2p[i].posix_err;
}
@ -198,6 +200,7 @@ onig_posix_regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch,
onig_posix_regmatch_t pmatch[], int posix_options)
{
int r, i, len;
OnigPos pos;
UChar* end;
onig_posix_regmatch_t* pm;
OnigOptionType options;
@ -222,22 +225,22 @@ onig_posix_regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pm, options);
pos = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pm, options);
if (r >= 0) {
if (pos >= 0) {
r = 0; /* Match */
if (pm != pmatch && pm != NULL) {
xmemcpy(pmatch, pm, sizeof(onig_posix_regmatch_t) * nmatch);
}
}
else if (r == ONIG_MISMATCH) {
else if (pos == ONIG_MISMATCH) {
r = REG_NOMATCH;
for (i = 0; i < (int )nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
r = onig2posix_error_code(pos);
}
if (pm != pmatch && pm != NULL)

View File

@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
#define GRAPHEME_BREAK_PROPERTY_VERSION 150000
#define GRAPHEME_BREAK_PROPERTY_VERSION 150100
/*
CR

View File

@ -28,7 +28,7 @@
#include "regenc.h"
#define UNICODE_CASEFOLD_VERSION 150000
#define UNICODE_CASEFOLD_VERSION 150100
OnigCodePoint OnigUnicodeFolds1[] = {

File diff suppressed because it is too large Load Diff

View File

@ -68,7 +68,7 @@ CR_NEWLINE[] = { 1,
/* PROPERTY: 'Alpha': POSIX [[:Alpha:]] */
static const OnigCodePoint
CR_Alpha[] = { 732,
CR_Alpha[] = { 733,
0x0041, 0x005a,
0x0061, 0x007a,
0x00aa, 0x00aa,
@ -798,6 +798,7 @@ CR_Alpha[] = { 732,
0x2b740, 0x2b81d,
0x2b820, 0x2cea1,
0x2ceb0, 0x2ebe0,
0x2ebf0, 0x2ee5d,
0x2f800, 0x2fa1d,
0x30000, 0x3134a,
0x31350, 0x323af,
@ -894,7 +895,7 @@ CR_Digit[] = { 64,
/* PROPERTY: 'Graph': POSIX [[:Graph:]] */
static const OnigCodePoint
CR_Graph[] = { 712,
CR_Graph[] = { 713,
0x0021, 0x007e,
0x00a1, 0x0377,
0x037a, 0x037f,
@ -1169,14 +1170,14 @@ CR_Graph[] = { 712,
0x2e80, 0x2e99,
0x2e9b, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
0x2ff0, 0x2fff,
0x3001, 0x303f,
0x3041, 0x3096,
0x3099, 0x30ff,
0x3105, 0x312f,
0x3131, 0x318e,
0x3190, 0x31e3,
0x31f0, 0x321e,
0x31ef, 0x321e,
0x3220, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -1599,6 +1600,7 @@ CR_Graph[] = { 712,
0x2b740, 0x2b81d,
0x2b820, 0x2cea1,
0x2ceb0, 0x2ebe0,
0x2ebf0, 0x2ee5d,
0x2f800, 0x2fa1d,
0x30000, 0x3134a,
0x31350, 0x323af,
@ -2559,14 +2561,13 @@ CR_Print[] = { 709,
0x2e80, 0x2e99,
0x2e9b, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
0x3000, 0x303f,
0x2ff0, 0x303f,
0x3041, 0x3096,
0x3099, 0x30ff,
0x3105, 0x312f,
0x3131, 0x318e,
0x3190, 0x31e3,
0x31f0, 0x321e,
0x31ef, 0x321e,
0x3220, 0xa48c,
0xa490, 0xa4c6,
0xa4d0, 0xa62b,
@ -2989,6 +2990,7 @@ CR_Print[] = { 709,
0x2b740, 0x2b81d,
0x2b820, 0x2cea1,
0x2ceb0, 0x2ebe0,
0x2ebf0, 0x2ee5d,
0x2f800, 0x2fa1d,
0x30000, 0x3134a,
0x31350, 0x323af,
@ -3001,7 +3003,7 @@ CR_Print[] = { 709,
/* PROPERTY: 'PosixPunct': POSIX [[:punct:]] */
static const OnigCodePoint
CR_PosixPunct[] = { 338,
CR_PosixPunct[] = { 339,
0x0021, 0x002f,
0x003a, 0x0040,
0x005b, 0x0060,
@ -3144,7 +3146,7 @@ CR_PosixPunct[] = { 338,
0x2e80, 0x2e99,
0x2e9b, 0x2ef3,
0x2f00, 0x2fd5,
0x2ff0, 0x2ffb,
0x2ff0, 0x2fff,
0x3001, 0x3004,
0x3008, 0x3020,
0x3030, 0x3030,
@ -3156,6 +3158,7 @@ CR_PosixPunct[] = { 338,
0x3190, 0x3191,
0x3196, 0x319f,
0x31c0, 0x31e3,
0x31ef, 0x31ef,
0x3200, 0x321e,
0x322a, 0x3247,
0x3250, 0x3250,
@ -4023,7 +4026,7 @@ CR_XDigit[] = { 3,
/* PROPERTY: 'Word': POSIX [[:Word:]] */
static const OnigCodePoint
CR_Word[] = { 770,
CR_Word[] = { 771,
0x0030, 0x0039,
0x0041, 0x005a,
0x005f, 0x005f,
@ -4790,6 +4793,7 @@ CR_Word[] = { 770,
0x2b740, 0x2b81d,
0x2b820, 0x2cea1,
0x2ceb0, 0x2ebe0,
0x2ebf0, 0x2ee5d,
0x2f800, 0x2fa1d,
0x30000, 0x3134a,
0x31350, 0x323af,
@ -4798,7 +4802,7 @@ CR_Word[] = { 770,
/* PROPERTY: 'Alnum': POSIX [[:Alnum:]] */
static const OnigCodePoint
CR_Alnum[] = { 772,
CR_Alnum[] = { 773,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a,
@ -5568,6 +5572,7 @@ CR_Alnum[] = { 772,
0x2b740, 0x2b81d,
0x2b820, 0x2cea1,
0x2ceb0, 0x2ebe0,
0x2ebf0, 0x2ee5d,
0x2f800, 0x2fa1d,
0x30000, 0x3134a,
0x31350, 0x323af,

View File

@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
#define WORD_BREAK_PROPERTY_VERSION 150000
#define WORD_BREAK_PROPERTY_VERSION 150100
/*
ALetter
@ -48,7 +48,7 @@ WSegSpace
ZWJ
*/
static int WB_RANGE_NUM = 1053;
static int WB_RANGE_NUM = 1052;
static WB_RANGE_TYPE WB_RANGES[] = {
{0x00000a, 0x00000a, WB_LF },
{0x00000b, 0x00000c, WB_Newline },
@ -105,7 +105,7 @@ static WB_RANGE_TYPE WB_RANGES[] = {
{0x0005ef, 0x0005f2, WB_Hebrew_Letter },
{0x0005f3, 0x0005f3, WB_ALetter },
{0x0005f4, 0x0005f4, WB_MidLetter },
{0x000600, 0x000605, WB_Format },
{0x000600, 0x000605, WB_Numeric },
{0x00060c, 0x00060d, WB_MidNum },
{0x000610, 0x00061a, WB_Extend },
{0x00061c, 0x00061c, WB_Format },
@ -119,7 +119,7 @@ static WB_RANGE_TYPE WB_RANGES[] = {
{0x000671, 0x0006d3, WB_ALetter },
{0x0006d5, 0x0006d5, WB_ALetter },
{0x0006d6, 0x0006dc, WB_Extend },
{0x0006dd, 0x0006dd, WB_Format },
{0x0006dd, 0x0006dd, WB_Numeric },
{0x0006df, 0x0006e4, WB_Extend },
{0x0006e5, 0x0006e6, WB_ALetter },
{0x0006e7, 0x0006e8, WB_Extend },
@ -128,8 +128,7 @@ static WB_RANGE_TYPE WB_RANGES[] = {
{0x0006f0, 0x0006f9, WB_Numeric },
{0x0006fa, 0x0006fc, WB_ALetter },
{0x0006ff, 0x0006ff, WB_ALetter },
{0x00070f, 0x00070f, WB_Format },
{0x000710, 0x000710, WB_ALetter },
{0x00070f, 0x000710, WB_ALetter },
{0x000711, 0x000711, WB_Extend },
{0x000712, 0x00072f, WB_ALetter },
{0x000730, 0x00074a, WB_Extend },
@ -156,11 +155,11 @@ static WB_RANGE_TYPE WB_RANGES[] = {
{0x000860, 0x00086a, WB_ALetter },
{0x000870, 0x000887, WB_ALetter },
{0x000889, 0x00088e, WB_ALetter },
{0x000890, 0x000891, WB_Format },
{0x000890, 0x000891, WB_Numeric },
{0x000898, 0x00089f, WB_Extend },
{0x0008a0, 0x0008c9, WB_ALetter },
{0x0008ca, 0x0008e1, WB_Extend },
{0x0008e2, 0x0008e2, WB_Format },
{0x0008e2, 0x0008e2, WB_Numeric },
{0x0008e3, 0x000903, WB_Extend },
{0x000904, 0x000939, WB_ALetter },
{0x00093a, 0x00093c, WB_Extend },
@ -779,9 +778,9 @@ static WB_RANGE_TYPE WB_RANGES[] = {
{0x01107f, 0x011082, WB_Extend },
{0x011083, 0x0110af, WB_ALetter },
{0x0110b0, 0x0110ba, WB_Extend },
{0x0110bd, 0x0110bd, WB_Format },
{0x0110bd, 0x0110bd, WB_Numeric },
{0x0110c2, 0x0110c2, WB_Extend },
{0x0110cd, 0x0110cd, WB_Format },
{0x0110cd, 0x0110cd, WB_Numeric },
{0x0110d0, 0x0110e8, WB_ALetter },
{0x0110f0, 0x0110f9, WB_Numeric },
{0x011100, 0x011102, WB_Extend },