diff --git a/src/Notepad3.vcxproj b/src/Notepad3.vcxproj index a367fe696..5225601fd 100644 --- a/src/Notepad3.vcxproj +++ b/src/Notepad3.vcxproj @@ -375,6 +375,7 @@ + diff --git a/src/Notepad3.vcxproj.filters b/src/Notepad3.vcxproj.filters index 37058cea1..a9bc645e0 100644 --- a/src/Notepad3.vcxproj.filters +++ b/src/Notepad3.vcxproj.filters @@ -411,6 +411,9 @@ Source Files\uchardet\LangModels + + Source Files\uchardet\LangModels + diff --git a/uchardet/uchardet/script/BuildLangModelLogs/LangBelarusianModel.log b/uchardet/uchardet/script/BuildLangModelLogs/LangBelarusianModel.log new file mode 100644 index 000000000..80b8609b8 --- /dev/null +++ b/uchardet/uchardet/script/BuildLangModelLogs/LangBelarusianModel.log @@ -0,0 +1,186 @@ += Logs of language model for Belarusian (be) = + +- Generated by BuildLangModel.py +- Started: 2019-03-05 18:30:17.964464 +- Maximum depth: 4 +- Max number of pages: 100 + +== Parsed pages == + +Галоўная_старонка (revision 3314810) +1386 (revision 3318522) +1812 (revision 3317760) +1837 (revision 3317732) +1925 (revision 3316369) +1956 (revision 3316332) +1959 (revision 3316329) +2019 (revision 3333637) +4 сакавіка (revision 1761191) +Helaeomyia petrolei (revision 3312800) +TUT.BY (revision 3189969) +XX стагоддзе (revision 3006438) +Іван Пятровіч Паўлаў (revision 3330827) +Інструкцыя па транслітарацыі (revision 3285076) +Антрапалогія (revision 3095342) +Асфальт (revision 2594585) +Аэрапорт Віцебск (revision 3106296) +Аўстрыя (revision 3306502) +Баравуха (revision 3332684) +Беларуская Вікіпедыя (revision 3330925) +Беларуская мова (revision 3321859) +Беларусь (revision 3320908) +Беласток (revision 3082237) +Вялікае княства Літоўскае (revision 3259013) +Вікіпедыя (revision 3333246) +Віцебск (revision 3328544) +Віцебская вобласць (revision 3328232) +Віцебскі раён (revision 3238295) +Віцьба (revision 3194807) +Горад (revision 2627422) +Гравітацыя (альбом) (revision 3333104) +Давыд Львовіч Глінскі (revision 3326607) +Еўрапейскі маршрут E95 (revision 2686641) +Жарэс Іванавіч Алфёраў (revision 3332006) +Заходняя Дзвіна (revision 3169818) +Знакі паштовай аплаты Украіны (1999) (revision 3225969) +Зігмунд Фрэйд (revision 3200632) +КА Піянер-4 (revision 2394843) +Лучоса (revision 3251826) +Лявон Вольскі (revision 3333010) +Літаратура (revision 1786497) +Магістраль М3 (revision 3116154) +Магістраль М8 (revision 3107563) +Манстэра прывабная (revision 3332338) +Масква (revision 3227608) +Мастацтва (revision 3294147) +Медыцына (revision 1782135) +Месяц, спадарожнік Зямлі (revision 3186245) +Мінск (revision 3229030) +Наваполацк (revision 3260791) +Нафта (revision 3327706) +Неўралогія (revision 3193067) +Нобелеўская прэмія па фізіцы (revision 3052696) +Ніва (1956) (revision 3315439) +Орша (revision 3248596) +Пагранічны канфлікт паміж Індыяй і Пакістанам (2019) (revision 3333643) +Панядзелак (revision 1526755) +Полацк (revision 3329956) +Поль Марыа (revision 3281894) +Порт (revision 2674776) +Псіхааналіз (revision 3260302) +Псіхааналітык (revision 3260302) +Псіхалогія (revision 2232890) +Псіхіятрыя (revision 1919915) +Расія (revision 3239593) +Рэч Паспалітая (revision 3266116) +Санкт-Пецярбург (revision 2857455) +Сацыялогія (revision 3130424) +Славенская Вікіпедыя (revision 2760315) +Справа БелТА (revision 3333271) +Суперкубак Беларусі па футболе 2019 (revision 3333334) +Сыраежка шэрая (revision 3332345) +Універсальны каардынаваны час (revision 2713688) +ФК БАТЭ (revision 3333087) +ФК Дынама Брэст (revision 3332229) +Футбол (revision 3161765) +Чыкага (revision 3312149) +Энцыклапедыя (revision 3048519) +Ягайла (revision 3248985) +Яўхім Храптовіч (revision 3008180) +12 сакавіка (revision 3110377) +1345 (revision 3318571) +1360-я (revision 2963824) +1370-я (revision 3318540) +1380-я (revision 3066609) +1383 (revision 3318525) +1384 (revision 3318524) +1385 (revision 3318523) +1387 (revision 3318521) +1388 (revision 3318520) +1389 (revision 3318519) +1390-я (revision 3318516) +1400-я (revision 3005229) +1428 (revision 3318470) +1456 (revision 3318437) +23.10 (revision 2463644) +24 лютага (revision 3100846) +24 чэрвеня (revision 2866534) + +== End of Parsed pages == + +- Wikipedia parsing ended at: 2019-03-05 18:36:38.571630 + +64 characters appeared 650592 times. + +First 61 characters: +[ 0] Char а: 15.592106881117504 % +[ 1] Char н: 6.625350450051645 % +[ 2] Char і: 5.712796960306921 % +[ 3] Char р: 5.026345236338596 % +[ 4] Char с: 4.852196153656977 % +[ 5] Char ы: 4.281024051940387 % +[ 6] Char к: 4.109795386355811 % +[ 7] Char л: 4.034633072647681 % +[ 8] Char е: 3.7853216762579316 % +[ 9] Char т: 3.5224841375239784 % +[10] Char я: 3.404437804338203 % +[11] Char д: 3.239664797599725 % +[12] Char о: 3.2385888544587087 % +[13] Char в: 3.172802616693719 % +[14] Char м: 2.900435295853623 % +[15] Char у: 2.899205646549604 % +[16] Char п: 2.7653275785745906 % +[17] Char з: 2.334028085190104 % +[18] Char ц: 2.276849392553244 % +[19] Char г: 2.130521125375043 % +[20] Char ў: 2.0449067925827555 % +[21] Char б: 1.5492044169003 % +[22] Char ч: 1.3281749545029757 % +[23] Char э: 1.3109598642467168 % +[24] Char й: 1.2388716737986325 % +[25] Char х: 1.0653374157690227 % +[26] Char ь: 1.0553465151738723 % +[27] Char ш: 0.8255558014854164 % +[28] Char ж: 0.5312084993359893 % +[29] Char ю: 0.4706482711130786 % +[30] Char ф: 0.4598888397029167 % +[31] Char i: 0.2782081550341842 % +[32] Char ё: 0.27574885642614727 % +[33] Char и: 0.14233190694014067 % +[34] Char e: 0.13833554670208056 % +[35] Char a: 0.13664477890905513 % +[36] Char s: 0.111436968176676 % +[37] Char o: 0.10344424770055581 % +[38] Char n: 0.10298312921154887 % +[39] Char t: 0.09744970734346564 % +[40] Char r: 0.09714229501746102 % +[41] Char x: 0.07516231370813044 % +[42] Char l: 0.07485490138212582 % +[43] Char u: 0.0667084747430033 % +[44] Char c: 0.06363435148295707 % +[45] Char v: 0.053336038561802177 % +[46] Char m: 0.04857114750873051 % +[47] Char d: 0.04764891053071664 % +[48] Char b: 0.04518961192267965 % +[49] Char p: 0.041808076336628794 % +[50] Char k: 0.033815355860508586 % +[51] Char g: 0.03243200039348778 % +[52] Char w: 0.031202351089469286 % +[53] Char y: 0.0301264079484531 % +[54] Char h: 0.029511583296443857 % +[55] Char z: 0.020750332005312084 % +[56] Char f: 0.018905858049284345 % +[57] Char j: 0.010605725247159511 % +[58] Char ъ: 0.002305592445034676 % +[59] Char щ: 0.0019981801190300527 % +[60] Char q: 0.001844473956027741 % + +The first 61 characters have an accumulated ratio of 0.9999815552604403. + +1419 sequences found. + +First 512 (typical positive ratio): 0.9748335015136226 +Next 512 (512-1024): 0.03404437804338203 +Rest: 0.0015613246491147821 + +- Processing end: 2019-03-05 18:36:38.805955 diff --git a/uchardet/uchardet/script/charsets/windows-1251.py b/uchardet/uchardet/script/charsets/windows-1251.py new file mode 100644 index 000000000..c08c785f3 --- /dev/null +++ b/uchardet/uchardet/script/charsets/windows-1251.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +from codepoints import * + +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# Python 3.6: needs tmp renaming Python/Lib/encodings/cp1251.py as cp1252.py ??? +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +name = 'WINDOWS-1251' +aliases = ['CP-1251', 'cswindows1251'] + +language = \ +{ + # Languages with complete coverage. + # Basically a mix of ISO-8859-1 and ISO-8859-15. + 'complete': [ 'be', 'mk', 'ru', 'sr' ], + 'incomplete': [] +} + +# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF # +charmap = \ +[ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X + SYM,LET,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,LET,LET,LET, # 8X + LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,LET,SYM,LET,LET,LET,LET, # 9X + CTR,LET,LET,LET,SYM,LET,SYM,SYM,LET,SYM,LET,SYM,SYM,CTR,SYM,LET, # AX + SYM,SYM,LET,LET,LET,LET,SYM,SYM,LET,SYM,LET,SYM,LET,LET,LET,LET, # BX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # FX +] diff --git a/uchardet/uchardet/script/langs/be.py b/uchardet/uchardet/script/langs/be.py new file mode 100644 index 000000000..8528c1785 --- /dev/null +++ b/uchardet/uchardet/script/langs/be.py @@ -0,0 +1,69 @@ +#!/bin/python3 +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +import re + +## Mandatory Properties ## + +# The human name for the language, in English. +name = 'Belarusian' +# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise, +# or use another catalog as a last resort. +code = 'be' +# ASCII characters are also used in French. +use_ascii = True +# The charsets we want to support and create data for. +charsets = ['WINDOWS-1251'] + +## Optional Properties ## + +# Alphabet characters. +# If use_ascii=True, there is no need to add any ASCII characters. +# If case_mapping=True, there is no need to add several cases of a same +# character (provided Python algorithms know the right cases). +alphabet = ['Р±', 'РІ', 'Рі', 'Рґ', 'Р¶', 'Р·', 'Рё', 'Р№', 'Рє', 'Р»', 'Рј', 'РЅ', 'С‚', 'С„', 'С†', 'С‡', 'С€', 'С‰', 'СЉ', 'С‹', 'СЊ', 'СЌ', 'СЋ', 'СЏ'] +# The start page. Though optional, it is advised to choose one yourself. +start_pages = ['Галоўная_старонка'] +# give possibility to select another code for the Wikipedia URL. +wikipedia_code = code +# 'a' and 'A' will be considered the same character, and so on. +# This uses Python algorithm to determine upper/lower-case of a given +# character. +case_mapping = True diff --git a/uchardet/uchardet/src/LangModels/LangBelarusianModel.cpp b/uchardet/uchardet/src/LangModels/LangBelarusianModel.cpp new file mode 100644 index 000000000..6ab310172 --- /dev/null +++ b/uchardet/uchardet/src/LangModels/LangBelarusianModel.cpp @@ -0,0 +1,228 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "../nsSBCharSetProber.h" + +/********* Language model for: Belarusian *********/ + +/** + * Generated by BuildLangModel.py + * On: 2019-03-05 18:36:38.571630 + **/ + +/* Character Mapping Table: + * ILL: illegal character. + * CTR: control character specific to the charset. + * RET: carriage/return. + * SYM: symbol (punctuation) that does not belong to word. + * NUM: 0 - 9. + * + * Other characters are ordered by probabilities + * (0 is the most common character in the language). + * + * Orders are generic to a language. So the codepoint with order X in + * CHARSET1 maps to the same character as the codepoint with the same + * order X in CHARSET2 for the same language. + * As such, it is possible to get missing order. For instance the + * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 + * even though they are both used for French. Same for the euro sign. + */ +static const unsigned char Windows_1251_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 35, 48, 44, 47, 34, 56, 51, 54, 31, 57, 50, 42, 46, 38, 37, /* 4X */ + 49, 60, 40, 36, 39, 43, 45, 52, 41, 53, 55,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 35, 48, 44, 47, 34, 56, 51, 54, 31, 57, 50, 42, 46, 38, 37, /* 6X */ + 49, 60, 40, 36, 39, 43, 45, 52, 41, 53, 55,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 64,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM, 66,SYM, 67, 68, 69, 70, /* 8X */ + 71,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 72,SYM, 73, 74, 75, 76, /* 9X */ + CTR, 20, 20, 77,SYM, 62,SYM,SYM, 32,SYM, 63,SYM,SYM,CTR,SYM, 61, /* AX */ + SYM,SYM, 2, 2, 62, 78,SYM,SYM, 32,SYM, 63,SYM, 79, 80, 81, 61, /* BX */ + 0, 21, 13, 19, 11, 8, 28, 17, 33, 24, 6, 7, 14, 1, 12, 16, /* CX */ + 3, 4, 9, 15, 30, 25, 18, 22, 27, 59, 58, 5, 26, 23, 29, 10, /* DX */ + 0, 21, 13, 19, 11, 8, 28, 17, 33, 24, 6, 7, 14, 1, 12, 16, /* EX */ + 3, 4, 9, 15, 30, 25, 18, 22, 27, 59, 58, 5, 26, 23, 29, 10, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + + +/* Model Table: + * Total sequences: 1419 + * First 512 sequences: 0.9748335015136226 + * Next 512 sequences (512-1024): 0.023605173837262638 + * Rest: 0.0015613246491147821 + * Negative sequences: TODO + */ +static const PRUint8 BelarusianLangModel[] = +{ + 3,3,3,3,3,0,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3, + 3,1,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, + 3,3,3,2,3,3,3,2,3,3,3,3,3,2,1,3,2,2,3,3,0,2,3,2,0,2,3,3,2,3, + 3,2,3,3,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,0,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,1,3,3,0,3,3,3, + 3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,2,3,3,3,3,3,3,1,3,3,3,3,3,2,2,3,3,0,3,3,3,0,3,2,3,3,1, + 2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,1,0,2,2,3,0,3,3,2,0,2, + 3,2,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0, + 2,3,3,3,3,0,3,3,3,3,3,3,1,3,3,2,3,3,3,3,3,3,3,1,3,3,0,3,3,3, + 2,1,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,3,3,3,3,1,2,3,3,3,0,1,3,3,3,3,2,2,3,2,0,1,0,2,1,2,1,2,0,1, + 1,2,2,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0, + 3,2,3,2,2,3,3,2,3,2,3,2,3,2,2,3,1,2,2,2,0,1,2,2,0,1,3,1,2,3, + 2,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,0,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,3,3,2,3,2,0,3,3,2, + 2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,3,2,3,2,3,3,3,3,1,1,1,3,3,2,3,2,2,2,2,0,2,3,3,0,2,2,1,0,1, + 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,1,3,3,0,3,3,3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,1,3,3,0,3,3,3, + 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,3,1,3,3,3,3,3,2,2,1,3,3,3,3,3,3,3,2,2,0,3,3,3,0,2,1,2,3,1, + 1,0,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,2,3,3,0,3,3,3,3,2,3,1,3,3,1,3,3,3,3,3,3,3,1,3,3,0,3,3,2, + 2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 3,2,3,2,2,3,3,2,3,2,3,2,3,1,2,3,2,1,0,1,0,0,1,2,0,1,1,1,0,2, + 0,2,3,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,3,3,1,2,2,1,3,1,2,0,1,0,1,1,1, + 2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,0,3,3,3,3,2,3,2,3,3,1,3,3,3,3,2,3,3,2,3,3,0,3,3,3, + 2,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,1,3,1,1,3,2,1,2,1,0,1,2,2,0,1,1,2,0,0, + 0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,3,3,1,3,3,3,3,3,3,1,1,0,3,0,3,2,2,0,0,3,2,1,2, + 0,1,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,1,1,3,3,2,3,3,3,0,3,3,2,3,0,0,3,1,0,2,0,3,0,0,3,1,0,3, + 0,1,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 3,3,3,3,2,1,2,3,3,1,1,2,3,2,3,3,1,1,0,2,0,1,2,3,0,1,0,1,0,1, + 0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,2,3,3,0,3,3,2,3,2,3,0,3,2,0,3,3,3,3,0,2,3,2,0,2,0,3,3,0, + 1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,2,3,2,3,2,2,3,1,1,2,1,0,0,2,2,0,2,0,2,2,2, + 1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0, + 3,3,0,0,0,3,3,2,2,1,0,0,3,3,2,3,1,1,1,0,0,1,2,3,0,0,0,0,1,1, + 0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,3,0,3,3,2,3,2,3,3,3,3,0,3,3,3,3,3,3,3,1,3,3,0,2,2,1, + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,1,3,0,2,3,0,3,0,3,1,2,3,1,2,2,3,2,0,3,2,0,0,1,0,3,0,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,0,2,2,2,3,0,0,3,3,2,3,2,0,2,1,0,1,1,1,0,2,1,0,0,1, + 0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,1,1,3,0,3,1,2,3,2,2,1,3,3,0,2,2,3,2,0,3,2,1,0,1,0,3,1,2, + 2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,2,2,3,3,3,2,3,0,0,3,2,3,3,2,0,2,1,0,0,3,3,0,0,0,2,0,1, + 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,1,1,2,3,2,1,2,0,0,2,3,2,1,3,2,1,2,1,0,2,2,3,0,0,0,0,2,0, + 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,2,1,3,2,0,2,2,2,3,0,3,0,2,2,0,0,3,3,0,0,3,3,0,0,2,0,2,1,2, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, + 3,2,3,3,2,1,1,2,3,3,2,0,3,0,0,3,0,0,2,1,0,0,0,1,0,0,0,0,0,1, + 1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,1,2,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0, + 0,3,0,0,2,3,3,2,3,2,2,2,2,2,3,2,2,2,2,2,2,2,1,0,1,2,1,1,0,0,1, + 0,3,0,3,3,0,2,3,0,2,0,2,0,3,3,0,2,2,1,2,3,0,0,1,3,1,0,1,2,2, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,0,2,2,0,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,2,2,1,2,2,0,2,0,1, + 1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,2,2,3,2,3,2,2,2,2,1,2,3,2,2,2,2,2,1,1,1,2,2,0,0,0, + 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,1,2,0,3,2,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,2,2,1,3,1,0,2,2,2,1,1,1,3,2,2,0,0,2,2,2,1,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,1,1,2,2,3,2,3,1,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,2,2,2,2,2,2,1,0,1,2,2,1,1,2,1,0,1,2,0,1,1,1,2,1,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,3,2,2,2,1,2,2,0,0,2,2,1,2,0,2,1,2,0,2,2,2,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,3,3,2,2,2,2,2,0,2,2,2,2,2,2,2,1,2,2,1,2,0,2,0,1,0,0,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,0,1,0,0,0,0,0,2,1,1,1,3,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,2,2,2,2,2,2,1,0,2,2,2,1,0,2,1,1,2,1,1,1,1,0,1,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,2,2,3,1,2,3,2,1,2,0,2,1,2,2,2,2,1,2,0,0,1,1,1,0,0,0,0, + 0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,2,0,0,2,2,1,2,0,2,2,0,2,2,1,0,0,1,0,0,2,0,0,2,2,2,0,1,0,0,0, + 0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,2,2,0,2,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,3,2,2,1,1,0,0,1,2,1,0,2,1,2,1,0,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,2,2,1,2,1,1,2,0,1,2,0,1,0,1,1,0,0,1,1,1,0,2,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,1,2,3,0,2,0,2,2,1,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,2,2,1,1,2,0,2,2,0,0,1,1,0,1,0,1,1,1,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,2,2,1,2,1,0,2,0,1,2,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,1,2,2,0,2,0,2,2,0,0,1,0,1,1,0,1,0,1,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,2,2,2,0,0,0,1,0,0,1,0,0,0,1,1,0,2,2,0,1,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,1,1,2,1,2,2,1,0,1,0,2,0,1,2,1,1,1,1,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,0,2,1,2,1,0,1,2,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,0,1,2,0,1,0,0,1,1,0,1,0,2,1,0,1,0,2,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,0,0,2,2,0,2,0,2,2,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,1,2,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, +}; + + +const SequenceModel Windows_1251BelarusianModel = +{ + Windows_1251_CharToOrderMap, + BelarusianLangModel, + 61, + (float)0.9748335015136226, + PR_TRUE, + "WINDOWS-1251" +}; \ No newline at end of file diff --git a/uchardet/uchardet/src/nsSBCSGroupProber.cpp b/uchardet/uchardet/src/nsSBCSGroupProber.cpp index ab6a8659b..59da8f93e 100644 --- a/uchardet/uchardet/src/nsSBCSGroupProber.cpp +++ b/uchardet/uchardet/src/nsSBCSGroupProber.cpp @@ -1,4 +1,4 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +п»ї/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * @@ -192,6 +192,7 @@ nsSBCSGroupProber::nsSBCSGroupProber() mProbers[99] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel); mProbers[100] = new nsSingleByteCharSetProber(&Windows_1252AfricaansModel); + mProbers[101] = new nsSingleByteCharSetProber(&Windows_1251BelarusianModel); Reset(); } diff --git a/uchardet/uchardet/src/nsSBCSGroupProber.h b/uchardet/uchardet/src/nsSBCSGroupProber.h index 55c1ee474..44ceff878 100644 --- a/uchardet/uchardet/src/nsSBCSGroupProber.h +++ b/uchardet/uchardet/src/nsSBCSGroupProber.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +п»ї/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * @@ -40,7 +40,7 @@ #define nsSBCSGroupProber_h__ -#define NUM_OF_SBCS_PROBERS 101 +#define NUM_OF_SBCS_PROBERS 102 class nsCharSetProber; class nsSBCSGroupProber: public nsCharSetProber { diff --git a/uchardet/uchardet/src/nsSBCharSetProber.h b/uchardet/uchardet/src/nsSBCharSetProber.h index e21b1b2db..fd34b82fa 100644 --- a/uchardet/uchardet/src/nsSBCharSetProber.h +++ b/uchardet/uchardet/src/nsSBCharSetProber.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +п»ї/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * @@ -253,6 +253,7 @@ extern const SequenceModel Iso_8859_15SwedishModel; extern const SequenceModel Windows_1252SwedishModel; extern const SequenceModel Windows_1252AfricaansModel; +extern const SequenceModel Windows_1251BelarusianModel; #endif /* nsSingleByteCharSetProber_h__ */