+ add: Add Belarusian trained charset prober to UCHARDET

This commit is contained in:
Rainer Kottenhoff 2019-03-05 18:48:41 +01:00
parent 8e0aa7690d
commit 5b47a2f2f9
9 changed files with 570 additions and 4 deletions

View File

@ -375,6 +375,7 @@
<ClCompile Include="..\uchardet\uchardet\src\JpCntx.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangAfricaansModel.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangArabicModel.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangBelarusianModel.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangBulgarianModel.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangCroatianModel.cpp" />
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangCzechModel.cpp" />

View File

@ -411,6 +411,9 @@
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangAfricaansModel.cpp">
<Filter>Source Files\uchardet\LangModels</Filter>
</ClCompile>
<ClCompile Include="..\uchardet\uchardet\src\LangModels\LangBelarusianModel.cpp">
<Filter>Source Files\uchardet\LangModels</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Dialogs.h">

View File

@ -0,0 +1,186 @@
= Logs of language model for Belarusian (be) =
- Generated by BuildLangModel.py
- Started: 2019-03-05 18:30:17.964464
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
Ãàëî¢íàÿ_ñòàðîíêà (revision 3314810)
1386 (revision 3318522)
1812 (revision 3317760)
1837 (revision 3317732)
1925 (revision 3316369)
1956 (revision 3316332)
1959 (revision 3316329)
2019 (revision 3333637)
4 ñàêàâ³êà (revision 1761191)
Helaeomyia petrolei (revision 3312800)
TUT.BY (revision 3189969)
XX ñòàãîääçå (revision 3006438)
²âàí Ïÿòðîâ³÷ Ïà¢ëࢠ(revision 3330827)
²íñòðóêöûÿ ïà òðàíñë³òàðàöû³ (revision 3285076)
Àíòðàïàëîã³ÿ (revision 3095342)
Àñôàëüò (revision 2594585)
Àýðàïîðò ³öåáñê (revision 3106296)
À¢ñòðûÿ (revision 3306502)
Áàðàâóõà (revision 3332684)
Áåëàðóñêàÿ ³ê³ïåäûÿ (revision 3330925)
Áåëàðóñêàÿ ìîâà (revision 3321859)
Áåëàðóñü (revision 3320908)
Áåëàñòîê (revision 3082237)
Âÿë³êàå êíÿñòâà ˳òî¢ñêàå (revision 3259013)
³ê³ïåäûÿ (revision 3333246)
³öåáñê (revision 3328544)
³öåáñêàÿ âîáëàñöü (revision 3328232)
³öåáñê³ ðà¸í (revision 3238295)
³öüáà (revision 3194807)
Ãîðàä (revision 2627422)
Ãðàâ³òàöûÿ (àëüáîì) (revision 3333104)
Äàâûä Ëüâîâ³÷ Ãë³íñê³ (revision 3326607)
Å¢ðàïåéñê³ ìàðøðóò E95 (revision 2686641)
Æàðýñ ²âàíàâ³÷ Àëô¸ðࢠ(revision 3332006)
Çàõîäíÿÿ Äçâ³íà (revision 3169818)
Çíàê³ ïàøòîâàé àïëàòû Óêðà³íû (1999) (revision 3225969)
dzãìóíä Ôðýéä (revision 3200632)
ÊÀ ϳÿíåð-4 (revision 2394843)
Ëó÷îñà (revision 3251826)
Ëÿâîí Âîëüñê³ (revision 3333010)
˳òàðàòóðà (revision 1786497)
Ìàã³ñòðàëü Ì3 (revision 3116154)
Ìàã³ñòðàëü Ì8 (revision 3107563)
Ìàíñòýðà ïðûâàáíàÿ (revision 3332338)
Ìàñêâà (revision 3227608)
Ìàñòàöòâà (revision 3294147)
Ìåäûöûíà (revision 1782135)
Ìåñÿö, ñïàäàðîæí³ê Çÿìë³ (revision 3186245)
̳íñê (revision 3229030)
Íàâàïîëàöê (revision 3260791)
Íàôòà (revision 3327706)
Íå¢ðàëîã³ÿ (revision 3193067)
Íîáåëå¢ñêàÿ ïðýì³ÿ ïà ô³ç³öû (revision 3052696)
ͳâà (1956) (revision 3315439)
Îðøà (revision 3248596)
Ïàãðàí³÷íû êàíôë³êò ïàì³æ ²íäûÿé ³ Ïàê³ñòàíàì (2019) (revision 3333643)
Ïàíÿäçåëàê (revision 1526755)
Ïîëàöê (revision 3329956)
Ïîëü Ìàðûà (revision 3281894)
Ïîðò (revision 2674776)
Ïñ³õààíàë³ç (revision 3260302)
Ïñ³õààíàë³òûê (revision 3260302)
Ïñ³õàëîã³ÿ (revision 2232890)
Ïñ³õ³ÿòðûÿ (revision 1919915)
Ðàñ³ÿ (revision 3239593)
Ðý÷ Ïàñïàë³òàÿ (revision 3266116)
Ñàíêò-Ïåöÿðáóðã (revision 2857455)
Ñàöûÿëîã³ÿ (revision 3130424)
Ñëàâåíñêàÿ ³ê³ïåäûÿ (revision 2760315)
Ñïðàâà ÁåëÒÀ (revision 3333271)
Ñóïåðêóáàê Áåëàðóñ³ ïà ôóòáîëå 2019 (revision 3333334)
Ñûðàåæêà øýðàÿ (revision 3332345)
Óí³âåðñàëüíû êààðäûíàâàíû ÷àñ (revision 2713688)
ÔÊ ÁÀÒÝ (revision 3333087)
ÔÊ Äûíàìà Áðýñò (revision 3332229)
Ôóòáîë (revision 3161765)
×ûêàãà (revision 3312149)
Ýíöûêëàïåäûÿ (revision 3048519)
ßãàéëà (revision 3248985)
ߢõ³ì Õðàïòîâ³÷ (revision 3008180)
12 ñàêàâ³êà (revision 3110377)
1345 (revision 3318571)
1360-ÿ (revision 2963824)
1370-ÿ (revision 3318540)
1380-ÿ (revision 3066609)
1383 (revision 3318525)
1384 (revision 3318524)
1385 (revision 3318523)
1387 (revision 3318521)
1388 (revision 3318520)
1389 (revision 3318519)
1390-ÿ (revision 3318516)
1400-ÿ (revision 3005229)
1428 (revision 3318470)
1456 (revision 3318437)
23.10 (revision 2463644)
24 ëþòàãà (revision 3100846)
24 ÷ýðâåíÿ (revision 2866534)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2019-03-05 18:36:38.571630
64 characters appeared 650592 times.
First 61 characters:
[ 0] Char à: 15.592106881117504 %
[ 1] Char í: 6.625350450051645 %
[ 2] Char ³: 5.712796960306921 %
[ 3] Char ð: 5.026345236338596 %
[ 4] Char ñ: 4.852196153656977 %
[ 5] Char û: 4.281024051940387 %
[ 6] Char ê: 4.109795386355811 %
[ 7] Char ë: 4.034633072647681 %
[ 8] Char å: 3.7853216762579316 %
[ 9] Char ò: 3.5224841375239784 %
[10] Char ÿ: 3.404437804338203 %
[11] Char ä: 3.239664797599725 %
[12] Char î: 3.2385888544587087 %
[13] Char â: 3.172802616693719 %
[14] Char ì: 2.900435295853623 %
[15] Char ó: 2.899205646549604 %
[16] Char ï: 2.7653275785745906 %
[17] Char ç: 2.334028085190104 %
[18] Char ö: 2.276849392553244 %
[19] Char ã: 2.130521125375043 %
[20] Char ¢: 2.0449067925827555 %
[21] Char á: 1.5492044169003 %
[22] Char ÷: 1.3281749545029757 %
[23] Char ý: 1.3109598642467168 %
[24] Char é: 1.2388716737986325 %
[25] Char õ: 1.0653374157690227 %
[26] Char ü: 1.0553465151738723 %
[27] Char ø: 0.8255558014854164 %
[28] Char æ: 0.5312084993359893 %
[29] Char þ: 0.4706482711130786 %
[30] Char ô: 0.4598888397029167 %
[31] Char i: 0.2782081550341842 %
[32] Char ¸: 0.27574885642614727 %
[33] Char è: 0.14233190694014067 %
[34] Char e: 0.13833554670208056 %
[35] Char a: 0.13664477890905513 %
[36] Char s: 0.111436968176676 %
[37] Char o: 0.10344424770055581 %
[38] Char n: 0.10298312921154887 %
[39] Char t: 0.09744970734346564 %
[40] Char r: 0.09714229501746102 %
[41] Char x: 0.07516231370813044 %
[42] Char l: 0.07485490138212582 %
[43] Char u: 0.0667084747430033 %
[44] Char c: 0.06363435148295707 %
[45] Char v: 0.053336038561802177 %
[46] Char m: 0.04857114750873051 %
[47] Char d: 0.04764891053071664 %
[48] Char b: 0.04518961192267965 %
[49] Char p: 0.041808076336628794 %
[50] Char k: 0.033815355860508586 %
[51] Char g: 0.03243200039348778 %
[52] Char w: 0.031202351089469286 %
[53] Char y: 0.0301264079484531 %
[54] Char h: 0.029511583296443857 %
[55] Char z: 0.020750332005312084 %
[56] Char f: 0.018905858049284345 %
[57] Char j: 0.010605725247159511 %
[58] Char ú: 0.002305592445034676 %
[59] Char ù: 0.0019981801190300527 %
[60] Char q: 0.001844473956027741 %
The first 61 characters have an accumulated ratio of 0.9999815552604403.
1419 sequences found.
First 512 (typical positive ratio): 0.9748335015136226
Next 512 (512-1024): 0.03404437804338203
Rest: 0.0015613246491147821
- Processing end: 2019-03-05 18:36:38.805955

View File

@ -0,0 +1,77 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ##### BEGIN LICENSE BLOCK #####
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jehan <jehan@girinstud.io>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ##### END LICENSE BLOCK #####
from codepoints import *
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Python 3.6: needs tmp renaming Python/Lib/encodings/cp1251.py as cp1252.py ???
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
name = 'WINDOWS-1251'
aliases = ['CP-1251', 'cswindows1251']
language = \
{
# Languages with complete coverage.
# Basically a mix of ISO-8859-1 and ISO-8859-15.
'complete': [ 'be', 'mk', 'ru', 'sr' ],
'incomplete': []
}
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
charmap = \
[
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
SYM,LET,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,LET,LET,LET, # 8X
LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,LET,SYM,LET,LET,LET,LET, # 9X
CTR,LET,LET,LET,SYM,LET,SYM,SYM,LET,SYM,LET,SYM,SYM,CTR,SYM,LET, # AX
SYM,SYM,LET,LET,LET,LET,SYM,SYM,LET,SYM,LET,SYM,LET,LET,LET,LET, # BX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # FX
]

View File

@ -0,0 +1,69 @@
#!/bin/python3
# -*- coding: utf-8 -*-
# ##### BEGIN LICENSE BLOCK #####
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jehan <jehan@girinstud.io>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ##### END LICENSE BLOCK #####
import re
## Mandatory Properties ##
# The human name for the language, in English.
name = 'Belarusian'
# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
# or use another catalog as a last resort.
code = 'be'
# ASCII characters are also used in French.
use_ascii = True
# The charsets we want to support and create data for.
charsets = ['WINDOWS-1251']
## Optional Properties ##
# Alphabet characters.
# If use_ascii=True, there is no need to add any ASCII characters.
# If case_mapping=True, there is no need to add several cases of a same
# character (provided Python algorithms know the right cases).
alphabet = ['б', 'в', 'г', 'д', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'т', 'ф', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я']
# The start page. Though optional, it is advised to choose one yourself.
start_pages = ['Галоўная_старонка']
# give possibility to select another code for the Wikipedia URL.
wikipedia_code = code
# 'a' and 'A' will be considered the same character, and so on.
# This uses Python algorithm to determine upper/lower-case of a given
# character.
case_mapping = True

View File

@ -0,0 +1,228 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
/********* Language model for: Belarusian *********/
/**
* Generated by BuildLangModel.py
* On: 2019-03-05 18:36:38.571630
**/
/* Character Mapping Table:
* ILL: illegal character.
* CTR: control character specific to the charset.
* RET: carriage/return.
* SYM: symbol (punctuation) that does not belong to word.
* NUM: 0 - 9.
*
* Other characters are ordered by probabilities
* (0 is the most common character in the language).
*
* Orders are generic to a language. So the codepoint with order X in
* CHARSET1 maps to the same character as the codepoint with the same
* order X in CHARSET2 for the same language.
* As such, it is possible to get missing order. For instance the
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Windows_1251_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 35, 48, 44, 47, 34, 56, 51, 54, 31, 57, 50, 42, 46, 38, 37, /* 4X */
49, 60, 40, 36, 39, 43, 45, 52, 41, 53, 55,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 35, 48, 44, 47, 34, 56, 51, 54, 31, 57, 50, 42, 46, 38, 37, /* 6X */
49, 60, 40, 36, 39, 43, 45, 52, 41, 53, 55,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 64,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM, 66,SYM, 67, 68, 69, 70, /* 8X */
71,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 72,SYM, 73, 74, 75, 76, /* 9X */
CTR, 20, 20, 77,SYM, 62,SYM,SYM, 32,SYM, 63,SYM,SYM,CTR,SYM, 61, /* AX */
SYM,SYM, 2, 2, 62, 78,SYM,SYM, 32,SYM, 63,SYM, 79, 80, 81, 61, /* BX */
0, 21, 13, 19, 11, 8, 28, 17, 33, 24, 6, 7, 14, 1, 12, 16, /* CX */
3, 4, 9, 15, 30, 25, 18, 22, 27, 59, 58, 5, 26, 23, 29, 10, /* DX */
0, 21, 13, 19, 11, 8, 28, 17, 33, 24, 6, 7, 14, 1, 12, 16, /* EX */
3, 4, 9, 15, 30, 25, 18, 22, 27, 59, 58, 5, 26, 23, 29, 10, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
/* Model Table:
* Total sequences: 1419
* First 512 sequences: 0.9748335015136226
* Next 512 sequences (512-1024): 0.023605173837262638
* Rest: 0.0015613246491147821
* Negative sequences: TODO
*/
static const PRUint8 BelarusianLangModel[] =
{
3,3,3,3,3,0,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,0,3,3,3,
3,1,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,2,3,3,3,2,3,3,3,3,3,2,1,3,2,2,3,3,0,2,3,2,0,2,3,3,2,3,
3,2,3,3,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,0,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,1,3,3,0,3,3,3,
3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,2,3,3,3,3,3,3,1,3,3,3,3,3,2,2,3,3,0,3,3,3,0,3,2,3,3,1,
2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,3,1,0,2,2,3,0,3,3,2,0,2,
3,2,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,
2,3,3,3,3,0,3,3,3,3,3,3,1,3,3,2,3,3,3,3,3,3,3,1,3,3,0,3,3,3,
2,1,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,1,2,3,3,3,0,1,3,3,3,3,2,2,3,2,0,1,0,2,1,2,1,2,0,1,
1,2,2,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
3,2,3,2,2,3,3,2,3,2,3,2,3,2,2,3,1,2,2,2,0,1,2,2,0,1,3,1,2,3,
2,2,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,0,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,3,3,2,3,2,0,3,3,2,
2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,2,3,2,3,3,3,3,1,1,1,3,3,2,3,2,2,2,2,0,2,3,3,0,2,2,1,0,1,
2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,1,3,3,0,3,3,3,3,3,3,2,3,3,1,3,3,3,3,3,3,3,1,3,3,0,3,3,3,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,1,3,3,3,3,3,2,2,1,3,3,3,3,3,3,3,2,2,0,3,3,3,0,2,1,2,3,1,
1,0,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,2,3,3,0,3,3,3,3,2,3,1,3,3,1,3,3,3,3,3,3,3,1,3,3,0,3,3,2,
2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,2,3,2,2,3,3,2,3,2,3,2,3,1,2,3,2,1,0,1,0,0,1,2,0,1,1,1,0,2,
0,2,3,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,3,3,1,2,2,1,3,1,2,0,1,0,1,1,1,
2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,0,3,3,3,3,2,3,2,3,3,1,3,3,3,3,2,3,3,2,3,3,0,3,3,3,
2,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,3,1,3,1,1,3,2,1,2,1,0,1,2,2,0,1,1,2,0,0,
0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,3,3,1,3,3,3,3,3,3,1,1,0,3,0,3,2,2,0,0,3,2,1,2,
0,1,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,1,1,3,3,2,3,3,3,0,3,3,2,3,0,0,3,1,0,2,0,3,0,0,3,1,0,3,
0,1,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,3,3,3,2,1,2,3,3,1,1,2,3,2,3,3,1,1,0,2,0,1,2,3,0,1,0,1,0,1,
0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,3,2,3,3,0,3,3,2,3,2,3,0,3,2,0,3,3,3,3,0,2,3,2,0,2,0,3,3,0,
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,3,2,3,2,2,3,1,1,2,1,0,0,2,2,0,2,0,2,2,2,
1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,
3,3,0,0,0,3,3,2,2,1,0,0,3,3,2,3,1,1,1,0,0,1,2,3,0,0,0,0,1,1,
0,0,1,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,0,3,3,2,3,2,3,3,3,3,0,3,3,3,3,3,3,3,1,3,3,0,2,2,1,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,1,3,0,2,3,0,3,0,3,1,2,3,1,2,2,3,2,0,3,2,0,0,1,0,3,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,0,2,2,2,3,0,0,3,3,2,3,2,0,2,1,0,1,1,1,0,2,1,0,0,1,
0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,1,1,3,0,3,1,2,3,2,2,1,3,3,0,2,2,3,2,0,3,2,1,0,1,0,3,1,2,
2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,0,2,2,3,3,3,2,3,0,0,3,2,3,3,2,0,2,1,0,0,3,3,0,0,0,2,0,1,
0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,1,1,2,3,2,1,2,0,0,2,3,2,1,3,2,1,2,1,0,2,2,3,0,0,0,0,2,0,
0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,1,3,2,0,2,2,2,3,0,3,0,2,2,0,0,3,3,0,0,3,3,0,0,2,0,2,1,2,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,2,3,3,2,1,1,2,3,3,2,0,3,0,0,3,0,0,2,1,0,0,0,1,0,0,0,0,0,1,
1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,1,2,0,0,1,0,1,1,0,0,1,1,0,1,0,1,1,1,0,1,0,1,1,0,0,1,0,
0,3,0,0,2,3,3,2,3,2,2,2,2,2,3,2,2,2,2,2,2,2,1,0,1,2,1,1,0,0,1,
0,3,0,3,3,0,2,3,0,2,0,2,0,3,3,0,2,2,1,2,3,0,0,1,3,1,0,1,2,2,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,0,2,2,0,2,2,2,2,2,2,2,2,2,1,2,2,2,2,0,2,2,1,2,2,0,2,0,1,
1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,2,2,3,2,3,2,2,2,2,1,2,3,2,2,2,2,2,1,1,1,2,2,0,0,0,
0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,1,2,0,3,2,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,2,2,1,3,1,0,2,2,2,1,1,1,3,2,2,0,0,2,2,2,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,1,1,2,2,3,2,3,1,2,2,2,2,2,2,2,2,2,2,2,1,1,1,2,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,2,2,2,2,2,2,1,0,1,2,2,1,1,2,1,0,1,2,0,1,1,1,2,1,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,3,2,2,2,1,2,2,0,0,2,2,1,2,0,2,1,2,0,2,2,2,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,3,3,2,2,2,2,2,0,2,2,2,2,2,2,2,1,2,2,1,2,0,2,0,1,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,0,1,0,0,0,0,0,2,1,1,1,3,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,2,2,2,2,2,2,1,0,2,2,2,1,0,2,1,1,2,1,1,1,1,0,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,2,2,3,1,2,3,2,1,2,0,2,1,2,2,2,2,1,2,0,0,1,1,1,0,0,0,0,
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,0,2,2,1,2,0,2,2,0,2,2,1,0,0,1,0,0,2,0,0,2,2,2,0,1,0,0,0,
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,2,2,0,2,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,3,2,2,1,1,0,0,1,2,1,0,2,1,2,1,0,0,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,2,2,1,2,1,1,2,0,1,2,0,1,0,1,1,0,0,1,1,1,0,2,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,1,2,3,0,2,0,2,2,1,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,2,2,1,1,2,0,2,2,0,0,1,1,0,1,0,1,1,1,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,2,2,1,2,1,0,2,0,1,2,1,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,1,2,2,0,2,0,2,2,0,0,1,0,1,1,0,1,0,1,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,2,2,2,0,0,0,1,0,0,1,0,0,0,1,1,0,2,2,0,1,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,1,1,2,1,2,2,1,0,1,0,2,0,1,2,1,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,0,2,1,2,1,0,1,2,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,0,1,2,0,1,0,0,1,1,0,1,0,2,1,0,1,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,2,0,2,0,2,2,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,1,2,1,0,1,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
};
const SequenceModel Windows_1251BelarusianModel =
{
Windows_1251_CharToOrderMap,
BelarusianLangModel,
61,
(float)0.9748335015136226,
PR_TRUE,
"WINDOWS-1251"
};

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -192,6 +192,7 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[99] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
mProbers[100] = new nsSingleByteCharSetProber(&Windows_1252AfricaansModel);
mProbers[101] = new nsSingleByteCharSetProber(&Windows_1251BelarusianModel);
Reset();
}

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -40,7 +40,7 @@
#define nsSBCSGroupProber_h__
#define NUM_OF_SBCS_PROBERS 101
#define NUM_OF_SBCS_PROBERS 102
class nsCharSetProber;
class nsSBCSGroupProber: public nsCharSetProber {

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -253,6 +253,7 @@ extern const SequenceModel Iso_8859_15SwedishModel;
extern const SequenceModel Windows_1252SwedishModel;
extern const SequenceModel Windows_1252AfricaansModel;
extern const SequenceModel Windows_1251BelarusianModel;
#endif /* nsSingleByteCharSetProber_h__ */