mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
Merge pull request #1890 from RaiKoHoff/DevNewFeatures
Fix/Change UCHARDET: Confidence calculation for Single Byte Character Set (SBCS)
This commit is contained in:
commit
bb79f1d669
@ -1,4 +1,4 @@
|
||||
[Notepad3]
|
||||
[Notepad3]
|
||||
;Notepad3.ini=%USERPROFILE%\Notepad3.ini
|
||||
;Notepad3.ini=%APPDATA%\Rizonesoft\Notepad3\Notepad3.ini
|
||||
[Settings]
|
||||
@ -49,7 +49,7 @@ SettingsVersion=4
|
||||
;UndoTransactionTimeout=0
|
||||
;AdministrationTool.exe=
|
||||
;DevDebugMode=0
|
||||
;AnalyzeReliableConfidenceLevel=66
|
||||
;AnalyzeReliableConfidenceLevel=70
|
||||
;LexerSQLNumberSignAsComment=1
|
||||
;ExitOnESCSkipLevel=2
|
||||
[Statusbar Settings]
|
||||
|
||||
@ -1 +1 @@
|
||||
2708
|
||||
2709
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
<assemblyIdentity
|
||||
name="Notepad3"
|
||||
processorArchitecture="*"
|
||||
version="5.20.116.2708"
|
||||
version="5.20.116.2709"
|
||||
type="win32"
|
||||
/>
|
||||
<description>Notepad3 BETA</description>
|
||||
|
||||
@ -780,7 +780,7 @@ void LoadSettings()
|
||||
Settings2.NoCutLineOnEmptySelection = IniSectionGetBool(Settings2_Section, L"NoCutLineOnEmptySelection", Defaults2.NoCutLineOnEmptySelection);
|
||||
|
||||
|
||||
int const iARCLdef = 66;
|
||||
int const iARCLdef = 70;
|
||||
Defaults2.AnalyzeReliableConfidenceLevel = (float)iARCLdef / 100.0f;
|
||||
int const iARCLset = clampi(IniSectionGetInt(Settings2_Section, L"AnalyzeReliableConfidenceLevel", iARCLdef), 0, 100);
|
||||
Settings2.AnalyzeReliableConfidenceLevel = (float)iARCLset / 100.0f;
|
||||
|
||||
@ -904,8 +904,8 @@ static void _SetEncodingTitleInfo(const char* encodingUCD, cpi_enc_t encUCD, flo
|
||||
const char* ukn = (!encodingUCD || (encodingUCD[0] == '\0')) ? "<unknown>" : encodingUCD;
|
||||
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), (encUCD == CPI_ASCII_7BIT) ? "ASCII" : ukn);
|
||||
}
|
||||
float const ucd_conf_perc = ucd_confidence * 100.0f;
|
||||
StringCchPrintfA(tmpBuf, 128, "' Conf=%.0f%%", ucd_conf_perc);
|
||||
int const ucd_conf_perc = float2int(ucd_confidence * 100.0f);
|
||||
StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), "' Conf=%i%%", ucd_conf_perc);
|
||||
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
|
||||
|
||||
//~StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), " || CED='");
|
||||
@ -920,15 +920,17 @@ static void _SetEncodingTitleInfo(const char* encodingUCD, cpi_enc_t encUCD, flo
|
||||
//~if ((encCED >= 0) || (encCED == CPI_ASCII_7BIT)) {
|
||||
//~ bool const ced_reliable = (ced_confidence >= Settings2.ReliableCEDConfidenceMapping);
|
||||
//~ bool const ced_not_reliable = (ced_confidence <= Settings2.UnReliableCEDConfidenceMapping);
|
||||
//~ StringCchPrintfA(tmpBuf, 128, "' Conf=%.0f%% [%s])", ced_confidence * 100.0f,
|
||||
//~ StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), "' Conf=%.0f%% [%s])", ced_confidence * 100.0f,
|
||||
//~ ced_reliable ? "reliable" : (ced_not_reliable ? "NOT reliable" : "???"));
|
||||
//~ StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
|
||||
//~}
|
||||
//~else {
|
||||
//~ StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), "'");
|
||||
//~}
|
||||
|
||||
StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), ucd_confidence >= Settings2.AnalyzeReliableConfidenceLevel ? " (reliable)" : " (NOT reliable)");
|
||||
|
||||
int const relThreshold = float2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
|
||||
const char* rel_fmt = (ucd_conf_perc >= relThreshold) ? " (reliable (%i%%))" : " (NOT reliable(%i%%))";
|
||||
StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), rel_fmt, relThreshold);
|
||||
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
|
||||
|
||||
::MultiByteToWideChar(CP_UTF7, 0, chEncodingInfo, -1, wchEncodingInfo, ARRAYSIZE(wchEncodingInfo));
|
||||
@ -1327,7 +1329,9 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
|
||||
}
|
||||
}
|
||||
|
||||
encDetRes.bIsAnalysisReliable = (confidence >= Settings2.AnalyzeReliableConfidenceLevel);
|
||||
int const iConfidence = float2int(confidence * 100.0f);
|
||||
int const iReliableThreshold = float2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
|
||||
encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// --- choose best encoding guess ----
|
||||
|
||||
@ -9843,10 +9843,13 @@ bool FileRevert(LPCWSTR szFileName, bool bIgnoreCmdLnEnc)
|
||||
bool bPreserveView = true;
|
||||
DOCVIEWPOS_T const docView = EditGetCurrentDocView(Globals.hwndEdit);
|
||||
|
||||
Encoding_SrcWeak(CPI_NONE);
|
||||
if (bIgnoreCmdLnEnc) {
|
||||
Encoding_Forced(CPI_NONE); // ignore history too
|
||||
Encoding_Forced(CPI_NONE); // ignore history too
|
||||
}
|
||||
else if (Encoding_HasChanged(Encoding_Current(CPI_GET))) {
|
||||
Encoding_SrcWeak(Encoding_Current(CPI_GET));
|
||||
}
|
||||
Encoding_SrcWeak(Encoding_Current(CPI_GET));
|
||||
|
||||
WCHAR tchFileName2[MAX_PATH] = { L'\0' };
|
||||
StringCchCopyW(tchFileName2, COUNTOF(tchFileName2), szFileName);
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
#define VERSION_MAJOR 5
|
||||
#define VERSION_MINOR 20
|
||||
#define VERSION_REV 116
|
||||
#define VERSION_BUILD 2708
|
||||
#define VERSION_BUILD 2709
|
||||
#define SCINTILLA_VER 423
|
||||
#define ONIGURUMA_REGEX_VER 6.9.4
|
||||
#define UCHARDET_VER 2018.09.27
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -46,20 +46,18 @@
|
||||
//#include "LangModels/GB2312Freq.tab"
|
||||
#include "LangModels/GB18030Freq.tab"
|
||||
|
||||
#define SURE_YES 0.99f
|
||||
#define SURE_NO 0.01f
|
||||
|
||||
//return confidence base on received data
|
||||
float CharDistributionAnalysis::GetConfidence()
|
||||
{
|
||||
//if we didn't receive any character in our consideration range, or the
|
||||
// number of frequent characters is below the minimum threshold, return
|
||||
// negative answer
|
||||
if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
|
||||
if ((mTotalChars <= 0) || (mFreqChars < mDataThreshold))
|
||||
return SURE_NO;
|
||||
|
||||
if (mTotalChars != mFreqChars) {
|
||||
float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
|
||||
if (mTotalChars > mFreqChars)
|
||||
{
|
||||
float r = (float)mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
|
||||
|
||||
if (r < SURE_YES)
|
||||
return r;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -42,10 +42,6 @@
|
||||
|
||||
#include "nscore.h"
|
||||
|
||||
#define ENOUGH_DATA_THRESHOLD 4096
|
||||
|
||||
#define MINIMUM_DATA_THRESHOLD 4
|
||||
|
||||
class CharDistributionAnalysis
|
||||
{
|
||||
public:
|
||||
@ -92,7 +88,7 @@ public:
|
||||
|
||||
//It is not necessary to receive all data to draw conclusion. For charset detection,
|
||||
// certain amount of data is enough
|
||||
PRBool GotEnoughData() {return mTotalChars > ENOUGH_DATA_THRESHOLD;};
|
||||
PRBool GotEnoughData() { return (mTotalChars >= ENOUGH_DATA_THRESHOLD); };
|
||||
|
||||
protected:
|
||||
//we do not handle character base on its original encoding string, but
|
||||
|
||||
@ -50,7 +50,6 @@ typedef enum {
|
||||
eNotMe = 2 //Negative answer
|
||||
} nsProbingState;
|
||||
|
||||
#define SHORTCUT_THRESHOLD (float)0.95
|
||||
|
||||
class nsCharSetProber {
|
||||
public:
|
||||
|
||||
@ -53,7 +53,7 @@ public:
|
||||
const char* GetCharSetName() {return mDetectedCharset;};
|
||||
nsProbingState GetState(void) {return mState;};
|
||||
void Reset(void);
|
||||
float GetConfidence(void){return (float)0.99;};
|
||||
float GetConfidence(void){return SURE_YES;};
|
||||
void SetOpion() {};
|
||||
|
||||
protected:
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
@ -89,8 +89,6 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
|
||||
float nsGB18030Prober::GetConfidence(void)
|
||||
{
|
||||
float distribCf = mDistributionAnalyser.GetConfidence();
|
||||
|
||||
return (float)distribCf;
|
||||
return mDistributionAnalyser.GetConfidence();
|
||||
}
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
|
||||
// Minimum Visual vs Logical model score difference.
|
||||
// If the difference is below this, don't rely at all on the model score distance.
|
||||
#define MIN_MODEL_DISTANCE (0.01)
|
||||
#define MIN_MODEL_DISTANCE (0.01f)
|
||||
|
||||
#define VISUAL_HEBREW_NAME ("ISO-8859-8")
|
||||
#define LOGICAL_HEBREW_NAME ("WINDOWS-1255")
|
||||
|
||||
@ -149,23 +149,22 @@ nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
float nsLatin1Prober::GetConfidence(void)
|
||||
{
|
||||
if (mState == eNotMe)
|
||||
return 0.01f;
|
||||
return SURE_NO;
|
||||
|
||||
float confidence;
|
||||
PRUint32 total = 0;
|
||||
for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++)
|
||||
for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++) {
|
||||
total += mFreqCounter[i];
|
||||
|
||||
if(!total)
|
||||
confidence = 0.0f;
|
||||
else
|
||||
{
|
||||
confidence = mFreqCounter[3]*1.0f / total;
|
||||
confidence -= mFreqCounter[1]*20.0f/total;
|
||||
}
|
||||
|
||||
if (confidence < 0.0f)
|
||||
confidence = 0.0f;
|
||||
float confidence = 0.0f;
|
||||
|
||||
if (total)
|
||||
{
|
||||
confidence = (float)mFreqCounter[3] / (float)total;
|
||||
confidence -= (float)mFreqCounter[1] * 20.0f / (float)total;
|
||||
}
|
||||
|
||||
if (confidence < 0.0f) { confidence = 0.0f; }
|
||||
|
||||
// lower the confidence of latin1 so that other more accurate detector
|
||||
// can take priority.
|
||||
|
||||
@ -59,10 +59,8 @@ const char *ProberName[] =
|
||||
#endif
|
||||
|
||||
nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
|
||||
: mNumOfProbers(MAX_NUM_OF_MBCS_PROBERS), mBestGuess(-1), mActiveNum(0)
|
||||
{
|
||||
for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) {
|
||||
mProbers[i] = nsnull;
|
||||
}
|
||||
PRUint32 i = 0;
|
||||
mProbers[i++] = new nsUTF8Prober();
|
||||
if (aLanguageFilter & NS_FILTER_JAPANESE)
|
||||
@ -84,14 +82,19 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
|
||||
mProbers[i++] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
|
||||
mProbers[i++] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
|
||||
}
|
||||
|
||||
mNumOfProbers = i;
|
||||
|
||||
for (; i < MAX_NUM_OF_MBCS_PROBERS; ++i) { mProbers[i] = nsnull; }
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
nsMBCSGroupProber::~nsMBCSGroupProber()
|
||||
{
|
||||
for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < MAX_NUM_OF_MBCS_PROBERS; ++i)
|
||||
{
|
||||
delete mProbers[i];
|
||||
if (mProbers[i]) { delete mProbers[i]; }
|
||||
}
|
||||
}
|
||||
|
||||
@ -100,8 +103,8 @@ const char* nsMBCSGroupProber::GetCharSetName()
|
||||
if (mBestGuess == -1)
|
||||
{
|
||||
GetConfidence();
|
||||
if (mBestGuess == -1)
|
||||
mBestGuess = 0;
|
||||
|
||||
if (mBestGuess == -1) { mBestGuess = 0; }
|
||||
}
|
||||
return mProbers[mBestGuess]->GetCharSetName();
|
||||
}
|
||||
@ -109,7 +112,7 @@ const char* nsMBCSGroupProber::GetCharSetName()
|
||||
void nsMBCSGroupProber::Reset(void)
|
||||
{
|
||||
mActiveNum = 0;
|
||||
for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < MAX_NUM_OF_MBCS_PROBERS; i++)
|
||||
{
|
||||
if (mProbers[i])
|
||||
{
|
||||
@ -144,7 +147,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
{
|
||||
if (--keepNext == 0)
|
||||
{
|
||||
for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
continue;
|
||||
@ -161,7 +164,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
}
|
||||
|
||||
if (keepNext) {
|
||||
for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
continue;
|
||||
@ -179,23 +182,22 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
return mState;
|
||||
}
|
||||
|
||||
float nsMBCSGroupProber::GetConfidence(void)
|
||||
float nsMBCSGroupProber::GetConfidence()
|
||||
{
|
||||
PRUint32 i;
|
||||
float bestConf = 0.0, cf;
|
||||
float bestConf = 0.0f;
|
||||
|
||||
switch (mState)
|
||||
{
|
||||
case eFoundIt:
|
||||
return (float)0.99;
|
||||
return SURE_YES;
|
||||
case eNotMe:
|
||||
return (float)0.01;
|
||||
return SURE_NO;
|
||||
default:
|
||||
for (i = 0; i < NUM_OF_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
continue;
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
float const cf = mProbers[i]->GetConfidence();
|
||||
if (bestConf < cf)
|
||||
{
|
||||
bestConf = cf;
|
||||
@ -209,17 +211,14 @@ float nsMBCSGroupProber::GetConfidence(void)
|
||||
#ifdef DEBUG_chardet
|
||||
void nsMBCSGroupProber::DumpStatus()
|
||||
{
|
||||
PRUint32 i;
|
||||
float cf;
|
||||
|
||||
GetConfidence();
|
||||
for (i = 0; i < NUM_OF_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
|
||||
else
|
||||
{
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
float const cf = mProbers[i]->GetConfidence();
|
||||
printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
|
||||
}
|
||||
}
|
||||
@ -229,7 +228,7 @@ void nsMBCSGroupProber::DumpStatus()
|
||||
#ifdef DEBUG_jgmyers
|
||||
void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset)
|
||||
{
|
||||
for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) {
|
||||
for (PRUint32 i = 0; i < mNumOfProbers; ++i) {
|
||||
states[offset].name = ProberName[i];
|
||||
states[offset].isActive = mIsActive[i];
|
||||
states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -50,7 +50,7 @@
|
||||
#include "nsBig5Prober.h"
|
||||
#include "nsEUCTWProber.h"
|
||||
|
||||
#define NUM_OF_PROBERS 7
|
||||
#define MAX_NUM_OF_MBCS_PROBERS 7
|
||||
|
||||
class nsMBCSGroupProber: public nsCharSetProber {
|
||||
public:
|
||||
@ -73,9 +73,10 @@ public:
|
||||
|
||||
protected:
|
||||
nsProbingState mState;
|
||||
nsCharSetProber* mProbers[NUM_OF_PROBERS];
|
||||
PRBool mIsActive[NUM_OF_PROBERS];
|
||||
PRInt32 mBestGuess;
|
||||
nsCharSetProber* mProbers[MAX_NUM_OF_MBCS_PROBERS];
|
||||
PRBool mIsActive[MAX_NUM_OF_MBCS_PROBERS];
|
||||
PRUint32 mNumOfProbers;
|
||||
PRInt32 mBestGuess;
|
||||
PRUint32 mActiveNum;
|
||||
PRUint32 mKeepNext;
|
||||
};
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -46,173 +46,183 @@
|
||||
|
||||
#include "nsHebrewProber.h"
|
||||
|
||||
|
||||
nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
: mNumOfProbers(MAX_NUM_OF_SBCS_PROBERS), mBestGuess(-1), mActiveNum(0)
|
||||
{
|
||||
mProbers[0] = new nsSingleByteCharSetProber(&Win1251RussianModel);
|
||||
mProbers[1] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
|
||||
mProbers[2] = new nsSingleByteCharSetProber(&Latin5RussianModel);
|
||||
mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
|
||||
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
|
||||
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
|
||||
PRUint32 i = 0;
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Win1251RussianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Latin5RussianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
|
||||
|
||||
mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
|
||||
mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
|
||||
|
||||
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
|
||||
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
|
||||
|
||||
nsHebrewProber *hebprober = new nsHebrewProber();
|
||||
// Notice: Any change in these indexes - 10,11,12 must be reflected
|
||||
// in the code below as well.
|
||||
mProbers[10] = hebprober;
|
||||
mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
|
||||
mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
|
||||
PRUint32 const heb = i;
|
||||
mProbers[i++] = hebprober;
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
|
||||
// Tell the Hebrew prober about the logical and visual probers
|
||||
if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
|
||||
if (mProbers[heb] && mProbers[heb+1] && mProbers[heb+2]) // all are not null
|
||||
{
|
||||
hebprober->SetModelProbers(mProbers[11], mProbers[12]);
|
||||
hebprober->SetModelProbers(mProbers[heb+1], mProbers[heb+2]);
|
||||
}
|
||||
else // One or more is null. avoid any Hebrew probing, null them all
|
||||
{
|
||||
for (PRUint32 i = 10; i <= 12; ++i)
|
||||
for (PRUint32 j = heb + 2; j >= heb; --j)
|
||||
{
|
||||
delete mProbers[i];
|
||||
mProbers[i] = 0;
|
||||
delete mProbers[j];
|
||||
mProbers[j] = nsnull;
|
||||
}
|
||||
}
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
|
||||
|
||||
mProbers[13] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
|
||||
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252AfricaansModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1AfricaansModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9AfricaansModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15AfricaansModel);
|
||||
|
||||
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
|
||||
mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
||||
mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
|
||||
|
||||
mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
|
||||
mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
|
||||
mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
|
||||
|
||||
mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
|
||||
mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
|
||||
|
||||
mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
|
||||
mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
|
||||
|
||||
mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
|
||||
|
||||
mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
|
||||
mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
|
||||
|
||||
mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
|
||||
mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
|
||||
|
||||
mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
|
||||
mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
|
||||
|
||||
mProbers[32] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
|
||||
mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
|
||||
mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
|
||||
|
||||
mProbers[35] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
|
||||
mProbers[36] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
|
||||
mProbers[37] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
|
||||
|
||||
mProbers[38] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
|
||||
mProbers[39] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
|
||||
mProbers[40] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
|
||||
|
||||
mProbers[41] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
|
||||
mProbers[42] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
|
||||
mProbers[43] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
|
||||
mProbers[44] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeCzechModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
|
||||
|
||||
mProbers[45] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeSlovakModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
|
||||
|
||||
mProbers[46] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
|
||||
mProbers[47] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
|
||||
mProbers[48] = new nsSingleByteCharSetProber(&MaccentraleuropeCzechModel);
|
||||
mProbers[49] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropePolishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
|
||||
|
||||
mProbers[50] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
|
||||
mProbers[51] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
|
||||
mProbers[52] = new nsSingleByteCharSetProber(&MaccentraleuropeSlovakModel);
|
||||
mProbers[53] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
|
||||
|
||||
mProbers[54] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
|
||||
mProbers[55] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
|
||||
mProbers[56] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
|
||||
mProbers[57] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
|
||||
mProbers[58] = new nsSingleByteCharSetProber(&MaccentraleuropePolishModel);
|
||||
mProbers[59] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
|
||||
|
||||
mProbers[60] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
|
||||
mProbers[61] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
|
||||
mProbers[62] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
|
||||
mProbers[63] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
|
||||
mProbers[64] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
|
||||
mProbers[65] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeCroatianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
|
||||
|
||||
mProbers[66] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
|
||||
mProbers[67] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
|
||||
mProbers[68] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
|
||||
mProbers[69] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
|
||||
mProbers[70] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
|
||||
|
||||
mProbers[71] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
|
||||
mProbers[72] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
|
||||
mProbers[73] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
|
||||
mProbers[74] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
|
||||
mProbers[75] = new nsSingleByteCharSetProber(&MaccentraleuropeCroatianModel);
|
||||
mProbers[76] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
|
||||
|
||||
mProbers[77] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
|
||||
mProbers[78] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
|
||||
mProbers[79] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
|
||||
mProbers[80] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
|
||||
mProbers[81] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
|
||||
|
||||
mProbers[82] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
|
||||
mProbers[83] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
|
||||
mProbers[84] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
|
||||
mProbers[85] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeSloveneModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
|
||||
|
||||
mProbers[86] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
|
||||
mProbers[87] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
|
||||
mProbers[88] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
|
||||
mProbers[89] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
|
||||
|
||||
mProbers[90] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
|
||||
mProbers[91] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
|
||||
mProbers[92] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
|
||||
mProbers[93] = new nsSingleByteCharSetProber(&MaccentraleuropeSloveneModel);
|
||||
mProbers[94] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1251BelarusianModel);
|
||||
|
||||
mProbers[95] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
|
||||
mProbers[96] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
|
||||
mProbers[97] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
|
||||
mProbers[98] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
|
||||
mProbers[99] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252NederlandsModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1NederlandsModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9NederlandsModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15NederlandsModel);
|
||||
|
||||
mProbers[100] = new nsSingleByteCharSetProber(&Windows_1252AfricaansModel);
|
||||
mProbers[101] = new nsSingleByteCharSetProber(&Iso_8859_1AfricaansModel);
|
||||
mProbers[102] = new nsSingleByteCharSetProber(&Iso_8859_9AfricaansModel);
|
||||
mProbers[103] = new nsSingleByteCharSetProber(&Iso_8859_15AfricaansModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
|
||||
|
||||
mProbers[104] = new nsSingleByteCharSetProber(&Windows_1251BelarusianModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
||||
|
||||
|
||||
//mProbers[i++] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
|
||||
mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
|
||||
|
||||
mNumOfProbers = i;
|
||||
|
||||
for (; i < MAX_NUM_OF_SBCS_PROBERS; ++i) { mProbers[i] = nsnull; }
|
||||
|
||||
mProbers[105] = new nsSingleByteCharSetProber(&Windows_1252NederlandsModel);
|
||||
mProbers[106] = new nsSingleByteCharSetProber(&Iso_8859_1NederlandsModel);
|
||||
mProbers[107] = new nsSingleByteCharSetProber(&Iso_8859_9NederlandsModel);
|
||||
mProbers[108] = new nsSingleByteCharSetProber(&Iso_8859_15NederlandsModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
nsSBCSGroupProber::~nsSBCSGroupProber()
|
||||
{
|
||||
for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < MAX_NUM_OF_SBCS_PROBERS; i++)
|
||||
{
|
||||
delete mProbers[i];
|
||||
if (mProbers[i]) { delete mProbers[i]; }
|
||||
}
|
||||
}
|
||||
|
||||
@ -234,7 +244,7 @@ const char* nsSBCSGroupProber::GetCharSetName()
|
||||
void nsSBCSGroupProber::Reset(void)
|
||||
{
|
||||
mActiveNum = 0;
|
||||
for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < MAX_NUM_OF_SBCS_PROBERS; ++i)
|
||||
{
|
||||
if (mProbers[i]) // not null
|
||||
{
|
||||
@ -242,8 +252,9 @@ void nsSBCSGroupProber::Reset(void)
|
||||
mIsActive[i] = PR_TRUE;
|
||||
++mActiveNum;
|
||||
}
|
||||
else
|
||||
else {
|
||||
mIsActive[i] = PR_FALSE;
|
||||
}
|
||||
}
|
||||
mBestGuess = -1;
|
||||
mState = eDetecting;
|
||||
@ -269,7 +280,7 @@ nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
if (newLen1 == 0)
|
||||
goto done; // Nothing to see here, move on.
|
||||
|
||||
for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
||||
for (i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
continue;
|
||||
@ -300,21 +311,20 @@ done:
|
||||
|
||||
float nsSBCSGroupProber::GetConfidence(void)
|
||||
{
|
||||
PRUint32 i;
|
||||
float bestConf = 0.0, cf;
|
||||
float bestConf = 0.0f;
|
||||
|
||||
switch (mState)
|
||||
{
|
||||
case eFoundIt:
|
||||
return (float)0.99; //sure yes
|
||||
return SURE_YES;
|
||||
case eNotMe:
|
||||
return (float)0.01; //sure no
|
||||
return SURE_NO;
|
||||
default:
|
||||
for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
||||
for (PRUint32 i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
continue;
|
||||
cf = mProbers[i]->GetConfidence();
|
||||
float const cf = mProbers[i]->GetConfidence();
|
||||
if (bestConf < cf)
|
||||
{
|
||||
bestConf = cf;
|
||||
@ -333,7 +343,7 @@ void nsSBCSGroupProber::DumpStatus()
|
||||
|
||||
cf = GetConfidence();
|
||||
printf(" SBCS Group Prober --------begin status \r\n");
|
||||
for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
|
||||
for (i = 0; i < mNumOfProbers; i++)
|
||||
{
|
||||
if (!mIsActive[i])
|
||||
printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -42,10 +42,11 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 109
|
||||
#define MAX_NUM_OF_SBCS_PROBERS 109
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
class nsSBCSGroupProber : public nsCharSetProber {
|
||||
public:
|
||||
nsSBCSGroupProber();
|
||||
virtual ~nsSBCSGroupProber();
|
||||
@ -62,8 +63,9 @@ public:
|
||||
|
||||
protected:
|
||||
nsProbingState mState;
|
||||
nsCharSetProber* mProbers[NUM_OF_SBCS_PROBERS];
|
||||
PRBool mIsActive[NUM_OF_SBCS_PROBERS];
|
||||
nsCharSetProber* mProbers[MAX_NUM_OF_SBCS_PROBERS];
|
||||
PRBool mIsActive[MAX_NUM_OF_SBCS_PROBERS];
|
||||
PRUint32 mNumOfProbers;
|
||||
PRInt32 mBestGuess;
|
||||
PRUint32 mActiveNum;
|
||||
};
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -82,7 +82,7 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32
|
||||
if (mState == eDetecting)
|
||||
if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
|
||||
{
|
||||
float cf = GetConfidence();
|
||||
float const cf = GetConfidence();
|
||||
if (cf > POSITIVE_SHORTCUT_THRESHOLD)
|
||||
mState = eFoundIt;
|
||||
else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
|
||||
@ -112,12 +112,22 @@ float nsSingleByteCharSetProber::GetConfidence(void)
|
||||
if (mTotalSeqs > 0)
|
||||
if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
|
||||
return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
|
||||
return (float)0.01;
|
||||
return SURE_NO;
|
||||
#else //POSITIVE_APPROACH
|
||||
float r;
|
||||
|
||||
if (mTotalSeqs > 0) {
|
||||
r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
|
||||
#define ffactor(m,d) (((d) > 0) ? ((float)(m)/(float)(d)) : 1.0f)
|
||||
|
||||
PRUint32 const txtChar = (mTotalChar > mCtrlChar) ? (mTotalChar - mCtrlChar) : (mTotalSeqs << 1);
|
||||
|
||||
if ((txtChar > 0) && (mTotalSeqs > 0))
|
||||
{
|
||||
PRUint32 const goodSeqCnt = mSeqCounters[POSITIVE_CAT] + (mSeqCounters[PROBABLE_CAT] >> 1);
|
||||
|
||||
float r = mModel->mTypicalPositiveRatio;
|
||||
|
||||
// negative sequence correction factor
|
||||
r *= ffactor(goodSeqCnt, mTotalSeqs + (mSeqCounters[NEGATIVE_CAT] << 4));
|
||||
|
||||
/* Multiply by a ratio of positive sequences per characters.
|
||||
* This would help in particular to distinguish close winners.
|
||||
* Indeed if you add a letter, you'd expect the positive sequence count
|
||||
@ -126,18 +136,21 @@ float nsSingleByteCharSetProber::GetConfidence(void)
|
||||
* character). This could make the difference between very closely related
|
||||
* charsets used for the same language.
|
||||
*/
|
||||
r = r * mSeqCounters[POSITIVE_CAT] / mTotalChar;
|
||||
//r = r * (mSeqCounters[POSITIVE_CAT] + (float) mSeqCounters[PROBABLE_CAT] / 4) / mTotalChar;
|
||||
r *= ffactor(goodSeqCnt + mSeqCounters[NEUTRAL_CAT], txtChar);
|
||||
|
||||
/* The more control characters (proportionnaly to the size of the text), the
|
||||
* less confident we become in the current charset.
|
||||
*/
|
||||
r = r * (mTotalChar - mCtrlChar) / mTotalChar;
|
||||
r = r*mFreqChar/mTotalChar;
|
||||
if (r >= (float)1.00)
|
||||
r = (float)0.99;
|
||||
r *= ffactor(txtChar, mTotalChar);
|
||||
|
||||
// normalizing
|
||||
r *= ffactor(mFreqChar, mTotalChar);
|
||||
|
||||
if (r >= 1.00f) { r = SURE_YES; }
|
||||
|
||||
return r;
|
||||
}
|
||||
return (float)0.01;
|
||||
return SURE_NO;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -55,9 +55,9 @@
|
||||
/* Numbers 0-9. */
|
||||
#define NUM 251
|
||||
|
||||
#define SB_ENOUGH_REL_THRESHOLD 1024
|
||||
#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
|
||||
#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
|
||||
#define SB_ENOUGH_REL_THRESHOLD min(512, ENOUGH_DATA_THRESHOLD)
|
||||
#define POSITIVE_SHORTCUT_THRESHOLD SHORTCUT_THRESHOLD
|
||||
#define NEGATIVE_SHORTCUT_THRESHOLD (0.05f)
|
||||
#define SYMBOL_CAT_ORDER 250
|
||||
#define NUMBER_OF_SEQ_CAT 4
|
||||
#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: et sw=2 ts=2 fdm=marker
|
||||
*/
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
@ -71,19 +71,19 @@ nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
return mState;
|
||||
}
|
||||
|
||||
#define ONE_CHAR_PROB (float)0.50
|
||||
#define ONE_CHAR_PROB (0.50f)
|
||||
|
||||
float nsUTF8Prober::GetConfidence(void)
|
||||
{
|
||||
float unlike = (float)0.99;
|
||||
float unlike = SURE_YES;
|
||||
|
||||
if (mNumOfMBChar < 6)
|
||||
{
|
||||
for (PRUint32 i = 0; i < mNumOfMBChar; i++)
|
||||
unlike *= ONE_CHAR_PROB;
|
||||
return (float)1.0 - unlike;
|
||||
return (1.0f - unlike);
|
||||
}
|
||||
else
|
||||
return (float)0.99;
|
||||
return SURE_YES;
|
||||
}
|
||||
|
||||
|
||||
@ -107,8 +107,6 @@ nsUniversalDetector::Reset()
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
#define SHORTCUT_THRESHOLD (float)0.95
|
||||
#define MINIMUM_THRESHOLD (float)0.20
|
||||
|
||||
nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
{
|
||||
@ -243,7 +241,7 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
PRUint32 i;
|
||||
for (i = 0; i < aLen; i++)
|
||||
{
|
||||
//other than 0xa0, if every othe character is ascii, the page is ascii
|
||||
//other than 0xa0, if every other character is ascii, the page is ascii
|
||||
if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP
|
||||
{
|
||||
//we got a non-ascii byte (high-byte)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
@ -45,9 +45,23 @@ typedef unsigned short PRUint16;
|
||||
typedef signed char PRInt8;
|
||||
typedef unsigned char PRUint8;
|
||||
|
||||
#define nsnull nullptr
|
||||
|
||||
#define PR_FALSE false
|
||||
#define PR_TRUE true
|
||||
#define nsnull 0
|
||||
|
||||
#define MINIMUM_DATA_THRESHOLD 4
|
||||
#define ENOUGH_DATA_THRESHOLD 1024
|
||||
|
||||
#define SURE_YES (0.99f)
|
||||
#define SURE_NO (0.01f)
|
||||
|
||||
#define SHORTCUT_THRESHOLD (0.95f)
|
||||
#define MINIMUM_THRESHOLD (0.20f)
|
||||
|
||||
#ifndef min
|
||||
#define min(x,y) (((x) < (y)) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef strdup
|
||||
|
||||
Loading…
Reference in New Issue
Block a user