diff --git a/Build/Notepad3.ini b/Build/Notepad3.ini
index 9495ec706..a34227077 100644
--- a/Build/Notepad3.ini
+++ b/Build/Notepad3.ini
@@ -1,4 +1,4 @@
-[Notepad3]
+[Notepad3]
;Notepad3.ini=%USERPROFILE%\Notepad3.ini
;Notepad3.ini=%APPDATA%\Rizonesoft\Notepad3\Notepad3.ini
[Settings]
@@ -49,7 +49,7 @@ SettingsVersion=4
;UndoTransactionTimeout=0
;AdministrationTool.exe=
;DevDebugMode=0
-;AnalyzeReliableConfidenceLevel=66
+;AnalyzeReliableConfidenceLevel=70
;LexerSQLNumberSignAsComment=1
;ExitOnESCSkipLevel=2
[Statusbar Settings]
diff --git a/Versions/build.txt b/Versions/build.txt
index e31b25996..a80fc0bca 100644
--- a/Versions/build.txt
+++ b/Versions/build.txt
@@ -1 +1 @@
-2708
+2709
diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf
index fb5912c65..fe37c9301 100644
--- a/res/Notepad3.exe.manifest.conf
+++ b/res/Notepad3.exe.manifest.conf
@@ -3,7 +3,7 @@
Notepad3 BETA
diff --git a/src/Config/Config.cpp b/src/Config/Config.cpp
index 2ef2a8bb6..1054d0485 100644
--- a/src/Config/Config.cpp
+++ b/src/Config/Config.cpp
@@ -780,7 +780,7 @@ void LoadSettings()
Settings2.NoCutLineOnEmptySelection = IniSectionGetBool(Settings2_Section, L"NoCutLineOnEmptySelection", Defaults2.NoCutLineOnEmptySelection);
- int const iARCLdef = 66;
+ int const iARCLdef = 70;
Defaults2.AnalyzeReliableConfidenceLevel = (float)iARCLdef / 100.0f;
int const iARCLset = clampi(IniSectionGetInt(Settings2_Section, L"AnalyzeReliableConfidenceLevel", iARCLdef), 0, 100);
Settings2.AnalyzeReliableConfidenceLevel = (float)iARCLset / 100.0f;
diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp
index 5514065d0..a80870a7e 100644
--- a/src/EncodingDetection.cpp
+++ b/src/EncodingDetection.cpp
@@ -904,8 +904,8 @@ static void _SetEncodingTitleInfo(const char* encodingUCD, cpi_enc_t encUCD, flo
const char* ukn = (!encodingUCD || (encodingUCD[0] == '\0')) ? "" : encodingUCD;
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), (encUCD == CPI_ASCII_7BIT) ? "ASCII" : ukn);
}
- float const ucd_conf_perc = ucd_confidence * 100.0f;
- StringCchPrintfA(tmpBuf, 128, "' Conf=%.0f%%", ucd_conf_perc);
+ int const ucd_conf_perc = float2int(ucd_confidence * 100.0f);
+ StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), "' Conf=%i%%", ucd_conf_perc);
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
//~StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), " || CED='");
@@ -920,15 +920,17 @@ static void _SetEncodingTitleInfo(const char* encodingUCD, cpi_enc_t encUCD, flo
//~if ((encCED >= 0) || (encCED == CPI_ASCII_7BIT)) {
//~ bool const ced_reliable = (ced_confidence >= Settings2.ReliableCEDConfidenceMapping);
//~ bool const ced_not_reliable = (ced_confidence <= Settings2.UnReliableCEDConfidenceMapping);
- //~ StringCchPrintfA(tmpBuf, 128, "' Conf=%.0f%% [%s])", ced_confidence * 100.0f,
+ //~ StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), "' Conf=%.0f%% [%s])", ced_confidence * 100.0f,
//~ ced_reliable ? "reliable" : (ced_not_reliable ? "NOT reliable" : "???"));
//~ StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
//~}
//~else {
//~ StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), "'");
//~}
-
- StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), ucd_confidence >= Settings2.AnalyzeReliableConfidenceLevel ? " (reliable)" : " (NOT reliable)");
+
+ int const relThreshold = float2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
+ const char* rel_fmt = (ucd_conf_perc >= relThreshold) ? " (reliable (%i%%))" : " (NOT reliable(%i%%))";
+ StringCchPrintfA(tmpBuf, ARRAYSIZE(tmpBuf), rel_fmt, relThreshold);
StringCchCatA(chEncodingInfo, ARRAYSIZE(chEncodingInfo), tmpBuf);
::MultiByteToWideChar(CP_UTF7, 0, chEncodingInfo, -1, wchEncodingInfo, ARRAYSIZE(wchEncodingInfo));
@@ -1327,7 +1329,9 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(LPWSTR pszFile, const char* lpData,
}
}
- encDetRes.bIsAnalysisReliable = (confidence >= Settings2.AnalyzeReliableConfidenceLevel);
+ int const iConfidence = float2int(confidence * 100.0f);
+ int const iReliableThreshold = float2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
+ encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);
// --------------------------------------------------------------------------
// --- choose best encoding guess ----
diff --git a/src/Notepad3.c b/src/Notepad3.c
index 8a52fd45e..ee4731816 100644
--- a/src/Notepad3.c
+++ b/src/Notepad3.c
@@ -9843,10 +9843,13 @@ bool FileRevert(LPCWSTR szFileName, bool bIgnoreCmdLnEnc)
bool bPreserveView = true;
DOCVIEWPOS_T const docView = EditGetCurrentDocView(Globals.hwndEdit);
+ Encoding_SrcWeak(CPI_NONE);
if (bIgnoreCmdLnEnc) {
- Encoding_Forced(CPI_NONE); // ignore history too
+ Encoding_Forced(CPI_NONE); // ignore history too
+ }
+ else if (Encoding_HasChanged(Encoding_Current(CPI_GET))) {
+ Encoding_SrcWeak(Encoding_Current(CPI_GET));
}
- Encoding_SrcWeak(Encoding_Current(CPI_GET));
WCHAR tchFileName2[MAX_PATH] = { L'\0' };
StringCchCopyW(tchFileName2, COUNTOF(tchFileName2), szFileName);
diff --git a/src/VersionEx.h b/src/VersionEx.h
index fceb7fc17..7fc589b7f 100644
--- a/src/VersionEx.h
+++ b/src/VersionEx.h
@@ -9,7 +9,7 @@
#define VERSION_MAJOR 5
#define VERSION_MINOR 20
#define VERSION_REV 116
-#define VERSION_BUILD 2708
+#define VERSION_BUILD 2709
#define SCINTILLA_VER 423
#define ONIGURUMA_REGEX_VER 6.9.4
#define UCHARDET_VER 2018.09.27
diff --git a/uchardet/uchardet/src/CharDistribution.cpp b/uchardet/uchardet/src/CharDistribution.cpp
index e3339bc72..860d79516 100644
--- a/uchardet/uchardet/src/CharDistribution.cpp
+++ b/uchardet/uchardet/src/CharDistribution.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -46,20 +46,18 @@
//#include "LangModels/GB2312Freq.tab"
#include "LangModels/GB18030Freq.tab"
-#define SURE_YES 0.99f
-#define SURE_NO 0.01f
-
//return confidence base on received data
float CharDistributionAnalysis::GetConfidence()
{
//if we didn't receive any character in our consideration range, or the
// number of frequent characters is below the minimum threshold, return
// negative answer
- if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
+ if ((mTotalChars <= 0) || (mFreqChars < mDataThreshold))
return SURE_NO;
- if (mTotalChars != mFreqChars) {
- float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
+ if (mTotalChars > mFreqChars)
+ {
+ float r = (float)mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
if (r < SURE_YES)
return r;
diff --git a/uchardet/uchardet/src/CharDistribution.h b/uchardet/uchardet/src/CharDistribution.h
index bcb60b975..def7cef63 100644
--- a/uchardet/uchardet/src/CharDistribution.h
+++ b/uchardet/uchardet/src/CharDistribution.h
@@ -1,4 +1,4 @@
-/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -42,10 +42,6 @@
#include "nscore.h"
-#define ENOUGH_DATA_THRESHOLD 4096
-
-#define MINIMUM_DATA_THRESHOLD 4
-
class CharDistributionAnalysis
{
public:
@@ -92,7 +88,7 @@ public:
//It is not necessary to receive all data to draw conclusion. For charset detection,
// certain amount of data is enough
- PRBool GotEnoughData() {return mTotalChars > ENOUGH_DATA_THRESHOLD;};
+ PRBool GotEnoughData() { return (mTotalChars >= ENOUGH_DATA_THRESHOLD); };
protected:
//we do not handle character base on its original encoding string, but
diff --git a/uchardet/uchardet/src/nsCharSetProber.h b/uchardet/uchardet/src/nsCharSetProber.h
index 27bb16a33..e7e08677d 100644
--- a/uchardet/uchardet/src/nsCharSetProber.h
+++ b/uchardet/uchardet/src/nsCharSetProber.h
@@ -50,7 +50,6 @@ typedef enum {
eNotMe = 2 //Negative answer
} nsProbingState;
-#define SHORTCUT_THRESHOLD (float)0.95
class nsCharSetProber {
public:
diff --git a/uchardet/uchardet/src/nsEscCharsetProber.h b/uchardet/uchardet/src/nsEscCharsetProber.h
index 56b3828f6..719052313 100644
--- a/uchardet/uchardet/src/nsEscCharsetProber.h
+++ b/uchardet/uchardet/src/nsEscCharsetProber.h
@@ -53,7 +53,7 @@ public:
const char* GetCharSetName() {return mDetectedCharset;};
nsProbingState GetState(void) {return mState;};
void Reset(void);
- float GetConfidence(void){return (float)0.99;};
+ float GetConfidence(void){return SURE_YES;};
void SetOpion() {};
protected:
diff --git a/uchardet/uchardet/src/nsGB18030Prober.cpp b/uchardet/uchardet/src/nsGB18030Prober.cpp
index f6d5c5b76..760aef6d0 100644
--- a/uchardet/uchardet/src/nsGB18030Prober.cpp
+++ b/uchardet/uchardet/src/nsGB18030Prober.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@@ -89,8 +89,6 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen)
float nsGB18030Prober::GetConfidence(void)
{
- float distribCf = mDistributionAnalyser.GetConfidence();
-
- return (float)distribCf;
+ return mDistributionAnalyser.GetConfidence();
}
diff --git a/uchardet/uchardet/src/nsHebrewProber.cpp b/uchardet/uchardet/src/nsHebrewProber.cpp
index 9becb821e..6a039c094 100644
--- a/uchardet/uchardet/src/nsHebrewProber.cpp
+++ b/uchardet/uchardet/src/nsHebrewProber.cpp
@@ -58,7 +58,7 @@
// Minimum Visual vs Logical model score difference.
// If the difference is below this, don't rely at all on the model score distance.
-#define MIN_MODEL_DISTANCE (0.01)
+#define MIN_MODEL_DISTANCE (0.01f)
#define VISUAL_HEBREW_NAME ("ISO-8859-8")
#define LOGICAL_HEBREW_NAME ("WINDOWS-1255")
diff --git a/uchardet/uchardet/src/nsLatin1Prober.cpp b/uchardet/uchardet/src/nsLatin1Prober.cpp
index 9dc76a789..c1cc7d8f3 100644
--- a/uchardet/uchardet/src/nsLatin1Prober.cpp
+++ b/uchardet/uchardet/src/nsLatin1Prober.cpp
@@ -149,23 +149,22 @@ nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen)
float nsLatin1Prober::GetConfidence(void)
{
if (mState == eNotMe)
- return 0.01f;
+ return SURE_NO;
- float confidence;
PRUint32 total = 0;
- for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++)
+ for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++) {
total += mFreqCounter[i];
-
- if(!total)
- confidence = 0.0f;
- else
- {
- confidence = mFreqCounter[3]*1.0f / total;
- confidence -= mFreqCounter[1]*20.0f/total;
}
- if (confidence < 0.0f)
- confidence = 0.0f;
+ float confidence = 0.0f;
+
+ if (total)
+ {
+ confidence = (float)mFreqCounter[3] / (float)total;
+ confidence -= (float)mFreqCounter[1] * 20.0f / (float)total;
+ }
+
+ if (confidence < 0.0f) { confidence = 0.0f; }
// lower the confidence of latin1 so that other more accurate detector
// can take priority.
diff --git a/uchardet/uchardet/src/nsMBCSGroupProber.cpp b/uchardet/uchardet/src/nsMBCSGroupProber.cpp
index 1e18f4dd6..c1051b5c9 100644
--- a/uchardet/uchardet/src/nsMBCSGroupProber.cpp
+++ b/uchardet/uchardet/src/nsMBCSGroupProber.cpp
@@ -59,10 +59,8 @@ const char *ProberName[] =
#endif
nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
+ : mNumOfProbers(MAX_NUM_OF_MBCS_PROBERS), mBestGuess(-1), mActiveNum(0)
{
- for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++) {
- mProbers[i] = nsnull;
- }
PRUint32 i = 0;
mProbers[i++] = new nsUTF8Prober();
if (aLanguageFilter & NS_FILTER_JAPANESE)
@@ -84,14 +82,19 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
mProbers[i++] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
mProbers[i++] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
}
+
+ mNumOfProbers = i;
+
+ for (; i < MAX_NUM_OF_MBCS_PROBERS; ++i) { mProbers[i] = nsnull; }
+
Reset();
}
nsMBCSGroupProber::~nsMBCSGroupProber()
{
- for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+ for (PRUint32 i = 0; i < MAX_NUM_OF_MBCS_PROBERS; ++i)
{
- delete mProbers[i];
+ if (mProbers[i]) { delete mProbers[i]; }
}
}
@@ -100,8 +103,8 @@ const char* nsMBCSGroupProber::GetCharSetName()
if (mBestGuess == -1)
{
GetConfidence();
- if (mBestGuess == -1)
- mBestGuess = 0;
+
+ if (mBestGuess == -1) { mBestGuess = 0; }
}
return mProbers[mBestGuess]->GetCharSetName();
}
@@ -109,7 +112,7 @@ const char* nsMBCSGroupProber::GetCharSetName()
void nsMBCSGroupProber::Reset(void)
{
mActiveNum = 0;
- for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+ for (PRUint32 i = 0; i < MAX_NUM_OF_MBCS_PROBERS; i++)
{
if (mProbers[i])
{
@@ -144,7 +147,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
{
if (--keepNext == 0)
{
- for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+ for (PRUint32 i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
continue;
@@ -161,7 +164,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
}
if (keepNext) {
- for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+ for (PRUint32 i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
continue;
@@ -179,23 +182,22 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
return mState;
}
-float nsMBCSGroupProber::GetConfidence(void)
+float nsMBCSGroupProber::GetConfidence()
{
- PRUint32 i;
- float bestConf = 0.0, cf;
+ float bestConf = 0.0f;
switch (mState)
{
case eFoundIt:
- return (float)0.99;
+ return SURE_YES;
case eNotMe:
- return (float)0.01;
+ return SURE_NO;
default:
- for (i = 0; i < NUM_OF_PROBERS; i++)
+ for (PRUint32 i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
continue;
- cf = mProbers[i]->GetConfidence();
+ float const cf = mProbers[i]->GetConfidence();
if (bestConf < cf)
{
bestConf = cf;
@@ -209,17 +211,14 @@ float nsMBCSGroupProber::GetConfidence(void)
#ifdef DEBUG_chardet
void nsMBCSGroupProber::DumpStatus()
{
- PRUint32 i;
- float cf;
-
GetConfidence();
- for (i = 0; i < NUM_OF_PROBERS; i++)
+ for (PRUint32 i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
else
{
- cf = mProbers[i]->GetConfidence();
+ float const cf = mProbers[i]->GetConfidence();
printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
}
}
@@ -229,7 +228,7 @@ void nsMBCSGroupProber::DumpStatus()
#ifdef DEBUG_jgmyers
void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset)
{
- for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) {
+ for (PRUint32 i = 0; i < mNumOfProbers; ++i) {
states[offset].name = ProberName[i];
states[offset].isActive = mIsActive[i];
states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
diff --git a/uchardet/uchardet/src/nsMBCSGroupProber.h b/uchardet/uchardet/src/nsMBCSGroupProber.h
index 42b8a6e41..df159e24d 100644
--- a/uchardet/uchardet/src/nsMBCSGroupProber.h
+++ b/uchardet/uchardet/src/nsMBCSGroupProber.h
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -50,7 +50,7 @@
#include "nsBig5Prober.h"
#include "nsEUCTWProber.h"
-#define NUM_OF_PROBERS 7
+#define MAX_NUM_OF_MBCS_PROBERS 7
class nsMBCSGroupProber: public nsCharSetProber {
public:
@@ -73,9 +73,10 @@ public:
protected:
nsProbingState mState;
- nsCharSetProber* mProbers[NUM_OF_PROBERS];
- PRBool mIsActive[NUM_OF_PROBERS];
- PRInt32 mBestGuess;
+ nsCharSetProber* mProbers[MAX_NUM_OF_MBCS_PROBERS];
+ PRBool mIsActive[MAX_NUM_OF_MBCS_PROBERS];
+ PRUint32 mNumOfProbers;
+ PRInt32 mBestGuess;
PRUint32 mActiveNum;
PRUint32 mKeepNext;
};
diff --git a/uchardet/uchardet/src/nsSBCSGroupProber.cpp b/uchardet/uchardet/src/nsSBCSGroupProber.cpp
index 8fb778ce2..71aed6310 100644
--- a/uchardet/uchardet/src/nsSBCSGroupProber.cpp
+++ b/uchardet/uchardet/src/nsSBCSGroupProber.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -46,173 +46,183 @@
#include "nsHebrewProber.h"
+
nsSBCSGroupProber::nsSBCSGroupProber()
+ : mNumOfProbers(MAX_NUM_OF_SBCS_PROBERS), mBestGuess(-1), mActiveNum(0)
{
- mProbers[0] = new nsSingleByteCharSetProber(&Win1251RussianModel);
- mProbers[1] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
- mProbers[2] = new nsSingleByteCharSetProber(&Latin5RussianModel);
- mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
- mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
- mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
+ PRUint32 i = 0;
+ mProbers[i++] = new nsSingleByteCharSetProber(&Win1251RussianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Koi8rRussianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Latin5RussianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&MacCyrillicRussianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
- mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
- mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
- mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
- mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
nsHebrewProber *hebprober = new nsHebrewProber();
// Notice: Any change in these indexes - 10,11,12 must be reflected
// in the code below as well.
- mProbers[10] = hebprober;
- mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
- mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
+ PRUint32 const heb = i;
+ mProbers[i++] = hebprober;
+ mProbers[i++] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
+ mProbers[i++] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
// Tell the Hebrew prober about the logical and visual probers
- if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
+ if (mProbers[heb] && mProbers[heb+1] && mProbers[heb+2]) // all are not null
{
- hebprober->SetModelProbers(mProbers[11], mProbers[12]);
+ hebprober->SetModelProbers(mProbers[heb+1], mProbers[heb+2]);
}
else // One or more is null. avoid any Hebrew probing, null them all
{
- for (PRUint32 i = 10; i <= 12; ++i)
+ for (PRUint32 j = heb + 2; j >= heb; --j)
{
- delete mProbers[i];
- mProbers[i] = 0;
+ delete mProbers[j];
+ mProbers[j] = nsnull;
}
}
+ mProbers[i++] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
- mProbers[13] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
- mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252AfricaansModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1AfricaansModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9AfricaansModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15AfricaansModel);
- mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
- mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
- mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
- mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
- mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
- mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
- mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
- mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
- mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
- mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
- mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
- mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
- mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
- mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
- mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
- mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
- mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
- mProbers[32] = new nsSingleByteCharSetProber(&Iso_8859_15DanishModel);
- mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
- mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
- mProbers[35] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
- mProbers[36] = new nsSingleByteCharSetProber(&Iso_8859_10LithuanianModel);
- mProbers[37] = new nsSingleByteCharSetProber(&Iso_8859_4LithuanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
- mProbers[38] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
- mProbers[39] = new nsSingleByteCharSetProber(&Iso_8859_10LatvianModel);
- mProbers[40] = new nsSingleByteCharSetProber(&Iso_8859_4LatvianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
- mProbers[41] = new nsSingleByteCharSetProber(&Iso_8859_1PortugueseModel);
- mProbers[42] = new nsSingleByteCharSetProber(&Iso_8859_9PortugueseModel);
- mProbers[43] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
- mProbers[44] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeCzechModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
- mProbers[45] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeSlovakModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
- mProbers[46] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
- mProbers[47] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
- mProbers[48] = new nsSingleByteCharSetProber(&MaccentraleuropeCzechModel);
- mProbers[49] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropePolishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
- mProbers[50] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel);
- mProbers[51] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel);
- mProbers[52] = new nsSingleByteCharSetProber(&MaccentraleuropeSlovakModel);
- mProbers[53] = new nsSingleByteCharSetProber(&Ibm852SlovakModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
- mProbers[54] = new nsSingleByteCharSetProber(&Windows_1250PolishModel);
- mProbers[55] = new nsSingleByteCharSetProber(&Iso_8859_2PolishModel);
- mProbers[56] = new nsSingleByteCharSetProber(&Iso_8859_13PolishModel);
- mProbers[57] = new nsSingleByteCharSetProber(&Iso_8859_16PolishModel);
- mProbers[58] = new nsSingleByteCharSetProber(&MaccentraleuropePolishModel);
- mProbers[59] = new nsSingleByteCharSetProber(&Ibm852PolishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
- mProbers[60] = new nsSingleByteCharSetProber(&Iso_8859_1FinnishModel);
- mProbers[61] = new nsSingleByteCharSetProber(&Iso_8859_4FinnishModel);
- mProbers[62] = new nsSingleByteCharSetProber(&Iso_8859_9FinnishModel);
- mProbers[63] = new nsSingleByteCharSetProber(&Iso_8859_13FinnishModel);
- mProbers[64] = new nsSingleByteCharSetProber(&Iso_8859_15FinnishModel);
- mProbers[65] = new nsSingleByteCharSetProber(&Windows_1252FinnishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeCroatianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
- mProbers[66] = new nsSingleByteCharSetProber(&Iso_8859_1ItalianModel);
- mProbers[67] = new nsSingleByteCharSetProber(&Iso_8859_3ItalianModel);
- mProbers[68] = new nsSingleByteCharSetProber(&Iso_8859_9ItalianModel);
- mProbers[69] = new nsSingleByteCharSetProber(&Iso_8859_15ItalianModel);
- mProbers[70] = new nsSingleByteCharSetProber(&Windows_1252ItalianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
- mProbers[71] = new nsSingleByteCharSetProber(&Windows_1250CroatianModel);
- mProbers[72] = new nsSingleByteCharSetProber(&Iso_8859_2CroatianModel);
- mProbers[73] = new nsSingleByteCharSetProber(&Iso_8859_13CroatianModel);
- mProbers[74] = new nsSingleByteCharSetProber(&Iso_8859_16CroatianModel);
- mProbers[75] = new nsSingleByteCharSetProber(&MaccentraleuropeCroatianModel);
- mProbers[76] = new nsSingleByteCharSetProber(&Ibm852CroatianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
- mProbers[77] = new nsSingleByteCharSetProber(&Windows_1252EstonianModel);
- mProbers[78] = new nsSingleByteCharSetProber(&Windows_1257EstonianModel);
- mProbers[79] = new nsSingleByteCharSetProber(&Iso_8859_4EstonianModel);
- mProbers[80] = new nsSingleByteCharSetProber(&Iso_8859_13EstonianModel);
- mProbers[81] = new nsSingleByteCharSetProber(&Iso_8859_15EstonianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
- mProbers[82] = new nsSingleByteCharSetProber(&Iso_8859_1IrishModel);
- mProbers[83] = new nsSingleByteCharSetProber(&Iso_8859_9IrishModel);
- mProbers[84] = new nsSingleByteCharSetProber(&Iso_8859_15IrishModel);
- mProbers[85] = new nsSingleByteCharSetProber(&Windows_1252IrishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&MaccentraleuropeSloveneModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
- mProbers[86] = new nsSingleByteCharSetProber(&Windows_1250RomanianModel);
- mProbers[87] = new nsSingleByteCharSetProber(&Iso_8859_2RomanianModel);
- mProbers[88] = new nsSingleByteCharSetProber(&Iso_8859_16RomanianModel);
- mProbers[89] = new nsSingleByteCharSetProber(&Ibm852RomanianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
- mProbers[90] = new nsSingleByteCharSetProber(&Windows_1250SloveneModel);
- mProbers[91] = new nsSingleByteCharSetProber(&Iso_8859_2SloveneModel);
- mProbers[92] = new nsSingleByteCharSetProber(&Iso_8859_16SloveneModel);
- mProbers[93] = new nsSingleByteCharSetProber(&MaccentraleuropeSloveneModel);
- mProbers[94] = new nsSingleByteCharSetProber(&Ibm852SloveneModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1251BelarusianModel);
- mProbers[95] = new nsSingleByteCharSetProber(&Iso_8859_1SwedishModel);
- mProbers[96] = new nsSingleByteCharSetProber(&Iso_8859_4SwedishModel);
- mProbers[97] = new nsSingleByteCharSetProber(&Iso_8859_9SwedishModel);
- mProbers[98] = new nsSingleByteCharSetProber(&Iso_8859_15SwedishModel);
- mProbers[99] = new nsSingleByteCharSetProber(&Windows_1252SwedishModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1252NederlandsModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_1NederlandsModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_9NederlandsModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_15NederlandsModel);
- mProbers[100] = new nsSingleByteCharSetProber(&Windows_1252AfricaansModel);
- mProbers[101] = new nsSingleByteCharSetProber(&Iso_8859_1AfricaansModel);
- mProbers[102] = new nsSingleByteCharSetProber(&Iso_8859_9AfricaansModel);
- mProbers[103] = new nsSingleByteCharSetProber(&Iso_8859_15AfricaansModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
- mProbers[104] = new nsSingleByteCharSetProber(&Windows_1251BelarusianModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
+
+
+ //mProbers[i++] = new nsSingleByteCharSetProber(&Tis_620ThaiModel);
+ mProbers[i++] = new nsSingleByteCharSetProber(&Iso_8859_11ThaiModel);
+
+ mNumOfProbers = i;
+
+ for (; i < MAX_NUM_OF_SBCS_PROBERS; ++i) { mProbers[i] = nsnull; }
- mProbers[105] = new nsSingleByteCharSetProber(&Windows_1252NederlandsModel);
- mProbers[106] = new nsSingleByteCharSetProber(&Iso_8859_1NederlandsModel);
- mProbers[107] = new nsSingleByteCharSetProber(&Iso_8859_9NederlandsModel);
- mProbers[108] = new nsSingleByteCharSetProber(&Iso_8859_15NederlandsModel);
-
Reset();
}
nsSBCSGroupProber::~nsSBCSGroupProber()
{
- for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
+ for (PRUint32 i = 0; i < MAX_NUM_OF_SBCS_PROBERS; i++)
{
- delete mProbers[i];
+ if (mProbers[i]) { delete mProbers[i]; }
}
}
@@ -234,7 +244,7 @@ const char* nsSBCSGroupProber::GetCharSetName()
void nsSBCSGroupProber::Reset(void)
{
mActiveNum = 0;
- for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
+ for (PRUint32 i = 0; i < MAX_NUM_OF_SBCS_PROBERS; ++i)
{
if (mProbers[i]) // not null
{
@@ -242,8 +252,9 @@ void nsSBCSGroupProber::Reset(void)
mIsActive[i] = PR_TRUE;
++mActiveNum;
}
- else
+ else {
mIsActive[i] = PR_FALSE;
+ }
}
mBestGuess = -1;
mState = eDetecting;
@@ -269,7 +280,7 @@ nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
if (newLen1 == 0)
goto done; // Nothing to see here, move on.
- for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
+ for (i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
continue;
@@ -300,21 +311,20 @@ done:
float nsSBCSGroupProber::GetConfidence(void)
{
- PRUint32 i;
- float bestConf = 0.0, cf;
+ float bestConf = 0.0f;
switch (mState)
{
case eFoundIt:
- return (float)0.99; //sure yes
+ return SURE_YES;
case eNotMe:
- return (float)0.01; //sure no
+ return SURE_NO;
default:
- for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
+ for (PRUint32 i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
continue;
- cf = mProbers[i]->GetConfidence();
+ float const cf = mProbers[i]->GetConfidence();
if (bestConf < cf)
{
bestConf = cf;
@@ -333,7 +343,7 @@ void nsSBCSGroupProber::DumpStatus()
cf = GetConfidence();
printf(" SBCS Group Prober --------begin status \r\n");
- for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
+ for (i = 0; i < mNumOfProbers; i++)
{
if (!mIsActive[i])
printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
diff --git a/uchardet/uchardet/src/nsSBCSGroupProber.h b/uchardet/uchardet/src/nsSBCSGroupProber.h
index 2b1b78a90..d71cbb8eb 100644
--- a/uchardet/uchardet/src/nsSBCSGroupProber.h
+++ b/uchardet/uchardet/src/nsSBCSGroupProber.h
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -42,10 +42,11 @@
#define nsSBCSGroupProber_h__
-#define NUM_OF_SBCS_PROBERS 109
+#define MAX_NUM_OF_SBCS_PROBERS 109
class nsCharSetProber;
-class nsSBCSGroupProber: public nsCharSetProber {
+
+class nsSBCSGroupProber : public nsCharSetProber {
public:
nsSBCSGroupProber();
virtual ~nsSBCSGroupProber();
@@ -62,8 +63,9 @@ public:
protected:
nsProbingState mState;
- nsCharSetProber* mProbers[NUM_OF_SBCS_PROBERS];
- PRBool mIsActive[NUM_OF_SBCS_PROBERS];
+ nsCharSetProber* mProbers[MAX_NUM_OF_SBCS_PROBERS];
+ PRBool mIsActive[MAX_NUM_OF_SBCS_PROBERS];
+ PRUint32 mNumOfProbers;
PRInt32 mBestGuess;
PRUint32 mActiveNum;
};
diff --git a/uchardet/uchardet/src/nsSBCharSetProber.cpp b/uchardet/uchardet/src/nsSBCharSetProber.cpp
index d2d31168b..88d85bf26 100644
--- a/uchardet/uchardet/src/nsSBCharSetProber.cpp
+++ b/uchardet/uchardet/src/nsSBCharSetProber.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -82,7 +82,7 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32
if (mState == eDetecting)
if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
{
- float cf = GetConfidence();
+ float const cf = GetConfidence();
if (cf > POSITIVE_SHORTCUT_THRESHOLD)
mState = eFoundIt;
else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
@@ -112,12 +112,22 @@ float nsSingleByteCharSetProber::GetConfidence(void)
if (mTotalSeqs > 0)
if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
- return (float)0.01;
+ return SURE_NO;
#else //POSITIVE_APPROACH
- float r;
- if (mTotalSeqs > 0) {
- r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
+ #define ffactor(m,d) (((d) > 0) ? ((float)(m)/(float)(d)) : 1.0f)
+
+ PRUint32 const txtChar = (mTotalChar > mCtrlChar) ? (mTotalChar - mCtrlChar) : (mTotalSeqs << 1);
+
+ if ((txtChar > 0) && (mTotalSeqs > 0))
+ {
+ PRUint32 const goodSeqCnt = mSeqCounters[POSITIVE_CAT] + (mSeqCounters[PROBABLE_CAT] >> 1);
+
+ float r = mModel->mTypicalPositiveRatio;
+
+ // negative sequence correction factor
+ r *= ffactor(goodSeqCnt, mTotalSeqs + (mSeqCounters[NEGATIVE_CAT] << 4));
+
/* Multiply by a ratio of positive sequences per characters.
* This would help in particular to distinguish close winners.
* Indeed if you add a letter, you'd expect the positive sequence count
@@ -126,18 +136,21 @@ float nsSingleByteCharSetProber::GetConfidence(void)
* character). This could make the difference between very closely related
* charsets used for the same language.
*/
- r = r * mSeqCounters[POSITIVE_CAT] / mTotalChar;
- //r = r * (mSeqCounters[POSITIVE_CAT] + (float) mSeqCounters[PROBABLE_CAT] / 4) / mTotalChar;
+ r *= ffactor(goodSeqCnt + mSeqCounters[NEUTRAL_CAT], txtChar);
+
/* The more control characters (proportionnaly to the size of the text), the
* less confident we become in the current charset.
*/
- r = r * (mTotalChar - mCtrlChar) / mTotalChar;
- r = r*mFreqChar/mTotalChar;
- if (r >= (float)1.00)
- r = (float)0.99;
+ r *= ffactor(txtChar, mTotalChar);
+
+ // normalizing
+ r *= ffactor(mFreqChar, mTotalChar);
+
+ if (r >= 1.00f) { r = SURE_YES; }
+
return r;
}
- return (float)0.01;
+ return SURE_NO;
#endif
}
diff --git a/uchardet/uchardet/src/nsSBCharSetProber.h b/uchardet/uchardet/src/nsSBCharSetProber.h
index 5097d2aea..bb15037fb 100644
--- a/uchardet/uchardet/src/nsSBCharSetProber.h
+++ b/uchardet/uchardet/src/nsSBCharSetProber.h
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -55,9 +55,9 @@
/* Numbers 0-9. */
#define NUM 251
-#define SB_ENOUGH_REL_THRESHOLD 1024
-#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
-#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
+#define SB_ENOUGH_REL_THRESHOLD min(512, ENOUGH_DATA_THRESHOLD)
+#define POSITIVE_SHORTCUT_THRESHOLD SHORTCUT_THRESHOLD
+#define NEGATIVE_SHORTCUT_THRESHOLD (0.05f)
#define SYMBOL_CAT_ORDER 250
#define NUMBER_OF_SEQ_CAT 4
#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT-1)
diff --git a/uchardet/uchardet/src/nsUTF8Prober.cpp b/uchardet/uchardet/src/nsUTF8Prober.cpp
index 937fcc9bc..612c01cb6 100644
--- a/uchardet/uchardet/src/nsUTF8Prober.cpp
+++ b/uchardet/uchardet/src/nsUTF8Prober.cpp
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: et sw=2 ts=2 fdm=marker
*/
/* ***** BEGIN LICENSE BLOCK *****
@@ -71,19 +71,19 @@ nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen)
return mState;
}
-#define ONE_CHAR_PROB (float)0.50
+#define ONE_CHAR_PROB (0.50f)
float nsUTF8Prober::GetConfidence(void)
{
- float unlike = (float)0.99;
+ float unlike = SURE_YES;
if (mNumOfMBChar < 6)
{
for (PRUint32 i = 0; i < mNumOfMBChar; i++)
unlike *= ONE_CHAR_PROB;
- return (float)1.0 - unlike;
+ return (1.0f - unlike);
}
else
- return (float)0.99;
+ return SURE_YES;
}
diff --git a/uchardet/uchardet/src/nsUniversalDetector.cpp b/uchardet/uchardet/src/nsUniversalDetector.cpp
index 44247ece7..e97190fe0 100644
--- a/uchardet/uchardet/src/nsUniversalDetector.cpp
+++ b/uchardet/uchardet/src/nsUniversalDetector.cpp
@@ -107,8 +107,6 @@ nsUniversalDetector::Reset()
}
//---------------------------------------------------------------------
-#define SHORTCUT_THRESHOLD (float)0.95
-#define MINIMUM_THRESHOLD (float)0.20
nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
{
@@ -243,7 +241,7 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
PRUint32 i;
for (i = 0; i < aLen; i++)
{
- //other than 0xa0, if every othe character is ascii, the page is ascii
+ //other than 0xa0, if every other character is ascii, the page is ascii
if (aBuf[i] & '\x80' && aBuf[i] != '\xA0') //Since many Ascii only page contains NBSP
{
//we got a non-ascii byte (high-byte)
diff --git a/uchardet/uchardet/src/nscore.h b/uchardet/uchardet/src/nscore.h
index f367448c7..f506d6376 100644
--- a/uchardet/uchardet/src/nscore.h
+++ b/uchardet/uchardet/src/nscore.h
@@ -1,4 +1,4 @@
-/* ***** BEGIN LICENSE BLOCK *****
+/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
@@ -45,9 +45,23 @@ typedef unsigned short PRUint16;
typedef signed char PRInt8;
typedef unsigned char PRUint8;
+#define nsnull nullptr
+
#define PR_FALSE false
#define PR_TRUE true
-#define nsnull 0
+
+#define MINIMUM_DATA_THRESHOLD 4
+#define ENOUGH_DATA_THRESHOLD 1024
+
+#define SURE_YES (0.99f)
+#define SURE_NO (0.01f)
+
+#define SHORTCUT_THRESHOLD (0.95f)
+#define MINIMUM_THRESHOLD (0.20f)
+
+#ifndef min
+#define min(x,y) (((x) < (y)) ? (x) : (y))
+#endif
#ifdef _MSC_VER
#ifdef strdup