diff --git a/Versions/build.txt b/Versions/build.txt
index 00750edc0..d00491fd7 100644
--- a/Versions/build.txt
+++ b/Versions/build.txt
@@ -1 +1 @@
-3
+1
diff --git a/Versions/day.txt b/Versions/day.txt
index 5478c714f..9346fabb2 100644
--- a/Versions/day.txt
+++ b/Versions/day.txt
@@ -1 +1 @@
-313
+314
diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf
index 12ee03daf..e193c4ffb 100644
--- a/res/Notepad3.exe.manifest.conf
+++ b/res/Notepad3.exe.manifest.conf
@@ -3,7 +3,7 @@
Notepad3 RC3
diff --git a/sciXlexers/LexCSV.cxx b/sciXlexers/LexCSV.cxx
index f6009729c..21efaf7a7 100644
--- a/sciXlexers/LexCSV.cxx
+++ b/sciXlexers/LexCSV.cxx
@@ -36,6 +36,11 @@ using namespace Scintilla;
namespace {
// Use an unnamed namespace to protect the functions and classes from name conflicts
+ static enum delim : unsigned int { eComma = 0, eSemic, eTab, ePipe, eMax };
+ static int const DelimList[eMax] = { ',', ';', '\t', '|' };
+
+ // =================================================================================
+
struct OptionsCSV {
bool fold;
bool foldCompact;
@@ -195,12 +200,19 @@ constexpr bool IsDoubleQuoteChar(const int ch) noexcept
}
// ----------------------------------------------------------------------------
-constexpr bool IsDelimiter(const int ch) noexcept
+constexpr unsigned int IsDelimiter(const int ch) noexcept
{
- return ((ch == ',') || (ch == ';') || (ch == '\t'));
+ for (unsigned int i = 0; i < eMax; ++i)
+ {
+ if (DelimList[i] == ch) { return i; }
+ }
+ return eMax;
}
// ----------------------------------------------------------------------------
+
+// ----------------------------------------------------------------------------
+
constexpr int GetStateByColumn(const int col) noexcept
{
switch (col % 10) {
@@ -231,17 +243,106 @@ constexpr int GetStateByColumn(const int col) noexcept
}
// ----------------------------------------------------------------------------
-// ----------------------------------------------------------------------------
void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument* pAccess)
{
Accessor styler(pAccess, nullptr);
- StyleContext sc(startPos, length, initStyle, styler);
- int csvColumn = 0;
+ // 2 passes: 1st pass: smart delimiter detection, 2nd pass: do styling
+
+ Sci_PositionU delimCount[eMax] = { 0 };
+ Sci_PositionU countPerPrevLine[eMax] = { 0 };
+
+ //Sci_PositionU totalCount[eMax] = { 0 };
+ //Sci_PositionU lineCount[eMax] = { 0 };
+
+ Sci_PositionU smartDelimVote[eMax] = { 0 };
+ Sci_PositionU columnAvg = 0;
+
+ // 1st PASS:
+
bool isInSQString = false;
bool isInDQString = false;
+ StyleContext cnt(startPos, length, initStyle, styler);
+ for (; cnt.More(); cnt.Forward())
+ {
+ // reset column infos
+ if (cnt.atLineStart)
+ {
+ isInSQString = false;
+ isInDQString = false;
+
+ for (unsigned int i = 0; i < eMax; ++i)
+ {
+ unsigned int const dlm = delimCount[i];
+ if (dlm > 0) {
+ smartDelimVote[i] += 1;
+
+ if ((dlm == countPerPrevLine[i])) {
+ smartDelimVote[i] += dlm; // bonus for column number
+ }
+
+ // e.g. delim=TAB, all columns decimal numbers with comma(,) as decimal-point => comma wins over TAB
+ if (dlm == columnAvg) {
+ smartDelimVote[i] += dlm; // correction for #delimiter = (#columns - 1);
+ }
+ columnAvg = (columnAvg == 0) ? dlm : (columnAvg + dlm - 1) >> 1;
+
+ }
+ countPerPrevLine[i] = dlm;
+ delimCount[i] = 0;
+
+ //totalCount[i] += dlm;
+ //++lineCount[i];
+ }
+ } // cnt.atLineStart
+
+ if (IsSingleQuoteChar(cnt.ch)) {
+ if (!isInDQString) {
+ isInSQString = !isInSQString; // toggle
+ }
+ }
+ else if (IsDoubleQuoteChar(cnt.ch)) {
+ if (!isInSQString) {
+ isInDQString = !isInDQString; // toggle
+ }
+ }
+ else if (!isInSQString && !isInDQString)
+ {
+ unsigned int i = IsDelimiter(cnt.ch);
+ if (i < eMax) {
+ ++delimCount[i];
+ }
+ }
+ }
+ cnt.Complete();
+
+ // --------------------------
+ // smar delimiter selection
+ // --------------------------
+ int delim = DelimList[0];
+ int maxVote = smartDelimVote[0];
+ for (unsigned int i = 1; i < eMax; ++i)
+ {
+ if (maxVote < smartDelimVote[i]) {
+ delim = DelimList[i];
+ maxVote = smartDelimVote[i];
+ }
+ }
+ // --------------------------
+
+ int const delimiter = delim;
+
+ // ------------------------------------------------------------------------------
+ // 2nd PASS
+ // ------------------------------------------------------------------------------
+
+ int csvColumn = 0;
+ isInSQString = false;
+ isInDQString = false;
+
+ StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
{
// reset context infos
@@ -262,7 +363,7 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i
isInDQString = !isInDQString; // toggle
}
}
- else if (IsDelimiter(sc.ch)) {
+ else if (delimiter == sc.ch) {
if (!isInSQString && !isInDQString) {
sc.SetState(GetStateByColumn(++csvColumn));
}
diff --git a/src/VersionEx.h b/src/VersionEx.h
index 025a0b310..39686b93e 100644
--- a/src/VersionEx.h
+++ b/src/VersionEx.h
@@ -8,8 +8,8 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 20
-#define VERSION_REV 313
-#define VERSION_BUILD 3
+#define VERSION_REV 314
+#define VERSION_BUILD 1
#define SCINTILLA_VER 432
#define ONIGURUMA_REGEX_VER 6.9.4
#define UCHARDET_VER 2018.09.27
diff --git a/test/test_files/StyleLexers/styleLexCSV/SampleCSVFile_inconsistent.csv b/test/test_files/StyleLexers/styleLexCSV/SampleCSVFile_inconsistent.csv
new file mode 100644
index 000000000..fb17298a9
--- /dev/null
+++ b/test/test_files/StyleLexers/styleLexCSV/SampleCSVFile_inconsistent.csv
@@ -0,0 +1,7 @@
+# Headline and comment
+100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
+100,00;200,00;300,00;400,00;500,00;600,00;700,00;80000;900,00;1000,00;1100,00
+100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
+100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
+100,00;200,00;300,00;400,00;500,00;600,00;70000;800,00;900,00;1000,00;1100,00
+100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00