diff --git a/Versions/build.txt b/Versions/build.txt index 00750edc0..d00491fd7 100644 --- a/Versions/build.txt +++ b/Versions/build.txt @@ -1 +1 @@ -3 +1 diff --git a/Versions/day.txt b/Versions/day.txt index 5478c714f..9346fabb2 100644 --- a/Versions/day.txt +++ b/Versions/day.txt @@ -1 +1 @@ -313 +314 diff --git a/res/Notepad3.exe.manifest.conf b/res/Notepad3.exe.manifest.conf index 12ee03daf..e193c4ffb 100644 --- a/res/Notepad3.exe.manifest.conf +++ b/res/Notepad3.exe.manifest.conf @@ -3,7 +3,7 @@ Notepad3 RC3 diff --git a/sciXlexers/LexCSV.cxx b/sciXlexers/LexCSV.cxx index f6009729c..21efaf7a7 100644 --- a/sciXlexers/LexCSV.cxx +++ b/sciXlexers/LexCSV.cxx @@ -36,6 +36,11 @@ using namespace Scintilla; namespace { // Use an unnamed namespace to protect the functions and classes from name conflicts + static enum delim : unsigned int { eComma = 0, eSemic, eTab, ePipe, eMax }; + static int const DelimList[eMax] = { ',', ';', '\t', '|' }; + + // ================================================================================= + struct OptionsCSV { bool fold; bool foldCompact; @@ -195,12 +200,19 @@ constexpr bool IsDoubleQuoteChar(const int ch) noexcept } // ---------------------------------------------------------------------------- -constexpr bool IsDelimiter(const int ch) noexcept +constexpr unsigned int IsDelimiter(const int ch) noexcept { - return ((ch == ',') || (ch == ';') || (ch == '\t')); + for (unsigned int i = 0; i < eMax; ++i) + { + if (DelimList[i] == ch) { return i; } + } + return eMax; } // ---------------------------------------------------------------------------- + +// ---------------------------------------------------------------------------- + constexpr int GetStateByColumn(const int col) noexcept { switch (col % 10) { @@ -231,17 +243,106 @@ constexpr int GetStateByColumn(const int col) noexcept } // ---------------------------------------------------------------------------- -// ---------------------------------------------------------------------------- void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument* pAccess) { Accessor styler(pAccess, nullptr); - StyleContext sc(startPos, length, initStyle, styler); - int csvColumn = 0; + // 2 passes: 1st pass: smart delimiter detection, 2nd pass: do styling + + Sci_PositionU delimCount[eMax] = { 0 }; + Sci_PositionU countPerPrevLine[eMax] = { 0 }; + + //Sci_PositionU totalCount[eMax] = { 0 }; + //Sci_PositionU lineCount[eMax] = { 0 }; + + Sci_PositionU smartDelimVote[eMax] = { 0 }; + Sci_PositionU columnAvg = 0; + + // 1st PASS: + bool isInSQString = false; bool isInDQString = false; + StyleContext cnt(startPos, length, initStyle, styler); + for (; cnt.More(); cnt.Forward()) + { + // reset column infos + if (cnt.atLineStart) + { + isInSQString = false; + isInDQString = false; + + for (unsigned int i = 0; i < eMax; ++i) + { + unsigned int const dlm = delimCount[i]; + if (dlm > 0) { + smartDelimVote[i] += 1; + + if ((dlm == countPerPrevLine[i])) { + smartDelimVote[i] += dlm; // bonus for column number + } + + // e.g. delim=TAB, all columns decimal numbers with comma(,) as decimal-point => comma wins over TAB + if (dlm == columnAvg) { + smartDelimVote[i] += dlm; // correction for #delimiter = (#columns - 1); + } + columnAvg = (columnAvg == 0) ? dlm : (columnAvg + dlm - 1) >> 1; + + } + countPerPrevLine[i] = dlm; + delimCount[i] = 0; + + //totalCount[i] += dlm; + //++lineCount[i]; + } + } // cnt.atLineStart + + if (IsSingleQuoteChar(cnt.ch)) { + if (!isInDQString) { + isInSQString = !isInSQString; // toggle + } + } + else if (IsDoubleQuoteChar(cnt.ch)) { + if (!isInSQString) { + isInDQString = !isInDQString; // toggle + } + } + else if (!isInSQString && !isInDQString) + { + unsigned int i = IsDelimiter(cnt.ch); + if (i < eMax) { + ++delimCount[i]; + } + } + } + cnt.Complete(); + + // -------------------------- + // smar delimiter selection + // -------------------------- + int delim = DelimList[0]; + int maxVote = smartDelimVote[0]; + for (unsigned int i = 1; i < eMax; ++i) + { + if (maxVote < smartDelimVote[i]) { + delim = DelimList[i]; + maxVote = smartDelimVote[i]; + } + } + // -------------------------- + + int const delimiter = delim; + + // ------------------------------------------------------------------------------ + // 2nd PASS + // ------------------------------------------------------------------------------ + + int csvColumn = 0; + isInSQString = false; + isInDQString = false; + + StyleContext sc(startPos, length, initStyle, styler); for (; sc.More(); sc.Forward()) { // reset context infos @@ -262,7 +363,7 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i isInDQString = !isInDQString; // toggle } } - else if (IsDelimiter(sc.ch)) { + else if (delimiter == sc.ch) { if (!isInSQString && !isInDQString) { sc.SetState(GetStateByColumn(++csvColumn)); } diff --git a/src/VersionEx.h b/src/VersionEx.h index 025a0b310..39686b93e 100644 --- a/src/VersionEx.h +++ b/src/VersionEx.h @@ -8,8 +8,8 @@ #define SAPPNAME "Notepad3" #define VERSION_MAJOR 5 #define VERSION_MINOR 20 -#define VERSION_REV 313 -#define VERSION_BUILD 3 +#define VERSION_REV 314 +#define VERSION_BUILD 1 #define SCINTILLA_VER 432 #define ONIGURUMA_REGEX_VER 6.9.4 #define UCHARDET_VER 2018.09.27 diff --git a/test/test_files/StyleLexers/styleLexCSV/SampleCSVFile_inconsistent.csv b/test/test_files/StyleLexers/styleLexCSV/SampleCSVFile_inconsistent.csv new file mode 100644 index 000000000..fb17298a9 --- /dev/null +++ b/test/test_files/StyleLexers/styleLexCSV/SampleCSVFile_inconsistent.csv @@ -0,0 +1,7 @@ +# Headline and comment +100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00 +100,00;200,00;300,00;400,00;500,00;600,00;700,00;80000;900,00;1000,00;1100,00 +100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00 +100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00 +100,00;200,00;300,00;400,00;500,00;600,00;70000;800,00;900,00;1000,00;1100,00 +100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00