Merge pull request #2109 from RaiKoHoff/RC2_DEV

Fix: smarter delimiter detection for CSV Rainbow Lexer
This commit is contained in:
Rainer Kottenhoff 2020-03-14 15:01:09 +01:00 committed by GitHub
commit 6b1b29fa9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 119 additions and 11 deletions

View File

@ -1 +1 @@
3
1

View File

@ -1 +1 @@
313
314

View File

@ -3,7 +3,7 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="5.20.313.3"
version="5.20.314.1"
type="win32"
/>
<description>Notepad3 RC3</description>

View File

@ -36,6 +36,11 @@ using namespace Scintilla;
namespace {
// Use an unnamed namespace to protect the functions and classes from name conflicts
static enum delim : unsigned int { eComma = 0, eSemic, eTab, ePipe, eMax };
static int const DelimList[eMax] = { ',', ';', '\t', '|' };
// =================================================================================
struct OptionsCSV {
bool fold;
bool foldCompact;
@ -195,12 +200,19 @@ constexpr bool IsDoubleQuoteChar(const int ch) noexcept
}
// ----------------------------------------------------------------------------
constexpr bool IsDelimiter(const int ch) noexcept
constexpr unsigned int IsDelimiter(const int ch) noexcept
{
return ((ch == ',') || (ch == ';') || (ch == '\t'));
for (unsigned int i = 0; i < eMax; ++i)
{
if (DelimList[i] == ch) { return i; }
}
return eMax;
}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
constexpr int GetStateByColumn(const int col) noexcept
{
switch (col % 10) {
@ -231,17 +243,106 @@ constexpr int GetStateByColumn(const int col) noexcept
}
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument* pAccess)
{
Accessor styler(pAccess, nullptr);
StyleContext sc(startPos, length, initStyle, styler);
int csvColumn = 0;
// 2 passes: 1st pass: smart delimiter detection, 2nd pass: do styling
Sci_PositionU delimCount[eMax] = { 0 };
Sci_PositionU countPerPrevLine[eMax] = { 0 };
//Sci_PositionU totalCount[eMax] = { 0 };
//Sci_PositionU lineCount[eMax] = { 0 };
Sci_PositionU smartDelimVote[eMax] = { 0 };
Sci_PositionU columnAvg = 0;
// 1st PASS:
bool isInSQString = false;
bool isInDQString = false;
StyleContext cnt(startPos, length, initStyle, styler);
for (; cnt.More(); cnt.Forward())
{
// reset column infos
if (cnt.atLineStart)
{
isInSQString = false;
isInDQString = false;
for (unsigned int i = 0; i < eMax; ++i)
{
unsigned int const dlm = delimCount[i];
if (dlm > 0) {
smartDelimVote[i] += 1;
if ((dlm == countPerPrevLine[i])) {
smartDelimVote[i] += dlm; // bonus for column number
}
// e.g. delim=TAB, all columns decimal numbers with comma(,) as decimal-point => comma wins over TAB
if (dlm == columnAvg) {
smartDelimVote[i] += dlm; // correction for #delimiter = (#columns - 1);
}
columnAvg = (columnAvg == 0) ? dlm : (columnAvg + dlm - 1) >> 1;
}
countPerPrevLine[i] = dlm;
delimCount[i] = 0;
//totalCount[i] += dlm;
//++lineCount[i];
}
} // cnt.atLineStart
if (IsSingleQuoteChar(cnt.ch)) {
if (!isInDQString) {
isInSQString = !isInSQString; // toggle
}
}
else if (IsDoubleQuoteChar(cnt.ch)) {
if (!isInSQString) {
isInDQString = !isInDQString; // toggle
}
}
else if (!isInSQString && !isInDQString)
{
unsigned int i = IsDelimiter(cnt.ch);
if (i < eMax) {
++delimCount[i];
}
}
}
cnt.Complete();
// --------------------------
// smar delimiter selection
// --------------------------
int delim = DelimList[0];
int maxVote = smartDelimVote[0];
for (unsigned int i = 1; i < eMax; ++i)
{
if (maxVote < smartDelimVote[i]) {
delim = DelimList[i];
maxVote = smartDelimVote[i];
}
}
// --------------------------
int const delimiter = delim;
// ------------------------------------------------------------------------------
// 2nd PASS
// ------------------------------------------------------------------------------
int csvColumn = 0;
isInSQString = false;
isInDQString = false;
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
{
// reset context infos
@ -262,7 +363,7 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i
isInDQString = !isInDQString; // toggle
}
}
else if (IsDelimiter(sc.ch)) {
else if (delimiter == sc.ch) {
if (!isInSQString && !isInDQString) {
sc.SetState(GetStateByColumn(++csvColumn));
}

View File

@ -8,8 +8,8 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 20
#define VERSION_REV 313
#define VERSION_BUILD 3
#define VERSION_REV 314
#define VERSION_BUILD 1
#define SCINTILLA_VER 432
#define ONIGURUMA_REGEX_VER 6.9.4
#define UCHARDET_VER 2018.09.27

View File

@ -0,0 +1,7 @@
# Headline and comment
100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
100,00;200,00;300,00;400,00;500,00;600,00;700,00;80000;900,00;1000,00;1100,00
100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
100,00;200,00;300,00;400,00;500,00;600,00;70000;800,00;900,00;1000,00;1100,00
100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
1 # Headline and comment
2 100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
3 100,00;200,00;300,00;400,00;500,00;600,00;700,00;80000;900,00;1000,00;1100,00
4 100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
5 100,00;20000;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00
6 100,00;200,00;300,00;400,00;500,00;600,00;70000;800,00;900,00;1000,00;1100,00
7 100,00;200,00;300,00;400,00;500,00;600,00;700,00;800,00;900,00;1000,00;1100,00