Merge pull request #4004 from RaiKoHoff/Dev_Master

Rainbow CSV Lexer: try to solve non-paired quotings styling
This commit is contained in:
Rainer Kottenhoff 2022-02-26 11:03:04 +01:00 committed by GitHub
commit ea14452b9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 78 additions and 63 deletions

View File

@ -226,8 +226,49 @@ constexpr unsigned int IsDelimiter(const int ch) noexcept
// ----------------------------------------------------------------------------
constexpr Sci_PositionU CountCharOccTillLineEnd(StyleContext& sc, const Sci_PositionU endPos)
{
Sci_Position i = 0;
Sci_PositionU count = 0;
while (((sc.currentPos + i) < endPos) && !IsLineBreak(sc.GetRelative(i)))
{
if (sc.GetRelative(++i) == sc.ch) { ++count; };
}
return count;
}
// ----------------------------------------------------------------------------
static inline bool HandleQuoteContext(StyleContext& sc, bool& isInSQString, bool& isInDQString, const Sci_PositionU endPos)
{
if (IsSingleQuoteChar(sc.ch))
{
// consistent count of possible end-quotes ?
Sci_PositionU const focc = isInSQString ? 1 : CountCharOccTillLineEnd(sc, endPos);
if (!isInDQString && (focc % 2 == 1))
{
isInSQString = !isInSQString; // toggle
}
return true;
}
if (IsDoubleQuoteChar(sc.ch))
{
// consistent count of possible end-quotes ?
Sci_PositionU const focc = isInDQString ? 1 : CountCharOccTillLineEnd(sc, endPos);
if (!isInSQString && (focc % 2 == 1))
{
isInDQString = !isInDQString; // toggle
}
return true;
}
return false;
}
// ----------------------------------------------------------------------------
constexpr int GetStateByColumn(const int col) noexcept
{
switch (col % 10)
@ -267,6 +308,7 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i
// 2 passes: 1st pass: smart delimiter detection, 2nd pass: do styling
Sci_PositionU endPos = startPos + length;
Sci_PositionU delimCount[eMax] = { 0 };
Sci_PositionU countPerPrevLine[eMax] = { 0 };
@ -281,11 +323,11 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i
bool isInSQString = false;
bool isInDQString = false;
StyleContext cnt(startPos, length, initStyle, styler);
for (; cnt.More(); cnt.Forward())
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
{
// reset column infos
if (cnt.atLineStart)
if (sc.atLineStart)
{
isInSQString = false;
isInDQString = false;
@ -316,32 +358,18 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i
//totalCount[i] += dlm;
//++lineCount[i];
}
} // cnt.atLineStart
} // sc.atLineStart
if (IsSingleQuoteChar(cnt.ch))
if (!HandleQuoteContext(sc, isInSQString, isInDQString, endPos) && (!isInSQString && !isInDQString))
{
if (!isInDQString)
{
isInSQString = !isInSQString; // toggle
}
}
else if (IsDoubleQuoteChar(cnt.ch))
{
if (!isInSQString)
{
isInDQString = !isInDQString; // toggle
}
}
else if (!isInSQString && !isInDQString)
{
unsigned int i = IsDelimiter(cnt.ch);
unsigned int i = IsDelimiter(sc.ch);
if (i < eMax)
{
++delimCount[i];
}
}
}
cnt.Complete();
sc.Complete();
// --------------------------
// smar delimiter selection
@ -368,42 +396,29 @@ void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int i
isInSQString = false;
isInDQString = false;
StyleContext sc(startPos, length, initStyle, styler);
for (; sc.More(); sc.Forward())
StyleContext sc2(startPos, length, initStyle, styler);
for (; sc2.More(); sc2.Forward())
{
// reset context infos
if (sc.atLineStart)
if (sc2.atLineStart)
{
csvColumn = 0;
isInSQString = false;
isInDQString = false;
sc.SetState(GetStateByColumn(csvColumn));
sc2.SetState(GetStateByColumn(csvColumn));
}
if (IsSingleQuoteChar(sc.ch))
{
if (!isInDQString)
{
isInSQString = !isInSQString; // toggle
}
}
else if (IsDoubleQuoteChar(sc.ch))
{
if (!isInSQString)
{
isInDQString = !isInDQString; // toggle
}
}
else if (delimiter == sc.ch)
if (!HandleQuoteContext(sc2, isInSQString, isInDQString, endPos) && (delimiter == sc2.ch))
{
if (!isInSQString && !isInDQString)
{
sc.SetState(GetStateByColumn(++csvColumn));
sc2.SetState(GetStateByColumn(++csvColumn));
}
}
}
sc.Complete();
sc2.Complete();
}
// ----------------------------------------------------------------------------

View File

@ -51,20 +51,12 @@
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
//#include "CharacterSet.h"
#include "CharSetX.h"
#include "LexerModule.h"
using namespace Lexilla;
namespace {
constexpr bool IsNewline(const int ch) {
// sc.GetRelative(i) returns '\0' if out of range
return (ch == '\n' || ch == '\r' || ch == '\0');
}
}
// True if can follow ch down to the end with possibly trailing whitespace
static bool FollowToLineEnd(const int ch, const int state, const Sci_PositionU endPos, StyleContext &sc) {
Sci_Position i = 0;
@ -73,7 +65,7 @@ static bool FollowToLineEnd(const int ch, const int state, const Sci_PositionU e
// Skip over whitespace
while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
++i;
if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
if (IsLineBreak(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
sc.Forward(i);
sc.ChangeState(state);
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
@ -131,7 +123,7 @@ static void SetStateAndZoom(const int state, const Sci_Position length, const in
static bool HasPrevLineContent(StyleContext &sc) {
Sci_Position i = 0;
// Go back to the previous newline
while ((--i + (Sci_Position)sc.currentPos) >= 0 && !IsNewline(sc.GetRelative(i)))
while ((--i + (Sci_Position)sc.currentPos) >= 0 && !IsLineBreak(sc.GetRelative(i)))
;
while ((--i + (Sci_Position)sc.currentPos) >= 0) {
const int ch = sc.GetRelative(i);
@ -151,7 +143,7 @@ static bool IsCompleteStyleRegion(StyleContext &sc, const char *token) {
bool found = false;
const size_t start = strlen(token);
Sci_Position i = static_cast<Sci_Position>(start);
while (!IsNewline(sc.GetRelative(i))) {
while (!IsLineBreak(sc.GetRelative(i))) {
// make sure an empty pair of single-char tokens doesn't match
// with a longer token: {*}{*} != {**}
if (sc.GetRelative(i) == *token && sc.GetRelative(i - 1) != *token) {
@ -174,7 +166,7 @@ static bool IsValidHrule(const Sci_PositionU endPos, StyleContext &sc) {
// hit a terminating character
else if (!IsASpaceOrTab(ch) || (sc.currentPos + i) == endPos) {
// Are we a valid HRULE
if ((IsNewline(ch) || (sc.currentPos + i) == endPos) &&
if ((IsLineBreak(ch) || (sc.currentPos + i) == endPos) &&
count >= 3 && !HasPrevLineContent(sc)) {
sc.SetState(SCE_MARKDOWN_HRULE);
sc.Forward(i);
@ -264,37 +256,37 @@ static void ColorizeMarkdownDoc(Sci_PositionU startPos, Sci_Position length, int
*/
// Strong
else if (sc.state == SCE_MARKDOWN_STRONG1) {
if ((sc.Match("**") && sc.chPrev != ' ') || IsNewline(sc.GetRelative(2))) {
if ((sc.Match("**") && sc.chPrev != ' ') || IsLineBreak(sc.GetRelative(2))) {
sc.Forward(2);
sc.SetState(SCE_MARKDOWN_DEFAULT);
}
}
else if (sc.state == SCE_MARKDOWN_STRONG2) {
if ((sc.Match("__") && sc.chPrev != ' ') || IsNewline(sc.GetRelative(2))) {
if ((sc.Match("__") && sc.chPrev != ' ') || IsLineBreak(sc.GetRelative(2))) {
sc.Forward(2);
sc.SetState(SCE_MARKDOWN_DEFAULT);
}
}
// Emphasis
else if (sc.state == SCE_MARKDOWN_EM1) {
if ((sc.ch == '*' && sc.chPrev != ' ') || IsNewline(sc.chNext))
if ((sc.ch == '*' && sc.chPrev != ' ') || IsLineBreak(sc.chNext))
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
}
else if (sc.state == SCE_MARKDOWN_EM2) {
if ((sc.ch == '_' && sc.chPrev != ' ') || IsNewline(sc.chNext))
if ((sc.ch == '_' && sc.chPrev != ' ') || IsLineBreak(sc.chNext))
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
}
else if (sc.state == SCE_MARKDOWN_CODEBK) {
if (sc.atLineStart && sc.Match("~~~")) {
Sci_Position i = 1;
while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
while (!IsLineBreak(sc.GetRelative(i)) && sc.currentPos + i < endPos)
i++;
sc.Forward(i);
sc.SetState(SCE_MARKDOWN_DEFAULT);
}
}
else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
if ((sc.Match("~~") && sc.chPrev != ' ') || IsNewline(sc.GetRelative(2))) {
if ((sc.Match("~~") && sc.chPrev != ' ') || IsLineBreak(sc.GetRelative(2))) {
sc.Forward(2);
sc.SetState(SCE_MARKDOWN_DEFAULT);
}
@ -480,7 +472,7 @@ static void ColorizeMarkdownDoc(Sci_PositionU startPos, Sci_Position length, int
sc.Forward();
}
// Beginning of line
else if (IsNewline(sc.ch)) {
else if (IsLineBreak(sc.ch)) {
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
}
}

View File

@ -0,0 +1,8 @@
Name,ObjectType,Office,Mobile,DDI
Alice Beetle,User,Office1,,6491234567
Adrian Smith,User,Office3,64271234567,6441234567
Harriet O'Donoghue,User,Office2,6421234567,6471234567
"Harr|et O'Donoghue",User,Office2,6421234567,6471234567
Harriet O'Do,nogh'ue,User,Office2,6421234567,6471234567
Harriet O'Donoghue;User,User,Office2,6421234567,6471234567
Judith Cole,User,Office1,,6491234568
Can't render this file because it has a wrong number of fields in line 6.