mirror of
https://github.com/rizonesoft/Notepad3.git
synced 2026-06-14 21:09:05 +08:00
389 lines
9.6 KiB
C++
389 lines
9.6 KiB
C++
// encoding: UTF-8
|
|
// Scintilla source code edit control
|
|
/** @file LexCSV.cxx
|
|
** Rainbow clouring for CSV files
|
|
** Written by RaiKoHoff
|
|
**/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
|
|
#include <string>
|
|
#include <map>
|
|
#include <algorithm>
|
|
|
|
#include "ILexer.h"
|
|
#include "Scintilla.h"
|
|
#include "SciXLexer.h"
|
|
|
|
#include "StringCopy.h"
|
|
#include "WordList.h"
|
|
#include "LexAccessor.h"
|
|
#include "Accessor.h"
|
|
#include "StyleContext.h"
|
|
#include "CharSetX.h"
|
|
#include "LexerModule.h"
|
|
#include "OptionSet.h"
|
|
#include "DefaultLexer.h"
|
|
|
|
|
|
using namespace Scintilla;
|
|
|
|
namespace {
|
|
// Use an unnamed namespace to protect the functions and classes from name conflicts
|
|
|
|
enum delim : unsigned int { eComma = 0, eSemic, eTab, ePipe, eMax };
|
|
static int const DelimList[eMax] = { ',', ';', '\t', '|' };
|
|
|
|
// =================================================================================
|
|
|
|
struct OptionsCSV {
|
|
bool fold;
|
|
bool foldCompact;
|
|
|
|
OptionsCSV() {
|
|
fold = true;
|
|
foldCompact = true;
|
|
}
|
|
};
|
|
|
|
static const char* const csvWordLists[] = {
|
|
nullptr
|
|
};
|
|
|
|
struct OptionSetCSV : public OptionSet<OptionsCSV> {
|
|
OptionSetCSV() {
|
|
|
|
DefineProperty("fold", &OptionsCSV::fold, "FOLD COMMENT");
|
|
DefineProperty("fold.compact", &OptionsCSV::foldCompact, "FOLDCOMPACT COMMENT");
|
|
|
|
DefineWordListSets(csvWordLists);
|
|
}
|
|
};
|
|
|
|
LexicalClass lexicalClasses[] = {
|
|
// Lexer CSV SCLEX_CSV SCE_CSV_:
|
|
0, "SCE_CSV_DEFAULT", "default", "Default",
|
|
1, "SCE_CSV_COLUMN_0", "col_0", "Column 0",
|
|
2, "SCE_CSV_COLUMN_1", "col_1", "Column 1",
|
|
3, "SCE_CSV_COLUMN_2", "col_2", "Column 2",
|
|
4, "SCE_CSV_COLUMN_3", "col_3", "Column 3",
|
|
5, "SCE_CSV_COLUMN_4", "col_4", "Column 4",
|
|
6, "SCE_CSV_COLUMN_5", "col_5", "Column 5",
|
|
7, "SCE_CSV_COLUMN_6", "col_6", "Column 6",
|
|
8, "SCE_CSV_COLUMN_7", "col_7", "Column 7",
|
|
9, "SCE_CSV_COLUMN_8", "col_8", "Column 8",
|
|
10, "SCE_CSV_COLUMN_9", "col_9", "Column 9",
|
|
};
|
|
|
|
|
|
} // end of namespace
|
|
|
|
class LexerCSV : public DefaultLexer {
|
|
|
|
WordList keywords;
|
|
|
|
OptionsCSV options;
|
|
OptionSetCSV osCSV;
|
|
|
|
public:
|
|
LexerCSV()
|
|
: DefaultLexer("CSV", SCLEX_CSV, lexicalClasses, ELEMENTS(lexicalClasses))
|
|
{ }
|
|
|
|
virtual ~LexerCSV() { }
|
|
|
|
void SCI_METHOD Release() override {
|
|
delete this;
|
|
}
|
|
|
|
int SCI_METHOD Version() const override {
|
|
return lvRelease5;
|
|
}
|
|
|
|
const char* SCI_METHOD PropertyNames() override {
|
|
return osCSV.PropertyNames();
|
|
}
|
|
|
|
int SCI_METHOD PropertyType(const char* name) override {
|
|
return osCSV.PropertyType(name);
|
|
}
|
|
|
|
const char* SCI_METHOD PropertyGet(const char* name) override {
|
|
return osCSV.PropertyGet(name);
|
|
}
|
|
|
|
const char* SCI_METHOD DescribeProperty(const char* name) override {
|
|
return osCSV.DescribeProperty(name);
|
|
}
|
|
|
|
|
|
const char* SCI_METHOD DescribeWordListSets() override {
|
|
return osCSV.DescribeWordListSets();
|
|
}
|
|
|
|
void* SCI_METHOD PrivateCall(int, void*) override {
|
|
return nullptr;
|
|
}
|
|
|
|
int SCI_METHOD LineEndTypesSupported() override {
|
|
return SC_LINE_END_TYPE_UNICODE;
|
|
}
|
|
|
|
int SCI_METHOD PrimaryStyleFromStyle(int style) override {
|
|
return style;
|
|
}
|
|
|
|
static ILexer5* LexerFactoryCSV() {
|
|
return new LexerCSV();
|
|
}
|
|
|
|
// --------------------------------------------------------------------------
|
|
|
|
Sci_Position SCI_METHOD PropertySet(const char* key, const char* val) override;
|
|
Sci_Position SCI_METHOD WordListSet(int n, const char* wl) override;
|
|
void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument* pAccess) override;
|
|
void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument* pAccess) override;
|
|
|
|
};
|
|
|
|
|
|
Sci_Position SCI_METHOD LexerCSV::PropertySet(const char* key, const char* val) {
|
|
if (osCSV.PropertySet(&options, key, val)) {
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
Sci_Position SCI_METHOD LexerCSV::WordListSet(int n, const char* wl)
|
|
{
|
|
WordList* wordListN = nullptr;
|
|
|
|
switch (n) {
|
|
case 0:
|
|
wordListN = &keywords;
|
|
break;
|
|
}
|
|
|
|
Sci_Position firstModification = -1;
|
|
|
|
if (wordListN) {
|
|
WordList wlNew;
|
|
wlNew.Set(wl);
|
|
if (*wordListN != wlNew) {
|
|
wordListN->Set(wl);
|
|
firstModification = 0;
|
|
}
|
|
}
|
|
return firstModification;
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
constexpr int abs_i(const int i) noexcept { return ((i < 0) ? (0 - i) : (0 + i)); }
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
constexpr bool IsSingleQuoteChar(const int ch) noexcept
|
|
{
|
|
return (ch == '\'');
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
constexpr bool IsDoubleQuoteChar(const int ch) noexcept
|
|
{
|
|
return (ch == '"');
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
constexpr unsigned int IsDelimiter(const int ch) noexcept
|
|
{
|
|
for (unsigned int i = 0; i < eMax; ++i)
|
|
{
|
|
if (DelimList[i] == ch) { return i; }
|
|
}
|
|
return eMax;
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
constexpr int GetStateByColumn(const int col) noexcept
|
|
{
|
|
switch (col % 10) {
|
|
case 0:
|
|
return SCE_CSV_COLUMN_0;
|
|
case 1:
|
|
return SCE_CSV_COLUMN_1;
|
|
case 2:
|
|
return SCE_CSV_COLUMN_2;
|
|
case 3:
|
|
return SCE_CSV_COLUMN_3;
|
|
case 4:
|
|
return SCE_CSV_COLUMN_4;
|
|
case 5:
|
|
return SCE_CSV_COLUMN_5;
|
|
case 6:
|
|
return SCE_CSV_COLUMN_6;
|
|
case 7:
|
|
return SCE_CSV_COLUMN_7;
|
|
case 8:
|
|
return SCE_CSV_COLUMN_8;
|
|
case 9:
|
|
return SCE_CSV_COLUMN_9;
|
|
default:
|
|
return SCE_CSV_COLUMN_0;
|
|
}
|
|
return SCE_CSV_COLUMN_0;
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
void SCI_METHOD LexerCSV::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument* pAccess)
|
|
{
|
|
Accessor styler(pAccess, nullptr);
|
|
|
|
// 2 passes: 1st pass: smart delimiter detection, 2nd pass: do styling
|
|
|
|
Sci_PositionU delimCount[eMax] = { 0 };
|
|
Sci_PositionU countPerPrevLine[eMax] = { 0 };
|
|
|
|
//Sci_PositionU totalCount[eMax] = { 0 };
|
|
//Sci_PositionU lineCount[eMax] = { 0 };
|
|
|
|
Sci_PositionU smartDelimVote[eMax] = { 0 };
|
|
Sci_PositionU columnAvg = 0;
|
|
|
|
// 1st PASS:
|
|
|
|
bool isInSQString = false;
|
|
bool isInDQString = false;
|
|
|
|
StyleContext cnt(startPos, length, initStyle, styler);
|
|
for (; cnt.More(); cnt.Forward())
|
|
{
|
|
// reset column infos
|
|
if (cnt.atLineStart)
|
|
{
|
|
isInSQString = false;
|
|
isInDQString = false;
|
|
|
|
for (unsigned int i = 0; i < eMax; ++i)
|
|
{
|
|
Sci_PositionU const dlm = delimCount[i];
|
|
if (dlm > 0) {
|
|
smartDelimVote[i] += 1;
|
|
|
|
if ((dlm == countPerPrevLine[i])) {
|
|
smartDelimVote[i] += dlm; // bonus for column number
|
|
}
|
|
|
|
// e.g. delim=TAB, all columns decimal numbers with comma(,) as decimal-point => comma wins over TAB
|
|
if (dlm == columnAvg) {
|
|
smartDelimVote[i] += dlm; // correction for #delimiter = (#columns - 1);
|
|
}
|
|
columnAvg = (columnAvg == 0) ? dlm : (columnAvg + dlm - 1) >> 1;
|
|
|
|
}
|
|
countPerPrevLine[i] = dlm;
|
|
delimCount[i] = 0;
|
|
|
|
//totalCount[i] += dlm;
|
|
//++lineCount[i];
|
|
}
|
|
} // cnt.atLineStart
|
|
|
|
if (IsSingleQuoteChar(cnt.ch)) {
|
|
if (!isInDQString) {
|
|
isInSQString = !isInSQString; // toggle
|
|
}
|
|
}
|
|
else if (IsDoubleQuoteChar(cnt.ch)) {
|
|
if (!isInSQString) {
|
|
isInDQString = !isInDQString; // toggle
|
|
}
|
|
}
|
|
else if (!isInSQString && !isInDQString)
|
|
{
|
|
unsigned int i = IsDelimiter(cnt.ch);
|
|
if (i < eMax) {
|
|
++delimCount[i];
|
|
}
|
|
}
|
|
}
|
|
cnt.Complete();
|
|
|
|
// --------------------------
|
|
// smar delimiter selection
|
|
// --------------------------
|
|
int delim = DelimList[0];
|
|
Sci_PositionU maxVote = smartDelimVote[0];
|
|
for (unsigned int i = 1; i < eMax; ++i)
|
|
{
|
|
if (maxVote < smartDelimVote[i]) {
|
|
delim = DelimList[i];
|
|
maxVote = smartDelimVote[i];
|
|
}
|
|
}
|
|
// --------------------------
|
|
|
|
int const delimiter = delim;
|
|
|
|
// ------------------------------------------------------------------------------
|
|
// 2nd PASS
|
|
// ------------------------------------------------------------------------------
|
|
|
|
int csvColumn = 0;
|
|
isInSQString = false;
|
|
isInDQString = false;
|
|
|
|
StyleContext sc(startPos, length, initStyle, styler);
|
|
for (; sc.More(); sc.Forward())
|
|
{
|
|
// reset context infos
|
|
if (sc.atLineStart) {
|
|
csvColumn = 0;
|
|
isInSQString = false;
|
|
isInDQString = false;
|
|
sc.SetState(GetStateByColumn(csvColumn));
|
|
}
|
|
|
|
if (IsSingleQuoteChar(sc.ch)) {
|
|
if (!isInDQString) {
|
|
isInSQString = !isInSQString; // toggle
|
|
}
|
|
}
|
|
else if (IsDoubleQuoteChar(sc.ch)) {
|
|
if (!isInSQString) {
|
|
isInDQString = !isInDQString; // toggle
|
|
}
|
|
}
|
|
else if (delimiter == sc.ch) {
|
|
if (!isInSQString && !isInDQString) {
|
|
sc.SetState(GetStateByColumn(++csvColumn));
|
|
}
|
|
}
|
|
|
|
}
|
|
sc.Complete();
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
void SCI_METHOD LexerCSV::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument* pAccess)
|
|
{
|
|
return;
|
|
}
|
|
// ----------------------------------------------------------------------------
|
|
|
|
LexerModule lmCSV(SCLEX_CSV, LexerCSV::LexerFactoryCSV, "csv", csvWordLists);
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|