+ upd: use Zufuliu's EOL Detection method (with compiler intrinsic optimizations)

This commit is contained in:
RaiKoHoff 2020-02-27 18:17:42 +01:00
parent f577de4c58
commit 330261ca5c
3 changed files with 240 additions and 1 deletions

View File

@ -24,6 +24,11 @@
#include <limits.h>
#include <shellapi.h>
#include <emmintrin.h>
#ifdef __AVX__
#include <immintrin.h>
#endif
#include "Styles.h"
#include "Dialogs.h"
#include "resource.h"
@ -826,6 +831,220 @@ bool EditCopyAppend(HWND hwnd, bool bAppend)
// EditDetectEOLMode() - moved here to handle Unicode files correctly
// by zufuliu (https://github.com/zufuliu/notepad2)
//
//=============================================================================
// https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64
// use __popcnt() or _mm_popcnt_u32() require testing __cpuid():
/*
* int cpuInfo[4];
* __cpuid(cpuInfo, 0x00000001);
* const BOOL cpuPOPCNT = cpuInfo[2] & (1 << 23);
*/
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// Bit Twiddling Hacks copyright 1997-2005 Sean Eron Anderson
#if defined(_MSC_VER)
static __forceinline unsigned int bth_popcount(unsigned int v) {
v = v - ((v >> 1) & 0x55555555U);
v = (v & 0x33333333U) + ((v >> 2) & 0x33333333U);
return (((v + (v >> 4)) & 0x0F0F0F0FU) * 0x01010101U) >> 24;
}
#endif
#if !defined(_MSC_VER)
#define __popcnt __builtin_popcount
#elif NP2_USE_AVX2
#define __popcnt _mm_popcnt_u32
#else
//#define np2_popcount __popcnt
#define __popcnt bth_popcount
#endif
// ----------------------------------------------------------------------------
void EditDetectEOLMode(LPCSTR lpData, size_t cbData, EditFileIOStatus* const status)
{
if (!lpData || (cbData == 0)) { return; }
/* '\r' and '\n' is not reused (e.g. as trailing byte in DBCS) by any known encoding,
it's safe to check whole data byte by byte.*/
DocLn lineCountCRLF = 0;
DocLn lineCountCR = 0;
DocLn lineCountLF = 0;
// tools/GenerateTable.py
static const uint8_t eol_table[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, // 00 - 0F
};
const uint8_t* ptr = (const uint8_t*)lpData;
// No NULL-terminated requirement for *ptr == '\n'
const uint8_t* const end = ptr + cbData - 1;
#if NP2_USE_AVX2
#define LAST_CR_MASK (1U << (sizeof(__m256i) - 1))
const __m256i vectCR = _mm256_set1_epi8('\r');
const __m256i vectLF = _mm256_set1_epi8('\n');
while (ptr + sizeof(__m256i) <= end) {
// unaligned loading: line starts at random position.
const __m256i chunk = _mm256_loadu_si256((__m256i*)ptr);
uint32_t maskCR = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vectCR));
uint32_t maskLF = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vectLF));
ptr += sizeof(__m256i);
if (maskCR) {
if (maskCR & LAST_CR_MASK) {
maskCR &= LAST_CR_MASK - 1;
if (*ptr == '\n') {
// CR+LF across boundary
++ptr;
++lineCountCRLF;
}
else {
// clear highest bit (last CR) to avoid using following code:
// maskCR = (maskCR_LF ^ maskLF) | (maskCR & LAST_CR_MASK);
++lineCountCR;
}
}
// maskCR and maskLF never have some bit set. after shifting maskCR by 1 bit,
// the bits both set in maskCR and maskLF represents CR+LF;
// the bits only set in maskCR or maskLF represents individual CR or LF.
const uint32_t maskCRLF = (maskCR << 1) & maskLF; // CR+LF
const uint32_t maskCR_LF = (maskCR << 1) ^ maskLF;// CR alone or LF alone
maskLF = maskCR_LF & maskLF; // LF alone
maskCR = maskCR_LF ^ maskLF; // CR alone (with one position offset)
if (maskCRLF) {
lineCountCRLF += __popcnt(maskCRLF);
}
if (maskCR) {
lineCountCR += __popcnt(maskCR);
}
}
if (maskLF) {
lineCountLF += __popcnt(maskLF);
}
}
#undef LAST_CR_MASK
// end NP2_USE_AVX2
#elif NP2_USE_SSE2
#define LAST_CR_MASK (1U << (2*sizeof(__m128i) - 1))
const __m128i vectCR = _mm_set1_epi8('\r');
const __m128i vectLF = _mm_set1_epi8('\n');
while (ptr + 2 * sizeof(__m128i) <= end) {
// unaligned loading: line starts at random position.
__m128i chunk = _mm_loadu_si128((__m128i*)ptr);
uint32_t maskCR = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectCR));
uint32_t maskLF = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectLF));
chunk = _mm_loadu_si128((__m128i*)(ptr + sizeof(__m128i)));
maskCR |= ((uint32_t)_mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectCR))) << sizeof(__m128i);
maskLF |= ((uint32_t)_mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectLF))) << sizeof(__m128i);
ptr += 2 * sizeof(__m128i);
if (maskCR) {
if (maskCR & LAST_CR_MASK) {
maskCR &= LAST_CR_MASK - 1;
if (*ptr == '\n') {
// CR+LF across boundary
++ptr;
++lineCountCRLF;
}
else {
// clear highest bit (last CR) to avoid using following code:
// maskCR = (maskCR_LF ^ maskLF) | (maskCR & LAST_CR_MASK);
++lineCountCR;
}
}
// maskCR and maskLF never have some bit set. after shifting maskCR by 1 bit,
// the bits both set in maskCR and maskLF represents CR+LF;
// the bits only set in maskCR or maskLF represents individual CR or LF.
const uint32_t maskCRLF = (maskCR << 1) & maskLF; // CR+LF
const uint32_t maskCR_LF = (maskCR << 1) ^ maskLF;// CR alone or LF alone
maskLF = maskCR_LF & maskLF; // LF alone
maskCR = maskCR_LF ^ maskLF; // CR alone (with one position offset)
if (maskCRLF) {
lineCountCRLF += __popcnt(maskCRLF);
}
if (maskCR) {
lineCountCR += __popcnt(maskCR);
}
}
if (maskLF) {
lineCountLF += __popcnt(maskLF);
}
}
#undef LAST_CR_MASK
// end NP2_USE_SSE2
#endif
do {
// skip to line end
uint8_t ch;
uint8_t type = 0;
while (ptr < end && ((ch = *ptr++) > '\r' || (type = eol_table[ch]) == 0)) {
// nop
}
switch (type) {
case 1: //'\n'
++lineCountLF;
break;
case 2: //'\r'
if (*ptr == '\n') {
++ptr;
++lineCountCRLF;
}
else {
++lineCountCR;
}
break;
}
} while (ptr < end);
if (ptr == end) {
switch (*ptr) {
case '\n':
++lineCountLF;
break;
case '\r':
++lineCountCR;
break;
}
}
// values must kept in same order as SC_EOL_CRLF(0), SC_EOL_CR(1), SC_EOL_LF(2)
DocLn const linesMax = max_ln(max_ln(lineCountCRLF, lineCountCR), lineCountLF);
DocLn linesCount[3] = { 0, 0, 0 };
linesCount[SC_EOL_CRLF] = lineCountCRLF;
linesCount[SC_EOL_CR] = lineCountCR;
linesCount[SC_EOL_LF] = lineCountLF;
int iEOLMode = status->iEOLMode;
if (linesMax != linesCount[iEOLMode])
{
if (linesMax == linesCount[SC_EOL_CRLF]) {
iEOLMode = SC_EOL_CRLF;
}
else if (linesMax == linesCount[SC_EOL_CR]) {
iEOLMode = SC_EOL_CR;
}
else {
iEOLMode = SC_EOL_LF;
}
}
status->iEOLMode = iEOLMode;
status->bInconsistentEOLs = 1 < ((!!lineCountCRLF) + (!!lineCountCR) + (!!lineCountLF));
status->eolCount[SC_EOL_CRLF] = lineCountCRLF;
status->eolCount[SC_EOL_CR] = lineCountCR;
status->eolCount[SC_EOL_LF] = lineCountLF;
}
#if 0
void EditDetectEOLMode(LPCSTR lpData, size_t cbData, EditFileIOStatus* const status)
{
int iEOLMode = Settings.DefaultEOLMode;
@ -889,6 +1108,8 @@ void EditDetectEOLMode(LPCSTR lpData, size_t cbData, EditFileIOStatus* const sta
status->eolCount[SC_EOL_LF] = linesCount[SC_EOL_LF];
}
#endif
//=============================================================================
//

View File

@ -240,7 +240,7 @@
<WarningLevel>Level4</WarningLevel>
<DebugInformationFormat>None</DebugInformationFormat>
<TreatWarningAsError>true</TreatWarningAsError>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<LanguageStandard>stdcpp17</LanguageStandard>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
@ -318,6 +318,7 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<ExceptionHandling>Sync</ExceptionHandling>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<AdditionalDependencies>comctl32.lib;imm32.lib;shlwapi.lib;uxtheme.lib;muiload.lib;dwrite.lib;scintilla.lib;%(AdditionalDependencies)</AdditionalDependencies>

View File

@ -47,6 +47,23 @@
#include "Sci_Position.h"
// Compiler Optimizations
#ifndef NP2_USE_SSE2
// SSE2 enabled by default
#define NP2_USE_SSE2 1
#endif
#if defined(_WIN64) && !defined(NP2_USE_AVX2)
#if defined(__AVX2__)
#define NP2_USE_AVX2 1
#else
#define NP2_USE_AVX2 0
#endif
#else
#define NP2_USE_AVX2 0
#endif // NP2_USE_AVX2
//
// TODO(rkotten):
// SCI_CREATEDOCUMENT (SC_DOCUMENTOPTION_TEXT_LARGE)