+ rev: revert support for enhanced instruction set otimizing options

This commit is contained in:
Rainer Kottenhoff 2020-02-29 02:57:02 +01:00
parent e863e36a3c
commit cf516bb4d7
9 changed files with 42 additions and 480 deletions

View File

@ -1 +1 @@
228
229

View File

@ -3,7 +3,7 @@
<assemblyIdentity
name="Notepad3"
processorArchitecture="*"
version="5.20.228.1"
version="5.20.229.1"
type="win32"
/>
<description>Notepad3 RC2</description>

View File

@ -2154,285 +2154,3 @@ extern "C" bool EditSetDocumentBuffer(const char* lpstrText, DocPosU lenText)
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Feature Detector by Mysticle (Alexander Yee)
// https://github.com/Mysticial/FeatureDetector
////////////////////////////////////////////////////////////////////////////////
struct cpu_x86 {
// Vendor
bool Vendor_AMD;
bool Vendor_Intel;
// OS Features
bool OS_x64;
bool OS_AVX;
bool OS_AVX512;
// Misc.
bool HW_MMX;
bool HW_x64;
bool HW_ABM;
bool HW_RDRAND;
bool HW_BMI1;
bool HW_BMI2;
bool HW_ADX;
bool HW_PREFETCHWT1;
bool HW_MPX;
// SIMD: 128-bit
bool HW_SSE;
bool HW_SSE2;
bool HW_SSE3;
bool HW_SSSE3;
bool HW_SSE41;
bool HW_SSE42;
bool HW_SSE4a;
bool HW_AES;
bool HW_SHA;
// SIMD: 256-bit
bool HW_AVX;
bool HW_XOP;
bool HW_FMA3;
bool HW_FMA4;
bool HW_AVX2;
// SIMD: 512-bit
bool HW_AVX512_F;
bool HW_AVX512_PF;
bool HW_AVX512_ER;
bool HW_AVX512_CD;
bool HW_AVX512_VL;
bool HW_AVX512_BW;
bool HW_AVX512_DQ;
bool HW_AVX512_IFMA;
bool HW_AVX512_VBMI;
public:
cpu_x86();
void detect_host();
static void cpuid(int32_t out[4], int32_t x);
static const char* get_vendor_string();
private:
static bool detect_OS_x64();
static bool detect_OS_AVX();
static bool detect_OS_AVX512();
};
////////////////////////////////////////////////////////////////////////////////
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
#include <intrin.h>
#else
# error "No cpuid intrinsic defined for processor architecture."
#endif
////////////////////////////////////////////////////////////////////////////////
void cpu_x86::cpuid(int32_t out[4], int32_t x) {
__cpuidex(out, x, 0);
}
__int64 xgetbv(unsigned int x) {
return _xgetbv(x);
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Detect 64-bit - Note that this snippet of code for detecting 64-bit has been copied from MSDN.
using LPFN_ISWOW64PROCESS = BOOL(WINAPI*) (HANDLE, PBOOL);
BOOL IsWow64()
{
BOOL bIsWow64 = FALSE;
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(
GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
printf("Error Detecting Operating System.\n");
printf("Defaulting to 32-bit OS.\n\n");
bIsWow64 = FALSE;
}
}
return bIsWow64;
}
bool cpu_x86::detect_OS_x64() {
#ifdef _M_X64
return true;
#else
return IsWow64() != 0;
#endif
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
cpu_x86::cpu_x86() {
memset(this, 0, sizeof(*this));
}
bool cpu_x86::detect_OS_AVX() {
// Copied from: http://stackoverflow.com/a/22521619/922184
bool avxSupported = false;
int cpuInfo[4];
cpuid(cpuInfo, 1);
bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
if (osUsesXSAVE_XRSTORE && cpuAVXSuport)
{
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
avxSupported = (xcrFeatureMask & 0x6) == 0x6;
}
return avxSupported;
}
bool cpu_x86::detect_OS_AVX512() {
if (!detect_OS_AVX())
return false;
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
return (xcrFeatureMask & 0xe6) == 0xe6;
}
const char* cpu_x86::get_vendor_string()
{
static char name[13];
int32_t CPUInfo[4];
cpuid(CPUInfo, 0);
memcpy(name + 0, &CPUInfo[1], 4);
memcpy(name + 4, &CPUInfo[3], 4);
memcpy(name + 8, &CPUInfo[2], 4);
name[12] = '\0';
return name;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
void cpu_x86::detect_host()
{
// OS Features
OS_x64 = detect_OS_x64();
OS_AVX = detect_OS_AVX();
OS_AVX512 = detect_OS_AVX512();
// Vendor
const char* vendor = get_vendor_string();
if (strcmp(vendor, "GenuineIntel") == 0) {
Vendor_Intel = true;
}
else if (strcmp(vendor, "AuthenticAMD") == 0) {
Vendor_AMD = true;
}
int info[4];
cpuid(info, 0);
int nIds = info[0];
cpuid(info, 0x80000000);
uint32_t nExIds = info[0];
// Detect Features
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
HW_MMX = (info[3] & ((int)1 << 23)) != 0;
HW_SSE = (info[3] & ((int)1 << 25)) != 0;
HW_SSE2 = (info[3] & ((int)1 << 26)) != 0;
HW_SSE3 = (info[2] & ((int)1 << 0)) != 0;
HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0;
HW_SSE41 = (info[2] & ((int)1 << 19)) != 0;
HW_SSE42 = (info[2] & ((int)1 << 20)) != 0;
HW_AES = (info[2] & ((int)1 << 25)) != 0;
HW_AVX = (info[2] & ((int)1 << 28)) != 0;
HW_FMA3 = (info[2] & ((int)1 << 12)) != 0;
HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
}
if (nIds >= 0x00000007) {
cpuid(info, 0x00000007);
HW_AVX2 = (info[1] & ((int)1 << 5)) != 0;
HW_BMI1 = (info[1] & ((int)1 << 3)) != 0;
HW_BMI2 = (info[1] & ((int)1 << 8)) != 0;
HW_ADX = (info[1] & ((int)1 << 19)) != 0;
HW_MPX = (info[1] & ((int)1 << 14)) != 0;
HW_SHA = (info[1] & ((int)1 << 29)) != 0;
HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0;
HW_AVX512_F = (info[1] & ((int)1 << 16)) != 0;
HW_AVX512_CD = (info[1] & ((int)1 << 28)) != 0;
HW_AVX512_PF = (info[1] & ((int)1 << 26)) != 0;
HW_AVX512_ER = (info[1] & ((int)1 << 27)) != 0;
HW_AVX512_VL = (info[1] & ((int)1 << 31)) != 0;
HW_AVX512_BW = (info[1] & ((int)1 << 30)) != 0;
HW_AVX512_DQ = (info[1] & ((int)1 << 17)) != 0;
HW_AVX512_IFMA = (info[1] & ((int)1 << 21)) != 0;
HW_AVX512_VBMI = (info[2] & ((int)1 << 1)) != 0;
}
if (nExIds >= 0x80000001) {
cpuid(info, 0x80000001);
HW_x64 = (info[3] & ((int)1 << 29)) != 0;
HW_ABM = (info[2] & ((int)1 << 5)) != 0;
HW_SSE4a = (info[2] & ((int)1 << 6)) != 0;
HW_FMA4 = (info[2] & ((int)1 << 16)) != 0;
HW_XOP = (info[2] & ((int)1 << 11)) != 0;
}
}
extern "C" bool CanUseCPUFeature(const CPU_OS_FEATURES featToUse)
{
cpu_x86 features;
features.detect_host();
switch (featToUse)
{
case OS_x64:
return (features.OS_x64);
case CPU_SSE2:
return (features.HW_SSE2);
case CPU_OS_AVX2:
#ifdef __AVX__
return (features.HW_AVX2 && features.OS_AVX);
#else
return false;
#endif
case CPU_OS_AVX512:
#ifdef __AVX__
return (features.HW_AVX512_F && features.OS_AVX512);
#else
return false;
#endif
default:
break;
}
return false;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

View File

@ -136,25 +136,6 @@ bool MRU_MergeSave(LPMRULIST pmru, bool, bool, bool);
// ----------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////////
// Feature Detector by Mysticle (Alexander Yee)
// https://github.com/Mysticial/FeatureDetector
////////////////////////////////////////////////////////////////////////////////
typedef enum
{
OS_x64 = 1,
CPU_SSE2 = 2,
CPU_OS_AVX2 = 3,
CPU_OS_AVX512 = 4
}
CPU_OS_FEATURES;
bool CanUseCPUFeature(const CPU_OS_FEATURES feature);
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
#ifdef __cplusplus
}
#endif

View File

@ -24,11 +24,6 @@
#include <limits.h>
#include <shellapi.h>
#include <emmintrin.h>
#ifdef __AVX__
#include <immintrin.h>
#endif
#include "Styles.h"
#include "Dialogs.h"
#include "resource.h"
@ -831,26 +826,6 @@ bool EditCopyAppend(HWND hwnd, bool bAppend)
// EditDetectEOLMode() - moved here to handle Unicode files correctly
// by zufuliu (https://github.com/zufuliu/notepad2)
//
//=============================================================================
// https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64
// use __popcnt() or _mm_popcnt_u32() require testing __cpuid():
/*
* int cpuInfo[4];
* __cpuid(cpuInfo, 0x00000001);
* const BOOL cpuPOPCNT = cpuInfo[2] & (1 << 23);
*/
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// Bit Twiddling Hacks copyright 1997-2005 Sean Eron Anderson
static __forceinline unsigned int bth_popcount(unsigned int v) {
v = v - ((v >> 1) & 0x55555555U);
v = (v & 0x33333333U) + ((v >> 2) & 0x33333333U);
return (((v + (v >> 4)) & 0x0F0F0F0FU) * 0x01010101U) >> 24;
}
// ----------------------------------------------------------------------------
void EditDetectEOLMode(LPCSTR lpData, size_t cbData, EditFileIOStatus* const status)
{
if (!lpData || (cbData == 0)) { return; }
@ -869,152 +844,46 @@ void EditDetectEOLMode(LPCSTR lpData, size_t cbData, EditFileIOStatus* const sta
const uint8_t* ptr = (const uint8_t*)lpData;
// No NULL-terminated requirement for *ptr == '\n'
const uint8_t* const end = ptr + cbData - 1;
const uint8_t* const end = (const uint8_t*)lpData + cbData - 1;
#ifdef __AVX__
if (Flags.bHas_AVX2_CPU_OS)
{
#define LAST_CR_MASK (1U << (sizeof(__m256i) - 1))
const __m256i vectCR = _mm256_set1_epi8('\r');
const __m256i vectLF = _mm256_set1_epi8('\n');
while (ptr + sizeof(__m256i) <= end) {
// unaligned loading: line starts at random position.
const __m256i chunk = _mm256_loadu_si256((__m256i*)ptr);
uint32_t maskCR = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vectCR));
uint32_t maskLF = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vectLF));
ptr += sizeof(__m256i);
if (maskCR) {
if (maskCR & LAST_CR_MASK) {
maskCR &= LAST_CR_MASK - 1;
if (*ptr == '\n') {
// CR+LF across boundary
++ptr;
++lineCountCRLF;
}
else {
// clear highest bit (last CR) to avoid using following code:
// maskCR = (maskCR_LF ^ maskLF) | (maskCR & LAST_CR_MASK);
++lineCountCR;
}
do {
// skip to line end
uint8_t ch;
uint8_t type = 0;
while (ptr < end && ((ch = *ptr++) > '\r' || (type = eol_table[ch]) == 0)) {} // nop
switch (type) {
case 1: //'\n'
++lineCountLF;
break;
case 2: //'\r'
if (*ptr == '\n') {
++ptr;
++lineCountCRLF;
}
// maskCR and maskLF never have some bit set. after shifting maskCR by 1 bit,
// the bits both set in maskCR and maskLF represents CR+LF;
// the bits only set in maskCR or maskLF represents individual CR or LF.
const uint32_t maskCRLF = (maskCR << 1) & maskLF; // CR+LF
const uint32_t maskCR_LF = (maskCR << 1) ^ maskLF;// CR alone or LF alone
maskLF = maskCR_LF & maskLF; // LF alone
maskCR = maskCR_LF ^ maskLF; // CR alone (with one position offset)
if (maskCRLF) {
lineCountCRLF += _mm_popcnt_u32(maskCRLF);
}
if (maskCR) {
lineCountCR += _mm_popcnt_u32(maskCR);
}
}
if (maskLF) {
lineCountLF += _mm_popcnt_u32(maskLF);
}
}
#undef LAST_CR_MASK
}
else
#endif
if (Flags.bHas_SSE2_CPU)
{
#define LAST_CR_MASK (1U << (2*sizeof(__m128i) - 1))
const __m128i vectCR = _mm_set1_epi8('\r');
const __m128i vectLF = _mm_set1_epi8('\n');
while (ptr + 2 * sizeof(__m128i) <= end) {
// unaligned loading: line starts at random position.
__m128i chunk = _mm_loadu_si128((__m128i*)ptr);
uint32_t maskCR = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectCR));
uint32_t maskLF = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectLF));
chunk = _mm_loadu_si128((__m128i*)(ptr + sizeof(__m128i)));
maskCR |= ((uint32_t)_mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectCR))) << sizeof(__m128i);
maskLF |= ((uint32_t)_mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vectLF))) << sizeof(__m128i);
ptr += 2 * sizeof(__m128i);
if (maskCR) {
if (maskCR & LAST_CR_MASK) {
maskCR &= LAST_CR_MASK - 1;
if (*ptr == '\n') {
// CR+LF across boundary
++ptr;
++lineCountCRLF;
}
else {
// clear highest bit (last CR) to avoid using following code:
// maskCR = (maskCR_LF ^ maskLF) | (maskCR & LAST_CR_MASK);
++lineCountCR;
}
}
// maskCR and maskLF never have some bit set. after shifting maskCR by 1 bit,
// the bits both set in maskCR and maskLF represents CR+LF;
// the bits only set in maskCR or maskLF represents individual CR or LF.
const uint32_t maskCRLF = (maskCR << 1) & maskLF; // CR+LF
const uint32_t maskCR_LF = (maskCR << 1) ^ maskLF;// CR alone or LF alone
maskLF = maskCR_LF & maskLF; // LF alone
maskCR = maskCR_LF ^ maskLF; // CR alone (with one position offset)
if (maskCRLF) {
lineCountCRLF += bth_popcount(maskCRLF);
}
if (maskCR) {
lineCountCR += bth_popcount(maskCR);
}
}
if (maskLF) {
lineCountLF += bth_popcount(maskLF);
}
}
#undef LAST_CR_MASK
}
else { // no intrinsic optimization
do {
// skip to line end
uint8_t ch;
uint8_t type = 0;
while (ptr < end && ((ch = *ptr++) > '\r' || (type = eol_table[ch]) == 0)) {
// nop
}
switch (type) {
case 1: //'\n'
++lineCountLF;
break;
case 2: //'\r'
if (*ptr == '\n') {
++ptr;
++lineCountCRLF;
}
else {
++lineCountCR;
}
break;
}
} while (ptr < end);
if (ptr == end) {
switch (*ptr) {
case '\n':
++lineCountLF;
break;
case '\r':
else {
++lineCountCR;
break;
}
}
break;
}
} while (ptr < end);
if (ptr == end) {
switch (*ptr) {
case '\n':
++lineCountLF;
break;
case '\r':
++lineCountCR;
break;
}
}
// values must kept in same order as SC_EOL_CRLF(0), SC_EOL_CR(1), SC_EOL_LF(2)
DocLn const linesMax = max_ln(max_ln(lineCountCRLF, lineCountCR), lineCountLF);
DocLn linesCount[3] = { 0, 0, 0 };
linesCount[SC_EOL_CRLF] = lineCountCRLF;
linesCount[SC_EOL_CR] = lineCountCR;
linesCount[SC_EOL_LF] = lineCountLF;
linesCount[SC_EOL_CR] = lineCountCR;
linesCount[SC_EOL_LF] = lineCountLF;
int iEOLMode = status->iEOLMode;
if (linesMax != linesCount[iEOLMode])
@ -6365,7 +6234,7 @@ static INT_PTR CALLBACK EditFindReplaceDlgProcW(HWND hwnd,UINT umsg,WPARAM wPara
{
WCHAR* wszFind = s_tchBuf;
WCHAR wszRepl[FNDRPL_BUFFER] = { L'\0' };
GetDlgItemTextW(hwnd, IDC_FINDTEXT, wszFind, COUNTOF(wszFind));
GetDlgItemTextW(hwnd, IDC_FINDTEXT, wszFind, COUNTOF(s_tchBuf));
GetDlgItemTextW(hwnd, IDC_REPLACETEXT, wszRepl, COUNTOF(wszRepl));
SetDlgItemTextW(hwnd, IDC_FINDTEXT, wszRepl);
SetDlgItemTextW(hwnd, IDC_REPLACETEXT, wszFind);
@ -6846,8 +6715,6 @@ static char* _GetReplaceString(HWND hwnd, LPCEDITFINDREPLACE lpefr, int* iRepla
//
bool EditReplace(HWND hwnd, LPCEDITFINDREPLACE lpefr)
{
_BEGIN_UNDO_ACTION_
int iReplaceMsg = SCI_REPLACETARGET;
char* pszReplace = _GetReplaceString(hwnd, lpefr, &iReplaceMsg);
if (!pszReplace) {
@ -6887,6 +6754,8 @@ bool EditReplace(HWND hwnd, LPCEDITFINDREPLACE lpefr)
return EditFindNext(hwnd, lpefr, false, false);
}
_BEGIN_UNDO_ACTION_
DocPos const saveTargetBeg = SciCall_GetTargetStart();
DocPos const saveTargetEnd = SciCall_GetTargetEnd();
@ -6926,8 +6795,6 @@ int EditReplaceAllInRange(HWND hwnd, LPCEDITFINDREPLACE lpefr, DocPos iStartPos,
{
int iCount = 0;
_BEGIN_UNDO_ACTION_
if (iStartPos > iEndPos) { swapos(&iStartPos, &iEndPos); }
char szFind[FNDRPL_BUFFER];
@ -6981,6 +6848,8 @@ int EditReplaceAllInRange(HWND hwnd, LPCEDITFINDREPLACE lpefr, DocPos iStartPos,
DocPos searchStart = iStartPos;
DocPos totalReplLength = 0;
_BEGIN_UNDO_ACTION_
_IGNORE_NOTIFY_CHANGE_;
for (ReplPos_t* pPosPair = (ReplPos_t*)utarray_front(ReplPosUTArray);

View File

@ -676,9 +676,6 @@ static void _InitGlobals()
Flags.bSettingsFileLocked = DefaultFlags.bSettingsFileLocked = false;
Flags.bHas_SSE2_CPU = DefaultFlags.bHas_SSE2_CPU = false;
Flags.bHas_AVX2_CPU_OS = DefaultFlags.bHas_AVX2_CPU_OS = false;
FocusedView.HideNonMatchedLines = false;
FocusedView.CodeFoldingAvailable = false;
FocusedView.ShowCodeFolding = true;
@ -807,9 +804,6 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE hPrevInstance,
Globals.hPrevInst = hPrevInstance;
Globals.hndlProcessHeap = GetProcessHeap();
Flags.bHas_SSE2_CPU = CanUseCPUFeature(CPU_SSE2);
Flags.bHas_AVX2_CPU_OS = CanUseCPUFeature(CPU_OS_AVX2);
WCHAR wchAppDir[2 * MAX_PATH + 4] = { L'\0' };
GetModuleFileName(NULL,wchAppDir,COUNTOF(wchAppDir));
PathCchRemoveFileSpec(wchAppDir, COUNTOF(wchAppDir));

View File

@ -240,7 +240,6 @@
<WarningLevel>Level4</WarningLevel>
<DebugInformationFormat>None</DebugInformationFormat>
<TreatWarningAsError>true</TreatWarningAsError>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<LanguageStandard>stdcpp17</LanguageStandard>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
@ -250,6 +249,8 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<ExceptionHandling>Sync</ExceptionHandling>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<BufferSecurityCheck>false</BufferSecurityCheck>
</ClCompile>
<Link>
<AdditionalDependencies>comctl32.lib;imm32.lib;shlwapi.lib;uxtheme.lib;muiload.lib;dwrite.lib;scintilla.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -318,7 +319,8 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<ExceptionHandling>Sync</ExceptionHandling>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<BufferSecurityCheck>false</BufferSecurityCheck>
</ClCompile>
<Link>
<AdditionalDependencies>comctl32.lib;imm32.lib;shlwapi.lib;uxtheme.lib;muiload.lib;dwrite.lib;scintilla.lib;%(AdditionalDependencies)</AdditionalDependencies>

View File

@ -507,8 +507,6 @@ typedef struct _flags_t
bool bSearchPathIfRelative;
bool bSettingsFileLocked;
bool bHas_SSE2_CPU;
bool bHas_AVX2_CPU_OS;
} FLAGS_T, *PFLAGS_T;

View File

@ -8,7 +8,7 @@
#define SAPPNAME "Notepad3"
#define VERSION_MAJOR 5
#define VERSION_MINOR 20
#define VERSION_REV 228
#define VERSION_REV 229
#define VERSION_BUILD 1
#define SCINTILLA_VER 430
#define ONIGURUMA_REGEX_VER 6.9.4
@ -16,4 +16,4 @@
#define TINYEXPR_VER 2018.05.11
#define UTHASH_VER 2.1.0
#define VERSION_PATCH RC2
#define VERSION_COMMIT_ID t7820-rk
#define VERSION_COMMIT_ID nebukadn