Notepad3/src/sort.h
Rainer Kottenhoff fbf83129a5 + fix: file save problem after switching encoding to GB2312 (52936)
+ sty: code styling reformat (AStyle: OTBS)
+ fix: file encoding analysis with 7-bit ASCII only
2020-11-25 12:47:14 +01:00

3444 lines
89 KiB
C

/* Copyright (c) 2010-2019 Christopher Swenson. */
/* Copyright (c) 2012 Vojtech Fried. */
/* Copyright (c) 2012 Google Inc. All Rights Reserved. */
/* https://github.com/swenson/sort */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#ifndef SORT_NAME
#error "Must declare SORT_NAME"
#endif
#ifndef SORT_TYPE
#error "Must declare SORT_TYPE"
#endif
#ifndef SORT_CMP
#define SORT_CMP(x, y) ((x) < (y) ? -1 : ((y) < (x) ? 1 : 0))
#endif
#ifdef __cplusplus
#ifndef SORT_SAFE_CPY
#define SORT_SAFE_CPY 0
#endif
#else
#undef SORT_SAFE_CPY
#define SORT_SAFE_CPY 0
#endif
#ifndef TIM_SORT_STACK_SIZE
#define TIM_SORT_STACK_SIZE 128
#endif
#ifndef TIM_SORT_MIN_GALLOP
#define TIM_SORT_MIN_GALLOP 7
#endif
#ifndef SORT_SWAP
#define SORT_SWAP(x,y) {SORT_TYPE _sort_swap_temp = (x); (x) = (y); (y) = _sort_swap_temp;}
#endif
/* Common, type-agnostic functions and constants that we don't want to declare twice. */
#ifndef SORT_COMMON_H
#define SORT_COMMON_H
#ifndef MAX
#define MAX(x,y) (((x) > (y) ? (x) : (y)))
#endif
#ifndef MIN
#define MIN(x,y) (((x) < (y) ? (x) : (y)))
#endif
static int compute_minrun(const uint64_t);
/* From http://oeis.org/classic/A102549 */
static const uint64_t shell_gaps[48] = {1, 4, 10, 23, 57, 132, 301, 701, 1750, 4376, 10941, 27353, 68383, 170958, 427396, 1068491, 2671228, 6678071, 16695178, 41737946, 104344866, 260862166, 652155416, 1630388541, 4075971353LL, 10189928383LL, 25474820958LL, 63687052396LL, 159217630991LL, 398044077478LL, 995110193696LL, 2487775484241LL, 6219438710603LL, 15548596776508LL, 38871491941271LL, 97178729853178LL, 242946824632946LL, 607367061582366LL, 1518417653955916LL, 3796044134889791LL, 9490110337224478LL, 23725275843061196LL, 59313189607652991LL, 148282974019132478LL, 370707435047831196LL, 926768587619577991LL, 2316921469048944978LL, 5792303672622362446LL};
#ifndef CLZ
/* clang-only */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if __has_builtin(__builtin_clzll) || (defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
#define CLZ __builtin_clzll
#else
static int clzll(uint64_t);
/* adapted from Hacker's Delight */
static int clzll(uint64_t x)
{
int n;
if (x == 0) {
return 64;
}
n = 0;
if (x <= 0x00000000FFFFFFFFL) {
n = n + 32;
x = x << 32;
}
if (x <= 0x0000FFFFFFFFFFFFL) {
n = n + 16;
x = x << 16;
}
if (x <= 0x00FFFFFFFFFFFFFFL) {
n = n + 8;
x = x << 8;
}
if (x <= 0x0FFFFFFFFFFFFFFFL) {
n = n + 4;
x = x << 4;
}
if (x <= 0x3FFFFFFFFFFFFFFFL) {
n = n + 2;
x = x << 2;
}
if (x <= 0x7FFFFFFFFFFFFFFFL) {
n = n + 1;
}
return n;
}
#define CLZ clzll
#endif
#endif
static __inline int compute_minrun(const uint64_t size)
{
const int top_bit = 64 - CLZ(size);
const int shift = MAX(top_bit, 6) - 6;
const int minrun = (int)(size >> shift);
const uint64_t mask = (1ULL << shift) - 1;
if (mask & size) {
return minrun + 1;
}
return minrun;
}
static __inline size_t rbnd(size_t len)
{
int k;
if (len < 16) {
return 2;
}
k = 62 - CLZ(len);
return 1ULL << ((2 * k) / 3);
}
#endif /* SORT_COMMON_H */
#define SORT_CONCAT(x, y) x ## _ ## y
#define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
#define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
#ifndef SMALL_SORT_BND
#define SMALL_SORT_BND 16
#endif
#ifndef SMALL_SORT
#define SMALL_SORT BITONIC_SORT
/*#define SMALL_SORT BINARY_INSERTION_SORT*/
#endif
#define SORT_TYPE_CPY SORT_MAKE_STR(sort_type_cpy)
#define SORT_TYPE_MOVE SORT_MAKE_STR(sort_type_move)
#define SORT_NEW_BUFFER SORT_MAKE_STR(sort_new_buffer)
#define SORT_DELETE_BUFFER SORT_MAKE_STR(sort_delete_buffer)
#define BITONIC_SORT SORT_MAKE_STR(bitonic_sort)
#define BINARY_INSERTION_FIND SORT_MAKE_STR(binary_insertion_find)
#define BINARY_INSERTION_SORT_START SORT_MAKE_STR(binary_insertion_sort_start)
#define BINARY_INSERTION_SORT SORT_MAKE_STR(binary_insertion_sort)
#define REVERSE_ELEMENTS SORT_MAKE_STR(reverse_elements)
#define COUNT_RUN SORT_MAKE_STR(count_run)
#define CHECK_INVARIANT SORT_MAKE_STR(check_invariant)
#define TIM_SORT SORT_MAKE_STR(tim_sort)
#define TIM_SORT_GALLOP SORT_MAKE_STR(tim_sort_gallop)
#define TIM_SORT_RESIZE SORT_MAKE_STR(tim_sort_resize)
#define TIM_SORT_MERGE SORT_MAKE_STR(tim_sort_merge)
#define TIM_SORT_MERGE_LEFT SORT_MAKE_STR(tim_sort_merge_left)
#define TIM_SORT_MERGE_RIGHT SORT_MAKE_STR(tim_sort_merge_right)
#define TIM_SORT_COLLAPSE SORT_MAKE_STR(tim_sort_collapse)
#define HEAP_SORT SORT_MAKE_STR(heap_sort)
#define MEDIAN SORT_MAKE_STR(median)
#define QUICK_SORT SORT_MAKE_STR(quick_sort)
#define MERGE_SORT SORT_MAKE_STR(merge_sort)
#define MERGE_SORT_RECURSIVE SORT_MAKE_STR(merge_sort_recursive)
#define MERGE_SORT_IN_PLACE SORT_MAKE_STR(merge_sort_in_place)
#define MERGE_SORT_IN_PLACE_RMERGE SORT_MAKE_STR(merge_sort_in_place_rmerge)
#define MERGE_SORT_IN_PLACE_BACKMERGE SORT_MAKE_STR(merge_sort_in_place_backmerge)
#define MERGE_SORT_IN_PLACE_FRONTMERGE SORT_MAKE_STR(merge_sort_in_place_frontmerge)
#define MERGE_SORT_IN_PLACE_ASWAP SORT_MAKE_STR(merge_sort_in_place_aswap)
#define SELECTION_SORT SORT_MAKE_STR(selection_sort)
#define SHELL_SORT SORT_MAKE_STR(shell_sort)
#define QUICK_SORT_PARTITION SORT_MAKE_STR(quick_sort_partition)
#define QUICK_SORT_RECURSIVE SORT_MAKE_STR(quick_sort_recursive)
#define HEAP_SIFT_DOWN SORT_MAKE_STR(heap_sift_down)
#define HEAPIFY SORT_MAKE_STR(heapify)
#define TIM_SORT_RUN_T SORT_MAKE_STR(tim_sort_run_t)
#define TEMP_STORAGE_T SORT_MAKE_STR(temp_storage_t)
#define PUSH_NEXT SORT_MAKE_STR(push_next)
#define GRAIL_SWAP1 SORT_MAKE_STR(grail_swap1)
#define REC_STABLE_SORT SORT_MAKE_STR(rec_stable_sort)
#define GRAIL_REC_MERGE SORT_MAKE_STR(grail_rec_merge)
#define GRAIL_SORT_DYN_BUFFER SORT_MAKE_STR(grail_sort_dyn_buffer)
#define GRAIL_SORT_FIXED_BUFFER SORT_MAKE_STR(grail_sort_fixed_buffer)
#define GRAIL_COMMON_SORT SORT_MAKE_STR(grail_common_sort)
#define GRAIL_SORT SORT_MAKE_STR(grail_sort)
#define GRAIL_COMBINE_BLOCKS SORT_MAKE_STR(grail_combine_blocks)
#define GRAIL_LAZY_STABLE_SORT SORT_MAKE_STR(grail_lazy_stable_sort)
#define GRAIL_MERGE_WITHOUT_BUFFER SORT_MAKE_STR(grail_merge_without_buffer)
#define GRAIL_ROTATE SORT_MAKE_STR(grail_rotate)
#define GRAIL_BIN_SEARCH_LEFT SORT_MAKE_STR(grail_bin_search_left)
#define GRAIL_BUILD_BLOCKS SORT_MAKE_STR(grail_build_blocks)
#define GRAIL_FIND_KEYS SORT_MAKE_STR(grail_find_keys)
#define GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF SORT_MAKE_STR(grail_merge_buffers_left_with_x_buf)
#define GRAIL_BIN_SEARCH_RIGHT SORT_MAKE_STR(grail_bin_search_right)
#define GRAIL_MERGE_BUFFERS_LEFT SORT_MAKE_STR(grail_merge_buffers_left)
#define GRAIL_SMART_MERGE_WITH_X_BUF SORT_MAKE_STR(grail_smart_merge_with_x_buf)
#define GRAIL_MERGE_LEFT_WITH_X_BUF SORT_MAKE_STR(grail_merge_left_with_x_buf)
#define GRAIL_SMART_MERGE_WITHOUT_BUFFER SORT_MAKE_STR(grail_smart_merge_without_buffer)
#define GRAIL_SMART_MERGE_WITH_BUFFER SORT_MAKE_STR(grail_smart_merge_with_buffer)
#define GRAIL_MERGE_RIGHT SORT_MAKE_STR(grail_merge_right)
#define GRAIL_MERGE_LEFT SORT_MAKE_STR(grail_merge_left)
#define GRAIL_SWAP_N SORT_MAKE_STR(grail_swap_n)
#define SQRT_SORT SORT_MAKE_STR(sqrt_sort)
#define SQRT_SORT_BUILD_BLOCKS SORT_MAKE_STR(sqrt_sort_build_blocks)
#define SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF SORT_MAKE_STR(sqrt_sort_merge_buffers_left_with_x_buf)
#define SQRT_SORT_MERGE_DOWN SORT_MAKE_STR(sqrt_sort_merge_down)
#define SQRT_SORT_MERGE_LEFT_WITH_X_BUF SORT_MAKE_STR(sqrt_sort_merge_left_with_x_buf)
#define SQRT_SORT_MERGE_RIGHT SORT_MAKE_STR(sqrt_sort_merge_right)
#define SQRT_SORT_SWAP_N SORT_MAKE_STR(sqrt_sort_swap_n)
#define SQRT_SORT_SWAP_1 SORT_MAKE_STR(sqrt_sort_swap_1)
#define SQRT_SORT_SMART_MERGE_WITH_X_BUF SORT_MAKE_STR(sqrt_sort_smart_merge_with_x_buf)
#define SQRT_SORT_SORT_INS SORT_MAKE_STR(sqrt_sort_sort_ins)
#define SQRT_SORT_COMBINE_BLOCKS SORT_MAKE_STR(sqrt_sort_combine_blocks)
#define SQRT_SORT_COMMON_SORT SORT_MAKE_STR(sqrt_sort_common_sort)
#define BUBBLE_SORT SORT_MAKE_STR(bubble_sort)
#ifndef MAX
#define MAX(x,y) (((x) > (y) ? (x) : (y)))
#endif
#ifndef MIN
#define MIN(x,y) (((x) < (y) ? (x) : (y)))
#endif
#ifndef SORT_CSWAP
#define SORT_CSWAP(x, y) { if(SORT_CMP((x),(y)) > 0) {SORT_SWAP((x),(y));}}
#endif
typedef struct {
size_t start;
size_t length;
} TIM_SORT_RUN_T;
void SHELL_SORT(SORT_TYPE *dst, const size_t size);
void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
void HEAP_SORT(SORT_TYPE *dst, const size_t size);
void QUICK_SORT(SORT_TYPE *dst, const size_t size);
void MERGE_SORT(SORT_TYPE *dst, const size_t size);
void MERGE_SORT_IN_PLACE(SORT_TYPE *dst, const size_t size);
void SELECTION_SORT(SORT_TYPE *dst, const size_t size);
void TIM_SORT(SORT_TYPE *dst, const size_t size);
void BUBBLE_SORT(SORT_TYPE *dst, const size_t size);
void BITONIC_SORT(SORT_TYPE *dst, const size_t size);
void REC_STABLE_SORT(SORT_TYPE *dst, const size_t size);
void GRAIL_SORT_DYN_BUFFER(SORT_TYPE *dst, const size_t size);
void GRAIL_SORT_FIXED_BUFFER(SORT_TYPE *dst, const size_t size);
void GRAIL_SORT(SORT_TYPE *dst, const size_t size);
void SQRT_SORT(SORT_TYPE *dst, const size_t size);
/* The full implementation of a bitonic sort is not here. Since we only want to use
sorting networks for small length lists we create optimal sorting networks for
lists of length <= 16 and call out to BINARY_INSERTION_SORT for anything larger
than 16.
Optimal sorting networks for small length lists.
Taken from https://pages.ripco.net/~jgamble/nw.html */
#define BITONIC_SORT_2 SORT_MAKE_STR(bitonic_sort_2)
static __inline void BITONIC_SORT_2(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
}
#define BITONIC_SORT_3 SORT_MAKE_STR(bitonic_sort_3)
static __inline void BITONIC_SORT_3(SORT_TYPE *dst)
{
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[0], dst[1]);
}
#define BITONIC_SORT_4 SORT_MAKE_STR(bitonic_sort_4)
static __inline void BITONIC_SORT_4(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[1], dst[2]);
}
#define BITONIC_SORT_5 SORT_MAKE_STR(bitonic_sort_5)
static __inline void BITONIC_SORT_5(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[0], dst[3]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[1], dst[2]);
}
#define BITONIC_SORT_6 SORT_MAKE_STR(bitonic_sort_6)
static __inline void BITONIC_SORT_6(SORT_TYPE *dst)
{
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[2], dst[5]);
SORT_CSWAP(dst[0], dst[3]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[2], dst[3]);
}
#define BITONIC_SORT_7 SORT_MAKE_STR(bitonic_sort_7)
static __inline void BITONIC_SORT_7(SORT_TYPE *dst)
{
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[0], dst[3]);
SORT_CSWAP(dst[2], dst[5]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[2], dst[3]);
}
#define BITONIC_SORT_8 SORT_MAKE_STR(bitonic_sort_8)
static __inline void BITONIC_SORT_8(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[3], dst[6]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[3], dst[4]);
}
#define BITONIC_SORT_9 SORT_MAKE_STR(bitonic_sort_9)
static __inline void BITONIC_SORT_9(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[2], dst[5]);
SORT_CSWAP(dst[0], dst[3]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[5], dst[8]);
SORT_CSWAP(dst[3], dst[6]);
SORT_CSWAP(dst[4], dst[7]);
SORT_CSWAP(dst[2], dst[5]);
SORT_CSWAP(dst[0], dst[3]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[2], dst[3]);
}
#define BITONIC_SORT_10 SORT_MAKE_STR(bitonic_sort_10)
static __inline void BITONIC_SORT_10(SORT_TYPE *dst)
{
SORT_CSWAP(dst[4], dst[9]);
SORT_CSWAP(dst[3], dst[8]);
SORT_CSWAP(dst[2], dst[7]);
SORT_CSWAP(dst[1], dst[6]);
SORT_CSWAP(dst[0], dst[5]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[6], dst[9]);
SORT_CSWAP(dst[0], dst[3]);
SORT_CSWAP(dst[5], dst[8]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[3], dst[6]);
SORT_CSWAP(dst[7], dst[9]);
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[2], dst[5]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[4], dst[7]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[4], dst[5]);
}
#define BITONIC_SORT_11 SORT_MAKE_STR(bitonic_sort_11)
static __inline void BITONIC_SORT_11(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[8], dst[10]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[6], dst[10]);
SORT_CSWAP(dst[4], dst[8]);
SORT_CSWAP(dst[5], dst[9]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[3], dst[8]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[6], dst[10]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[7], dst[10]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[7], dst[9]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[7], dst[8]);
}
#define BITONIC_SORT_12 SORT_MAKE_STR(bitonic_sort_12)
static __inline void BITONIC_SORT_12(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[10], dst[11]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[9], dst[11]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[8], dst[10]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[7], dst[11]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[6], dst[10]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[4], dst[8]);
SORT_CSWAP(dst[5], dst[9]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[7], dst[11]);
SORT_CSWAP(dst[3], dst[8]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[6], dst[10]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[7], dst[10]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[7], dst[9]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[7], dst[8]);
}
#define BITONIC_SORT_13 SORT_MAKE_STR(bitonic_sort_13)
static __inline void BITONIC_SORT_13(SORT_TYPE *dst)
{
SORT_CSWAP(dst[1], dst[7]);
SORT_CSWAP(dst[9], dst[11]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[8]);
SORT_CSWAP(dst[0], dst[12]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[8], dst[11]);
SORT_CSWAP(dst[7], dst[12]);
SORT_CSWAP(dst[5], dst[9]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[10], dst[11]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[6], dst[12]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[11], dst[12]);
SORT_CSWAP(dst[4], dst[9]);
SORT_CSWAP(dst[6], dst[10]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[10], dst[11]);
SORT_CSWAP(dst[1], dst[7]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[9], dst[11]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[4], dst[7]);
SORT_CSWAP(dst[8], dst[10]);
SORT_CSWAP(dst[0], dst[5]);
SORT_CSWAP(dst[2], dst[5]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
}
#define BITONIC_SORT_14 SORT_MAKE_STR(bitonic_sort_14)
static __inline void BITONIC_SORT_14(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[10], dst[11]);
SORT_CSWAP(dst[12], dst[13]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[8], dst[10]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[9], dst[11]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[8], dst[12]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[9], dst[13]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[0], dst[8]);
SORT_CSWAP(dst[1], dst[9]);
SORT_CSWAP(dst[2], dst[10]);
SORT_CSWAP(dst[3], dst[11]);
SORT_CSWAP(dst[4], dst[12]);
SORT_CSWAP(dst[5], dst[13]);
SORT_CSWAP(dst[5], dst[10]);
SORT_CSWAP(dst[6], dst[9]);
SORT_CSWAP(dst[3], dst[12]);
SORT_CSWAP(dst[7], dst[11]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[4], dst[8]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[7], dst[13]);
SORT_CSWAP(dst[2], dst[8]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[11], dst[13]);
SORT_CSWAP(dst[3], dst[8]);
SORT_CSWAP(dst[7], dst[12]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[10], dst[12]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[7], dst[9]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[11], dst[12]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
}
#define BITONIC_SORT_15 SORT_MAKE_STR(bitonic_sort_15)
static __inline void BITONIC_SORT_15(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[10], dst[11]);
SORT_CSWAP(dst[12], dst[13]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[8], dst[10]);
SORT_CSWAP(dst[12], dst[14]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[9], dst[11]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[8], dst[12]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[9], dst[13]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[10], dst[14]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[0], dst[8]);
SORT_CSWAP(dst[1], dst[9]);
SORT_CSWAP(dst[2], dst[10]);
SORT_CSWAP(dst[3], dst[11]);
SORT_CSWAP(dst[4], dst[12]);
SORT_CSWAP(dst[5], dst[13]);
SORT_CSWAP(dst[6], dst[14]);
SORT_CSWAP(dst[5], dst[10]);
SORT_CSWAP(dst[6], dst[9]);
SORT_CSWAP(dst[3], dst[12]);
SORT_CSWAP(dst[13], dst[14]);
SORT_CSWAP(dst[7], dst[11]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[4], dst[8]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[7], dst[13]);
SORT_CSWAP(dst[2], dst[8]);
SORT_CSWAP(dst[11], dst[14]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[11], dst[13]);
SORT_CSWAP(dst[3], dst[8]);
SORT_CSWAP(dst[7], dst[12]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[10], dst[12]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[7], dst[9]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[11], dst[12]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
}
#define BITONIC_SORT_16 SORT_MAKE_STR(bitonic_sort_16)
static __inline void BITONIC_SORT_16(SORT_TYPE *dst)
{
SORT_CSWAP(dst[0], dst[1]);
SORT_CSWAP(dst[2], dst[3]);
SORT_CSWAP(dst[4], dst[5]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
SORT_CSWAP(dst[10], dst[11]);
SORT_CSWAP(dst[12], dst[13]);
SORT_CSWAP(dst[14], dst[15]);
SORT_CSWAP(dst[0], dst[2]);
SORT_CSWAP(dst[4], dst[6]);
SORT_CSWAP(dst[8], dst[10]);
SORT_CSWAP(dst[12], dst[14]);
SORT_CSWAP(dst[1], dst[3]);
SORT_CSWAP(dst[5], dst[7]);
SORT_CSWAP(dst[9], dst[11]);
SORT_CSWAP(dst[13], dst[15]);
SORT_CSWAP(dst[0], dst[4]);
SORT_CSWAP(dst[8], dst[12]);
SORT_CSWAP(dst[1], dst[5]);
SORT_CSWAP(dst[9], dst[13]);
SORT_CSWAP(dst[2], dst[6]);
SORT_CSWAP(dst[10], dst[14]);
SORT_CSWAP(dst[3], dst[7]);
SORT_CSWAP(dst[11], dst[15]);
SORT_CSWAP(dst[0], dst[8]);
SORT_CSWAP(dst[1], dst[9]);
SORT_CSWAP(dst[2], dst[10]);
SORT_CSWAP(dst[3], dst[11]);
SORT_CSWAP(dst[4], dst[12]);
SORT_CSWAP(dst[5], dst[13]);
SORT_CSWAP(dst[6], dst[14]);
SORT_CSWAP(dst[7], dst[15]);
SORT_CSWAP(dst[5], dst[10]);
SORT_CSWAP(dst[6], dst[9]);
SORT_CSWAP(dst[3], dst[12]);
SORT_CSWAP(dst[13], dst[14]);
SORT_CSWAP(dst[7], dst[11]);
SORT_CSWAP(dst[1], dst[2]);
SORT_CSWAP(dst[4], dst[8]);
SORT_CSWAP(dst[1], dst[4]);
SORT_CSWAP(dst[7], dst[13]);
SORT_CSWAP(dst[2], dst[8]);
SORT_CSWAP(dst[11], dst[14]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[2], dst[4]);
SORT_CSWAP(dst[11], dst[13]);
SORT_CSWAP(dst[3], dst[8]);
SORT_CSWAP(dst[7], dst[12]);
SORT_CSWAP(dst[6], dst[8]);
SORT_CSWAP(dst[10], dst[12]);
SORT_CSWAP(dst[3], dst[5]);
SORT_CSWAP(dst[7], dst[9]);
SORT_CSWAP(dst[3], dst[4]);
SORT_CSWAP(dst[5], dst[6]);
SORT_CSWAP(dst[7], dst[8]);
SORT_CSWAP(dst[9], dst[10]);
SORT_CSWAP(dst[11], dst[12]);
SORT_CSWAP(dst[6], dst[7]);
SORT_CSWAP(dst[8], dst[9]);
}
void BITONIC_SORT(SORT_TYPE *dst, const size_t size)
{
switch (size) {
case 0:
case 1:
break;
case 2:
BITONIC_SORT_2(dst);
break;
case 3:
BITONIC_SORT_3(dst);
break;
case 4:
BITONIC_SORT_4(dst);
break;
case 5:
BITONIC_SORT_5(dst);
break;
case 6:
BITONIC_SORT_6(dst);
break;
case 7:
BITONIC_SORT_7(dst);
break;
case 8:
BITONIC_SORT_8(dst);
break;
case 9:
BITONIC_SORT_9(dst);
break;
case 10:
BITONIC_SORT_10(dst);
break;
case 11:
BITONIC_SORT_11(dst);
break;
case 12:
BITONIC_SORT_12(dst);
break;
case 13:
BITONIC_SORT_13(dst);
break;
case 14:
BITONIC_SORT_14(dst);
break;
case 15:
BITONIC_SORT_15(dst);
break;
case 16:
BITONIC_SORT_16(dst);
break;
default:
BINARY_INSERTION_SORT(dst, size);
}
}
#if SORT_SAFE_CPY
void SORT_TYPE_CPY(SORT_TYPE *dst, SORT_TYPE *src, const size_t size)
{
size_t i = 0;
for (; i < size; ++i) {
dst[i] = src[i];
}
}
void SORT_TYPE_MOVE(SORT_TYPE *dst, SORT_TYPE *src, const size_t size)
{
size_t i;
if (dst < src) {
SORT_TYPE_CPY(dst, src, size);
} else if (dst != src && size > 0) {
for (i = size - 1; i > 0; --i) {
dst[i] = src[i];
}
*dst = *src;
}
}
#else
#undef SORT_TYPE_CPY
#define SORT_TYPE_CPY(dst, src, size) memcpy((dst), (src), (size) * sizeof(SORT_TYPE))
#undef SORT_TYPE_MOVE
#define SORT_TYPE_MOVE(dst, src, size) memmove((dst), (src), (size) * sizeof(SORT_TYPE))
#endif
SORT_TYPE* SORT_NEW_BUFFER(size_t size)
{
#if SORT_SAFE_CPY
return new SORT_TYPE[size];
#else
return (SORT_TYPE*)malloc(size * sizeof(SORT_TYPE));
#endif
}
void SORT_DELETE_BUFFER(SORT_TYPE* pointer)
{
#if SORT_SAFE_CPY
delete[] pointer;
#else
free(pointer);
#endif
}
/* Shell sort implementation based on Wikipedia article
http://en.wikipedia.org/wiki/Shell_sort
*/
void SHELL_SORT(SORT_TYPE *dst, const size_t size)
{
/* don't bother sorting an array of size 0 or 1 */
/* TODO: binary search to find first gap? */
int inci = 47;
size_t inc = shell_gaps[inci];
size_t i;
if (size <= 1) {
return;
}
while (inc > (size >> 1)) {
inc = shell_gaps[--inci];
}
while (1) {
for (i = inc; i < size; i++) {
SORT_TYPE temp = dst[i];
size_t j = i;
while ((j >= inc) && (SORT_CMP(dst[j - inc], temp) > 0)) {
dst[j] = dst[j - inc];
j -= inc;
}
dst[j] = temp;
}
if (inc == 1) {
break;
}
inc = shell_gaps[--inci];
}
}
/* Function used to do a binary search for binary insertion sort */
static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
const size_t size)
{
size_t l, c, r;
SORT_TYPE cx;
l = 0;
r = size - 1;
c = r >> 1;
/* check for out of bounds at the beginning. */
if (SORT_CMP(x, dst[0]) < 0) {
return 0;
} else if (SORT_CMP(x, dst[r]) > 0) {
return r;
}
cx = dst[c];
while (1) {
const int val = SORT_CMP(x, cx);
if (val < 0) {
if (c - l <= 1) {
return c;
}
r = c;
} else { /* allow = for stability. The binary search favors the right. */
if (r - c <= 1) {
return c + 1;
}
l = c;
}
c = l + ((r - l) >> 1);
cx = dst[c];
}
}
/* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size)
{
size_t i;
for (i = start; i < size; i++) {
size_t j;
SORT_TYPE x;
size_t location;
/* If this entry is already correct, just move along */
if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
continue;
}
/* Else we need to find the right place, shift everything over, and squeeze in */
x = dst[i];
location = BINARY_INSERTION_FIND(dst, x, i);
for (j = i - 1; j >= location; j--) {
dst[j + 1] = dst[j];
if (j == 0) { /* check edge case because j is unsigned */
break;
}
}
dst[location] = x;
}
}
/* Binary insertion sort */
void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size)
{
/* don't bother sorting an array of size <= 1 */
if (size <= 1) {
return;
}
BINARY_INSERTION_SORT_START(dst, 1, size);
}
/* Selection sort */
void SELECTION_SORT(SORT_TYPE *dst, const size_t size)
{
size_t i, j;
/* don't bother sorting an array of size <= 1 */
if (size <= 1) {
return;
}
for (i = 0; i < size; i++) {
for (j = i + 1; j < size; j++) {
if (SORT_CMP(dst[j], dst[i]) < 0) {
SORT_SWAP(dst[i], dst[j]);
}
}
}
}
/* In-place mergesort */
void MERGE_SORT_IN_PLACE_ASWAP(SORT_TYPE * dst1, SORT_TYPE * dst2, size_t len)
{
do {
SORT_SWAP(*dst1, *dst2);
dst1++;
dst2++;
} while (--len);
}
void MERGE_SORT_IN_PLACE_FRONTMERGE(SORT_TYPE *dst1, size_t l1, SORT_TYPE *dst2, size_t l2)
{
SORT_TYPE *dst0 = dst2 - l1;
if (SORT_CMP(dst1[l1 - 1], dst2[0]) <= 0) {
MERGE_SORT_IN_PLACE_ASWAP(dst1, dst0, l1);
return;
}
do {
while (SORT_CMP(*dst2, *dst1) > 0) {
SORT_SWAP(*dst1, *dst0);
dst1++;
dst0++;
if (--l1 == 0) {
return;
}
}
SORT_SWAP(*dst2, *dst0);
dst2++;
dst0++;
} while (--l2);
do {
SORT_SWAP(*dst1, *dst0);
dst1++;
dst0++;
} while (--l1);
}
size_t MERGE_SORT_IN_PLACE_BACKMERGE(SORT_TYPE * dst1, size_t l1, SORT_TYPE * dst2, size_t l2)
{
size_t res;
SORT_TYPE *dst0 = dst2 + l1;
if (SORT_CMP(dst1[1 - l1], dst2[0]) >= 0) {
MERGE_SORT_IN_PLACE_ASWAP(dst1 - l1 + 1, dst0 - l1 + 1, l1);
return l1;
}
do {
while (SORT_CMP(*dst2, *dst1) < 0) {
SORT_SWAP(*dst1, *dst0);
dst1--;
dst0--;
if (--l1 == 0) {
return 0;
}
}
SORT_SWAP(*dst2, *dst0);
dst2--;
dst0--;
} while (--l2);
res = l1;
do {
SORT_SWAP(*dst1, *dst0);
dst1--;
dst0--;
} while (--l1);
return res;
}
/* merge dst[p0..p1) by buffer dst[p1..p1+r) */
void MERGE_SORT_IN_PLACE_RMERGE(SORT_TYPE *dst, size_t len, size_t lp, size_t r)
{
size_t i, lq;
int cv;
if (SORT_CMP(dst[lp], dst[lp - 1]) >= 0) {
return;
}
lq = lp;
for (i = 0; i < len; i += r) {
/* select smallest dst[p0+n*r] */
size_t q = i, j;
for (j = lp; j <= lq; j += r) {
cv = SORT_CMP(dst[j], dst[q]);
if (cv == 0) {
cv = SORT_CMP(dst[j + r - 1], dst[q + r - 1]);
}
if (cv < 0) {
q = j;
}
}
if (q != i) {
MERGE_SORT_IN_PLACE_ASWAP(dst + i, dst + q, r); /* swap it with current position */
if (q == lq && q < (len - r)) {
lq += r;
}
}
if (i != 0 && SORT_CMP(dst[i], dst[i - 1]) < 0) {
MERGE_SORT_IN_PLACE_ASWAP(dst + len, dst + i, r); /* swap current position with buffer */
MERGE_SORT_IN_PLACE_BACKMERGE(dst + (len + r - 1), r, dst + (i - 1),
r); /* buffer :merge: dst[i-r..i) -> dst[i-r..i+r) */
}
if (lp == i) {
lp += r;
}
}
}
/* In-place Merge Sort implementation. (c)2012, Andrey Astrelin, astrelin@tochka.ru */
void MERGE_SORT_IN_PLACE(SORT_TYPE *dst, const size_t len)
{
/* don't bother sorting an array of size <= 1 */
size_t r = rbnd(len);
size_t lr = (len / r - 1) * r;
SORT_TYPE *dst1 = dst - 1;
size_t p, m, q, q1, p0;
if (len <= 1) {
return;
}
if (len <= SMALL_SORT_BND) {
SMALL_SORT(dst, len);
return;
}
for (p = 2; p <= lr; p += 2) {
dst1 += 2;
if (SORT_CMP(dst1[0], dst1[-1]) < 0) {
SORT_SWAP(dst1[0], dst1[-1]);
}
if (p & 2) {
continue;
}
m = len - p;
q = 2;
while ((p & q) == 0) {
if (SORT_CMP(dst1[1 - q], dst1[-(int) q]) < 0) {
break;
}
q *= 2;
}
if (p & q) {
continue;
}
if (q < m) {
p0 = len - q;
MERGE_SORT_IN_PLACE_ASWAP(dst + p - q, dst + p0, q);
for (;;) {
q1 = 2 * q;
if ((q1 > m) || (p & q1)) {
break;
}
p0 = len - q1;
MERGE_SORT_IN_PLACE_FRONTMERGE(dst + (p - q1), q, dst + p0 + q, q);
q = q1;
}
MERGE_SORT_IN_PLACE_BACKMERGE(dst + (len - 1), q, dst1 - q, q);
q *= 2;
}
q1 = q;
while (q1 > m) {
q1 /= 2;
}
while ((q & p) == 0) {
q *= 2;
MERGE_SORT_IN_PLACE_RMERGE(dst + (p - q), q, q / 2, q1);
}
}
q1 = 0;
for (q = r; q < lr; q *= 2) {
if ((lr & q) != 0) {
q1 += q;
if (q1 != q) {
MERGE_SORT_IN_PLACE_RMERGE(dst + (lr - q1), q1, q, r);
}
}
}
m = len - lr;
MERGE_SORT_IN_PLACE(dst + lr, m);
MERGE_SORT_IN_PLACE_ASWAP(dst, dst + lr, m);
m += MERGE_SORT_IN_PLACE_BACKMERGE(dst + (m - 1), m, dst + (lr - 1), lr - m);
MERGE_SORT_IN_PLACE(dst, m);
}
/* Standard merge sort */
void MERGE_SORT_RECURSIVE(SORT_TYPE *newdst, SORT_TYPE *dst, const size_t size)
{
const size_t middle = size / 2;
size_t out = 0;
size_t i = 0;
size_t j = middle;
/* don't bother sorting an array of size <= 1 */
if (size <= 1) {
return;
}
if (size <= SMALL_SORT_BND) {
BINARY_INSERTION_SORT(dst, size);
return;
}
MERGE_SORT_RECURSIVE(newdst, dst, middle);
MERGE_SORT_RECURSIVE(newdst, &dst[middle], size - middle);
while (out != size) {
if (i < middle) {
if (j < size) {
if (SORT_CMP(dst[i], dst[j]) <= 0) {
newdst[out] = dst[i++];
} else {
newdst[out] = dst[j++];
}
} else {
newdst[out] = dst[i++];
}
} else {
newdst[out] = dst[j++];
}
out++;
}
SORT_TYPE_CPY(dst, newdst, size);
}
/* Standard merge sort */
void MERGE_SORT(SORT_TYPE *dst, const size_t size)
{
SORT_TYPE *newdst;
/* don't bother sorting an array of size <= 1 */
if (size <= 1) {
return;
}
if (size <= SMALL_SORT_BND) {
BINARY_INSERTION_SORT(dst, size);
return;
}
newdst = SORT_NEW_BUFFER(size);
MERGE_SORT_RECURSIVE(newdst, dst, size);
SORT_DELETE_BUFFER(newdst);
}
static __inline size_t QUICK_SORT_PARTITION(SORT_TYPE *dst, const size_t left,
const size_t right, const size_t pivot)
{
SORT_TYPE value = dst[pivot];
size_t index = left;
size_t i;
int not_all_same = 0;
/* move the pivot to the right */
SORT_SWAP(dst[pivot], dst[right]);
for (i = left; i < right; i++) {
int cmp = SORT_CMP(dst[i], value);
/* check if everything is all the same */
not_all_same |= cmp;
if (cmp < 0) {
SORT_SWAP(dst[i], dst[index]);
index++;
}
}
SORT_SWAP(dst[right], dst[index]);
/* avoid degenerate case */
if (not_all_same == 0) {
return SIZE_MAX;
}
return index;
}
/* Based on Knuth vol. 3
static __inline size_t QUICK_SORT_HOARE_PARTITION(SORT_TYPE *dst, const size_t l,
const size_t r, const size_t pivot) {
SORT_TYPE value;
size_t i = l + 1;
size_t j = r;
if (pivot != l) {
SORT_SWAP(dst[pivot], dst[l]);
}
value = dst[l];
while (1) {
while (SORT_CMP(dst[i], value) < 0) {
i++;
}
while (SORT_CMP(value, dst[j]) < 0) {
j--;
}
if (j <= i) {
SORT_SWAP(dst[l], dst[j]);
return j;
}
SORT_SWAP(dst[i], dst[j]);
i++;
j--;
}
return 0;
}
*/
/* Return the median index of the objects at the three indices. */
static __inline size_t MEDIAN(const SORT_TYPE *dst, const size_t a, const size_t b,
const size_t c)
{
const int AB = SORT_CMP(dst[a], dst[b]) < 0;
if (AB) {
/* a < b */
const int BC = SORT_CMP(dst[b], dst[c]) < 0;
if (BC) {
/* a < b < c */
return b;
} else {
/* a < b, c < b */
const int AC = SORT_CMP(dst[a], dst[c]) < 0;
if (AC) {
/* a < c < b */
return c;
} else {
/* c < a < b */
return a;
}
}
} else {
/* b < a */
const int AC = SORT_CMP(dst[a], dst[b]) < 0;
if (AC) {
/* b < a < c */
return a;
} else {
/* b < a, c < a */
const int BC = SORT_CMP(dst[b], dst[c]) < 0;
if (BC) {
/* b < c < a */
return c;
} else {
/* c < b < a */
return b;
}
}
}
}
static void QUICK_SORT_RECURSIVE(SORT_TYPE *dst, const size_t original_left,
const size_t original_right)
{
size_t left;
size_t right;
size_t pivot;
size_t new_pivot;
size_t middle;
int loop_count = 0;
const int max_loops = 64 - CLZ(original_right - original_left); /* ~lg N */
left = original_left;
right = original_right;
while (1) {
if (right <= left) {
return;
}
if ((right - left + 1U) <= SMALL_SORT_BND) {
SMALL_SORT(&dst[left], right - left + 1U);
return;
}
if (++loop_count >= max_loops) {
/* we have recursed / looped too many times; switch to heap sort */
HEAP_SORT(&dst[left], right - left + 1U);
return;
}
/* median of 5 */
middle = left + ((right - left) >> 1);
pivot = MEDIAN((const SORT_TYPE *) dst, left, middle, right);
pivot = MEDIAN((const SORT_TYPE *) dst, left + ((middle - left) >> 1), pivot,
middle + ((right - middle) >> 1));
new_pivot = QUICK_SORT_PARTITION(dst, left, right, pivot);
/* check for partition all equal */
if (new_pivot == SIZE_MAX) {
return;
}
/* recurse only on the small part to avoid degenerate stack sizes */
/* and manually do tail call on the large part */
if (new_pivot - 1U - left > right - new_pivot - 1U) {
/* left is bigger than right */
QUICK_SORT_RECURSIVE(dst, new_pivot + 1U, right);
/* tail call for left */
right = new_pivot - 1U;
} else {
/* right is bigger than left */
QUICK_SORT_RECURSIVE(dst, left, new_pivot - 1U);
/* tail call for right */
left = new_pivot + 1U;
}
}
}
void QUICK_SORT(SORT_TYPE *dst, const size_t size)
{
/* don't bother sorting an array of size 1 */
if (size <= 1) {
return;
}
QUICK_SORT_RECURSIVE(dst, 0U, size - 1U);
}
/* timsort implementation, based on timsort.txt */
static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end)
{
while (1) {
if (start >= end) {
return;
}
SORT_SWAP(dst[start], dst[end]);
start++;
end--;
}
}
static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size)
{
size_t curr;
if (size - start == 1) {
return 1;
}
if (start >= size - 2) {
if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
SORT_SWAP(dst[size - 2], dst[size - 1]);
}
return 2;
}
curr = start + 2;
if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
/* increasing run */
while (1) {
if (curr == size - 1) {
break;
}
if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
break;
}
curr++;
}
return curr - start;
} else {
/* decreasing run */
while (1) {
if (curr == size - 1) {
break;
}
if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
break;
}
curr++;
}
/* reverse in-place */
REVERSE_ELEMENTS(dst, start, curr - 1);
return curr - start;
}
}
static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr)
{
size_t A, B, C;
if (stack_curr < 2) {
return 1;
}
if (stack_curr == 2) {
const size_t A1 = stack[stack_curr - 2].length;
const size_t B1 = stack[stack_curr - 1].length;
if (A1 <= B1) {
return 0;
}
return 1;
}
A = stack[stack_curr - 3].length;
B = stack[stack_curr - 2].length;
C = stack[stack_curr - 1].length;
if ((A <= B + C) || (B <= C)) {
return 0;
}
return 1;
}
typedef struct {
size_t alloc;
SORT_TYPE *storage;
} TEMP_STORAGE_T;
static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size)
{
if ((store->storage == NULL) || (store->alloc < new_size)) {
SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
if (tempstore == NULL) {
fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
(unsigned long)(sizeof(SORT_TYPE) * new_size));
exit(1);
}
store->storage = tempstore;
store->alloc = new_size;
}
}
static size_t TIM_SORT_GALLOP(SORT_TYPE *dst, const size_t size, const SORT_TYPE key, size_t anchor,
int right)
{
int last_ofs = 0;
int ofs, max_ofs, ofs_sign, cmp;
size_t l, c, r;
cmp = SORT_CMP(key, dst[anchor]);
if (cmp < 0 || (!right && cmp == 0)) {
/* short cut */
if (anchor == 0) {
return 0;
}
ofs = -1;
ofs_sign = -1;
max_ofs = -(int)anchor; /* ensure anchor+max_ofs is valid idx */
} else {
if (anchor == size - 1) {
return size;
}
ofs = 1;
ofs_sign = 1;
max_ofs = (int)(size - anchor - 1);
}
for (;;) {
/* deal with overflow */
if (max_ofs / ofs <= 1) {
ofs = max_ofs;
if (ofs < 0) {
cmp = SORT_CMP(key, dst[0]);
if ((right && cmp < 0) || (!right && cmp <= 0)) {
return 0;
}
} else {
cmp = SORT_CMP(dst[size - 1], key);
if ((right && cmp <= 0) || (!right && cmp < 0)) {
return size;
}
}
break;
}
c = anchor + ofs;
/* right, 0<ofs: dst[anchor+last_ofs] <= key < dst[anchor+ofs] */
/* left, 0<ofs: dst[anchor+last_ofs] < key <= dst[anchor+ofs] */
/* right, ofs<0: dst[anchor+ofs] <= key < dst[anchor+last_ofs] */
/* left, ofs<0: dst[anchor+ofs] < key <= dst[anchor+last_ofs] */
cmp = SORT_CMP(key, dst[c]);
if (0 < ofs) {
if ((right && cmp < 0) || (!right && cmp <= 0)) {
break;
}
} else {
if ((right && 0 <= cmp) || (!right && 0 < cmp)) {
break;
}
}
last_ofs = ofs;
ofs = (ofs << 1) + ofs_sign;
}
/* key in region (l, r) , both l and r have already been compared */
if (ofs < 0) {
l = anchor + ofs;
r = anchor + last_ofs;
} else {
l = anchor + last_ofs;
r = anchor + ofs;
}
while (1 < r - l) {
c = l + ((r - l) >> 1);
cmp = SORT_CMP(key, dst[c]);
if ((right && cmp < 0) || (!right && cmp <= 0)) {
r = c;
} else {
l = c;
}
}
return r;
}
static void TIM_SORT_MERGE_LEFT(SORT_TYPE *A_src, SORT_TYPE *B_src, const size_t A, const size_t B,
SORT_TYPE* storage, int *min_gallop_p)
{
size_t pdst, pa, pb, k;
int a_count, b_count;
int min_gallop = *min_gallop_p;
SORT_TYPE *dst = A_src;
SORT_TYPE_CPY(storage, dst, A);
A_src = storage;
pdst = pa = pb = 0;
/* first element must in B, otherwise skipped in the caller */
dst[pdst++] = B_src[pb++];
if (B == 1) {
goto copyA;
}
for (;;) {
a_count = b_count = 0;
for (;;) {
if (SORT_CMP(A_src[pa], B_src[pb]) <= 0) {
dst[pdst++] = A_src[pa++];
++a_count;
b_count = 0;
/* No need to check if pa == A because the last element must be in A
* so pb will reach to B first. You can check pa == A-1 and do
* some optimization if you wish.*/
if (min_gallop <= a_count) {
break;
}
} else {
dst[pdst++] = B_src[pb++];
++b_count;
a_count = 0;
if (pb == B) {
goto copyA;
}
if (min_gallop <= b_count) {
break;
}
}
}
++min_gallop;
for (;;) {
if (min_gallop != 0) {
min_gallop --;
}
k = TIM_SORT_GALLOP(&A_src[pa], A - pa, B_src[pb], 0, 1);
SORT_TYPE_CPY(&dst[pdst], &A_src[pa], k);
pdst += k;
pa += k;
/* now we know the next must be in B */
dst[pdst++] = B_src[pb++];
if (pb == B) {
goto copyA;
}
if (a_count && k < TIM_SORT_MIN_GALLOP) {
++min_gallop;
break;
}
k = TIM_SORT_GALLOP(&B_src[pb], B - pb, A_src[pa], 0, 0);
SORT_TYPE_MOVE(&dst[pdst], &B_src[pb], k);
pdst += k;
pb += k;
if (pb == B) {
goto copyA;
}
dst[pdst++] = A_src[pa++];
if (b_count && k < TIM_SORT_MIN_GALLOP) {
++min_gallop;
break;
}
}
}
copyA:
SORT_TYPE_CPY(&dst[pdst], &A_src[pa], A - pa);
*min_gallop_p = min_gallop;
return;
}
static void TIM_SORT_MERGE_RIGHT(SORT_TYPE *A_src, SORT_TYPE *B_src, const size_t A, const size_t B,
SORT_TYPE* storage, int *min_gallop_p)
{
size_t k;
int pdst, pa, pb, a_count, b_count;
int min_gallop = *min_gallop_p;
SORT_TYPE *dst = A_src;
pa = (int)(A - 1);
pb = (int)(B - 1);
pdst = (int)(A + B - 1);
SORT_TYPE_CPY(storage, B_src, B);
B_src = storage;
/* last element must in A, otherwise skipped in the caller */
dst[pdst--] = A_src[pa--];
if (A == 1) {
goto copyB;
}
for (;;) {
a_count = b_count = 0;
for (;;) {
if (SORT_CMP(A_src[pa], B_src[pb]) <= 0) {
dst[pdst--] = B_src[pb--];
++b_count;
a_count = 0;
if (min_gallop <= b_count) {
break;
}
/* No need to check if pb == -1 because the first element must be in B
* so pa will reach to -1 first. You can check pb == 0 and do
* some optimization if you wish.*/
} else {
dst[pdst--] = A_src[pa--];
++a_count;
b_count = 0;
if (pa == -1) {
goto copyB;
}
if (min_gallop <= a_count) {
break;
}
}
}
++min_gallop;
for (;;) {
if (min_gallop != 0) {
min_gallop --;
}
k = TIM_SORT_GALLOP(A_src, pa + 1, B_src[pb], pa, 1);
/* Understand the margin by considering k==0 */
SORT_TYPE_MOVE(&dst[pb + k + 1], &A_src[k], pa + 1 - k);
pdst = pb + (int)k;
pa = (int)(k - 1);
if (pa == -1) {
goto copyB;
}
/* now we know the next must be in B */
dst[pdst--] = B_src[pb--];
if (a_count && pa + 1 - k < TIM_SORT_MIN_GALLOP) {
++min_gallop;
break;
}
k = TIM_SORT_GALLOP(B_src, pb + 1, A_src[pa], pb, 0);
SORT_TYPE_CPY(&dst[pa + k + 1], &B_src[k], pb + 1 - k);
pdst = pa + (int)k;
pb = (int)(k - 1);
dst[pdst--] = A_src[pa--];
if (pa == -1) {
goto copyB;
}
if (b_count && pb + 1 - k < TIM_SORT_MIN_GALLOP) {
++min_gallop;
break;
}
}
}
copyB:
SORT_TYPE_CPY(dst, B_src, pb + 1);
*min_gallop_p = min_gallop;
return;
}
static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
TEMP_STORAGE_T *store, int* min_gallop_p)
{
size_t A = stack[stack_curr - 2].length;
size_t B = stack[stack_curr - 1].length;
size_t A_start = stack[stack_curr - 2].start;
size_t B_start = stack[stack_curr - 1].start;
SORT_TYPE *storage;
size_t k;
/* A[k-1] <= B[0] < A[k] */
k = TIM_SORT_GALLOP(&dst[A_start], A, dst[B_start], 0, 1);
A_start += k;
A -= k;
if (A == 0) {
*min_gallop_p /= 2;
return;
}
/* B[k-1] < A[A-1] <= B[k] */
k = TIM_SORT_GALLOP(&dst[B_start], B, dst[B_start - 1], B - 1, 0);
B = k;
TIM_SORT_RESIZE(store, MIN(A, B));
storage = store->storage;
if (A < B) {
TIM_SORT_MERGE_LEFT(&dst[A_start], &dst[B_start], A, B, storage, min_gallop_p);
} else {
TIM_SORT_MERGE_RIGHT(&dst[A_start], &dst[B_start], A, B, storage, min_gallop_p);
}
}
static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
TEMP_STORAGE_T *store, const size_t size, int* min_gallop_p)
{
while (1) {
size_t A, B, C, D;
int ABC, BCD, CD;
/* if the stack only has one thing on it, we are done with the collapse */
if (stack_curr <= 1) {
break;
}
/* if this is the last merge, just do it */
if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
TIM_SORT_MERGE(dst, stack, stack_curr, store, min_gallop_p);
stack[0].length += stack[1].length;
stack_curr--;
break;
}
/* check if the invariant is off for a stack of 2 elements */
else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
TIM_SORT_MERGE(dst, stack, stack_curr, store, min_gallop_p);
stack[0].length += stack[1].length;
stack_curr--;
break;
} else if (stack_curr == 2) {
break;
}
B = stack[stack_curr - 3].length;
C = stack[stack_curr - 2].length;
D = stack[stack_curr - 1].length;
if (stack_curr >= 4) {
A = stack[stack_curr - 4].length;
ABC = (A <= B + C);
} else {
ABC = 0;
}
BCD = (B <= C + D) || ABC;
CD = (C <= D);
/* Both invariants are good */
if (!BCD && !CD) {
break;
}
/* left merge */
if (BCD && !CD) {
TIM_SORT_MERGE(dst, stack, stack_curr - 1, store, min_gallop_p);
stack[stack_curr - 3].length += stack[stack_curr - 2].length;
stack[stack_curr - 2] = stack[stack_curr - 1];
stack_curr--;
} else {
/* right merge */
TIM_SORT_MERGE(dst, stack, stack_curr, store, min_gallop_p);
stack[stack_curr - 2].length += stack[stack_curr - 1].length;
stack_curr--;
}
}
return stack_curr;
}
static __inline int PUSH_NEXT(SORT_TYPE *dst,
const size_t size,
TEMP_STORAGE_T *store,
const size_t minrun,
TIM_SORT_RUN_T *run_stack,
size_t *stack_curr,
size_t *curr,
int *min_gallop_p)
{
size_t len = COUNT_RUN(dst, *curr, size);
size_t run = minrun;
if (run > size - *curr) {
run = size - *curr;
}
if (run > len) {
BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
len = run;
}
run_stack[*stack_curr].start = *curr;
run_stack[*stack_curr].length = len;
(*stack_curr)++;
*curr += len;
if (*curr == size) {
/* finish up */
while (*stack_curr > 1) {
TIM_SORT_MERGE(dst, run_stack, (int)*stack_curr, store, min_gallop_p);
run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
(*stack_curr)--;
}
if (store->storage != NULL) {
free(store->storage);
store->storage = NULL;
}
return 0;
}
return 1;
}
void TIM_SORT(SORT_TYPE *dst, const size_t size)
{
size_t minrun;
TEMP_STORAGE_T _store, *store;
TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
size_t stack_curr = 0;
size_t curr = 0;
int min_gallop = TIM_SORT_MIN_GALLOP;
/* don't bother sorting an array of size 1 */
if (size <= 1) {
return;
}
if (size < 64) {
SMALL_SORT(dst, size);
return;
}
/* compute the minimum run length */
minrun = compute_minrun(size);
/* temporary storage for merges */
store = &_store;
store->alloc = 0;
store->storage = NULL;
if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) {
return;
}
if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) {
return;
}
if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) {
return;
}
while (1) {
if (!CHECK_INVARIANT(run_stack, (int)stack_curr)) {
stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, (int)stack_curr, store, size, &min_gallop);
continue;
}
if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) {
return;
}
}
}
/* heap sort: based on wikipedia */
static __inline void HEAP_SIFT_DOWN(SORT_TYPE *dst, const size_t start, const size_t end)
{
size_t root = start;
while ((root << 1) <= end) {
size_t child = root << 1;
if ((child < end) && (SORT_CMP(dst[child], dst[child + 1]) < 0)) {
child++;
}
if (SORT_CMP(dst[root], dst[child]) < 0) {
SORT_SWAP(dst[root], dst[child]);
root = child;
} else {
return;
}
}
}
static __inline void HEAPIFY(SORT_TYPE *dst, const size_t size)
{
size_t start = size >> 1;
while (1) {
HEAP_SIFT_DOWN(dst, start, size - 1);
if (start == 0) {
break;
}
start--;
}
}
void HEAP_SORT(SORT_TYPE *dst, const size_t size)
{
size_t end = size - 1;
/* don't bother sorting an array of size <= 1 */
if (size <= 1) {
return;
}
HEAPIFY(dst, size);
while (end > 0) {
SORT_SWAP(dst[end], dst[0]);
HEAP_SIFT_DOWN(dst, 0, end - 1);
end--;
}
}
/********* Sqrt sorting *********************************/
/* */
/* (c) 2014 by Andrey Astrelin */
/* */
/* */
/* Stable sorting that works in O(N*log(N)) worst time */
/* and uses O(sqrt(N)) extra memory */
/* */
/* Define SORT_TYPE and SORT_CMP */
/* and then call SqrtSort() function */
/* */
/*********************************************************/
#define SORT_CMP_A(a,b) SORT_CMP(*(a),*(b))
static __inline void SQRT_SORT_SWAP_1(SORT_TYPE *a, SORT_TYPE *b)
{
SORT_TYPE c = *a;
*a++ = *b;
*b++ = c;
}
static __inline void SQRT_SORT_SWAP_N(SORT_TYPE *a, SORT_TYPE *b, int n)
{
while (n--) {
SQRT_SORT_SWAP_1(a++, b++);
}
}
static void SQRT_SORT_MERGE_RIGHT(SORT_TYPE *arr, int L1, int L2, int M)
{
int p0 = L1 + L2 + M - 1, p2 = L1 + L2 - 1, p1 = L1 - 1;
while (p1 >= 0) {
if (p2 < L1 || SORT_CMP_A(arr + p1, arr + p2) > 0) {
arr[p0--] = arr[p1--];
} else {
arr[p0--] = arr[p2--];
}
}
if (p2 != p0) while (p2 >= L1) {
arr[p0--] = arr[p2--];
}
}
/* arr[M..-1] - free, arr[0,L1-1]++arr[L1,L1+L2-1] -> arr[M,M+L1+L2-1] */
static void SQRT_SORT_MERGE_LEFT_WITH_X_BUF(SORT_TYPE *arr, int L1, int L2, int M)
{
int p0 = 0, p1 = L1;
L2 += L1;
while (p1 < L2) {
if (p0 == L1 || SORT_CMP_A(arr + p0, arr + p1) > 0) {
arr[M++] = arr[p1++];
} else {
arr[M++] = arr[p0++];
}
}
if (M != p0) while (p0 < L1) {
arr[M++] = arr[p0++];
}
}
/* arr[0,L1-1] ++ arr2[0,L2-1] -> arr[-L1,L2-1], arr2 is "before" arr1 */
static void SQRT_SORT_MERGE_DOWN(SORT_TYPE *arr, SORT_TYPE *arr2, int L1, int L2)
{
int p0 = 0, p1 = 0, M = -L2;
while (p1 < L2) {
if (p0 == L1 || SORT_CMP_A(arr + p0, arr2 + p1) >= 0) {
arr[M++] = arr2[p1++];
} else {
arr[M++] = arr[p0++];
}
}
if (M != p0) while (p0 < L1) {
arr[M++] = arr[p0++];
}
}
static void SQRT_SORT_SMART_MERGE_WITH_X_BUF(SORT_TYPE *arr, int *alen1, int *atype, int len2,
int lkeys)
{
int p0 = -lkeys, p1 = 0, p2 = *alen1, q1 = p2, q2 = p2 + len2;
int ftype = 1 - *atype; /* 1 if inverted */
while (p1 < q1 && p2 < q2) {
if (SORT_CMP_A(arr + p1, arr + p2) - ftype < 0) {
arr[p0++] = arr[p1++];
} else {
arr[p0++] = arr[p2++];
}
}
if (p1 < q1) {
*alen1 = q1 - p1;
while (p1 < q1) {
arr[--q2] = arr[--q1];
}
} else {
*alen1 = q2 - p2;
*atype = ftype;
}
}
/*
arr - starting array. arr[-lblock..-1] - buffer (if havebuf).
lblock - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
keys - arrays of keys, in same order as blocks. key<midkey means stream A
nblock2 are regular blocks from stream A. llast is length of last (irregular) block from stream B, that should go before nblock2 blocks.
llast=0 requires nblock2=0 (no irregular blocks). llast>0, nblock2=0 is possible.
*/
static void SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF(int *keys, int midkey, SORT_TYPE *arr,
int nblock, int lblock, int nblock2, int llast)
{
int l, prest, lrest, frest, pidx, cidx, fnext;
if (nblock == 0) {
l = nblock2 * lblock;
SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr, l, llast, -lblock);
return;
}
lrest = lblock;
frest = keys[0] < midkey ? 0 : 1;
pidx = lblock;
for (cidx = 1; cidx < nblock; cidx++, pidx += lblock) {
prest = pidx - lrest;
fnext = keys[cidx] < midkey ? 0 : 1;
if (fnext == frest) {
SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest);
prest = pidx;
lrest = lblock;
} else {
SQRT_SORT_SMART_MERGE_WITH_X_BUF(arr + prest, &lrest, &frest, lblock, lblock);
}
}
prest = pidx - lrest;
if (llast) {
if (frest) {
SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest);
prest = pidx;
lrest = lblock * nblock2;
frest = 0;
} else {
lrest += lblock * nblock2;
}
SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr + prest, lrest, llast, -lblock);
} else {
SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest);
}
}
/*
build blocks of length K
input: [-K,-1] elements are buffer
output: first K elements are buffer, blocks 2*K and last subblock sorted
*/
static void SQRT_SORT_BUILD_BLOCKS(SORT_TYPE *arr, int L, int K)
{
int m, u, h, p0, p1, rest, restk, p;
for (m = 1; m < L; m += 2) {
u = 0;
if (SORT_CMP_A(arr + (m - 1), arr + m) > 0) {
u = 1;
}
arr[m - 3] = arr[m - 1 + u];
arr[m - 2] = arr[m - u];
}
if (L % 2) {
arr[L - 3] = arr[L - 1];
}
arr -= 2;
for (h = 2; h < K; h *= 2) {
p0 = 0;
p1 = L - 2 * h;
while (p0 <= p1) {
SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr + p0, h, h, -h);
p0 += 2 * h;
}
rest = L - p0;
if (rest > h) {
SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr + p0, h, rest - h, -h);
} else {
for (; p0 < L; p0++) {
arr[p0 - h] = arr[p0];
}
}
arr -= h;
}
restk = L % (2 * K);
p = L - restk;
if (restk <= K) {
SORT_TYPE_CPY(arr + p + K, arr + p, restk);
} else {
SQRT_SORT_MERGE_RIGHT(arr + p, K, restk - K, K);
}
while (p > 0) {
p -= 2 * K;
SQRT_SORT_MERGE_RIGHT(arr + p, K, K, K);
}
}
static void SQRT_SORT_SORT_INS(SORT_TYPE *arr, int len)
{
int i, j;
for (i = 1; i < len; i++) {
for (j = i - 1; j >= 0 && SORT_CMP_A(arr + (j + 1), arr + j) < 0; j--) {
SQRT_SORT_SWAP_1(arr + j, arr + (j + 1));
}
}
}
/*
keys are on the left of arr. Blocks of length LL combined. We'll combine them in pairs
LL and nkeys are powers of 2. (2*LL/lblock) keys are guarantied
*/
static void SQRT_SORT_COMBINE_BLOCKS(SORT_TYPE *arr, int len, int LL, int lblock, int *tags)
{
int M, b, NBlk, midkey, lrest, u, i, p, v, kc, nbl2, llast;
SORT_TYPE *arr1;
M = len / (2 * LL);
lrest = len % (2 * LL);
if (lrest <= LL) {
len -= lrest;
lrest = 0;
}
for (b = 0; b <= M; b++) {
if (b == M && lrest == 0) {
break;
}
arr1 = arr + b * 2 * LL;
NBlk = (b == M ? lrest : 2 * LL) / lblock;
u = NBlk + (b == M ? 1 : 0);
for (i = 0; i <= u; i++) {
tags[i] = i;
}
midkey = LL / lblock;
for (u = 1; u < NBlk; u++) {
p = u - 1;
for (v = u; v < NBlk; v++) {
kc = SORT_CMP_A(arr1 + p * lblock, arr1 + v * lblock);
if (kc > 0 || (kc == 0 && tags[p] > tags[v])) {
p = v;
}
}
if (p != u - 1) {
SQRT_SORT_SWAP_N(arr1 + (u - 1)*lblock, arr1 + p * lblock, lblock);
i = tags[u - 1];
tags[u - 1] = tags[p];
tags[p] = i;
}
}
nbl2 = llast = 0;
if (b == M) {
llast = lrest % lblock;
}
if (llast != 0) {
while (nbl2 < NBlk && SORT_CMP_A(arr1 + NBlk * lblock, arr1 + (NBlk - nbl2 - 1)*lblock) < 0) {
nbl2++;
}
}
SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF(tags, midkey, arr1, NBlk - nbl2, lblock, nbl2, llast);
}
for (p = len; --p >= 0;) {
arr[p] = arr[p - lblock];
}
}
static void SQRT_SORT_COMMON_SORT(SORT_TYPE *arr, int Len, SORT_TYPE *extbuf, int *Tags)
{
int lblock, cbuf;
if (Len < 16) {
SQRT_SORT_SORT_INS(arr, Len);
return;
}
lblock = 1;
while (lblock * lblock < Len) {
lblock *= 2;
}
SORT_TYPE_CPY(extbuf, arr, lblock);
SQRT_SORT_COMMON_SORT(extbuf, lblock, arr, Tags);
SQRT_SORT_BUILD_BLOCKS(arr + lblock, Len - lblock, lblock);
cbuf = lblock;
while (Len > (cbuf *= 2)) {
SQRT_SORT_COMBINE_BLOCKS(arr + lblock, Len - lblock, cbuf, lblock, Tags);
}
SQRT_SORT_MERGE_DOWN(arr + lblock, extbuf, Len - lblock, lblock);
}
void SQRT_SORT(SORT_TYPE *arr, size_t Len)
{
int L = 1;
SORT_TYPE *ExtBuf;
int *Tags;
int NK;
while (L * L < Len) {
L *= 2;
}
NK = (int)((Len - 1) / L + 2);
ExtBuf = SORT_NEW_BUFFER(L);
if (ExtBuf == NULL) {
return; /* fail */
}
Tags = (int*)malloc(NK * sizeof(int));
if (Tags == NULL) {
return;
}
SQRT_SORT_COMMON_SORT(arr, (int)Len, ExtBuf, Tags);
free(Tags);
SORT_DELETE_BUFFER(ExtBuf);
}
/********* Grail sorting *********************************/
/* */
/* (c) 2013 by Andrey Astrelin */
/* */
/* */
/* Stable sorting that works in O(N*log(N)) worst time */
/* and uses O(1) extra memory */
/* */
/* Define SORT_TYPE and SORT_CMP */
/* and then call GrailSort() function */
/* */
/* For sorting with fixed external buffer (512 items) */
/* use GrailSortWithBuffer() */
/* */
/* For sorting with dynamic external buffer (O(sqrt(N)) items) */
/* use GrailSortWithDynBuffer() */
/* */
/* Also classic in-place merge sort is implemented */
/* under the name of RecStableSort() */
/* */
/*********************************************************/
#define GRAIL_EXT_BUFFER_LENGTH 512
static __inline void GRAIL_SWAP1(SORT_TYPE *a, SORT_TYPE *b)
{
SORT_TYPE c = *a;
*a = *b;
*b = c;
}
static __inline void GRAIL_SWAP_N(SORT_TYPE *a, SORT_TYPE *b, int n)
{
while (n--) {
GRAIL_SWAP1(a++, b++);
}
}
static void GRAIL_ROTATE(SORT_TYPE *a, int l1, int l2)
{
while (l1 && l2) {
if (l1 <= l2) {
GRAIL_SWAP_N(a, a + l1, l1);
a += l1;
l2 -= l1;
} else {
GRAIL_SWAP_N(a + (l1 - l2), a + l1, l2);
l1 -= l2;
}
}
}
static int GRAIL_BIN_SEARCH_LEFT(SORT_TYPE *arr, int len, SORT_TYPE *key)
{
int a = -1, b = len, c;
while (a < b - 1) {
c = a + ((b - a) >> 1);
if (SORT_CMP_A(arr + c, key) >= 0) {
b = c;
} else {
a = c;
}
}
return b;
}
static int GRAIL_BIN_SEARCH_RIGHT(SORT_TYPE *arr, int len, SORT_TYPE *key)
{
int a = -1, b = len, c;
while (a < b - 1) {
c = a + ((b - a) >> 1);
if (SORT_CMP_A(arr + c, key) > 0) {
b = c;
} else {
a = c;
}
}
return b;
}
/* cost: 2*len+nk^2/2 */
static int GRAIL_FIND_KEYS(SORT_TYPE *arr, int len, int nkeys)
{
int h = 1, h0 = 0; /* first key is always here */
int u = 1, r;
while (u < len && h < nkeys) {
r = GRAIL_BIN_SEARCH_LEFT(arr + h0, h, arr + u);
if (r == h || SORT_CMP_A(arr + u, arr + (h0 + r)) != 0) {
GRAIL_ROTATE(arr + h0, h, u - (h0 + h));
h0 = u - h;
GRAIL_ROTATE(arr + (h0 + r), h - r, 1);
h++;
}
u++;
}
GRAIL_ROTATE(arr, h0, h);
return h;
}
/* cost: min(L1,L2)^2+max(L1,L2) */
static void GRAIL_MERGE_WITHOUT_BUFFER(SORT_TYPE *arr, int len1, int len2)
{
int h;
if (len1 < len2) {
while (len1) {
h = GRAIL_BIN_SEARCH_LEFT(arr + len1, len2, arr);
if (h != 0) {
GRAIL_ROTATE(arr, len1, h);
arr += h;
len2 -= h;
}
if (len2 == 0) {
break;
}
do {
arr++;
len1--;
} while (len1 && SORT_CMP_A(arr, arr + len1) <= 0);
}
} else {
while (len2) {
h = GRAIL_BIN_SEARCH_RIGHT(arr, len1, arr + (len1 + len2 - 1));
if (h != len1) {
GRAIL_ROTATE(arr + h, len1 - h, len2);
len1 = h;
}
if (len1 == 0) {
break;
}
do {
len2--;
} while (len2 && SORT_CMP_A(arr + len1 - 1, arr + len1 + len2 - 1) <= 0);
}
}
}
/* arr[M..-1] - buffer, arr[0,L1-1]++arr[L1,L1+L2-1] -> arr[M,M+L1+L2-1] */
static void GRAIL_MERGE_LEFT(SORT_TYPE *arr, int L1, int L2, int M)
{
int p0 = 0, p1 = L1;
L2 += L1;
while (p1 < L2) {
if (p0 == L1 || SORT_CMP_A(arr + p0, arr + p1) > 0) {
GRAIL_SWAP1(arr + (M++), arr + (p1++));
} else {
GRAIL_SWAP1(arr + (M++), arr + (p0++));
}
}
if (M != p0) {
GRAIL_SWAP_N(arr + M, arr + p0, L1 - p0);
}
}
static void GRAIL_MERGE_RIGHT(SORT_TYPE *arr, int L1, int L2, int M)
{
int p0 = L1 + L2 + M - 1, p2 = L1 + L2 - 1, p1 = L1 - 1;
while (p1 >= 0) {
if (p2 < L1 || SORT_CMP_A(arr + p1, arr + p2) > 0) {
GRAIL_SWAP1(arr + (p0--), arr + (p1--));
} else {
GRAIL_SWAP1(arr + (p0--), arr + (p2--));
}
}
if (p2 != p0) while (p2 >= L1) {
GRAIL_SWAP1(arr + (p0--), arr + (p2--));
}
}
static void GRAIL_SMART_MERGE_WITH_BUFFER(SORT_TYPE *arr, int *alen1, int *atype, int len2,
int lkeys)
{
int p0 = -lkeys, p1 = 0, p2 = *alen1, q1 = p2, q2 = p2 + len2;
int ftype = 1 - *atype; /* 1 if inverted */
while (p1 < q1 && p2 < q2) {
if (SORT_CMP_A(arr + p1, arr + p2) - ftype < 0) {
GRAIL_SWAP1(arr + (p0++), arr + (p1++));
} else {
GRAIL_SWAP1(arr + (p0++), arr + (p2++));
}
}
if (p1 < q1) {
*alen1 = q1 - p1;
while (p1 < q1) {
GRAIL_SWAP1(arr + (--q1), arr + (--q2));
}
} else {
*alen1 = q2 - p2;
*atype = ftype;
}
}
static void GRAIL_SMART_MERGE_WITHOUT_BUFFER(SORT_TYPE *arr, int *alen1, int *atype, int _len2)
{
int len1, len2, ftype, h;
if (!_len2) {
return;
}
len1 = *alen1;
len2 = _len2;
ftype = 1 - *atype;
if (len1 && SORT_CMP_A(arr + (len1 - 1), arr + len1) - ftype >= 0) {
while (len1) {
h = ftype ? GRAIL_BIN_SEARCH_LEFT(arr + len1, len2, arr) : GRAIL_BIN_SEARCH_RIGHT(arr + len1, len2,
arr);
if (h != 0) {
GRAIL_ROTATE(arr, len1, h);
arr += h;
len2 -= h;
}
if (len2 == 0) {
*alen1 = len1;
return;
}
do {
arr++;
len1--;
} while (len1 && SORT_CMP_A(arr, arr + len1) - ftype < 0);
}
}
*alen1 = len2;
*atype = ftype;
}
/***** Sort With Extra Buffer *****/
/* arr[M..-1] - free, arr[0,L1-1]++arr[L1,L1+L2-1] -> arr[M,M+L1+L2-1] */
static void GRAIL_MERGE_LEFT_WITH_X_BUF(SORT_TYPE *arr, int L1, int L2, int M)
{
int p0 = 0, p1 = L1;
L2 += L1;
while (p1 < L2) {
if (p0 == L1 || SORT_CMP_A(arr + p0, arr + p1) > 0) {
arr[M++] = arr[p1++];
} else {
arr[M++] = arr[p0++];
}
}
if (M != p0) while (p0 < L1) {
arr[M++] = arr[p0++];
}
}
static void GRAIL_SMART_MERGE_WITH_X_BUF(SORT_TYPE *arr, int *alen1, int *atype, int len2,
int lkeys)
{
int p0 = -lkeys, p1 = 0, p2 = *alen1, q1 = p2, q2 = p2 + len2;
int ftype = 1 - *atype; /* 1 if inverted */
while (p1 < q1 && p2 < q2) {
if (SORT_CMP_A(arr + p1, arr + p2) - ftype < 0) {
arr[p0++] = arr[p1++];
} else {
arr[p0++] = arr[p2++];
}
}
if (p1 < q1) {
*alen1 = q1 - p1;
while (p1 < q1) {
arr[--q2] = arr[--q1];
}
} else {
*alen1 = q2 - p2;
*atype = ftype;
}
}
/*
arr - starting array. arr[-lblock..-1] - buffer (if havebuf).
lblock - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
keys - arrays of keys, in same order as blocks. key<midkey means stream A
nblock2 are regular blocks from stream A. llast is length of last (irregular) block from stream B, that should go before nblock2 blocks.
llast=0 requires nblock2=0 (no irregular blocks). llast>0, nblock2=0 is possible.
*/
static void GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF(SORT_TYPE *keys, SORT_TYPE *midkey, SORT_TYPE *arr,
int nblock, int lblock, int nblock2, int llast)
{
int l, prest, lrest, frest, pidx, cidx, fnext;
if (nblock == 0) {
l = nblock2 * lblock;
GRAIL_MERGE_LEFT_WITH_X_BUF(arr, l, llast, -lblock);
return;
}
lrest = lblock;
frest = SORT_CMP_A(keys, midkey) < 0 ? 0 : 1;
pidx = lblock;
for (cidx = 1; cidx < nblock; cidx++, pidx += lblock) {
prest = pidx - lrest;
fnext = SORT_CMP_A(keys + cidx, midkey) < 0 ? 0 : 1;
if (fnext == frest) {
SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest);
prest = pidx;
lrest = lblock;
} else {
GRAIL_SMART_MERGE_WITH_X_BUF(arr + prest, &lrest, &frest, lblock, lblock);
}
}
prest = pidx - lrest;
if (llast) {
if (frest) {
SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest);
prest = pidx;
lrest = lblock * nblock2;
frest = 0;
} else {
lrest += lblock * nblock2;
}
GRAIL_MERGE_LEFT_WITH_X_BUF(arr + prest, lrest, llast, -lblock);
} else {
SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest);
}
}
/***** End Sort With Extra Buffer *****/
/*
build blocks of length K
input: [-K,-1] elements are buffer
output: first K elements are buffer, blocks 2*K and last subblock sorted
*/
static void GRAIL_BUILD_BLOCKS(SORT_TYPE *arr, int L, int K, SORT_TYPE *extbuf, int LExtBuf)
{
int m, u, h, p0, p1, rest, restk, p, kbuf;
kbuf = K < LExtBuf ? K : LExtBuf;
while (kbuf & (kbuf - 1)) {
kbuf &= kbuf - 1; /* max power or 2 - just in case */
}
if (kbuf) {
SORT_TYPE_CPY(extbuf, arr - kbuf, kbuf);
for (m = 1; m < L; m += 2) {
u = 0;
if (SORT_CMP_A(arr + (m - 1), arr + m) > 0) {
u = 1;
}
arr[m - 3] = arr[m - 1 + u];
arr[m - 2] = arr[m - u];
}
if (L % 2) {
arr[L - 3] = arr[L - 1];
}
arr -= 2;
for (h = 2; h < kbuf; h *= 2) {
p0 = 0;
p1 = L - 2 * h;
while (p0 <= p1) {
GRAIL_MERGE_LEFT_WITH_X_BUF(arr + p0, h, h, -h);
p0 += 2 * h;
}
rest = L - p0;
if (rest > h) {
GRAIL_MERGE_LEFT_WITH_X_BUF(arr + p0, h, rest - h, -h);
} else {
for (; p0 < L; p0++) {
arr[p0 - h] = arr[p0];
}
}
arr -= h;
}
SORT_TYPE_CPY(arr + L, extbuf, kbuf);
} else {
for (m = 1; m < L; m += 2) {
u = 0;
if (SORT_CMP_A(arr + (m - 1), arr + m) > 0) {
u = 1;
}
GRAIL_SWAP1(arr + (m - 3), arr + (m - 1 + u));
GRAIL_SWAP1(arr + (m - 2), arr + (m - u));
}
if (L % 2) {
GRAIL_SWAP1(arr + (L - 1), arr + (L - 3));
}
arr -= 2;
h = 2;
}
for (; h < K; h *= 2) {
p0 = 0;
p1 = L - 2 * h;
while (p0 <= p1) {
GRAIL_MERGE_LEFT(arr + p0, h, h, -h);
p0 += 2 * h;
}
rest = L - p0;
if (rest > h) {
GRAIL_MERGE_LEFT(arr + p0, h, rest - h, -h);
} else {
GRAIL_ROTATE(arr + p0 - h, h, rest);
}
arr -= h;
}
restk = L % (2 * K);
p = L - restk;
if (restk <= K) {
GRAIL_ROTATE(arr + p, restk, K);
} else {
GRAIL_MERGE_RIGHT(arr + p, K, restk - K, K);
}
while (p > 0) {
p -= 2 * K;
GRAIL_MERGE_RIGHT(arr + p, K, K, K);
}
}
/*
arr - starting array. arr[-lblock..-1] - buffer (if havebuf).
lblock - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
keys - arrays of keys, in same order as blocks. key<midkey means stream A
nblock2 are regular blocks from stream A. llast is length of last (irregular) block from stream B, that should go before nblock2 blocks.
llast=0 requires nblock2=0 (no irregular blocks). llast>0, nblock2=0 is possible.
*/
static void GRAIL_MERGE_BUFFERS_LEFT(SORT_TYPE *keys, SORT_TYPE *midkey, SORT_TYPE *arr, int nblock,
int lblock, int havebuf, int nblock2, int llast)
{
int l, prest, lrest, frest, pidx, cidx, fnext;
if (nblock == 0) {
l = nblock2 * lblock;
if (havebuf) {
GRAIL_MERGE_LEFT(arr, l, llast, -lblock);
} else {
GRAIL_MERGE_WITHOUT_BUFFER(arr, l, llast);
}
return;
}
lrest = lblock;
frest = SORT_CMP_A(keys, midkey) < 0 ? 0 : 1;
pidx = lblock;
for (cidx = 1; cidx < nblock; cidx++, pidx += lblock) {
prest = pidx - lrest;
fnext = SORT_CMP_A(keys + cidx, midkey) < 0 ? 0 : 1;
if (fnext == frest) {
if (havebuf) {
GRAIL_SWAP_N(arr + prest - lblock, arr + prest, lrest);
}
prest = pidx;
lrest = lblock;
} else {
if (havebuf) {
GRAIL_SMART_MERGE_WITH_BUFFER(arr + prest, &lrest, &frest, lblock, lblock);
} else {
GRAIL_SMART_MERGE_WITHOUT_BUFFER(arr + prest, &lrest, &frest, lblock);
}
}
}
prest = pidx - lrest;
if (llast) {
if (frest) {
if (havebuf) {
GRAIL_SWAP_N(arr + prest - lblock, arr + prest, lrest);
}
prest = pidx;
lrest = lblock * nblock2;
frest = 0;
} else {
lrest += lblock * nblock2;
}
if (havebuf) {
GRAIL_MERGE_LEFT(arr + prest, lrest, llast, -lblock);
} else {
GRAIL_MERGE_WITHOUT_BUFFER(arr + prest, lrest, llast);
}
} else {
if (havebuf) {
GRAIL_SWAP_N(arr + prest, arr + (prest - lblock), lrest);
}
}
}
static void GRAIL_LAZY_STABLE_SORT(SORT_TYPE *arr, int L)
{
int m, h, p0, p1, rest;
for (m = 1; m < L; m += 2) {
if (SORT_CMP_A(arr + m - 1, arr + m) > 0) {
GRAIL_SWAP1(arr + (m - 1), arr + m);
}
}
for (h = 2; h < L; h *= 2) {
p0 = 0;
p1 = L - 2 * h;
while (p0 <= p1) {
GRAIL_MERGE_WITHOUT_BUFFER(arr + p0, h, h);
p0 += 2 * h;
}
rest = L - p0;
if (rest > h) {
GRAIL_MERGE_WITHOUT_BUFFER(arr + p0, h, rest - h);
}
}
}
/*
keys are on the left of arr. Blocks of length LL combined. We'll combine them in pairs
LL and nkeys are powers of 2. (2*LL/lblock) keys are guarantied
*/
static void GRAIL_COMBINE_BLOCKS(SORT_TYPE *keys, SORT_TYPE *arr, int len, int LL, int lblock,
int havebuf, SORT_TYPE *xbuf)
{
int M, b, NBlk, midkey, lrest, u, p, v, kc, nbl2, llast;
SORT_TYPE *arr1;
M = len / (2 * LL);
lrest = len % (2 * LL);
if (lrest <= LL) {
len -= lrest;
lrest = 0;
}
if (xbuf) {
SORT_TYPE_CPY(xbuf, arr - lblock, lblock);
}
for (b = 0; b <= M; b++) {
if (b == M && lrest == 0) {
break;
}
arr1 = arr + b * 2 * LL;
NBlk = (b == M ? lrest : 2 * LL) / lblock;
SMALL_SORT(keys, NBlk + (b == M ? 1 : 0));
midkey = LL / lblock;
for (u = 1; u < NBlk; u++) {
p = u - 1;
for (v = u; v < NBlk; v++) {
kc = SORT_CMP_A(arr1 + p * lblock, arr1 + v * lblock);
if (kc > 0 || (kc == 0 && SORT_CMP_A(keys + p, keys + v) > 0)) {
p = v;
}
}
if (p != u - 1) {
GRAIL_SWAP_N(arr1 + (u - 1)*lblock, arr1 + p * lblock, lblock);
GRAIL_SWAP1(keys + (u - 1), keys + p);
if (midkey == u - 1 || midkey == p) {
midkey ^= (u - 1)^p;
}
}
}
nbl2 = llast = 0;
if (b == M) {
llast = lrest % lblock;
}
if (llast != 0) {
while (nbl2 < NBlk && SORT_CMP_A(arr1 + NBlk * lblock, arr1 + (NBlk - nbl2 - 1)*lblock) < 0) {
nbl2++;
}
}
if (xbuf) {
GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF(keys, keys + midkey, arr1, NBlk - nbl2, lblock, nbl2, llast);
} else {
GRAIL_MERGE_BUFFERS_LEFT(keys, keys + midkey, arr1, NBlk - nbl2, lblock, havebuf, nbl2, llast);
}
}
if (xbuf) {
for (p = len; --p >= 0;) {
arr[p] = arr[p - lblock];
}
SORT_TYPE_CPY(arr - lblock, xbuf, lblock);
} else if (havebuf) {
while (--len >= 0) {
GRAIL_SWAP1(arr + len, arr + len - lblock);
}
}
}
static void GRAIL_COMMON_SORT(SORT_TYPE *arr, int Len, SORT_TYPE *extbuf, int LExtBuf)
{
int lblock, nkeys, findkeys, ptr, cbuf, lb, nk;
int havebuf, chavebuf;
long long s;
if (Len <= SMALL_SORT_BND) {
SMALL_SORT(arr, Len);
return;
}
lblock = 1;
while (lblock * lblock < Len) {
lblock *= 2;
}
nkeys = (Len - 1) / lblock + 1;
findkeys = GRAIL_FIND_KEYS(arr, Len, nkeys + lblock);
havebuf = 1;
if (findkeys < nkeys + lblock) {
if (findkeys < 4) {
GRAIL_LAZY_STABLE_SORT(arr, Len);
return;
}
nkeys = lblock;
while (nkeys > findkeys) {
nkeys /= 2;
}
havebuf = 0;
lblock = 0;
}
ptr = lblock + nkeys;
cbuf = havebuf ? lblock : nkeys;
if (havebuf) {
GRAIL_BUILD_BLOCKS(arr + ptr, Len - ptr, cbuf, extbuf, LExtBuf);
} else {
GRAIL_BUILD_BLOCKS(arr + ptr, Len - ptr, cbuf, NULL, 0);
}
/* 2*cbuf are built */
while (Len - ptr > (cbuf *= 2)) {
lb = lblock;
chavebuf = havebuf;
if (!havebuf) {
if (nkeys > 4 && nkeys / 8 * nkeys >= cbuf) {
lb = nkeys / 2;
chavebuf = 1;
} else {
nk = 1;
s = (long long)cbuf * findkeys / 2;
while (nk < nkeys && s != 0) {
nk *= 2;
s /= 8;
}
lb = (2 * cbuf) / nk;
}
}
GRAIL_COMBINE_BLOCKS(arr, arr + ptr, Len - ptr, cbuf, lb, chavebuf, chavebuf
&& lb <= LExtBuf ? extbuf : NULL);
}
SMALL_SORT(arr, ptr);
GRAIL_MERGE_WITHOUT_BUFFER(arr, ptr, Len - ptr);
}
void GRAIL_SORT(SORT_TYPE *arr, size_t Len)
{
GRAIL_COMMON_SORT(arr, (int)Len, NULL, 0);
}
void GRAIL_SORT_FIXED_BUFFER(SORT_TYPE *arr, size_t Len)
{
SORT_TYPE ExtBuf[GRAIL_EXT_BUFFER_LENGTH];
GRAIL_COMMON_SORT(arr, (int)Len, ExtBuf, GRAIL_EXT_BUFFER_LENGTH);
}
void GRAIL_SORT_DYN_BUFFER(SORT_TYPE *arr, size_t Len)
{
int L = 1;
SORT_TYPE *ExtBuf;
while (L * L < Len) {
L *= 2;
}
ExtBuf = SORT_NEW_BUFFER(L);
if (ExtBuf == NULL) {
GRAIL_SORT_FIXED_BUFFER(arr, Len);
} else {
GRAIL_COMMON_SORT(arr, (int)Len, ExtBuf, L);
SORT_DELETE_BUFFER(ExtBuf);
}
}
/****** classic MergeInPlace *************/
static void GRAIL_REC_MERGE(SORT_TYPE *A, int L1, int L2)
{
int K, k1, k2, m1, m2;
if (L1 < 3 || L2 < 3) {
GRAIL_MERGE_WITHOUT_BUFFER(A, L1, L2);
return;
}
if (L1 < L2) {
K = L1 + L2 / 2;
} else {
K = L1 / 2;
}
k1 = k2 = GRAIL_BIN_SEARCH_LEFT(A, L1, A + K);
if (k2 < L1 && SORT_CMP_A(A + k2, A + K) == 0) {
k2 = GRAIL_BIN_SEARCH_RIGHT(A + k1, L1 - k1, A + K) + k1;
}
m1 = GRAIL_BIN_SEARCH_LEFT(A + L1, L2, A + K);
m2 = m1;
if (m2 < L2 && SORT_CMP_A(A + L1 + m2, A + K) == 0) {
m2 = GRAIL_BIN_SEARCH_RIGHT(A + L1 + m1, L2 - m1, A + K) + m1;
}
if (k1 == k2) {
GRAIL_ROTATE(A + k2, L1 - k2, m2);
} else {
GRAIL_ROTATE(A + k1, L1 - k1, m1);
if (m2 != m1) {
GRAIL_ROTATE(A + (k2 + m1), L1 - k2, m2 - m1);
}
}
GRAIL_REC_MERGE(A + (k2 + m2), L1 - k2, L2 - m2);
GRAIL_REC_MERGE(A, k1, m1);
}
void REC_STABLE_SORT(SORT_TYPE *arr, size_t L)
{
int m, h, p0, p1, rest;
for (m = 1; m < L; m += 2) {
if (SORT_CMP_A(arr + m - 1, arr + m) > 0) {
GRAIL_SWAP1(arr + (m - 1), arr + m);
}
}
for (h = 2; h < L; h *= 2) {
p0 = 0;
p1 = (int)(L - 2 * h);
while (p0 <= p1) {
GRAIL_REC_MERGE(arr + p0, h, h);
p0 += 2 * h;
}
rest = (int)(L - p0);
if (rest > h) {
GRAIL_REC_MERGE(arr + p0, h, rest - h);
}
}
}
/* Bubble sort implementation based on Wikipedia article
https://en.wikipedia.org/wiki/Bubble_sort
*/
void BUBBLE_SORT(SORT_TYPE *dst, const size_t size)
{
size_t n = size;
while (n) {
size_t i, newn = 0U;
for (i = 1U; i < n; ++i) {
if (SORT_CMP(dst[i - 1U], dst[i]) > 0) {
SORT_SWAP(dst[i - 1U], dst[i]);
newn = i;
}
}
n = newn;
}
}
#undef SORT_SAFE_CPY
#undef SORT_TYPE_CPY
#undef SORT_TYPE_MOVE
#undef SORT_NEW_BUFFER
#undef SORT_DELETE_BUFFER
#undef QUICK_SORT
#undef MEDIAN
#undef SORT_CONCAT
#undef SORT_MAKE_STR1
#undef SORT_MAKE_STR
#undef SORT_NAME
#undef SORT_TYPE
#undef SORT_CMP
#undef TEMP_STORAGE_T
#undef TIM_SORT_RUN_T
#undef PUSH_NEXT
#undef SORT_SWAP
#undef SORT_CONCAT
#undef SORT_MAKE_STR1
#undef SORT_MAKE_STR
#undef BINARY_INSERTION_FIND
#undef BINARY_INSERTION_SORT_START
#undef BINARY_INSERTION_SORT
#undef REVERSE_ELEMENTS
#undef COUNT_RUN
#undef TIM_SORT
#undef TIM_SORT_RESIZE
#undef TIM_SORT_COLLAPSE
#undef TIM_SORT_RUN_T
#undef TEMP_STORAGE_T
#undef MERGE_SORT
#undef MERGE_SORT_RECURSIVE
#undef MERGE_SORT_IN_PLACE
#undef MERGE_SORT_IN_PLACE_RMERGE
#undef MERGE_SORT_IN_PLACE_BACKMERGE
#undef MERGE_SORT_IN_PLACE_FRONTMERGE
#undef MERGE_SORT_IN_PLACE_ASWAP
#undef GRAIL_SWAP1
#undef REC_STABLE_SORT
#undef GRAIL_REC_MERGE
#undef GRAIL_SORT_DYN_BUFFER
#undef GRAIL_SORT_FIXED_BUFFER
#undef GRAIL_COMMON_SORT
#undef GRAIL_SORT
#undef GRAIL_COMBINE_BLOCKS
#undef GRAIL_LAZY_STABLE_SORT
#undef GRAIL_MERGE_WITHOUT_BUFFER
#undef GRAIL_ROTATE
#undef GRAIL_BIN_SEARCH_LEFT
#undef GRAIL_BUILD_BLOCKS
#undef GRAIL_FIND_KEYS
#undef GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF
#undef GRAIL_BIN_SEARCH_RIGHT
#undef GRAIL_MERGE_BUFFERS_LEFT
#undef GRAIL_SMART_MERGE_WITH_X_BUF
#undef GRAIL_MERGE_LEFT_WITH_X_BUF
#undef GRAIL_SMART_MERGE_WITHOUT_BUFFER
#undef GRAIL_SMART_MERGE_WITH_BUFFER
#undef GRAIL_MERGE_RIGHT
#undef GRAIL_MERGE_LEFT
#undef GRAIL_SWAP_N
#undef SQRT_SORT
#undef SQRT_SORT_BUILD_BLOCKS
#undef SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF
#undef SQRT_SORT_MERGE_DOWN
#undef SQRT_SORT_MERGE_LEFT_WITH_X_BUF
#undef SQRT_SORT_MERGE_RIGHT
#undef SQRT_SORT_SWAP_N
#undef SQRT_SORT_SWAP_1
#undef SQRT_SORT_SMART_MERGE_WITH_X_BUF
#undef SQRT_SORT_SORT_INS
#undef SQRT_SORT_COMBINE_BLOCKS
#undef SQRT_SORT_COMMON_SORT
#undef SORT_CMP_A
#undef BUBBLE_SORT