/* Copyright (c) 2010-2019 Christopher Swenson. */ /* Copyright (c) 2012 Vojtech Fried. */ /* Copyright (c) 2012 Google Inc. All Rights Reserved. */ /* https://github.com/swenson/sort */ #include #include #include #include #ifndef SORT_NAME #error "Must declare SORT_NAME" #endif #ifndef SORT_TYPE #error "Must declare SORT_TYPE" #endif #ifndef SORT_CMP #define SORT_CMP(x, y) ((x) < (y) ? -1 : ((y) < (x) ? 1 : 0)) #endif #ifdef __cplusplus #ifndef SORT_SAFE_CPY #define SORT_SAFE_CPY 0 #endif #else #undef SORT_SAFE_CPY #define SORT_SAFE_CPY 0 #endif #ifndef TIM_SORT_STACK_SIZE #define TIM_SORT_STACK_SIZE 128 #endif #ifndef TIM_SORT_MIN_GALLOP #define TIM_SORT_MIN_GALLOP 7 #endif #ifndef SORT_SWAP #define SORT_SWAP(x,y) {SORT_TYPE _sort_swap_temp = (x); (x) = (y); (y) = _sort_swap_temp;} #endif /* Common, type-agnostic functions and constants that we don't want to declare twice. */ #ifndef SORT_COMMON_H #define SORT_COMMON_H #ifndef MAX #define MAX(x,y) (((x) > (y) ? (x) : (y))) #endif #ifndef MIN #define MIN(x,y) (((x) < (y) ? (x) : (y))) #endif static int compute_minrun(const uint64_t); /* From http://oeis.org/classic/A102549 */ static const uint64_t shell_gaps[48] = {1, 4, 10, 23, 57, 132, 301, 701, 1750, 4376, 10941, 27353, 68383, 170958, 427396, 1068491, 2671228, 6678071, 16695178, 41737946, 104344866, 260862166, 652155416, 1630388541, 4075971353LL, 10189928383LL, 25474820958LL, 63687052396LL, 159217630991LL, 398044077478LL, 995110193696LL, 2487775484241LL, 6219438710603LL, 15548596776508LL, 38871491941271LL, 97178729853178LL, 242946824632946LL, 607367061582366LL, 1518417653955916LL, 3796044134889791LL, 9490110337224478LL, 23725275843061196LL, 59313189607652991LL, 148282974019132478LL, 370707435047831196LL, 926768587619577991LL, 2316921469048944978LL, 5792303672622362446LL}; #ifndef CLZ /* clang-only */ #ifndef __has_builtin #define __has_builtin(x) 0 #endif #if __has_builtin(__builtin_clzll) || (defined(__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3))) #define CLZ __builtin_clzll #else static int clzll(uint64_t); /* adapted from Hacker's Delight */ static int clzll(uint64_t x) { int n; if (x == 0) { return 64; } n = 0; if (x <= 0x00000000FFFFFFFFL) { n = n + 32; x = x << 32; } if (x <= 0x0000FFFFFFFFFFFFL) { n = n + 16; x = x << 16; } if (x <= 0x00FFFFFFFFFFFFFFL) { n = n + 8; x = x << 8; } if (x <= 0x0FFFFFFFFFFFFFFFL) { n = n + 4; x = x << 4; } if (x <= 0x3FFFFFFFFFFFFFFFL) { n = n + 2; x = x << 2; } if (x <= 0x7FFFFFFFFFFFFFFFL) { n = n + 1; } return n; } #define CLZ clzll #endif #endif static __inline int compute_minrun(const uint64_t size) { const int top_bit = 64 - CLZ(size); const int shift = MAX(top_bit, 6) - 6; const int minrun = (int)(size >> shift); const uint64_t mask = (1ULL << shift) - 1; if (mask & size) { return minrun + 1; } return minrun; } static __inline size_t rbnd(size_t len) { int k; if (len < 16) { return 2; } k = 62 - CLZ(len); return 1ULL << ((2 * k) / 3); } #endif /* SORT_COMMON_H */ #define SORT_CONCAT(x, y) x ## _ ## y #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y) #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x) #ifndef SMALL_SORT_BND #define SMALL_SORT_BND 16 #endif #ifndef SMALL_SORT #define SMALL_SORT BITONIC_SORT /*#define SMALL_SORT BINARY_INSERTION_SORT*/ #endif #define SORT_TYPE_CPY SORT_MAKE_STR(sort_type_cpy) #define SORT_TYPE_MOVE SORT_MAKE_STR(sort_type_move) #define SORT_NEW_BUFFER SORT_MAKE_STR(sort_new_buffer) #define SORT_DELETE_BUFFER SORT_MAKE_STR(sort_delete_buffer) #define BITONIC_SORT SORT_MAKE_STR(bitonic_sort) #define BINARY_INSERTION_FIND SORT_MAKE_STR(binary_insertion_find) #define BINARY_INSERTION_SORT_START SORT_MAKE_STR(binary_insertion_sort_start) #define BINARY_INSERTION_SORT SORT_MAKE_STR(binary_insertion_sort) #define REVERSE_ELEMENTS SORT_MAKE_STR(reverse_elements) #define COUNT_RUN SORT_MAKE_STR(count_run) #define CHECK_INVARIANT SORT_MAKE_STR(check_invariant) #define TIM_SORT SORT_MAKE_STR(tim_sort) #define TIM_SORT_GALLOP SORT_MAKE_STR(tim_sort_gallop) #define TIM_SORT_RESIZE SORT_MAKE_STR(tim_sort_resize) #define TIM_SORT_MERGE SORT_MAKE_STR(tim_sort_merge) #define TIM_SORT_MERGE_LEFT SORT_MAKE_STR(tim_sort_merge_left) #define TIM_SORT_MERGE_RIGHT SORT_MAKE_STR(tim_sort_merge_right) #define TIM_SORT_COLLAPSE SORT_MAKE_STR(tim_sort_collapse) #define HEAP_SORT SORT_MAKE_STR(heap_sort) #define MEDIAN SORT_MAKE_STR(median) #define QUICK_SORT SORT_MAKE_STR(quick_sort) #define MERGE_SORT SORT_MAKE_STR(merge_sort) #define MERGE_SORT_RECURSIVE SORT_MAKE_STR(merge_sort_recursive) #define MERGE_SORT_IN_PLACE SORT_MAKE_STR(merge_sort_in_place) #define MERGE_SORT_IN_PLACE_RMERGE SORT_MAKE_STR(merge_sort_in_place_rmerge) #define MERGE_SORT_IN_PLACE_BACKMERGE SORT_MAKE_STR(merge_sort_in_place_backmerge) #define MERGE_SORT_IN_PLACE_FRONTMERGE SORT_MAKE_STR(merge_sort_in_place_frontmerge) #define MERGE_SORT_IN_PLACE_ASWAP SORT_MAKE_STR(merge_sort_in_place_aswap) #define SELECTION_SORT SORT_MAKE_STR(selection_sort) #define SHELL_SORT SORT_MAKE_STR(shell_sort) #define QUICK_SORT_PARTITION SORT_MAKE_STR(quick_sort_partition) #define QUICK_SORT_RECURSIVE SORT_MAKE_STR(quick_sort_recursive) #define HEAP_SIFT_DOWN SORT_MAKE_STR(heap_sift_down) #define HEAPIFY SORT_MAKE_STR(heapify) #define TIM_SORT_RUN_T SORT_MAKE_STR(tim_sort_run_t) #define TEMP_STORAGE_T SORT_MAKE_STR(temp_storage_t) #define PUSH_NEXT SORT_MAKE_STR(push_next) #define GRAIL_SWAP1 SORT_MAKE_STR(grail_swap1) #define REC_STABLE_SORT SORT_MAKE_STR(rec_stable_sort) #define GRAIL_REC_MERGE SORT_MAKE_STR(grail_rec_merge) #define GRAIL_SORT_DYN_BUFFER SORT_MAKE_STR(grail_sort_dyn_buffer) #define GRAIL_SORT_FIXED_BUFFER SORT_MAKE_STR(grail_sort_fixed_buffer) #define GRAIL_COMMON_SORT SORT_MAKE_STR(grail_common_sort) #define GRAIL_SORT SORT_MAKE_STR(grail_sort) #define GRAIL_COMBINE_BLOCKS SORT_MAKE_STR(grail_combine_blocks) #define GRAIL_LAZY_STABLE_SORT SORT_MAKE_STR(grail_lazy_stable_sort) #define GRAIL_MERGE_WITHOUT_BUFFER SORT_MAKE_STR(grail_merge_without_buffer) #define GRAIL_ROTATE SORT_MAKE_STR(grail_rotate) #define GRAIL_BIN_SEARCH_LEFT SORT_MAKE_STR(grail_bin_search_left) #define GRAIL_BUILD_BLOCKS SORT_MAKE_STR(grail_build_blocks) #define GRAIL_FIND_KEYS SORT_MAKE_STR(grail_find_keys) #define GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF SORT_MAKE_STR(grail_merge_buffers_left_with_x_buf) #define GRAIL_BIN_SEARCH_RIGHT SORT_MAKE_STR(grail_bin_search_right) #define GRAIL_MERGE_BUFFERS_LEFT SORT_MAKE_STR(grail_merge_buffers_left) #define GRAIL_SMART_MERGE_WITH_X_BUF SORT_MAKE_STR(grail_smart_merge_with_x_buf) #define GRAIL_MERGE_LEFT_WITH_X_BUF SORT_MAKE_STR(grail_merge_left_with_x_buf) #define GRAIL_SMART_MERGE_WITHOUT_BUFFER SORT_MAKE_STR(grail_smart_merge_without_buffer) #define GRAIL_SMART_MERGE_WITH_BUFFER SORT_MAKE_STR(grail_smart_merge_with_buffer) #define GRAIL_MERGE_RIGHT SORT_MAKE_STR(grail_merge_right) #define GRAIL_MERGE_LEFT SORT_MAKE_STR(grail_merge_left) #define GRAIL_SWAP_N SORT_MAKE_STR(grail_swap_n) #define SQRT_SORT SORT_MAKE_STR(sqrt_sort) #define SQRT_SORT_BUILD_BLOCKS SORT_MAKE_STR(sqrt_sort_build_blocks) #define SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF SORT_MAKE_STR(sqrt_sort_merge_buffers_left_with_x_buf) #define SQRT_SORT_MERGE_DOWN SORT_MAKE_STR(sqrt_sort_merge_down) #define SQRT_SORT_MERGE_LEFT_WITH_X_BUF SORT_MAKE_STR(sqrt_sort_merge_left_with_x_buf) #define SQRT_SORT_MERGE_RIGHT SORT_MAKE_STR(sqrt_sort_merge_right) #define SQRT_SORT_SWAP_N SORT_MAKE_STR(sqrt_sort_swap_n) #define SQRT_SORT_SWAP_1 SORT_MAKE_STR(sqrt_sort_swap_1) #define SQRT_SORT_SMART_MERGE_WITH_X_BUF SORT_MAKE_STR(sqrt_sort_smart_merge_with_x_buf) #define SQRT_SORT_SORT_INS SORT_MAKE_STR(sqrt_sort_sort_ins) #define SQRT_SORT_COMBINE_BLOCKS SORT_MAKE_STR(sqrt_sort_combine_blocks) #define SQRT_SORT_COMMON_SORT SORT_MAKE_STR(sqrt_sort_common_sort) #define BUBBLE_SORT SORT_MAKE_STR(bubble_sort) #ifndef MAX #define MAX(x,y) (((x) > (y) ? (x) : (y))) #endif #ifndef MIN #define MIN(x,y) (((x) < (y) ? (x) : (y))) #endif #ifndef SORT_CSWAP #define SORT_CSWAP(x, y) { if(SORT_CMP((x),(y)) > 0) {SORT_SWAP((x),(y));}} #endif typedef struct { size_t start; size_t length; } TIM_SORT_RUN_T; void SHELL_SORT(SORT_TYPE *dst, const size_t size); void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size); void HEAP_SORT(SORT_TYPE *dst, const size_t size); void QUICK_SORT(SORT_TYPE *dst, const size_t size); void MERGE_SORT(SORT_TYPE *dst, const size_t size); void MERGE_SORT_IN_PLACE(SORT_TYPE *dst, const size_t size); void SELECTION_SORT(SORT_TYPE *dst, const size_t size); void TIM_SORT(SORT_TYPE *dst, const size_t size); void BUBBLE_SORT(SORT_TYPE *dst, const size_t size); void BITONIC_SORT(SORT_TYPE *dst, const size_t size); void REC_STABLE_SORT(SORT_TYPE *dst, const size_t size); void GRAIL_SORT_DYN_BUFFER(SORT_TYPE *dst, const size_t size); void GRAIL_SORT_FIXED_BUFFER(SORT_TYPE *dst, const size_t size); void GRAIL_SORT(SORT_TYPE *dst, const size_t size); void SQRT_SORT(SORT_TYPE *dst, const size_t size); /* The full implementation of a bitonic sort is not here. Since we only want to use sorting networks for small length lists we create optimal sorting networks for lists of length <= 16 and call out to BINARY_INSERTION_SORT for anything larger than 16. Optimal sorting networks for small length lists. Taken from https://pages.ripco.net/~jgamble/nw.html */ #define BITONIC_SORT_2 SORT_MAKE_STR(bitonic_sort_2) static __inline void BITONIC_SORT_2(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); } #define BITONIC_SORT_3 SORT_MAKE_STR(bitonic_sort_3) static __inline void BITONIC_SORT_3(SORT_TYPE *dst) { SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[0], dst[1]); } #define BITONIC_SORT_4 SORT_MAKE_STR(bitonic_sort_4) static __inline void BITONIC_SORT_4(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[1], dst[2]); } #define BITONIC_SORT_5 SORT_MAKE_STR(bitonic_sort_5) static __inline void BITONIC_SORT_5(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[0], dst[3]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[1], dst[2]); } #define BITONIC_SORT_6 SORT_MAKE_STR(bitonic_sort_6) static __inline void BITONIC_SORT_6(SORT_TYPE *dst) { SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[2], dst[5]); SORT_CSWAP(dst[0], dst[3]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[2], dst[3]); } #define BITONIC_SORT_7 SORT_MAKE_STR(bitonic_sort_7) static __inline void BITONIC_SORT_7(SORT_TYPE *dst) { SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[0], dst[3]); SORT_CSWAP(dst[2], dst[5]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[2], dst[3]); } #define BITONIC_SORT_8 SORT_MAKE_STR(bitonic_sort_8) static __inline void BITONIC_SORT_8(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[3], dst[6]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[3], dst[4]); } #define BITONIC_SORT_9 SORT_MAKE_STR(bitonic_sort_9) static __inline void BITONIC_SORT_9(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[2], dst[5]); SORT_CSWAP(dst[0], dst[3]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[5], dst[8]); SORT_CSWAP(dst[3], dst[6]); SORT_CSWAP(dst[4], dst[7]); SORT_CSWAP(dst[2], dst[5]); SORT_CSWAP(dst[0], dst[3]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[2], dst[3]); } #define BITONIC_SORT_10 SORT_MAKE_STR(bitonic_sort_10) static __inline void BITONIC_SORT_10(SORT_TYPE *dst) { SORT_CSWAP(dst[4], dst[9]); SORT_CSWAP(dst[3], dst[8]); SORT_CSWAP(dst[2], dst[7]); SORT_CSWAP(dst[1], dst[6]); SORT_CSWAP(dst[0], dst[5]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[6], dst[9]); SORT_CSWAP(dst[0], dst[3]); SORT_CSWAP(dst[5], dst[8]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[3], dst[6]); SORT_CSWAP(dst[7], dst[9]); SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[2], dst[5]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[4], dst[7]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[4], dst[5]); } #define BITONIC_SORT_11 SORT_MAKE_STR(bitonic_sort_11) static __inline void BITONIC_SORT_11(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[8], dst[10]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[6], dst[10]); SORT_CSWAP(dst[4], dst[8]); SORT_CSWAP(dst[5], dst[9]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[3], dst[8]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[6], dst[10]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[7], dst[10]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[7], dst[9]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[7], dst[8]); } #define BITONIC_SORT_12 SORT_MAKE_STR(bitonic_sort_12) static __inline void BITONIC_SORT_12(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[10], dst[11]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[9], dst[11]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[8], dst[10]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[7], dst[11]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[6], dst[10]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[4], dst[8]); SORT_CSWAP(dst[5], dst[9]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[7], dst[11]); SORT_CSWAP(dst[3], dst[8]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[6], dst[10]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[7], dst[10]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[7], dst[9]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[7], dst[8]); } #define BITONIC_SORT_13 SORT_MAKE_STR(bitonic_sort_13) static __inline void BITONIC_SORT_13(SORT_TYPE *dst) { SORT_CSWAP(dst[1], dst[7]); SORT_CSWAP(dst[9], dst[11]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[8]); SORT_CSWAP(dst[0], dst[12]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[8], dst[11]); SORT_CSWAP(dst[7], dst[12]); SORT_CSWAP(dst[5], dst[9]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[10], dst[11]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[6], dst[12]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[11], dst[12]); SORT_CSWAP(dst[4], dst[9]); SORT_CSWAP(dst[6], dst[10]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[10], dst[11]); SORT_CSWAP(dst[1], dst[7]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[9], dst[11]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[4], dst[7]); SORT_CSWAP(dst[8], dst[10]); SORT_CSWAP(dst[0], dst[5]); SORT_CSWAP(dst[2], dst[5]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); } #define BITONIC_SORT_14 SORT_MAKE_STR(bitonic_sort_14) static __inline void BITONIC_SORT_14(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[10], dst[11]); SORT_CSWAP(dst[12], dst[13]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[8], dst[10]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[9], dst[11]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[8], dst[12]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[9], dst[13]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[0], dst[8]); SORT_CSWAP(dst[1], dst[9]); SORT_CSWAP(dst[2], dst[10]); SORT_CSWAP(dst[3], dst[11]); SORT_CSWAP(dst[4], dst[12]); SORT_CSWAP(dst[5], dst[13]); SORT_CSWAP(dst[5], dst[10]); SORT_CSWAP(dst[6], dst[9]); SORT_CSWAP(dst[3], dst[12]); SORT_CSWAP(dst[7], dst[11]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[4], dst[8]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[7], dst[13]); SORT_CSWAP(dst[2], dst[8]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[11], dst[13]); SORT_CSWAP(dst[3], dst[8]); SORT_CSWAP(dst[7], dst[12]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[10], dst[12]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[7], dst[9]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[11], dst[12]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); } #define BITONIC_SORT_15 SORT_MAKE_STR(bitonic_sort_15) static __inline void BITONIC_SORT_15(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[10], dst[11]); SORT_CSWAP(dst[12], dst[13]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[8], dst[10]); SORT_CSWAP(dst[12], dst[14]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[9], dst[11]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[8], dst[12]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[9], dst[13]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[10], dst[14]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[0], dst[8]); SORT_CSWAP(dst[1], dst[9]); SORT_CSWAP(dst[2], dst[10]); SORT_CSWAP(dst[3], dst[11]); SORT_CSWAP(dst[4], dst[12]); SORT_CSWAP(dst[5], dst[13]); SORT_CSWAP(dst[6], dst[14]); SORT_CSWAP(dst[5], dst[10]); SORT_CSWAP(dst[6], dst[9]); SORT_CSWAP(dst[3], dst[12]); SORT_CSWAP(dst[13], dst[14]); SORT_CSWAP(dst[7], dst[11]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[4], dst[8]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[7], dst[13]); SORT_CSWAP(dst[2], dst[8]); SORT_CSWAP(dst[11], dst[14]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[11], dst[13]); SORT_CSWAP(dst[3], dst[8]); SORT_CSWAP(dst[7], dst[12]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[10], dst[12]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[7], dst[9]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[11], dst[12]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); } #define BITONIC_SORT_16 SORT_MAKE_STR(bitonic_sort_16) static __inline void BITONIC_SORT_16(SORT_TYPE *dst) { SORT_CSWAP(dst[0], dst[1]); SORT_CSWAP(dst[2], dst[3]); SORT_CSWAP(dst[4], dst[5]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); SORT_CSWAP(dst[10], dst[11]); SORT_CSWAP(dst[12], dst[13]); SORT_CSWAP(dst[14], dst[15]); SORT_CSWAP(dst[0], dst[2]); SORT_CSWAP(dst[4], dst[6]); SORT_CSWAP(dst[8], dst[10]); SORT_CSWAP(dst[12], dst[14]); SORT_CSWAP(dst[1], dst[3]); SORT_CSWAP(dst[5], dst[7]); SORT_CSWAP(dst[9], dst[11]); SORT_CSWAP(dst[13], dst[15]); SORT_CSWAP(dst[0], dst[4]); SORT_CSWAP(dst[8], dst[12]); SORT_CSWAP(dst[1], dst[5]); SORT_CSWAP(dst[9], dst[13]); SORT_CSWAP(dst[2], dst[6]); SORT_CSWAP(dst[10], dst[14]); SORT_CSWAP(dst[3], dst[7]); SORT_CSWAP(dst[11], dst[15]); SORT_CSWAP(dst[0], dst[8]); SORT_CSWAP(dst[1], dst[9]); SORT_CSWAP(dst[2], dst[10]); SORT_CSWAP(dst[3], dst[11]); SORT_CSWAP(dst[4], dst[12]); SORT_CSWAP(dst[5], dst[13]); SORT_CSWAP(dst[6], dst[14]); SORT_CSWAP(dst[7], dst[15]); SORT_CSWAP(dst[5], dst[10]); SORT_CSWAP(dst[6], dst[9]); SORT_CSWAP(dst[3], dst[12]); SORT_CSWAP(dst[13], dst[14]); SORT_CSWAP(dst[7], dst[11]); SORT_CSWAP(dst[1], dst[2]); SORT_CSWAP(dst[4], dst[8]); SORT_CSWAP(dst[1], dst[4]); SORT_CSWAP(dst[7], dst[13]); SORT_CSWAP(dst[2], dst[8]); SORT_CSWAP(dst[11], dst[14]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[2], dst[4]); SORT_CSWAP(dst[11], dst[13]); SORT_CSWAP(dst[3], dst[8]); SORT_CSWAP(dst[7], dst[12]); SORT_CSWAP(dst[6], dst[8]); SORT_CSWAP(dst[10], dst[12]); SORT_CSWAP(dst[3], dst[5]); SORT_CSWAP(dst[7], dst[9]); SORT_CSWAP(dst[3], dst[4]); SORT_CSWAP(dst[5], dst[6]); SORT_CSWAP(dst[7], dst[8]); SORT_CSWAP(dst[9], dst[10]); SORT_CSWAP(dst[11], dst[12]); SORT_CSWAP(dst[6], dst[7]); SORT_CSWAP(dst[8], dst[9]); } void BITONIC_SORT(SORT_TYPE *dst, const size_t size) { switch (size) { case 0: case 1: break; case 2: BITONIC_SORT_2(dst); break; case 3: BITONIC_SORT_3(dst); break; case 4: BITONIC_SORT_4(dst); break; case 5: BITONIC_SORT_5(dst); break; case 6: BITONIC_SORT_6(dst); break; case 7: BITONIC_SORT_7(dst); break; case 8: BITONIC_SORT_8(dst); break; case 9: BITONIC_SORT_9(dst); break; case 10: BITONIC_SORT_10(dst); break; case 11: BITONIC_SORT_11(dst); break; case 12: BITONIC_SORT_12(dst); break; case 13: BITONIC_SORT_13(dst); break; case 14: BITONIC_SORT_14(dst); break; case 15: BITONIC_SORT_15(dst); break; case 16: BITONIC_SORT_16(dst); break; default: BINARY_INSERTION_SORT(dst, size); } } #if SORT_SAFE_CPY void SORT_TYPE_CPY(SORT_TYPE *dst, SORT_TYPE *src, const size_t size) { size_t i = 0; for (; i < size; ++i) { dst[i] = src[i]; } } void SORT_TYPE_MOVE(SORT_TYPE *dst, SORT_TYPE *src, const size_t size) { size_t i; if (dst < src) { SORT_TYPE_CPY(dst, src, size); } else if (dst != src && size > 0) { for (i = size - 1; i > 0; --i) { dst[i] = src[i]; } *dst = *src; } } #else #undef SORT_TYPE_CPY #define SORT_TYPE_CPY(dst, src, size) memcpy((dst), (src), (size) * sizeof(SORT_TYPE)) #undef SORT_TYPE_MOVE #define SORT_TYPE_MOVE(dst, src, size) memmove((dst), (src), (size) * sizeof(SORT_TYPE)) #endif SORT_TYPE* SORT_NEW_BUFFER(size_t size) { #if SORT_SAFE_CPY return new SORT_TYPE[size]; #else return (SORT_TYPE*)malloc(size * sizeof(SORT_TYPE)); #endif } void SORT_DELETE_BUFFER(SORT_TYPE* pointer) { #if SORT_SAFE_CPY delete[] pointer; #else free(pointer); #endif } /* Shell sort implementation based on Wikipedia article http://en.wikipedia.org/wiki/Shell_sort */ void SHELL_SORT(SORT_TYPE *dst, const size_t size) { /* don't bother sorting an array of size 0 or 1 */ /* TODO: binary search to find first gap? */ int inci = 47; size_t inc = shell_gaps[inci]; size_t i; if (size <= 1) { return; } while (inc > (size >> 1)) { inc = shell_gaps[--inci]; } while (1) { for (i = inc; i < size; i++) { SORT_TYPE temp = dst[i]; size_t j = i; while ((j >= inc) && (SORT_CMP(dst[j - inc], temp) > 0)) { dst[j] = dst[j - inc]; j -= inc; } dst[j] = temp; } if (inc == 1) { break; } inc = shell_gaps[--inci]; } } /* Function used to do a binary search for binary insertion sort */ static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x, const size_t size) { size_t l, c, r; SORT_TYPE cx; l = 0; r = size - 1; c = r >> 1; /* check for out of bounds at the beginning. */ if (SORT_CMP(x, dst[0]) < 0) { return 0; } else if (SORT_CMP(x, dst[r]) > 0) { return r; } cx = dst[c]; while (1) { const int val = SORT_CMP(x, cx); if (val < 0) { if (c - l <= 1) { return c; } r = c; } else { /* allow = for stability. The binary search favors the right. */ if (r - c <= 1) { return c + 1; } l = c; } c = l + ((r - l) >> 1); cx = dst[c]; } } /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */ static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) { size_t i; for (i = start; i < size; i++) { size_t j; SORT_TYPE x; size_t location; /* If this entry is already correct, just move along */ if (SORT_CMP(dst[i - 1], dst[i]) <= 0) { continue; } /* Else we need to find the right place, shift everything over, and squeeze in */ x = dst[i]; location = BINARY_INSERTION_FIND(dst, x, i); for (j = i - 1; j >= location; j--) { dst[j + 1] = dst[j]; if (j == 0) { /* check edge case because j is unsigned */ break; } } dst[location] = x; } } /* Binary insertion sort */ void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) { /* don't bother sorting an array of size <= 1 */ if (size <= 1) { return; } BINARY_INSERTION_SORT_START(dst, 1, size); } /* Selection sort */ void SELECTION_SORT(SORT_TYPE *dst, const size_t size) { size_t i, j; /* don't bother sorting an array of size <= 1 */ if (size <= 1) { return; } for (i = 0; i < size; i++) { for (j = i + 1; j < size; j++) { if (SORT_CMP(dst[j], dst[i]) < 0) { SORT_SWAP(dst[i], dst[j]); } } } } /* In-place mergesort */ void MERGE_SORT_IN_PLACE_ASWAP(SORT_TYPE * dst1, SORT_TYPE * dst2, size_t len) { do { SORT_SWAP(*dst1, *dst2); dst1++; dst2++; } while (--len); } void MERGE_SORT_IN_PLACE_FRONTMERGE(SORT_TYPE *dst1, size_t l1, SORT_TYPE *dst2, size_t l2) { SORT_TYPE *dst0 = dst2 - l1; if (SORT_CMP(dst1[l1 - 1], dst2[0]) <= 0) { MERGE_SORT_IN_PLACE_ASWAP(dst1, dst0, l1); return; } do { while (SORT_CMP(*dst2, *dst1) > 0) { SORT_SWAP(*dst1, *dst0); dst1++; dst0++; if (--l1 == 0) { return; } } SORT_SWAP(*dst2, *dst0); dst2++; dst0++; } while (--l2); do { SORT_SWAP(*dst1, *dst0); dst1++; dst0++; } while (--l1); } size_t MERGE_SORT_IN_PLACE_BACKMERGE(SORT_TYPE * dst1, size_t l1, SORT_TYPE * dst2, size_t l2) { size_t res; SORT_TYPE *dst0 = dst2 + l1; if (SORT_CMP(dst1[1 - l1], dst2[0]) >= 0) { MERGE_SORT_IN_PLACE_ASWAP(dst1 - l1 + 1, dst0 - l1 + 1, l1); return l1; } do { while (SORT_CMP(*dst2, *dst1) < 0) { SORT_SWAP(*dst1, *dst0); dst1--; dst0--; if (--l1 == 0) { return 0; } } SORT_SWAP(*dst2, *dst0); dst2--; dst0--; } while (--l2); res = l1; do { SORT_SWAP(*dst1, *dst0); dst1--; dst0--; } while (--l1); return res; } /* merge dst[p0..p1) by buffer dst[p1..p1+r) */ void MERGE_SORT_IN_PLACE_RMERGE(SORT_TYPE *dst, size_t len, size_t lp, size_t r) { size_t i, lq; int cv; if (SORT_CMP(dst[lp], dst[lp - 1]) >= 0) { return; } lq = lp; for (i = 0; i < len; i += r) { /* select smallest dst[p0+n*r] */ size_t q = i, j; for (j = lp; j <= lq; j += r) { cv = SORT_CMP(dst[j], dst[q]); if (cv == 0) { cv = SORT_CMP(dst[j + r - 1], dst[q + r - 1]); } if (cv < 0) { q = j; } } if (q != i) { MERGE_SORT_IN_PLACE_ASWAP(dst + i, dst + q, r); /* swap it with current position */ if (q == lq && q < (len - r)) { lq += r; } } if (i != 0 && SORT_CMP(dst[i], dst[i - 1]) < 0) { MERGE_SORT_IN_PLACE_ASWAP(dst + len, dst + i, r); /* swap current position with buffer */ MERGE_SORT_IN_PLACE_BACKMERGE(dst + (len + r - 1), r, dst + (i - 1), r); /* buffer :merge: dst[i-r..i) -> dst[i-r..i+r) */ } if (lp == i) { lp += r; } } } /* In-place Merge Sort implementation. (c)2012, Andrey Astrelin, astrelin@tochka.ru */ void MERGE_SORT_IN_PLACE(SORT_TYPE *dst, const size_t len) { /* don't bother sorting an array of size <= 1 */ size_t r = rbnd(len); size_t lr = (len / r - 1) * r; SORT_TYPE *dst1 = dst - 1; size_t p, m, q, q1, p0; if (len <= 1) { return; } if (len <= SMALL_SORT_BND) { SMALL_SORT(dst, len); return; } for (p = 2; p <= lr; p += 2) { dst1 += 2; if (SORT_CMP(dst1[0], dst1[-1]) < 0) { SORT_SWAP(dst1[0], dst1[-1]); } if (p & 2) { continue; } m = len - p; q = 2; while ((p & q) == 0) { if (SORT_CMP(dst1[1 - q], dst1[-(int) q]) < 0) { break; } q *= 2; } if (p & q) { continue; } if (q < m) { p0 = len - q; MERGE_SORT_IN_PLACE_ASWAP(dst + p - q, dst + p0, q); for (;;) { q1 = 2 * q; if ((q1 > m) || (p & q1)) { break; } p0 = len - q1; MERGE_SORT_IN_PLACE_FRONTMERGE(dst + (p - q1), q, dst + p0 + q, q); q = q1; } MERGE_SORT_IN_PLACE_BACKMERGE(dst + (len - 1), q, dst1 - q, q); q *= 2; } q1 = q; while (q1 > m) { q1 /= 2; } while ((q & p) == 0) { q *= 2; MERGE_SORT_IN_PLACE_RMERGE(dst + (p - q), q, q / 2, q1); } } q1 = 0; for (q = r; q < lr; q *= 2) { if ((lr & q) != 0) { q1 += q; if (q1 != q) { MERGE_SORT_IN_PLACE_RMERGE(dst + (lr - q1), q1, q, r); } } } m = len - lr; MERGE_SORT_IN_PLACE(dst + lr, m); MERGE_SORT_IN_PLACE_ASWAP(dst, dst + lr, m); m += MERGE_SORT_IN_PLACE_BACKMERGE(dst + (m - 1), m, dst + (lr - 1), lr - m); MERGE_SORT_IN_PLACE(dst, m); } /* Standard merge sort */ void MERGE_SORT_RECURSIVE(SORT_TYPE *newdst, SORT_TYPE *dst, const size_t size) { const size_t middle = size / 2; size_t out = 0; size_t i = 0; size_t j = middle; /* don't bother sorting an array of size <= 1 */ if (size <= 1) { return; } if (size <= SMALL_SORT_BND) { BINARY_INSERTION_SORT(dst, size); return; } MERGE_SORT_RECURSIVE(newdst, dst, middle); MERGE_SORT_RECURSIVE(newdst, &dst[middle], size - middle); while (out != size) { if (i < middle) { if (j < size) { if (SORT_CMP(dst[i], dst[j]) <= 0) { newdst[out] = dst[i++]; } else { newdst[out] = dst[j++]; } } else { newdst[out] = dst[i++]; } } else { newdst[out] = dst[j++]; } out++; } SORT_TYPE_CPY(dst, newdst, size); } /* Standard merge sort */ void MERGE_SORT(SORT_TYPE *dst, const size_t size) { SORT_TYPE *newdst; /* don't bother sorting an array of size <= 1 */ if (size <= 1) { return; } if (size <= SMALL_SORT_BND) { BINARY_INSERTION_SORT(dst, size); return; } newdst = SORT_NEW_BUFFER(size); MERGE_SORT_RECURSIVE(newdst, dst, size); SORT_DELETE_BUFFER(newdst); } static __inline size_t QUICK_SORT_PARTITION(SORT_TYPE *dst, const size_t left, const size_t right, const size_t pivot) { SORT_TYPE value = dst[pivot]; size_t index = left; size_t i; int not_all_same = 0; /* move the pivot to the right */ SORT_SWAP(dst[pivot], dst[right]); for (i = left; i < right; i++) { int cmp = SORT_CMP(dst[i], value); /* check if everything is all the same */ not_all_same |= cmp; if (cmp < 0) { SORT_SWAP(dst[i], dst[index]); index++; } } SORT_SWAP(dst[right], dst[index]); /* avoid degenerate case */ if (not_all_same == 0) { return SIZE_MAX; } return index; } /* Based on Knuth vol. 3 static __inline size_t QUICK_SORT_HOARE_PARTITION(SORT_TYPE *dst, const size_t l, const size_t r, const size_t pivot) { SORT_TYPE value; size_t i = l + 1; size_t j = r; if (pivot != l) { SORT_SWAP(dst[pivot], dst[l]); } value = dst[l]; while (1) { while (SORT_CMP(dst[i], value) < 0) { i++; } while (SORT_CMP(value, dst[j]) < 0) { j--; } if (j <= i) { SORT_SWAP(dst[l], dst[j]); return j; } SORT_SWAP(dst[i], dst[j]); i++; j--; } return 0; } */ /* Return the median index of the objects at the three indices. */ static __inline size_t MEDIAN(const SORT_TYPE *dst, const size_t a, const size_t b, const size_t c) { const int AB = SORT_CMP(dst[a], dst[b]) < 0; if (AB) { /* a < b */ const int BC = SORT_CMP(dst[b], dst[c]) < 0; if (BC) { /* a < b < c */ return b; } else { /* a < b, c < b */ const int AC = SORT_CMP(dst[a], dst[c]) < 0; if (AC) { /* a < c < b */ return c; } else { /* c < a < b */ return a; } } } else { /* b < a */ const int AC = SORT_CMP(dst[a], dst[b]) < 0; if (AC) { /* b < a < c */ return a; } else { /* b < a, c < a */ const int BC = SORT_CMP(dst[b], dst[c]) < 0; if (BC) { /* b < c < a */ return c; } else { /* c < b < a */ return b; } } } } static void QUICK_SORT_RECURSIVE(SORT_TYPE *dst, const size_t original_left, const size_t original_right) { size_t left; size_t right; size_t pivot; size_t new_pivot; size_t middle; int loop_count = 0; const int max_loops = 64 - CLZ(original_right - original_left); /* ~lg N */ left = original_left; right = original_right; while (1) { if (right <= left) { return; } if ((right - left + 1U) <= SMALL_SORT_BND) { SMALL_SORT(&dst[left], right - left + 1U); return; } if (++loop_count >= max_loops) { /* we have recursed / looped too many times; switch to heap sort */ HEAP_SORT(&dst[left], right - left + 1U); return; } /* median of 5 */ middle = left + ((right - left) >> 1); pivot = MEDIAN((const SORT_TYPE *) dst, left, middle, right); pivot = MEDIAN((const SORT_TYPE *) dst, left + ((middle - left) >> 1), pivot, middle + ((right - middle) >> 1)); new_pivot = QUICK_SORT_PARTITION(dst, left, right, pivot); /* check for partition all equal */ if (new_pivot == SIZE_MAX) { return; } /* recurse only on the small part to avoid degenerate stack sizes */ /* and manually do tail call on the large part */ if (new_pivot - 1U - left > right - new_pivot - 1U) { /* left is bigger than right */ QUICK_SORT_RECURSIVE(dst, new_pivot + 1U, right); /* tail call for left */ right = new_pivot - 1U; } else { /* right is bigger than left */ QUICK_SORT_RECURSIVE(dst, left, new_pivot - 1U); /* tail call for right */ left = new_pivot + 1U; } } } void QUICK_SORT(SORT_TYPE *dst, const size_t size) { /* don't bother sorting an array of size 1 */ if (size <= 1) { return; } QUICK_SORT_RECURSIVE(dst, 0U, size - 1U); } /* timsort implementation, based on timsort.txt */ static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) { while (1) { if (start >= end) { return; } SORT_SWAP(dst[start], dst[end]); start++; end--; } } static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) { size_t curr; if (size - start == 1) { return 1; } if (start >= size - 2) { if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) { SORT_SWAP(dst[size - 2], dst[size - 1]); } return 2; } curr = start + 2; if (SORT_CMP(dst[start], dst[start + 1]) <= 0) { /* increasing run */ while (1) { if (curr == size - 1) { break; } if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) { break; } curr++; } return curr - start; } else { /* decreasing run */ while (1) { if (curr == size - 1) { break; } if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) { break; } curr++; } /* reverse in-place */ REVERSE_ELEMENTS(dst, start, curr - 1); return curr - start; } } static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) { size_t A, B, C; if (stack_curr < 2) { return 1; } if (stack_curr == 2) { const size_t A1 = stack[stack_curr - 2].length; const size_t B1 = stack[stack_curr - 1].length; if (A1 <= B1) { return 0; } return 1; } A = stack[stack_curr - 3].length; B = stack[stack_curr - 2].length; C = stack[stack_curr - 1].length; if ((A <= B + C) || (B <= C)) { return 0; } return 1; } typedef struct { size_t alloc; SORT_TYPE *storage; } TEMP_STORAGE_T; static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) { if ((store->storage == NULL) || (store->alloc < new_size)) { SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE)); if (tempstore == NULL) { fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes", (unsigned long)(sizeof(SORT_TYPE) * new_size)); exit(1); } store->storage = tempstore; store->alloc = new_size; } } static size_t TIM_SORT_GALLOP(SORT_TYPE *dst, const size_t size, const SORT_TYPE key, size_t anchor, int right) { int last_ofs = 0; int ofs, max_ofs, ofs_sign, cmp; size_t l, c, r; cmp = SORT_CMP(key, dst[anchor]); if (cmp < 0 || (!right && cmp == 0)) { /* short cut */ if (anchor == 0) { return 0; } ofs = -1; ofs_sign = -1; max_ofs = -(int)anchor; /* ensure anchor+max_ofs is valid idx */ } else { if (anchor == size - 1) { return size; } ofs = 1; ofs_sign = 1; max_ofs = (int)(size - anchor - 1); } for (;;) { /* deal with overflow */ if (max_ofs / ofs <= 1) { ofs = max_ofs; if (ofs < 0) { cmp = SORT_CMP(key, dst[0]); if ((right && cmp < 0) || (!right && cmp <= 0)) { return 0; } } else { cmp = SORT_CMP(dst[size - 1], key); if ((right && cmp <= 0) || (!right && cmp < 0)) { return size; } } break; } c = anchor + ofs; /* right, 0> 1); cmp = SORT_CMP(key, dst[c]); if ((right && cmp < 0) || (!right && cmp <= 0)) { r = c; } else { l = c; } } return r; } static void TIM_SORT_MERGE_LEFT(SORT_TYPE *A_src, SORT_TYPE *B_src, const size_t A, const size_t B, SORT_TYPE* storage, int *min_gallop_p) { size_t pdst, pa, pb, k; int a_count, b_count; int min_gallop = *min_gallop_p; SORT_TYPE *dst = A_src; SORT_TYPE_CPY(storage, dst, A); A_src = storage; pdst = pa = pb = 0; /* first element must in B, otherwise skipped in the caller */ dst[pdst++] = B_src[pb++]; if (B == 1) { goto copyA; } for (;;) { a_count = b_count = 0; for (;;) { if (SORT_CMP(A_src[pa], B_src[pb]) <= 0) { dst[pdst++] = A_src[pa++]; ++a_count; b_count = 0; /* No need to check if pa == A because the last element must be in A * so pb will reach to B first. You can check pa == A-1 and do * some optimization if you wish.*/ if (min_gallop <= a_count) { break; } } else { dst[pdst++] = B_src[pb++]; ++b_count; a_count = 0; if (pb == B) { goto copyA; } if (min_gallop <= b_count) { break; } } } ++min_gallop; for (;;) { if (min_gallop != 0) { min_gallop --; } k = TIM_SORT_GALLOP(&A_src[pa], A - pa, B_src[pb], 0, 1); SORT_TYPE_CPY(&dst[pdst], &A_src[pa], k); pdst += k; pa += k; /* now we know the next must be in B */ dst[pdst++] = B_src[pb++]; if (pb == B) { goto copyA; } if (a_count && k < TIM_SORT_MIN_GALLOP) { ++min_gallop; break; } k = TIM_SORT_GALLOP(&B_src[pb], B - pb, A_src[pa], 0, 0); SORT_TYPE_MOVE(&dst[pdst], &B_src[pb], k); pdst += k; pb += k; if (pb == B) { goto copyA; } dst[pdst++] = A_src[pa++]; if (b_count && k < TIM_SORT_MIN_GALLOP) { ++min_gallop; break; } } } copyA: SORT_TYPE_CPY(&dst[pdst], &A_src[pa], A - pa); *min_gallop_p = min_gallop; return; } static void TIM_SORT_MERGE_RIGHT(SORT_TYPE *A_src, SORT_TYPE *B_src, const size_t A, const size_t B, SORT_TYPE* storage, int *min_gallop_p) { size_t k; int pdst, pa, pb, a_count, b_count; int min_gallop = *min_gallop_p; SORT_TYPE *dst = A_src; pa = (int)(A - 1); pb = (int)(B - 1); pdst = (int)(A + B - 1); SORT_TYPE_CPY(storage, B_src, B); B_src = storage; /* last element must in A, otherwise skipped in the caller */ dst[pdst--] = A_src[pa--]; if (A == 1) { goto copyB; } for (;;) { a_count = b_count = 0; for (;;) { if (SORT_CMP(A_src[pa], B_src[pb]) <= 0) { dst[pdst--] = B_src[pb--]; ++b_count; a_count = 0; if (min_gallop <= b_count) { break; } /* No need to check if pb == -1 because the first element must be in B * so pa will reach to -1 first. You can check pb == 0 and do * some optimization if you wish.*/ } else { dst[pdst--] = A_src[pa--]; ++a_count; b_count = 0; if (pa == -1) { goto copyB; } if (min_gallop <= a_count) { break; } } } ++min_gallop; for (;;) { if (min_gallop != 0) { min_gallop --; } k = TIM_SORT_GALLOP(A_src, pa + 1, B_src[pb], pa, 1); /* Understand the margin by considering k==0 */ SORT_TYPE_MOVE(&dst[pb + k + 1], &A_src[k], pa + 1 - k); pdst = pb + (int)k; pa = (int)(k - 1); if (pa == -1) { goto copyB; } /* now we know the next must be in B */ dst[pdst--] = B_src[pb--]; if (a_count && pa + 1 - k < TIM_SORT_MIN_GALLOP) { ++min_gallop; break; } k = TIM_SORT_GALLOP(B_src, pb + 1, A_src[pa], pb, 0); SORT_TYPE_CPY(&dst[pa + k + 1], &B_src[k], pb + 1 - k); pdst = pa + (int)k; pb = (int)(k - 1); dst[pdst--] = A_src[pa--]; if (pa == -1) { goto copyB; } if (b_count && pb + 1 - k < TIM_SORT_MIN_GALLOP) { ++min_gallop; break; } } } copyB: SORT_TYPE_CPY(dst, B_src, pb + 1); *min_gallop_p = min_gallop; return; } static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr, TEMP_STORAGE_T *store, int* min_gallop_p) { size_t A = stack[stack_curr - 2].length; size_t B = stack[stack_curr - 1].length; size_t A_start = stack[stack_curr - 2].start; size_t B_start = stack[stack_curr - 1].start; SORT_TYPE *storage; size_t k; /* A[k-1] <= B[0] < A[k] */ k = TIM_SORT_GALLOP(&dst[A_start], A, dst[B_start], 0, 1); A_start += k; A -= k; if (A == 0) { *min_gallop_p /= 2; return; } /* B[k-1] < A[A-1] <= B[k] */ k = TIM_SORT_GALLOP(&dst[B_start], B, dst[B_start - 1], B - 1, 0); B = k; TIM_SORT_RESIZE(store, MIN(A, B)); storage = store->storage; if (A < B) { TIM_SORT_MERGE_LEFT(&dst[A_start], &dst[B_start], A, B, storage, min_gallop_p); } else { TIM_SORT_MERGE_RIGHT(&dst[A_start], &dst[B_start], A, B, storage, min_gallop_p); } } static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr, TEMP_STORAGE_T *store, const size_t size, int* min_gallop_p) { while (1) { size_t A, B, C, D; int ABC, BCD, CD; /* if the stack only has one thing on it, we are done with the collapse */ if (stack_curr <= 1) { break; } /* if this is the last merge, just do it */ if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) { TIM_SORT_MERGE(dst, stack, stack_curr, store, min_gallop_p); stack[0].length += stack[1].length; stack_curr--; break; } /* check if the invariant is off for a stack of 2 elements */ else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) { TIM_SORT_MERGE(dst, stack, stack_curr, store, min_gallop_p); stack[0].length += stack[1].length; stack_curr--; break; } else if (stack_curr == 2) { break; } B = stack[stack_curr - 3].length; C = stack[stack_curr - 2].length; D = stack[stack_curr - 1].length; if (stack_curr >= 4) { A = stack[stack_curr - 4].length; ABC = (A <= B + C); } else { ABC = 0; } BCD = (B <= C + D) || ABC; CD = (C <= D); /* Both invariants are good */ if (!BCD && !CD) { break; } /* left merge */ if (BCD && !CD) { TIM_SORT_MERGE(dst, stack, stack_curr - 1, store, min_gallop_p); stack[stack_curr - 3].length += stack[stack_curr - 2].length; stack[stack_curr - 2] = stack[stack_curr - 1]; stack_curr--; } else { /* right merge */ TIM_SORT_MERGE(dst, stack, stack_curr, store, min_gallop_p); stack[stack_curr - 2].length += stack[stack_curr - 1].length; stack_curr--; } } return stack_curr; } static __inline int PUSH_NEXT(SORT_TYPE *dst, const size_t size, TEMP_STORAGE_T *store, const size_t minrun, TIM_SORT_RUN_T *run_stack, size_t *stack_curr, size_t *curr, int *min_gallop_p) { size_t len = COUNT_RUN(dst, *curr, size); size_t run = minrun; if (run > size - *curr) { run = size - *curr; } if (run > len) { BINARY_INSERTION_SORT_START(&dst[*curr], len, run); len = run; } run_stack[*stack_curr].start = *curr; run_stack[*stack_curr].length = len; (*stack_curr)++; *curr += len; if (*curr == size) { /* finish up */ while (*stack_curr > 1) { TIM_SORT_MERGE(dst, run_stack, (int)*stack_curr, store, min_gallop_p); run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length; (*stack_curr)--; } if (store->storage != NULL) { free(store->storage); store->storage = NULL; } return 0; } return 1; } void TIM_SORT(SORT_TYPE *dst, const size_t size) { size_t minrun; TEMP_STORAGE_T _store, *store; TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE]; size_t stack_curr = 0; size_t curr = 0; int min_gallop = TIM_SORT_MIN_GALLOP; /* don't bother sorting an array of size 1 */ if (size <= 1) { return; } if (size < 64) { SMALL_SORT(dst, size); return; } /* compute the minimum run length */ minrun = compute_minrun(size); /* temporary storage for merges */ store = &_store; store->alloc = 0; store->storage = NULL; if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) { return; } if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) { return; } if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) { return; } while (1) { if (!CHECK_INVARIANT(run_stack, (int)stack_curr)) { stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, (int)stack_curr, store, size, &min_gallop); continue; } if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr, &min_gallop)) { return; } } } /* heap sort: based on wikipedia */ static __inline void HEAP_SIFT_DOWN(SORT_TYPE *dst, const size_t start, const size_t end) { size_t root = start; while ((root << 1) <= end) { size_t child = root << 1; if ((child < end) && (SORT_CMP(dst[child], dst[child + 1]) < 0)) { child++; } if (SORT_CMP(dst[root], dst[child]) < 0) { SORT_SWAP(dst[root], dst[child]); root = child; } else { return; } } } static __inline void HEAPIFY(SORT_TYPE *dst, const size_t size) { size_t start = size >> 1; while (1) { HEAP_SIFT_DOWN(dst, start, size - 1); if (start == 0) { break; } start--; } } void HEAP_SORT(SORT_TYPE *dst, const size_t size) { size_t end = size - 1; /* don't bother sorting an array of size <= 1 */ if (size <= 1) { return; } HEAPIFY(dst, size); while (end > 0) { SORT_SWAP(dst[end], dst[0]); HEAP_SIFT_DOWN(dst, 0, end - 1); end--; } } /********* Sqrt sorting *********************************/ /* */ /* (c) 2014 by Andrey Astrelin */ /* */ /* */ /* Stable sorting that works in O(N*log(N)) worst time */ /* and uses O(sqrt(N)) extra memory */ /* */ /* Define SORT_TYPE and SORT_CMP */ /* and then call SqrtSort() function */ /* */ /*********************************************************/ #define SORT_CMP_A(a,b) SORT_CMP(*(a),*(b)) static __inline void SQRT_SORT_SWAP_1(SORT_TYPE *a, SORT_TYPE *b) { SORT_TYPE c = *a; *a++ = *b; *b++ = c; } static __inline void SQRT_SORT_SWAP_N(SORT_TYPE *a, SORT_TYPE *b, int n) { while (n--) { SQRT_SORT_SWAP_1(a++, b++); } } static void SQRT_SORT_MERGE_RIGHT(SORT_TYPE *arr, int L1, int L2, int M) { int p0 = L1 + L2 + M - 1, p2 = L1 + L2 - 1, p1 = L1 - 1; while (p1 >= 0) { if (p2 < L1 || SORT_CMP_A(arr + p1, arr + p2) > 0) { arr[p0--] = arr[p1--]; } else { arr[p0--] = arr[p2--]; } } if (p2 != p0) while (p2 >= L1) { arr[p0--] = arr[p2--]; } } /* arr[M..-1] - free, arr[0,L1-1]++arr[L1,L1+L2-1] -> arr[M,M+L1+L2-1] */ static void SQRT_SORT_MERGE_LEFT_WITH_X_BUF(SORT_TYPE *arr, int L1, int L2, int M) { int p0 = 0, p1 = L1; L2 += L1; while (p1 < L2) { if (p0 == L1 || SORT_CMP_A(arr + p0, arr + p1) > 0) { arr[M++] = arr[p1++]; } else { arr[M++] = arr[p0++]; } } if (M != p0) while (p0 < L1) { arr[M++] = arr[p0++]; } } /* arr[0,L1-1] ++ arr2[0,L2-1] -> arr[-L1,L2-1], arr2 is "before" arr1 */ static void SQRT_SORT_MERGE_DOWN(SORT_TYPE *arr, SORT_TYPE *arr2, int L1, int L2) { int p0 = 0, p1 = 0, M = -L2; while (p1 < L2) { if (p0 == L1 || SORT_CMP_A(arr + p0, arr2 + p1) >= 0) { arr[M++] = arr2[p1++]; } else { arr[M++] = arr[p0++]; } } if (M != p0) while (p0 < L1) { arr[M++] = arr[p0++]; } } static void SQRT_SORT_SMART_MERGE_WITH_X_BUF(SORT_TYPE *arr, int *alen1, int *atype, int len2, int lkeys) { int p0 = -lkeys, p1 = 0, p2 = *alen1, q1 = p2, q2 = p2 + len2; int ftype = 1 - *atype; /* 1 if inverted */ while (p1 < q1 && p2 < q2) { if (SORT_CMP_A(arr + p1, arr + p2) - ftype < 0) { arr[p0++] = arr[p1++]; } else { arr[p0++] = arr[p2++]; } } if (p1 < q1) { *alen1 = q1 - p1; while (p1 < q1) { arr[--q2] = arr[--q1]; } } else { *alen1 = q2 - p2; *atype = ftype; } } /* arr - starting array. arr[-lblock..-1] - buffer (if havebuf). lblock - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded keys - arrays of keys, in same order as blocks. key0, nblock2=0 is possible. */ static void SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF(int *keys, int midkey, SORT_TYPE *arr, int nblock, int lblock, int nblock2, int llast) { int l, prest, lrest, frest, pidx, cidx, fnext; if (nblock == 0) { l = nblock2 * lblock; SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr, l, llast, -lblock); return; } lrest = lblock; frest = keys[0] < midkey ? 0 : 1; pidx = lblock; for (cidx = 1; cidx < nblock; cidx++, pidx += lblock) { prest = pidx - lrest; fnext = keys[cidx] < midkey ? 0 : 1; if (fnext == frest) { SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest); prest = pidx; lrest = lblock; } else { SQRT_SORT_SMART_MERGE_WITH_X_BUF(arr + prest, &lrest, &frest, lblock, lblock); } } prest = pidx - lrest; if (llast) { if (frest) { SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest); prest = pidx; lrest = lblock * nblock2; frest = 0; } else { lrest += lblock * nblock2; } SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr + prest, lrest, llast, -lblock); } else { SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest); } } /* build blocks of length K input: [-K,-1] elements are buffer output: first K elements are buffer, blocks 2*K and last subblock sorted */ static void SQRT_SORT_BUILD_BLOCKS(SORT_TYPE *arr, int L, int K) { int m, u, h, p0, p1, rest, restk, p; for (m = 1; m < L; m += 2) { u = 0; if (SORT_CMP_A(arr + (m - 1), arr + m) > 0) { u = 1; } arr[m - 3] = arr[m - 1 + u]; arr[m - 2] = arr[m - u]; } if (L % 2) { arr[L - 3] = arr[L - 1]; } arr -= 2; for (h = 2; h < K; h *= 2) { p0 = 0; p1 = L - 2 * h; while (p0 <= p1) { SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr + p0, h, h, -h); p0 += 2 * h; } rest = L - p0; if (rest > h) { SQRT_SORT_MERGE_LEFT_WITH_X_BUF(arr + p0, h, rest - h, -h); } else { for (; p0 < L; p0++) { arr[p0 - h] = arr[p0]; } } arr -= h; } restk = L % (2 * K); p = L - restk; if (restk <= K) { SORT_TYPE_CPY(arr + p + K, arr + p, restk); } else { SQRT_SORT_MERGE_RIGHT(arr + p, K, restk - K, K); } while (p > 0) { p -= 2 * K; SQRT_SORT_MERGE_RIGHT(arr + p, K, K, K); } } static void SQRT_SORT_SORT_INS(SORT_TYPE *arr, int len) { int i, j; for (i = 1; i < len; i++) { for (j = i - 1; j >= 0 && SORT_CMP_A(arr + (j + 1), arr + j) < 0; j--) { SQRT_SORT_SWAP_1(arr + j, arr + (j + 1)); } } } /* keys are on the left of arr. Blocks of length LL combined. We'll combine them in pairs LL and nkeys are powers of 2. (2*LL/lblock) keys are guarantied */ static void SQRT_SORT_COMBINE_BLOCKS(SORT_TYPE *arr, int len, int LL, int lblock, int *tags) { int M, b, NBlk, midkey, lrest, u, i, p, v, kc, nbl2, llast; SORT_TYPE *arr1; M = len / (2 * LL); lrest = len % (2 * LL); if (lrest <= LL) { len -= lrest; lrest = 0; } for (b = 0; b <= M; b++) { if (b == M && lrest == 0) { break; } arr1 = arr + b * 2 * LL; NBlk = (b == M ? lrest : 2 * LL) / lblock; u = NBlk + (b == M ? 1 : 0); for (i = 0; i <= u; i++) { tags[i] = i; } midkey = LL / lblock; for (u = 1; u < NBlk; u++) { p = u - 1; for (v = u; v < NBlk; v++) { kc = SORT_CMP_A(arr1 + p * lblock, arr1 + v * lblock); if (kc > 0 || (kc == 0 && tags[p] > tags[v])) { p = v; } } if (p != u - 1) { SQRT_SORT_SWAP_N(arr1 + (u - 1)*lblock, arr1 + p * lblock, lblock); i = tags[u - 1]; tags[u - 1] = tags[p]; tags[p] = i; } } nbl2 = llast = 0; if (b == M) { llast = lrest % lblock; } if (llast != 0) { while (nbl2 < NBlk && SORT_CMP_A(arr1 + NBlk * lblock, arr1 + (NBlk - nbl2 - 1)*lblock) < 0) { nbl2++; } } SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF(tags, midkey, arr1, NBlk - nbl2, lblock, nbl2, llast); } for (p = len; --p >= 0;) { arr[p] = arr[p - lblock]; } } static void SQRT_SORT_COMMON_SORT(SORT_TYPE *arr, int Len, SORT_TYPE *extbuf, int *Tags) { int lblock, cbuf; if (Len < 16) { SQRT_SORT_SORT_INS(arr, Len); return; } lblock = 1; while (lblock * lblock < Len) { lblock *= 2; } SORT_TYPE_CPY(extbuf, arr, lblock); SQRT_SORT_COMMON_SORT(extbuf, lblock, arr, Tags); SQRT_SORT_BUILD_BLOCKS(arr + lblock, Len - lblock, lblock); cbuf = lblock; while (Len > (cbuf *= 2)) { SQRT_SORT_COMBINE_BLOCKS(arr + lblock, Len - lblock, cbuf, lblock, Tags); } SQRT_SORT_MERGE_DOWN(arr + lblock, extbuf, Len - lblock, lblock); } void SQRT_SORT(SORT_TYPE *arr, size_t Len) { int L = 1; SORT_TYPE *ExtBuf; int *Tags; int NK; while (L * L < Len) { L *= 2; } NK = (int)((Len - 1) / L + 2); ExtBuf = SORT_NEW_BUFFER(L); if (ExtBuf == NULL) { return; /* fail */ } Tags = (int*)malloc(NK * sizeof(int)); if (Tags == NULL) { return; } SQRT_SORT_COMMON_SORT(arr, (int)Len, ExtBuf, Tags); free(Tags); SORT_DELETE_BUFFER(ExtBuf); } /********* Grail sorting *********************************/ /* */ /* (c) 2013 by Andrey Astrelin */ /* */ /* */ /* Stable sorting that works in O(N*log(N)) worst time */ /* and uses O(1) extra memory */ /* */ /* Define SORT_TYPE and SORT_CMP */ /* and then call GrailSort() function */ /* */ /* For sorting with fixed external buffer (512 items) */ /* use GrailSortWithBuffer() */ /* */ /* For sorting with dynamic external buffer (O(sqrt(N)) items) */ /* use GrailSortWithDynBuffer() */ /* */ /* Also classic in-place merge sort is implemented */ /* under the name of RecStableSort() */ /* */ /*********************************************************/ #define GRAIL_EXT_BUFFER_LENGTH 512 static __inline void GRAIL_SWAP1(SORT_TYPE *a, SORT_TYPE *b) { SORT_TYPE c = *a; *a = *b; *b = c; } static __inline void GRAIL_SWAP_N(SORT_TYPE *a, SORT_TYPE *b, int n) { while (n--) { GRAIL_SWAP1(a++, b++); } } static void GRAIL_ROTATE(SORT_TYPE *a, int l1, int l2) { while (l1 && l2) { if (l1 <= l2) { GRAIL_SWAP_N(a, a + l1, l1); a += l1; l2 -= l1; } else { GRAIL_SWAP_N(a + (l1 - l2), a + l1, l2); l1 -= l2; } } } static int GRAIL_BIN_SEARCH_LEFT(SORT_TYPE *arr, int len, SORT_TYPE *key) { int a = -1, b = len, c; while (a < b - 1) { c = a + ((b - a) >> 1); if (SORT_CMP_A(arr + c, key) >= 0) { b = c; } else { a = c; } } return b; } static int GRAIL_BIN_SEARCH_RIGHT(SORT_TYPE *arr, int len, SORT_TYPE *key) { int a = -1, b = len, c; while (a < b - 1) { c = a + ((b - a) >> 1); if (SORT_CMP_A(arr + c, key) > 0) { b = c; } else { a = c; } } return b; } /* cost: 2*len+nk^2/2 */ static int GRAIL_FIND_KEYS(SORT_TYPE *arr, int len, int nkeys) { int h = 1, h0 = 0; /* first key is always here */ int u = 1, r; while (u < len && h < nkeys) { r = GRAIL_BIN_SEARCH_LEFT(arr + h0, h, arr + u); if (r == h || SORT_CMP_A(arr + u, arr + (h0 + r)) != 0) { GRAIL_ROTATE(arr + h0, h, u - (h0 + h)); h0 = u - h; GRAIL_ROTATE(arr + (h0 + r), h - r, 1); h++; } u++; } GRAIL_ROTATE(arr, h0, h); return h; } /* cost: min(L1,L2)^2+max(L1,L2) */ static void GRAIL_MERGE_WITHOUT_BUFFER(SORT_TYPE *arr, int len1, int len2) { int h; if (len1 < len2) { while (len1) { h = GRAIL_BIN_SEARCH_LEFT(arr + len1, len2, arr); if (h != 0) { GRAIL_ROTATE(arr, len1, h); arr += h; len2 -= h; } if (len2 == 0) { break; } do { arr++; len1--; } while (len1 && SORT_CMP_A(arr, arr + len1) <= 0); } } else { while (len2) { h = GRAIL_BIN_SEARCH_RIGHT(arr, len1, arr + (len1 + len2 - 1)); if (h != len1) { GRAIL_ROTATE(arr + h, len1 - h, len2); len1 = h; } if (len1 == 0) { break; } do { len2--; } while (len2 && SORT_CMP_A(arr + len1 - 1, arr + len1 + len2 - 1) <= 0); } } } /* arr[M..-1] - buffer, arr[0,L1-1]++arr[L1,L1+L2-1] -> arr[M,M+L1+L2-1] */ static void GRAIL_MERGE_LEFT(SORT_TYPE *arr, int L1, int L2, int M) { int p0 = 0, p1 = L1; L2 += L1; while (p1 < L2) { if (p0 == L1 || SORT_CMP_A(arr + p0, arr + p1) > 0) { GRAIL_SWAP1(arr + (M++), arr + (p1++)); } else { GRAIL_SWAP1(arr + (M++), arr + (p0++)); } } if (M != p0) { GRAIL_SWAP_N(arr + M, arr + p0, L1 - p0); } } static void GRAIL_MERGE_RIGHT(SORT_TYPE *arr, int L1, int L2, int M) { int p0 = L1 + L2 + M - 1, p2 = L1 + L2 - 1, p1 = L1 - 1; while (p1 >= 0) { if (p2 < L1 || SORT_CMP_A(arr + p1, arr + p2) > 0) { GRAIL_SWAP1(arr + (p0--), arr + (p1--)); } else { GRAIL_SWAP1(arr + (p0--), arr + (p2--)); } } if (p2 != p0) while (p2 >= L1) { GRAIL_SWAP1(arr + (p0--), arr + (p2--)); } } static void GRAIL_SMART_MERGE_WITH_BUFFER(SORT_TYPE *arr, int *alen1, int *atype, int len2, int lkeys) { int p0 = -lkeys, p1 = 0, p2 = *alen1, q1 = p2, q2 = p2 + len2; int ftype = 1 - *atype; /* 1 if inverted */ while (p1 < q1 && p2 < q2) { if (SORT_CMP_A(arr + p1, arr + p2) - ftype < 0) { GRAIL_SWAP1(arr + (p0++), arr + (p1++)); } else { GRAIL_SWAP1(arr + (p0++), arr + (p2++)); } } if (p1 < q1) { *alen1 = q1 - p1; while (p1 < q1) { GRAIL_SWAP1(arr + (--q1), arr + (--q2)); } } else { *alen1 = q2 - p2; *atype = ftype; } } static void GRAIL_SMART_MERGE_WITHOUT_BUFFER(SORT_TYPE *arr, int *alen1, int *atype, int _len2) { int len1, len2, ftype, h; if (!_len2) { return; } len1 = *alen1; len2 = _len2; ftype = 1 - *atype; if (len1 && SORT_CMP_A(arr + (len1 - 1), arr + len1) - ftype >= 0) { while (len1) { h = ftype ? GRAIL_BIN_SEARCH_LEFT(arr + len1, len2, arr) : GRAIL_BIN_SEARCH_RIGHT(arr + len1, len2, arr); if (h != 0) { GRAIL_ROTATE(arr, len1, h); arr += h; len2 -= h; } if (len2 == 0) { *alen1 = len1; return; } do { arr++; len1--; } while (len1 && SORT_CMP_A(arr, arr + len1) - ftype < 0); } } *alen1 = len2; *atype = ftype; } /***** Sort With Extra Buffer *****/ /* arr[M..-1] - free, arr[0,L1-1]++arr[L1,L1+L2-1] -> arr[M,M+L1+L2-1] */ static void GRAIL_MERGE_LEFT_WITH_X_BUF(SORT_TYPE *arr, int L1, int L2, int M) { int p0 = 0, p1 = L1; L2 += L1; while (p1 < L2) { if (p0 == L1 || SORT_CMP_A(arr + p0, arr + p1) > 0) { arr[M++] = arr[p1++]; } else { arr[M++] = arr[p0++]; } } if (M != p0) while (p0 < L1) { arr[M++] = arr[p0++]; } } static void GRAIL_SMART_MERGE_WITH_X_BUF(SORT_TYPE *arr, int *alen1, int *atype, int len2, int lkeys) { int p0 = -lkeys, p1 = 0, p2 = *alen1, q1 = p2, q2 = p2 + len2; int ftype = 1 - *atype; /* 1 if inverted */ while (p1 < q1 && p2 < q2) { if (SORT_CMP_A(arr + p1, arr + p2) - ftype < 0) { arr[p0++] = arr[p1++]; } else { arr[p0++] = arr[p2++]; } } if (p1 < q1) { *alen1 = q1 - p1; while (p1 < q1) { arr[--q2] = arr[--q1]; } } else { *alen1 = q2 - p2; *atype = ftype; } } /* arr - starting array. arr[-lblock..-1] - buffer (if havebuf). lblock - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded keys - arrays of keys, in same order as blocks. key0, nblock2=0 is possible. */ static void GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF(SORT_TYPE *keys, SORT_TYPE *midkey, SORT_TYPE *arr, int nblock, int lblock, int nblock2, int llast) { int l, prest, lrest, frest, pidx, cidx, fnext; if (nblock == 0) { l = nblock2 * lblock; GRAIL_MERGE_LEFT_WITH_X_BUF(arr, l, llast, -lblock); return; } lrest = lblock; frest = SORT_CMP_A(keys, midkey) < 0 ? 0 : 1; pidx = lblock; for (cidx = 1; cidx < nblock; cidx++, pidx += lblock) { prest = pidx - lrest; fnext = SORT_CMP_A(keys + cidx, midkey) < 0 ? 0 : 1; if (fnext == frest) { SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest); prest = pidx; lrest = lblock; } else { GRAIL_SMART_MERGE_WITH_X_BUF(arr + prest, &lrest, &frest, lblock, lblock); } } prest = pidx - lrest; if (llast) { if (frest) { SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest); prest = pidx; lrest = lblock * nblock2; frest = 0; } else { lrest += lblock * nblock2; } GRAIL_MERGE_LEFT_WITH_X_BUF(arr + prest, lrest, llast, -lblock); } else { SORT_TYPE_CPY(arr + prest - lblock, arr + prest, lrest); } } /***** End Sort With Extra Buffer *****/ /* build blocks of length K input: [-K,-1] elements are buffer output: first K elements are buffer, blocks 2*K and last subblock sorted */ static void GRAIL_BUILD_BLOCKS(SORT_TYPE *arr, int L, int K, SORT_TYPE *extbuf, int LExtBuf) { int m, u, h, p0, p1, rest, restk, p, kbuf; kbuf = K < LExtBuf ? K : LExtBuf; while (kbuf & (kbuf - 1)) { kbuf &= kbuf - 1; /* max power or 2 - just in case */ } if (kbuf) { SORT_TYPE_CPY(extbuf, arr - kbuf, kbuf); for (m = 1; m < L; m += 2) { u = 0; if (SORT_CMP_A(arr + (m - 1), arr + m) > 0) { u = 1; } arr[m - 3] = arr[m - 1 + u]; arr[m - 2] = arr[m - u]; } if (L % 2) { arr[L - 3] = arr[L - 1]; } arr -= 2; for (h = 2; h < kbuf; h *= 2) { p0 = 0; p1 = L - 2 * h; while (p0 <= p1) { GRAIL_MERGE_LEFT_WITH_X_BUF(arr + p0, h, h, -h); p0 += 2 * h; } rest = L - p0; if (rest > h) { GRAIL_MERGE_LEFT_WITH_X_BUF(arr + p0, h, rest - h, -h); } else { for (; p0 < L; p0++) { arr[p0 - h] = arr[p0]; } } arr -= h; } SORT_TYPE_CPY(arr + L, extbuf, kbuf); } else { for (m = 1; m < L; m += 2) { u = 0; if (SORT_CMP_A(arr + (m - 1), arr + m) > 0) { u = 1; } GRAIL_SWAP1(arr + (m - 3), arr + (m - 1 + u)); GRAIL_SWAP1(arr + (m - 2), arr + (m - u)); } if (L % 2) { GRAIL_SWAP1(arr + (L - 1), arr + (L - 3)); } arr -= 2; h = 2; } for (; h < K; h *= 2) { p0 = 0; p1 = L - 2 * h; while (p0 <= p1) { GRAIL_MERGE_LEFT(arr + p0, h, h, -h); p0 += 2 * h; } rest = L - p0; if (rest > h) { GRAIL_MERGE_LEFT(arr + p0, h, rest - h, -h); } else { GRAIL_ROTATE(arr + p0 - h, h, rest); } arr -= h; } restk = L % (2 * K); p = L - restk; if (restk <= K) { GRAIL_ROTATE(arr + p, restk, K); } else { GRAIL_MERGE_RIGHT(arr + p, K, restk - K, K); } while (p > 0) { p -= 2 * K; GRAIL_MERGE_RIGHT(arr + p, K, K, K); } } /* arr - starting array. arr[-lblock..-1] - buffer (if havebuf). lblock - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded keys - arrays of keys, in same order as blocks. key0, nblock2=0 is possible. */ static void GRAIL_MERGE_BUFFERS_LEFT(SORT_TYPE *keys, SORT_TYPE *midkey, SORT_TYPE *arr, int nblock, int lblock, int havebuf, int nblock2, int llast) { int l, prest, lrest, frest, pidx, cidx, fnext; if (nblock == 0) { l = nblock2 * lblock; if (havebuf) { GRAIL_MERGE_LEFT(arr, l, llast, -lblock); } else { GRAIL_MERGE_WITHOUT_BUFFER(arr, l, llast); } return; } lrest = lblock; frest = SORT_CMP_A(keys, midkey) < 0 ? 0 : 1; pidx = lblock; for (cidx = 1; cidx < nblock; cidx++, pidx += lblock) { prest = pidx - lrest; fnext = SORT_CMP_A(keys + cidx, midkey) < 0 ? 0 : 1; if (fnext == frest) { if (havebuf) { GRAIL_SWAP_N(arr + prest - lblock, arr + prest, lrest); } prest = pidx; lrest = lblock; } else { if (havebuf) { GRAIL_SMART_MERGE_WITH_BUFFER(arr + prest, &lrest, &frest, lblock, lblock); } else { GRAIL_SMART_MERGE_WITHOUT_BUFFER(arr + prest, &lrest, &frest, lblock); } } } prest = pidx - lrest; if (llast) { if (frest) { if (havebuf) { GRAIL_SWAP_N(arr + prest - lblock, arr + prest, lrest); } prest = pidx; lrest = lblock * nblock2; frest = 0; } else { lrest += lblock * nblock2; } if (havebuf) { GRAIL_MERGE_LEFT(arr + prest, lrest, llast, -lblock); } else { GRAIL_MERGE_WITHOUT_BUFFER(arr + prest, lrest, llast); } } else { if (havebuf) { GRAIL_SWAP_N(arr + prest, arr + (prest - lblock), lrest); } } } static void GRAIL_LAZY_STABLE_SORT(SORT_TYPE *arr, int L) { int m, h, p0, p1, rest; for (m = 1; m < L; m += 2) { if (SORT_CMP_A(arr + m - 1, arr + m) > 0) { GRAIL_SWAP1(arr + (m - 1), arr + m); } } for (h = 2; h < L; h *= 2) { p0 = 0; p1 = L - 2 * h; while (p0 <= p1) { GRAIL_MERGE_WITHOUT_BUFFER(arr + p0, h, h); p0 += 2 * h; } rest = L - p0; if (rest > h) { GRAIL_MERGE_WITHOUT_BUFFER(arr + p0, h, rest - h); } } } /* keys are on the left of arr. Blocks of length LL combined. We'll combine them in pairs LL and nkeys are powers of 2. (2*LL/lblock) keys are guarantied */ static void GRAIL_COMBINE_BLOCKS(SORT_TYPE *keys, SORT_TYPE *arr, int len, int LL, int lblock, int havebuf, SORT_TYPE *xbuf) { int M, b, NBlk, midkey, lrest, u, p, v, kc, nbl2, llast; SORT_TYPE *arr1; M = len / (2 * LL); lrest = len % (2 * LL); if (lrest <= LL) { len -= lrest; lrest = 0; } if (xbuf) { SORT_TYPE_CPY(xbuf, arr - lblock, lblock); } for (b = 0; b <= M; b++) { if (b == M && lrest == 0) { break; } arr1 = arr + b * 2 * LL; NBlk = (b == M ? lrest : 2 * LL) / lblock; SMALL_SORT(keys, NBlk + (b == M ? 1 : 0)); midkey = LL / lblock; for (u = 1; u < NBlk; u++) { p = u - 1; for (v = u; v < NBlk; v++) { kc = SORT_CMP_A(arr1 + p * lblock, arr1 + v * lblock); if (kc > 0 || (kc == 0 && SORT_CMP_A(keys + p, keys + v) > 0)) { p = v; } } if (p != u - 1) { GRAIL_SWAP_N(arr1 + (u - 1)*lblock, arr1 + p * lblock, lblock); GRAIL_SWAP1(keys + (u - 1), keys + p); if (midkey == u - 1 || midkey == p) { midkey ^= (u - 1)^p; } } } nbl2 = llast = 0; if (b == M) { llast = lrest % lblock; } if (llast != 0) { while (nbl2 < NBlk && SORT_CMP_A(arr1 + NBlk * lblock, arr1 + (NBlk - nbl2 - 1)*lblock) < 0) { nbl2++; } } if (xbuf) { GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF(keys, keys + midkey, arr1, NBlk - nbl2, lblock, nbl2, llast); } else { GRAIL_MERGE_BUFFERS_LEFT(keys, keys + midkey, arr1, NBlk - nbl2, lblock, havebuf, nbl2, llast); } } if (xbuf) { for (p = len; --p >= 0;) { arr[p] = arr[p - lblock]; } SORT_TYPE_CPY(arr - lblock, xbuf, lblock); } else if (havebuf) { while (--len >= 0) { GRAIL_SWAP1(arr + len, arr + len - lblock); } } } static void GRAIL_COMMON_SORT(SORT_TYPE *arr, int Len, SORT_TYPE *extbuf, int LExtBuf) { int lblock, nkeys, findkeys, ptr, cbuf, lb, nk; int havebuf, chavebuf; long long s; if (Len <= SMALL_SORT_BND) { SMALL_SORT(arr, Len); return; } lblock = 1; while (lblock * lblock < Len) { lblock *= 2; } nkeys = (Len - 1) / lblock + 1; findkeys = GRAIL_FIND_KEYS(arr, Len, nkeys + lblock); havebuf = 1; if (findkeys < nkeys + lblock) { if (findkeys < 4) { GRAIL_LAZY_STABLE_SORT(arr, Len); return; } nkeys = lblock; while (nkeys > findkeys) { nkeys /= 2; } havebuf = 0; lblock = 0; } ptr = lblock + nkeys; cbuf = havebuf ? lblock : nkeys; if (havebuf) { GRAIL_BUILD_BLOCKS(arr + ptr, Len - ptr, cbuf, extbuf, LExtBuf); } else { GRAIL_BUILD_BLOCKS(arr + ptr, Len - ptr, cbuf, NULL, 0); } /* 2*cbuf are built */ while (Len - ptr > (cbuf *= 2)) { lb = lblock; chavebuf = havebuf; if (!havebuf) { if (nkeys > 4 && nkeys / 8 * nkeys >= cbuf) { lb = nkeys / 2; chavebuf = 1; } else { nk = 1; s = (long long)cbuf * findkeys / 2; while (nk < nkeys && s != 0) { nk *= 2; s /= 8; } lb = (2 * cbuf) / nk; } } GRAIL_COMBINE_BLOCKS(arr, arr + ptr, Len - ptr, cbuf, lb, chavebuf, chavebuf && lb <= LExtBuf ? extbuf : NULL); } SMALL_SORT(arr, ptr); GRAIL_MERGE_WITHOUT_BUFFER(arr, ptr, Len - ptr); } void GRAIL_SORT(SORT_TYPE *arr, size_t Len) { GRAIL_COMMON_SORT(arr, (int)Len, NULL, 0); } void GRAIL_SORT_FIXED_BUFFER(SORT_TYPE *arr, size_t Len) { SORT_TYPE ExtBuf[GRAIL_EXT_BUFFER_LENGTH]; GRAIL_COMMON_SORT(arr, (int)Len, ExtBuf, GRAIL_EXT_BUFFER_LENGTH); } void GRAIL_SORT_DYN_BUFFER(SORT_TYPE *arr, size_t Len) { int L = 1; SORT_TYPE *ExtBuf; while (L * L < Len) { L *= 2; } ExtBuf = SORT_NEW_BUFFER(L); if (ExtBuf == NULL) { GRAIL_SORT_FIXED_BUFFER(arr, Len); } else { GRAIL_COMMON_SORT(arr, (int)Len, ExtBuf, L); SORT_DELETE_BUFFER(ExtBuf); } } /****** classic MergeInPlace *************/ static void GRAIL_REC_MERGE(SORT_TYPE *A, int L1, int L2) { int K, k1, k2, m1, m2; if (L1 < 3 || L2 < 3) { GRAIL_MERGE_WITHOUT_BUFFER(A, L1, L2); return; } if (L1 < L2) { K = L1 + L2 / 2; } else { K = L1 / 2; } k1 = k2 = GRAIL_BIN_SEARCH_LEFT(A, L1, A + K); if (k2 < L1 && SORT_CMP_A(A + k2, A + K) == 0) { k2 = GRAIL_BIN_SEARCH_RIGHT(A + k1, L1 - k1, A + K) + k1; } m1 = GRAIL_BIN_SEARCH_LEFT(A + L1, L2, A + K); m2 = m1; if (m2 < L2 && SORT_CMP_A(A + L1 + m2, A + K) == 0) { m2 = GRAIL_BIN_SEARCH_RIGHT(A + L1 + m1, L2 - m1, A + K) + m1; } if (k1 == k2) { GRAIL_ROTATE(A + k2, L1 - k2, m2); } else { GRAIL_ROTATE(A + k1, L1 - k1, m1); if (m2 != m1) { GRAIL_ROTATE(A + (k2 + m1), L1 - k2, m2 - m1); } } GRAIL_REC_MERGE(A + (k2 + m2), L1 - k2, L2 - m2); GRAIL_REC_MERGE(A, k1, m1); } void REC_STABLE_SORT(SORT_TYPE *arr, size_t L) { int m, h, p0, p1, rest; for (m = 1; m < L; m += 2) { if (SORT_CMP_A(arr + m - 1, arr + m) > 0) { GRAIL_SWAP1(arr + (m - 1), arr + m); } } for (h = 2; h < L; h *= 2) { p0 = 0; p1 = (int)(L - 2 * h); while (p0 <= p1) { GRAIL_REC_MERGE(arr + p0, h, h); p0 += 2 * h; } rest = (int)(L - p0); if (rest > h) { GRAIL_REC_MERGE(arr + p0, h, rest - h); } } } /* Bubble sort implementation based on Wikipedia article https://en.wikipedia.org/wiki/Bubble_sort */ void BUBBLE_SORT(SORT_TYPE *dst, const size_t size) { size_t n = size; while (n) { size_t i, newn = 0U; for (i = 1U; i < n; ++i) { if (SORT_CMP(dst[i - 1U], dst[i]) > 0) { SORT_SWAP(dst[i - 1U], dst[i]); newn = i; } } n = newn; } } #undef SORT_SAFE_CPY #undef SORT_TYPE_CPY #undef SORT_TYPE_MOVE #undef SORT_NEW_BUFFER #undef SORT_DELETE_BUFFER #undef QUICK_SORT #undef MEDIAN #undef SORT_CONCAT #undef SORT_MAKE_STR1 #undef SORT_MAKE_STR #undef SORT_NAME #undef SORT_TYPE #undef SORT_CMP #undef TEMP_STORAGE_T #undef TIM_SORT_RUN_T #undef PUSH_NEXT #undef SORT_SWAP #undef SORT_CONCAT #undef SORT_MAKE_STR1 #undef SORT_MAKE_STR #undef BINARY_INSERTION_FIND #undef BINARY_INSERTION_SORT_START #undef BINARY_INSERTION_SORT #undef REVERSE_ELEMENTS #undef COUNT_RUN #undef TIM_SORT #undef TIM_SORT_RESIZE #undef TIM_SORT_COLLAPSE #undef TIM_SORT_RUN_T #undef TEMP_STORAGE_T #undef MERGE_SORT #undef MERGE_SORT_RECURSIVE #undef MERGE_SORT_IN_PLACE #undef MERGE_SORT_IN_PLACE_RMERGE #undef MERGE_SORT_IN_PLACE_BACKMERGE #undef MERGE_SORT_IN_PLACE_FRONTMERGE #undef MERGE_SORT_IN_PLACE_ASWAP #undef GRAIL_SWAP1 #undef REC_STABLE_SORT #undef GRAIL_REC_MERGE #undef GRAIL_SORT_DYN_BUFFER #undef GRAIL_SORT_FIXED_BUFFER #undef GRAIL_COMMON_SORT #undef GRAIL_SORT #undef GRAIL_COMBINE_BLOCKS #undef GRAIL_LAZY_STABLE_SORT #undef GRAIL_MERGE_WITHOUT_BUFFER #undef GRAIL_ROTATE #undef GRAIL_BIN_SEARCH_LEFT #undef GRAIL_BUILD_BLOCKS #undef GRAIL_FIND_KEYS #undef GRAIL_MERGE_BUFFERS_LEFT_WITH_X_BUF #undef GRAIL_BIN_SEARCH_RIGHT #undef GRAIL_MERGE_BUFFERS_LEFT #undef GRAIL_SMART_MERGE_WITH_X_BUF #undef GRAIL_MERGE_LEFT_WITH_X_BUF #undef GRAIL_SMART_MERGE_WITHOUT_BUFFER #undef GRAIL_SMART_MERGE_WITH_BUFFER #undef GRAIL_MERGE_RIGHT #undef GRAIL_MERGE_LEFT #undef GRAIL_SWAP_N #undef SQRT_SORT #undef SQRT_SORT_BUILD_BLOCKS #undef SQRT_SORT_MERGE_BUFFERS_LEFT_WITH_X_BUF #undef SQRT_SORT_MERGE_DOWN #undef SQRT_SORT_MERGE_LEFT_WITH_X_BUF #undef SQRT_SORT_MERGE_RIGHT #undef SQRT_SORT_SWAP_N #undef SQRT_SORT_SWAP_1 #undef SQRT_SORT_SMART_MERGE_WITH_X_BUF #undef SQRT_SORT_SORT_INS #undef SQRT_SORT_COMBINE_BLOCKS #undef SQRT_SORT_COMMON_SORT #undef SORT_CMP_A #undef BUBBLE_SORT