#ifndef RADIXSORT_H
#define RADIXSORT_H
/*******************************************************************
* Recommended compiler options: *
* -v -O3 -march=native -funroll-loops *
* 250 million integers in the range [-2147483647,2147483647] 2.42s *
* 100 million integers in the range [-2147483647,2147483647] 0.98s *
*******************************************************************/
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/shm.h>
typedef unsigned int uint32_t;
/*******************************************************************
* Flips all the bits if negative. *
* Flips only the sign bit if positive. *
* Allows for both negative and positive values. *
* Returns one of the two bytes of the integer. *
*******************************************************************/
static uint32_t position(int number, int pass) {
int mask;
if (number <= 0) mask = 0x80000000;
else mask = (number >> 31) | 0x80000000;
uint32_t out = number ^ mask;
return pass == 0 ? out & 0xffff : (out >> 16) & 0xffff;
}
/*******************************************************************
* Creates a histogram using shared memory and fork processes for *
* parallelism. This creates a parallel conquer and divide *
* algorithm, which merges into one histogram to use later on in *
* the code. Note: This will not work in windows, with unistd.h. *
*******************************************************************/
static void histogram(int *array,int size,int pass,uint32_t *hist) {
int i, j;
for (i=0;i<8;i++) {
pid_t pid = fork();
if (pid < 0) _exit(0);
if (pid == 0) {
const int start = (i * size) >> 3;
const int stop = i == 7 ? size : ((i + 1) * size) >> 3;
const int curr = i << 16;
for (j=start;j<stop;++j)
hist[curr + position(array[j], pass)]++;
_exit(0);
}
}
for (i=0;i<8;i++) wait(NULL);
for (i=1;i<8;i++) {
const int pos = i << 16;
for (j=0;j<65536;j++)
hist[j] += hist[pos + j];
}
for (i=1;i<65536;i++)
hist[i] += hist[i-1];
}
/*******************************************************************
* Counting sorts each of the 2 16-bit bytes of each integers. *
* This eliminates the need for buckets, for each pass. *
* Bytes used to eliminate the number of passes down from k (length *
* of the largest integer) down to 2 passes. *
*******************************************************************/
static void radixSort(int *array, int size) {
int i;
const uint32_t arrSize = sizeof(uint32_t) << 19;
int shmMal = shmget(12345, arrSize, IPC_CREAT | 0666);
if (shmMal < 0) return;
uint32_t *hist = (uint32_t *) shmat(shmMal, NULL, 0);
if ((int *) hist == (int *) -1) return;
int *temp
= (int *) malloc(size
* sizeof(int)); if (!temp) return;
histogram(array, size, 0, hist);
for (i=size-1;i>=0;i--)
temp[--hist[position(array[i], 0)]] = array[i];
histogram(temp, size, 1, hist);
for (i=size-1;i>=0;i--)
array[--hist[position(temp[i], 1)]] = temp[i];
shmdt((void *) hist);
shmctl(shmMal, IPC_RMID, 0);
}
#endif