PDA

View Full Version : Radix Sorting



R0b0t1
11-06-2008, 01:49 AM
Yeah, it is supposedly the best (and so far proven the best), although it takes some work to get it to sort negatives (which mine does not). It will, however, also sort doubles and floats (I think Delphi only has doubles, IIRC). Note this is a pretty... Er... Badly coded method...



#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

#define ARRAY_SIZE 5
#define radixsort_f(source, temp, N) radix_32(0, 4, N, (int *)source, (int *)temp);
#define radixsort_d(source, temp, N) radix_64(0, 8, N, (uint64_t *)source, (uint64_t *)temp);

void radix_32(int byte, int bytemax, int N, int *source, int *dest)
{
int count[256], index[256], i;

memset(count, 0, sizeof (count));

if(bytemax % 2 != 0) bytemax++;

for(i = 0; i < N; i++)
count[((source[i]) >> (byte * 8)) & 0xFF]++;

index[0] = 0;
for(i = 1; i < 256; i++)
index[i] = index[i - 1] + count[i - 1];

for(i = 0; i < N; i++)
dest[index[((source[i]) >> (byte * 8)) & 0xFF]++] = source[i];

if(byte + 1 <= bytemax) radix_32(byte + 1, bytemax, N, dest, source);
else return;
}

void radix_64(int byte, int bytemax, int N, uint64_t *source, uint64_t *dest)
{
int count[256], index[256], i;

memset(count, 0, sizeof (count));

if(bytemax % 2 != 0) bytemax++;

for(i = 0; i < N; i++)
count[((source[i]) >> (byte * 8)) & 0xFF]++;

index[0] = 0;
for(i = 1; i < 256; i++)
index[i] = index[i - 1] + count[i - 1];

for(i = 0; i < N; i++)
dest[index[((source[i]) >> (byte * 8)) & 0xFF]++] = source[i];

if(byte + 1 <= bytemax) radix_64(byte + 1, bytemax, N, dest, source);
else return;
}

void make_random(int *data, int N)
{
int i;
for(i = 0; i < N; i++) data[i] = abs(rand());
}

int main(void)
{
double data[ARRAY_SIZE] = {0, 2.123, .1337, 100, .42};
double temp[ARRAY_SIZE];
int i;

//make_random((int *)data, ARRAY_SIZE);
radixsort_d(data, temp, ARRAY_SIZE);
for(i = 0; i < ARRAY_SIZE; i++) printf("%f\n", data[i]);
return 0;
}

Wizzup?
11-06-2008, 10:09 AM
I think CountSort is the fastest algo. (Turf Sort = Count Sort), you need to know max range to optimize it though.


#include <stdlib.h>
#include <stdio.h>
#include <time.h>

#ifndef GENERATOR_RANGE
#define GENERATOR_RANGE 1000
#endif

#ifndef GENERATOR_LENGTH
#define GENERATOR_LENGTH 10000000
#endif

#if GENERATOR_RANGE > RAND_MAX
#error Generator range out of bounds
#endif

#ifndef NULL
#define NULL 0x0
#endif

// Core Functions
unsigned int generate_random_list(unsigned int** newlist,unsigned int items,unsigned int range);
unsigned int list_sorted(unsigned int* list,unsigned int items);
int qsort_helper_callback(const void * a, const void * b);
unsigned int quantum_sort(unsigned int* list,unsigned int items,
unsigned int base,unsigned int range);
unsigned int turf_sort(unsigned int* list,unsigned int items,unsigned int range);

int main()
{
// Initialize the Random Number Generator
srand(time(NULL));

printf("Simpel program demonstrating quantum sort\n");
printf("Generating a random list containing %u items\n",GENERATOR_LENGTH);
unsigned int *list_a, *backup;
generate_random_list(&list_a,GENERATOR_LENGTH,GENERATOR_RANGE);
backup = malloc(sizeof(unsigned int)*GENERATOR_LENGTH);
memcpy(backup,list_a,sizeof(unsigned int)*GENERATOR_LENGTH);

// Perform Quick sort
clock_t quickstart = clock();
qsort(list_a,GENERATOR_LENGTH,sizeof(unsigned int),qsort_helper_callback);
clock_t quickstop = clock();
if(!list_sorted(list_a,GENERATOR_LENGTH))
printf("Quicksort list not sorted!\n");
free(list_a);
list_a = backup;
backup = NULL;

// Perform Quantum sort
clock_t quantumstart = clock();
turf_sort(list_a,GENERATOR_LENGTH,GENERATOR_RANGE) ;
//quantum_sort(list_a,GENERATOR_LENGTH,0,GENERATOR_R ANGE);
clock_t quantumstop = clock();
if(!list_sorted(list_a,GENERATOR_LENGTH))
printf("Turf Sort list not sorted!\n");

// Asses performance
printf("Clocks per second: %u\n", CLOCKS_PER_SEC);
printf("Quicksort clocks: %u\nTurf Sort Clocks clocks: %u\n",
quickstop-quickstart,
quantumstop-quantumstart);
free(list_a);
return 0;
}

unsigned int generate_random_list(unsigned int** newlist,unsigned int items,unsigned int range)
{
int i=0;
*newlist = malloc(sizeof(unsigned int)*items);
for(i=0;i<items;i++)
(*newlist)[i] = rand() % range;
return 0;
}

unsigned int list_sorted(unsigned int* list,unsigned int items)
{
int i=0;
for(i=0;i<items-1;i++)
if(list[i]>list[i+1])
return 0;
return 1;
}

int qsort_helper_callback(const void * a, const void * b)
{
return ( *(unsigned int*)a - *(unsigned int*)b );
}

unsigned int quantum_sort(unsigned int* list,unsigned int items,
unsigned int base,unsigned int range)
{
int i=0;
unsigned int* qspace = malloc(range*items*sizeof(unsigned int));
unsigned int* rspace = malloc(range*sizeof(unsigned int));
for(i=0;i<range;i++)
rspace[i]=0;
// Perform quantum alike sorting
for(i=0;i<items;i++)
{
// Compute range index
register unsigned int rx = list[i]-base;
qspace[(rx*items)+rspace[rx]] = list[i];
rspace[rx]++;
}
// List index
unsigned int lx = 0;
// Write back sorted list
for(i=0;i<range;i++)
{
memcpy(list+lx,&(qspace[i*items]),rspace[i]*sizeof(unsigned int));
lx += rspace[i];
}
free(rspace);
free(qspace);
return 0;
}

unsigned int turf_sort(unsigned int* list,unsigned int items,unsigned int range)
{
unsigned int* tmp = malloc(range * sizeof(unsigned int));
unsigned int i = 0;
for(i=0;i<range;i++)
tmp[i] = 0;
for(i=0;i<items;i++)
tmp[list[i]]++;
unsigned int c = 0;
unsigned int j = 0;
for(i=0;i<range;i++)
for(j=0;j<tmp[i];j++)
list[c++] = i;
free(tmp);
return 0;
}


Quicksort clocks: 1157
Turf Sort Clocks clocks: 32

boberman
11-06-2008, 05:01 PM
I think CountSort is the fastest algo. (Turf Sort = Count Sort), you need to know max range to optimize it though.

Quicksort clocks: 1157
Turf Sort Clocks clocks: 32

CountSort and radix sort are both O(n). count sort will be slightly faster, but radix sorting is far more flexible in the long run. (it can be made to handle negatives, doubles, floats, ect and doesn't need to know how big the array is).

The reason you would use QuickSort or Heap sort over radix sorting or count sorting is because they are even more flexible then either radix sorting or count sorting. As well, Quicksort and heap sort have smaller memory footprints then count sorting or radix sorting. and can be more easily optimized for multiple threaded optimizations.

Radix sorting can be made multithreaded using MSD radix sorting, but its a PITA to make it so anything is actually gained from it.

ShowerThoughts
11-06-2008, 05:58 PM
for what would you use it?

boberman
11-06-2008, 06:08 PM
for what would you use it?

Sorting has many applications. From from finding the shortest distance of a path, to sorting database records. Sometimes having something in order will save processing time in the long run.

Another good reason to introduce sorting is because it is a good way to learn big oh notation.

ShowerThoughts
11-06-2008, 06:50 PM
Sorting has many applications. From from finding the shortest distance of a path, to sorting database records. Sometimes having something in order will save processing time in the long run.

Another good reason to introduce sorting is because it is a good way to learn big oh notation.

Okay, thank you :)

R0b0t1
11-07-2008, 12:44 AM
Well, do you need to use the exact largest? 'cause you could just make the largest number 2^32.

To be honest I'd just choose radix sorting. I mean, I've made it, and I'm confident with it. Can a count sort use negatives?


EDIT: No, apparently not, the wrong (higher) max value slows it down :|

EDIT: Besides the need for insane amount of memory for anything over about 10,000.

boberman
11-07-2008, 03:28 AM
Well, do you need to use the exact largest? 'cause you could just make the largest number 2^32.

To be honest I'd just choose radix sorting. I mean, I've made it, and I'm confident with it. Can a count sort use negatives?


EDIT: No, apparently not, the wrong (higher) max value slows it down :|

EDIT: Besides the need for insane amount of memory for anything over about 10,000.

Count sort can do negatives pretty easily actually. just assume that each number is off by the negative amount and Viola, you have a count sort that can do negative numbers.

As for the big number problem. Yep, its a problem :P.

It is generally faster then a radix sort though as it needs only 2 passes to sort the list. Radix sorting can take anywhere from 2-100 depending on implementation. (so in some cases radix sorting can be faster)

Wizzup?
11-07-2008, 07:12 AM
Well, do you need to use the exact largest? 'cause you could just make the largest number 2^32.

To be honest I'd just choose radix sorting. I mean, I've made it, and I'm confident with it. Can a count sort use negatives?


EDIT: No, apparently not, the wrong (higher) max value slows it down :|

EDIT: Besides the need for insane amount of memory for anything over about 10,000.

2^32 will use a massive amount of memory.


unsigned int* tmp = malloc(range * sizeof(unsigned int));

The program will probably stop there, allocating too much memory. Say we have a range 10000 numbers. 10000 * 2^32 * 32
Where the last is sizeof(unsigned int).

Countsort can easily do negative values, too. As long as you know what the range is. If the numbers go from -100 to 200, you just add 100 to all the array indices, make the max index 300 and when you put then all back, do -100. 100 is called the base here.

I made a SortTPAPoints with CountSort. Compare CountSort to QuickSort for TPA's here:
program New;

procedure QuickTPASort(var A: TIntegerArray; var B: TPointArray; iLo, iHi: Integer; SortUp: Boolean);
var
Lo, Hi, Mid, T: Integer;
TP: TPoint;
begin
if (Length(A) <> Length(B)) then Exit;
Lo := iLo;
Hi := iHi;
Mid := A[(Lo + Hi) shr 1];
repeat
if SortUp then
begin
while (A[Lo] < Mid) do Inc(Lo);
while (A[Hi] > Mid) do Dec(Hi);
end else
begin
while (A[Lo] > Mid) do Inc(Lo);
while (A[Hi] < Mid) do Dec(Hi);
end;
if (Lo <= Hi) then
begin
T := A[Lo];
A[Lo] := A[Hi];
A[Hi] := T;
TP := B[Lo];
B[Lo] := B[Hi];
B[Hi] := TP;
Inc(Lo);
Dec(Hi);
end;
until Lo > Hi;
if (Hi > iLo) then QuickTPASort(A, B, iLo, Hi, SortUp);
if (Lo < iHi) then QuickTPASort(A, B, Lo, iHi, SortUp);
end;

procedure SortTPAFrom(var a: TPointArray; const From: TPoint; DistArr: TIntegerArray);
var
i, l: Integer;
begin
l := High(a);
if (l < 0) then Exit;
SetLength(DistArr, l + 1);
QuickTPASort(DistArr, a, 0, l, True);
end;

Function FastTPASort(TPA: TPointArray; Dists: TIntegerArray; maxDist: Integer; CloseFirst: Boolean): TPointArray;

Var
Oc, D: TIntegerArray;
I, H, J: Integer;
ind: array of array of integer;

Begin
SetLength(Oc, maxDist + 1);
H := High(Dists);
For I := 0 To H Do // Get the occurance of each dist, and store it in Oc.
Oc[Dists[I]] := Oc[Dists[I]] + 1;

//For I := 0 To maxDist Do
// WriteLn('Distance ' + IntToStr(I) + ' has been found ' + IntToStr(Oc[I]) + ' times');

SetLength(D, maxDist + 1);
SetLength(ind, maxDist + 1);

For I := 0 To maxDist Do // set the ind length of each ind[distance] to Oc[distance]
SetLength(ind[i], Oc[i] + 1);

For I := 0 To H Do // Performs the actual index sorting
Begin
ind[Dists[I]][D[Dists[I]]] := I; // put an index (of Dists AND TPA) into a distance array.
D[Dists[i]] := D[Dists[i]] + 1; // inc by one, so we won't override previous indexes.
End;

SetLength(Result, H + 1);
H := 0;
If CloseFirst Then
Begin
For I := 0 To maxDist Do // Put back to result
For J := 0 To oC[I] - 1 Do
Begin
Result[H] := TPA[ind[I][J]];
H := H + 1;
//WriteLn('D: ' + IntToStr(Dists[ind[I][J]]) + '; P (' + IntToStr(TPA[ind[I][J]].X) + ', ' + IntToStr(TPA[ind[I][J]].Y) + ')');
End;
End
Else
For I := maxDist Downto 0 Do // Put back to result
For J := 0 To oC[I] - 1 Do
Begin
Result[H] := TPA[ind[I][J]];
H := H + 1;
//WriteLn('D: ' + IntToStr(Dists[ind[I][J]]) + '; P (' + IntToStr(TPA[ind[I][J]].X) + ', ' + IntToStr(TPA[ind[I][J]].Y) + ')');
End;
End;

Const
Wid = 100;
Hei = 100;

Var
F: TPoint;
TPA, T: TPointArray;
D: TIntegerArray;
I, Ti: Integer;

begin
ClearDebug;
F := Point(100, 100);
SetLength(TPA, 100000);
For I := 0 To High(TPA) Do
TPA[I] := Point(Random(Wid), Random(Hei));
SetLength(D, Length(TPA));
For I := 0 To High(TPA) Do
D[I] := Distance(F.X, F.Y, TPA[I].X, TPA[I].Y);

Ti := GetSystemTime;
T := FastTPASort(TPA, D, ceil(sqrt(sqr(Wid) + sqr(Hei))), False);
WriteLn('FastTPASort: ' + IntToStr(GetSystemTime - Ti) + ' ms')

Ti := GetSystemTime;
SortTPAFrom(TPA, F, D);
WriteLn('QuickTPASort: ' + IntToStr(GetSystemTime - Ti) + ' ms')

For I := 0 To High(TPA) - 1 Do
If Distance(F.X, F.Y, T[I].X, T[I].Y) < Distance(F.X, F.Y, T[I + 1].X, T[I + 1].Y) Then
Begin
WriteLn(IntToStr(Distance(F.X, F.Y, T[I].X, T[I].Y)));
WriteLn(IntToStr(Distance(F.X, F.Y, T[I + 1].X, T[I + 1].Y)));
WriteLn('Fail Wizzup! You suck at life');
Break;
End;
end.

R0b0t1
11-07-2008, 09:55 PM
Yeah, I figured out the memory part if you couldn't tell. Anyway, it seems like you'd need to "change" it for almost every set of varied numbers you have... Which I don't really think is that good.

Wizzup?
11-08-2008, 01:24 PM
Yeah, I figured out the memory part if you couldn't tell. Anyway, it seems like you'd need to "change" it for almost every set of varied numbers you have... Which I don't really think is that good.

Not true. If you look at my previous example, you could pass a variable with the Lowest and the Highest value. Make the lowest the base, when you work with negative values. Then put back the result the same way as I did in the example. It's still faster than Radix Sort or Quick Sort.