Velocity Reviews > Speaking of selection algorithms, I had never heard of heapselect, so I wrote one.

# Speaking of selection algorithms, I had never heard of heapselect, so I wrote one.

user923005
Guest
Posts: n/a

 06-06-2007
/*
Tragically, I have bollixed it up badly. It's such a simple
algorithm, but somehow I've gone off, because it does not always
work. Any ideas where I went off?
*/

#include <stdlib.h>
#include <string.h>
#include <errno.h>

/*
We will swap sizeof(long) bytes at a time if possible (if the data is
long aligned),
else a char at a time.
*/
static void swapfunc(char *a, char *b, const size_t e_size, const
int swaptype)
{
if (swaptype <= 1) {
size_t i = (e_size) / sizeof(long);
long *pi = (long *) (void *) a;
long *pj = (long *) (void *) b;
do {
const long t = *pi;
*pi++ = *pj;
*pj++ = t;
} while (--i > 0);
} else {
size_t i = (e_size);
char *pi = (char *) (void *) a;
char *pj = (char *) (void *) b;
do {
const char t = *pi;
*pi++ = *pj;
*pj++ = t;
} while (--i > 0);
}
}
/*
Citation: http://www.ics.uci.edu/~eppstein/161/960125.html

This beautiful algorithm:

heapselect(L,k)
{
heap H = heapify(L)
for (i = 1; i < k; i++) remove min(H)
return min(H)
}

has slowly transmongrified itself into this awful slop:
*/
int heapselect(void *voidbase, const size_t rank, const
size_t nmemb, const size_t e_size, int (*cmp) (const void *, const
void *))
{
char *base = voidbase; /* Base of the array */
char *tmp; /* will hold temporary element after
allocation */
size_t i, /* array position indicator */
i_stride, /* array position indicator, adjusted
for element stride */
j, /* array position indicator */
j_stride, /* array position indicator, adjusted
for element stride */
left, /* array position indicator */
mid, /* array position indicator */
mid_stride, /* array position indicator, adjusted
for element stride */
right; /* array position indicator */
/* swaptype will tell us about the alignment of the data type, for
opimization of exchange */
const unsigned swaptype = ((char *) (base) - (char *) 0) %
sizeof(long) || (e_size) %sizeof(long) ? 2 : (e_size) ==
sizeof(long) ? 0 : 1;

if (e_size == 0 || rank > nmemb) {
return -EDOM;
}
if (nmemb <= 1) {
return 0;
}
left = (rank + 1) / 2;
right = rank - 1;

if (!(tmp = malloc(e_size))) {
return -ENOMEM;
}

while (0 < left) {
left--;
i = left;
i_stride = i * e_size;
memcpy(tmp, base + i_stride, e_size);
while ((j = 2 * i + 1) <= right) {
j_stride = j * e_size;
char *current = base + j_stride;
if (j < right && (*cmp) (current, current + e_size) < 0) {
j++;
j_stride += e_size;
}
if ((*cmp) (base + j_stride, tmp) < 0) {
break;
}
memcpy(base + i_stride, base + j_stride, e_size);
i = j;
i_stride = j_stride;
}
if (i != left) {
memcpy(base + i_stride, tmp, e_size);
}
}

for (mid = right + 1; mid < nmemb; mid++) {
mid_stride = mid * e_size;
if ((*cmp) (base + mid_stride, base) < 0) {

if (swaptype == 0) {
char *p = base + mid_stride;
const long t = *(long *) (void *) p;
*(long *) (void *) p = *(long *) (void *) base;
*(long *) (void *) base = t;
} else
swapfunc(base + mid_stride, base, e_size, swaptype);

i = 0;
i_stride = 0;
memcpy(tmp, base + i_stride, e_size);
while ((j = 2 * i + 1) <= right) {
j_stride = j * e_size;
char *current = base + j_stride;
if (j < right && (*cmp) (current, current + e_size) <
0) {
j++;
j_stride += e_size;
}
if ((*cmp) (base + j_stride, tmp) < 0) {
break;
}
memcpy(base + i_stride, base + j_stride, e_size);
i = j;
i_stride = j_stride;
}
if (i != 0) {
memcpy(base + i_stride, tmp, e_size);
}
}
}

free(tmp);
return 0;
}

#ifdef UNIT_TEST

#include <time.h>
#include <stdio.h>

int arr[100000];

int compare(const void *aa, const void *bb)
{
int a = *(int *) aa;
int b = *(int *) bb;
if (a > b) return 1;
if (a < b) return -1;
return 0;
}

int main(void)
{
size_t index;
size_t rank;
size_t arrlen = sizeof arr / sizeof arr[0];
int position;
srand((unsigned) time(NULL));
for (index = 0; index < arrlen; index++) {
arr[index] = rand();
}
rank = rand() / (RAND_MAX / arrlen + 1);
position = heapselect(arr, rank, arrlen, sizeof arr[0], compare);
if (position < 0)
printf("Error in heapselect = %d\n", position);
else
printf("item (%u) is %d\n", (unsigned) rank, arr[position]);
qsort(arr, arrlen, sizeof arr[0], compare);
printf("sorted item (%u) is %d\n", (unsigned) rank, arr[rank]);
return 0;
}

#endif

user923005
Guest
Posts: n/a

 06-06-2007
Posted to the wrong group, sorry. I meant news:comp.programming but
forgot where I was.

On Jun 5, 10:57 pm, user923005 <(E-Mail Removed)> wrote:
> /*
> Tragically, I have bollixed it up badly. It's such a simple
> algorithm, but somehow I've gone off, because it does not always
> work. Any ideas where I went off?
> */
>
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
>
> /*
> We will swap sizeof(long) bytes at a time if possible (if the data is
> long aligned),
> else a char at a time.
> */
> static void swapfunc(char *a, char *b, const size_t e_size, const
> int swaptype)
> {
> if (swaptype <= 1) {
> size_t i = (e_size) / sizeof(long);
> long *pi = (long *) (void *) a;
> long *pj = (long *) (void *) b;
> do {
> const long t = *pi;
> *pi++ = *pj;
> *pj++ = t;
> } while (--i > 0);
> } else {
> size_t i = (e_size);
> char *pi = (char *) (void *) a;
> char *pj = (char *) (void *) b;
> do {
> const char t = *pi;
> *pi++ = *pj;
> *pj++ = t;
> } while (--i > 0);
> }}
>
> /*
> Citation:http://www.ics.uci.edu/~eppstein/161/960125.html
>
> This beautiful algorithm:
>
> heapselect(L,k)
> {
> heap H = heapify(L)
> for (i = 1; i < k; i++) remove min(H)
> return min(H)
> }
>
> has slowly transmongrified itself into this awful slop:
> */
> int heapselect(void *voidbase, const size_t rank, const
> size_t nmemb, const size_t e_size, int (*cmp) (const void *, const
> void *))
> {
> char *base = voidbase; /* Base of the array */
> char *tmp; /* will hold temporary element after
> allocation */
> size_t i, /* array position indicator */
> i_stride, /* array position indicator, adjusted
> for element stride */
> j, /* array position indicator */
> j_stride, /* array position indicator, adjusted
> for element stride */
> left, /* array position indicator */
> mid, /* array position indicator */
> mid_stride, /* array position indicator, adjusted
> for element stride */
> right; /* array position indicator */
> /* swaptype will tell us about the alignment of the data type, for
> opimization of exchange */
> const unsigned swaptype = ((char *) (base) - (char *) 0) %
> sizeof(long) || (e_size) %sizeof(long) ? 2 : (e_size) ==
> sizeof(long) ? 0 : 1;
>
> if (e_size == 0 || rank > nmemb) {
> return -EDOM;
> }
> if (nmemb <= 1) {
> return 0;
> }
> left = (rank + 1) / 2;
> right = rank - 1;
>
> if (!(tmp = malloc(e_size))) {
> return -ENOMEM;
> }
>
> while (0 < left) {
> left--;
> i = left;
> i_stride = i * e_size;
> memcpy(tmp, base + i_stride, e_size);
> while ((j = 2 * i + 1) <= right) {
> j_stride = j * e_size;
> char *current = base + j_stride;
> if (j < right && (*cmp) (current, current + e_size) < 0) {
> j++;
> j_stride += e_size;
> }
> if ((*cmp) (base + j_stride, tmp) < 0) {
> break;
> }
> memcpy(base + i_stride, base + j_stride, e_size);
> i = j;
> i_stride = j_stride;
> }
> if (i != left) {
> memcpy(base + i_stride, tmp, e_size);
> }
> }
>
> for (mid = right + 1; mid < nmemb; mid++) {
> mid_stride = mid * e_size;
> if ((*cmp) (base + mid_stride, base) < 0) {
>
> if (swaptype == 0) {
> char *p = base + mid_stride;
> const long t = *(long *) (void *) p;
> *(long *) (void *) p = *(long *) (void *) base;
> *(long *) (void *) base = t;
> } else
> swapfunc(base + mid_stride, base, e_size, swaptype);
>
> i = 0;
> i_stride = 0;
> memcpy(tmp, base + i_stride, e_size);
> while ((j = 2 * i + 1) <= right) {
> j_stride = j * e_size;
> char *current = base + j_stride;
> if (j < right && (*cmp) (current, current + e_size) <
> 0) {
> j++;
> j_stride += e_size;
> }
> if ((*cmp) (base + j_stride, tmp) < 0) {
> break;
> }
> memcpy(base + i_stride, base + j_stride, e_size);
> i = j;
> i_stride = j_stride;
> }
> if (i != 0) {
> memcpy(base + i_stride, tmp, e_size);
> }
> }
> }
>
> free(tmp);
> return 0;
>
> }
>
> #ifdef UNIT_TEST
>
> #include <time.h>
> #include <stdio.h>
>
> int arr[100000];
>
> int compare(const void *aa, const void *bb)
> {
> int a = *(int *) aa;
> int b = *(int *) bb;
> if (a > b) return 1;
> if (a < b) return -1;
> return 0;
>
> }
>
> int main(void)
> {
> size_t index;
> size_t rank;
> size_t arrlen = sizeof arr / sizeof arr[0];
> int position;
> srand((unsigned) time(NULL));
> for (index = 0; index < arrlen; index++) {
> arr[index] = rand();
> }
> rank = rand() / (RAND_MAX / arrlen + 1);
> position = heapselect(arr, rank, arrlen, sizeof arr[0], compare);
> if (position < 0)
> printf("Error in heapselect = %d\n", position);
> else
> printf("item (%u) is %d\n", (unsigned) rank, arr[position]);
> qsort(arr, arrlen, sizeof arr[0], compare);
> printf("sorted item (%u) is %d\n", (unsigned) rank, arr[rank]);
> return 0;
>
> }
>
> #endif

user923005
Guest
Posts: n/a

 06-06-2007
On Jun 6, 12:09 am, CBFalconer <(E-Mail Removed)> wrote:
> user923005 wrote:
>
> > /*
> > Tragically, I have bollixed it up badly. It's such a simple
> > algorithm, but somehow I've gone off, because it does not always
> > work. Any ideas where I went off?
> > */

>
> ... snip 200 lines of code ...
>
> As you say, bollixed. Tear it up and start over. Use void
> pointers.

The problem with void pointers is that they have no stride.
So you can't move data with them.

Flash Gordon
Guest
Posts: n/a

 06-06-2007
user923005 wrote, On 06/06/07 19:16:
> On Jun 6, 12:09 am, CBFalconer <(E-Mail Removed)> wrote:
>> user923005 wrote:
>>
>>> /*
>>> Tragically, I have bollixed it up badly. It's such a simple
>>> algorithm, but somehow I've gone off, because it does not always
>>> work. Any ideas where I went off?
>>> */

>> ... snip 200 lines of code ...
>>
>> As you say, bollixed. Tear it up and start over. Use void
>> pointers.

>
> The problem with void pointers is that they have no stride.
> So you can't move data with them.

Unless you use memcpy/memmove. Of course, you need to know how much data
to move.
--
Flash Gordon

user923005
Guest
Posts: n/a

 06-06-2007
On Jun 6, 2:13 pm, Flash Gordon <(E-Mail Removed)> wrote:
> user923005 wrote, On 06/06/07 19:16:
>
> > On Jun 6, 12:09 am, CBFalconer <(E-Mail Removed)> wrote:
> >> user923005 wrote:

>
> >>> /*
> >>> Tragically, I have bollixed it up badly. It's such a simple
> >>> algorithm, but somehow I've gone off, because it does not always
> >>> work. Any ideas where I went off?
> >>> */
> >> ... snip 200 lines of code ...

>
> >> As you say, bollixed. Tear it up and start over. Use void
> >> pointers.

>
> > The problem with void pointers is that they have no stride.
> > So you can't move data with them.

>
> Unless you use memcpy/memmove. Of course, you need to know how much data
> to move.

If you look at the user of pointers in my program you will see that
char pointers were chosen for a reason.
The user interfaces are void pointers for the reason of generic
access, but the exchange is optimized for speed.
Of course, I posted to the wrong newsgroup, and so I expect I won't
get much utility on the responses here.