Velocity Reviews

Velocity Reviews (http://www.velocityreviews.com/forums/index.php)
-   C Programming (http://www.velocityreviews.com/forums/f42-c-programming.html)
-   -   simple BST implementation (http://www.velocityreviews.com/forums/t702210-simple-bst-implementation.html)

Mark 10-20-2009 04:14 AM

simple BST implementation
 
Hello

I've implemented a very simple and small binary-search tree library as a
training exercise. I'm posting it here together with a testing program and
expect to receive critics and useful recommendations :-)

It compiles fine with the following GCC options:

-std=c99 -pedantic -W -Wall -Wcast-align -Wcast-qual -Wmissing-prototypes -Wshadow
-Wnested-externs -Wstrict-prototypes -Waggregate-return -O0 -ggdb

I put in various checkpoints tracing messages which help me to better
understand BST and to debug, it can be disabled with -DWITH_TRACE compiler
flag. I'm yet to decide where to carefully put 'asserts'.

/* tree.h */
#ifndef TREE_H
#define TREE_H

#include <stddef.h> /* for size_t */

/* error codes */
enum {
T_ESUCCESS,
T_EINVALID,
T_EBADPTR,
T_EDUP,
T_ENOMEM,
T_EMPTYTREE
};

struct tree_node {
void *data;
size_t size; /* for future to indicate the data size */
struct tree_node *left;
struct tree_node *right;
};

struct tree_node *tree_allocnode(void *data);

int tree_addnode(struct tree_node **root, struct tree_node *node,
int(*cmp_fn)(const void *d1, const void *d2));

void tree_freenode(struct tree_node *node);

struct tree_node *tree_lookup(struct tree_node *root, const void *data,
int(*cmp_fn)(const void *d1, const void *d2));

void tree_freetree(struct tree_node *node, void (*fn)(void *data));

int tree_traverse(struct tree_node *root,
int(*fn)(struct tree_node *node, void *arg), void *arg);

#endif

/* tree.c */
#ifdef WITH_TRACE
#include <stdarg.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"

#ifdef WITH_TRACE
#define TRACE(...) (printf("TRACE: "), printf(__VA_ARGS__), printf("\n"))
#else
#define TRACE(...)
#endif

struct tree_node *tree_allocnode(void *data)
{
struct tree_node *node = NULL;

node = malloc(sizeof *node);
if (node != NULL)
{
if (data != NULL)
{
node->data = data;
node->left = node->right = NULL;
TRACE("node @ %p, data=%s", (void *)node, (char *)data);
}
else
{
free(node);
node = NULL;
}
}

return node;
}

int tree_addnode(struct tree_node **root, struct tree_node *node,
int(*cmp_fn)(const void *d1, const void *d2))
{
int res = T_ESUCCESS;
int d;

if (node != NULL)
{
/* empty tree case */
if (NULL == *root)
{
*root = node;
node->left = node->right = NULL;
TRACE("root @ %p", (void *)*root);
}
else
{
d = cmp_fn(node->data, (*root)->data);
if (d < 0)
{
tree_addnode(&(*root)->left, node, cmp_fn);
TRACE("node(%p)->left=%p", (void *)(*root), (void
*)(*root)->left);
}
else if (d > 0)
{
tree_addnode(&(*root)->right, node, cmp_fn);
TRACE("node(%p)->right=%p", (void *)(*root), (void
*)(*root)->right);
}
else /* d == 0, i.e. duplicate entry */
{
res = T_EDUP;
}
}
}
else
{
res = T_EBADPTR;
}

return res;
}

void tree_freenode(struct tree_node *node)
{
if (node != NULL)
{
if (node->data != NULL)
{
free(node->data);
}
free(node);
}
}

struct tree_node *tree_lookup(struct tree_node *root, const void *data,
int(*cmp_fn)(const void *d1, const void *d2))
{
struct tree_node *node = NULL;
int d;

if (root != NULL)
{
d = cmp_fn(data, root->data);
if (d > 0)
{
return tree_lookup(root->right, data, cmp_fn);
}
else if (d < 0)
{
return tree_lookup(root->left, data, cmp_fn);
}
else
{
node = root;
TRACE("node found %p", (void *)node);
}
}

return node;
}

void tree_freetree(struct tree_node *node, void (*fn)(void *data))
{
if (node != NULL)
{
if (fn != NULL)
{
fn(node->data);
}
TRACE("deleting node(%p)->left=%p", (void *)node, (void
*)node->left);
tree_freetree(node->left, fn);
TRACE("deleting node(%p)->right=%p", (void *)node, (void
*)node->right);
tree_freetree(node->right, fn);

free(node);
}
}

int tree_traverse(struct tree_node *root,
int(*fn)(struct tree_node *root, void *arg), void *arg)
{
int res = T_ESUCCESS;

if (root != NULL)
{
tree_traverse(root->left, fn, arg);
fn(root, arg);
tree_traverse(root->right, fn, arg);
}
else
{
res = T_EMPTYTREE;
}

return res;
}

/* tree-test.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tree.h"

static int tdata1[] = { 10, 6, 900, 7, 1, 57, 928, 1, 987 };
static char *tdata[] = { "yellow", "apple", "chess", "penguin", "car",
"mixer", "Zoo", "loop", "zebra" };
#define DNUM (sizeof(tdata) / sizeof(tdata[0]))

static int comp(const void *d1, const void *d2)
{
int res;

res = strcmp((const char *)d1, (const char *)d2);
if (res > 0) {
printf("%s > %s\n", (const char *)d1, (const char *)d2);
return 1;
} else if (res < 0) {
printf("%s < %s\n", (const char *)d1, (const char *)d2);
return -1;
} else {
printf("%s = %s\n", (const char *)d1, (const char *)d2);
return 0;
}
}

static int comp1(const void *d1, const void *d2)
{
if (*(const int *)d1 < *(const int *)d2) {
printf("%d < %d\n", *(const int *)d1, *(const int *)d2);
return -1;
}
else if (*(const int *)d1 > *(const int *)d2) {
printf("%d > %d\n", *(const int *)d1, *(const int *)d2);
return 1;
}
else { /* if (*(const int *)d1 == *(const int * )d2) */
printf("%d = %d\n", *(const int *)d1, *(const int *)d2);
return 0;
}
}

static int walk(struct tree_node *node, void *arg)
{
printf(arg, (char *)node->data);
return 0;
}

int main(void)
{
struct tree_node *tptr, *root = NULL;
unsigned int i;

puts("Populating the tree...\n");
for (i = 0; i < DNUM; i++) {
puts("-------------------------------------");
tptr = tree_allocnode(tdata[i]);
if (NULL == tptr)
{
puts("Failed to allocate memory!");
exit(EXIT_FAILURE);
}
tree_addnode(&root, tptr, comp);
}

puts("Traversing the tree...\n");
tree_traverse(root, walk, "data=%s\n");
printf("found node %p\n", (void *)tree_lookup(root, tdata[3], comp));
puts("Destroying the tree...\n");
tree_freetree(root, NULL);
puts("Traversing the tree...\n");
tree_traverse(root, walk, "data=%s\n");

return 0;
}

--
Mark


Seebs 10-20-2009 04:50 AM

Re: simple BST implementation
 
On 2009-10-20, Mark <mark_cruzNOTFORSPAM@hotmail.com> wrote:
> /* error codes */
> enum {
> T_ESUCCESS,


No.

"EFOO" is widely recognized as indicating an *error*.

Success is not an error, normally.

> T_EINVALID,
> T_EBADPTR,
> T_EDUP,
> T_ENOMEM,
> T_EMPTYTREE


It seems to me that you might see whether there's enough standard/common
errno values defined that you could just use errno.

> struct tree_node {
> void *data;
> size_t size; /* for future to indicate the data size */


If you're not using it, don't put it here, IMHO.

> struct tree_node *tree_allocnode(void *data);


I'm not totally sold on this, although I'm not totally unsold on it
either.

> int tree_addnode(struct tree_node **root, struct tree_node *node,
> int(*cmp_fn)(const void *d1, const void *d2));


I'd probably do these two differently.

tree_node *tree_addnode(struct tree_node *root, void *data, int (*cmp_fn))...

But even this is error-prone. In this case, I think it makes more sense
to distinguish betwen the tree and its nodes.

So I recommend:

struct tree {
int (*cmp_fn)(const void *d1, const void *d2);
struct tree_node *root;
}

Then:
struct tree *tree_new(int (*cmp_fn)(const void *d1, const void *d2));

.... and consider
typedef int (*tree_cmp)(const void *d1, const_void *d2);

Some people recommend omitting the parameter names in prototypes, although
I'm currently leaning against it.

So I guess:
typedef int (*tree_cmp)(const void *d1, const_void *d2);
struct tree {
tree_cmp cmp;
struct tree_node *root;
}

then:

struct tree *tree_new(tree_cmp cmp);
int tree_add(struct tree *t, void *data);
struct tree_node *tree_root(struct tree *t);
struct tree_node *tree_lookup(struct tree *t, const void *key);

.... But you will, of course, note that this means you can't grab a subtree
and do a lookup. But that can be addressed:

struct tree_node *tree_node_lookup(struct tree_node *tn, const void *key);

in which case:
struct tree_node *tree_lookup(struct tree *t, const void *key) {
if (!t || !t->root)
return NULL;
return tree_node_lookup(t->root, key);
}

The other corresponding change, then:

int tree_add(struct tree *t, const void *data) {
... do the node allocation in here, don't make the user
do it separately.
}

-s
--
Copyright 2009, all wrongs reversed. Peter Seebach / usenet-nospam@seebs.net
http://www.seebs.net/log/ <-- lawsuits, religion, and funny pictures
http://en.wikipedia.org/wiki/Fair_Game_(Scientology) <-- get educated!

Seebs 10-20-2009 05:08 AM

Re: simple BST implementation
 
On 2009-10-20, Richard Heathfield <rjh@see.sig.invalid> wrote:
> In <slrnhdqgcl.sn4.usenet-nospam@guild.seebs.net>, Seebs wrote:
>
>> On 2009-10-20, Mark <mark_cruzNOTFORSPAM@hotmail.com> wrote:
>>> /* error codes */
>>> enum {
>>> T_ESUCCESS,

>>
>> No.
>>
>> "EFOO" is widely recognized as indicating an *error*.


> No. EFOO is reserved for the implementation. Bad Seebs - no biscuit!


Yeah, too terse, bad me. "T_E*" good, "ESUCCESS" bad, "T_ESUCCESS" still
bad because people will see "T_E*" and assume it's an [E]rorr.

>> It seems to me that you might see whether there's enough
>> standard/common errno values defined that you could just use errno.


> It is very unlikely that the standard ones will suffice. I could only
> find three, EDOM, EILSEQ, and ERANGE. The moment you add "common"
> ones, you make your program less portable.


True.

> He's probably going to be using it in the very next mod, so it's not
> such a big deal. (Yes, okay, he could #if 0 it for now.)


Agreed.

> I would take a pointer to a tree structure (which manages the root and
> any housekeeping information needed by the tree), the data, the size,
> and an optional void * for side-channel information. (Consequently, I
> would add an extra void * to the comparison function.)


Good point about the side-channel. Hmm.

Actually, that's an interesting question. Possibly the function should
take:
* item 1
* item 2
* tree
* aux

Because the tree might be relevant in some cases to the comparison function.
(Not that I can think of them, but...)

>> int tree_add(struct tree *t, const void *data) {
>> ... do the node allocation in here, don't make the user
>> do it separately.


> Um, fair enough to factor it out, but tree_add should call
> tree_allocate - one should not have to call it separately in advance.
> Is that what you meant?


Yes.

At which point, I honestly don't think the user ever has a reason to
call tree_allocate, because the only thing you can usefully do with that
new node, normally, is add it to a tree.

(Interesting question: Imagine a tree comparison function which, for any
two items in a tree, tells you whether the first item comes before, or
after, or is in the same place as the second function. Now reverse it and
try to sort by it...)

-s
--
Copyright 2009, all wrongs reversed. Peter Seebach / usenet-nospam@seebs.net
http://www.seebs.net/log/ <-- lawsuits, religion, and funny pictures
http://en.wikipedia.org/wiki/Fair_Game_(Scientology) <-- get educated!

Nick Keighley 10-20-2009 07:02 AM

Re: simple BST implementation
 
On 20 Oct, 06:16, Richard Heathfield <r...@see.sig.invalid> wrote:
> In <slrnhdqgcl.sn4.usenet-nos...@guild.seebs.net>, Seebs wrote:
> > On 2009-10-20, Mark <mark_cruzNOTFORS...@hotmail.com> wrote:


> >> /* error codes */
> >> enum {
> >> * * T_ESUCCESS,

>
> > No.

>
> > "EFOO" is widely recognized as indicating an *error*.


it's not uncommon (nor very wrong) to lump "success" in with the
"error" codes.
Though I must I admit I prefer to call them "return codes"

<snip>

> > Some people recommend omitting the parameter names in prototypes,
> > although I'm currently leaning against it.

>
> Tough call, arguments on both sides, and I think the last time I
> thought about this I was, like you, edging towards including names.


I tend to omit the names unless putting them in adds useful
information.
I try to arrange it so the header file is as informative as possible.

Retval transmitter_reset (Transmitter*);
double sum (double[], size_t);
void clear_element_in_instance_table (int element_id);

though perhaps the last one is an argument for using a better type.

<snip>

Nick Keighley 10-20-2009 07:04 AM

Re: simple BST implementation
 
On 20 Oct, 06:08, Seebs <usenet-nos...@seebs.net> wrote:
> On 2009-10-20, Richard Heathfield <r...@see.sig.invalid> wrote:
> > In <slrnhdqgcl.sn4.usenet-nos...@guild.seebs.net>, Seebs wrote:


> >> On 2009-10-20, Mark <mark_cruzNOTFORS...@hotmail.com> wrote:
> >>> /* error codes */
> >>> enum {
> >>> * * T_ESUCCESS,

>
> >> No.

>
> >> "EFOO" is widely recognized as indicating an *error*.

>
> > No. EFOO is reserved for the implementation. Bad Seebs - no biscuit!

>
> Yeah, too terse, bad me. *"T_E*" good, "ESUCCESS" bad, "T_ESUCCESS" still
> bad because people will see "T_E*" and assume it's an [E]rorr.


only if they were trying *really* hard not to understand

"There are some ideas so wrong that only a very intelligent person
could believe in them." -- George Orwell


I was searching for a Dan Pop quote along these lines but I thought
George would do in a pinch


Ben Bacarisse 10-20-2009 11:02 AM

Re: simple BST implementation
 
"Mark" <mark_cruzNOTFORSPAM@hotmail.com> writes:

> I've implemented a very simple and small binary-search tree library as
> a training exercise. I'm posting it here together with a testing
> program and expect to receive critics and useful recommendations :-)


Here are a few thoughts:

I don't like interfaces where the library signals what the caller
knows already. Your insert function returns T_EBADPTR if and only if
node == NULL. The caller can test this as easily as the function can.
In fact, the caller can probably test for it more simply than it can
test for all the various return values.

I'd write insert so that it adds data rather a node. I.e. it
allocates the node when it will do the insert. In your design, the
caller will probably have to allocate the node, try to insert it and
then delete it if the insert did not happen.

A more serious issue: don't delete what you don't allocate! Your
freenode frees the data but it shouldn't. I may want to add pointers
to non-'malloc'ed things. Moreover, when I free a node, I may want to
keep the data.

You could consider making the whole tree freeing function a call to a
post-order walk function.

<snip code>
--
Ben.

Nisse Engström 10-20-2009 10:35 PM

Re: simple BST implementation
 
On Tue, 20 Oct 2009 08:02:15 +0000, Richard Heathfield wrote:

> In <09300620-0ab8-4c38-9d3a-90ae210ebbc7@s6g2000vbp.googlegroups.com>,
> Nick Keighley wrote:
>
>> I was searching for a Dan Pop quote along these lines but I thought
>> George would do in a pinch

>
> You may have been thinking of this [from memory, so might be slightly
> wrong]:
>
> "The expression isn't unclear /at all/, and only an expert could
> possibly have any doubts about it."


"The expression isn't unclear *at all* and only an expert could actually
have doubts about it" - Dan Pop 2001-02-06


/Nisse

Chris M. Thomasson 10-21-2009 04:18 AM

Re: simple BST implementation
 

"Ben Bacarisse" <ben.usenet@bsb.me.uk> wrote in message
news:0.0265d0425e9269f7bdb5.20091020120240BST.8763 aagwfj.fsf@bsb.me.uk...
> "Mark" <mark_cruzNOTFORSPAM@hotmail.com> writes:
>
>> I've implemented a very simple and small binary-search tree library as
>> a training exercise. I'm posting it here together with a testing
>> program and expect to receive critics and useful recommendations :-)

>
> Here are a few thoughts:
>
> I don't like interfaces where the library signals what the caller
> knows already. Your insert function returns T_EBADPTR if and only if
> node == NULL. The caller can test this as easily as the function can.
> In fact, the caller can probably test for it more simply than it can
> test for all the various return values.
>
> I'd write insert so that it adds data rather a node. I.e. it
> allocates the node when it will do the insert. In your design, the
> caller will probably have to allocate the node, try to insert it and
> then delete it if the insert did not happen.


[...]

IMHO, intrusive data-structures can be more "efficient" than the more
flexible container based allocation schemes. I basically agree with the
first point you made. The caller can probably allocate the node more
efficiently/simply than the end library function call can... Perhaps the
end-user application has various objects that can be directly linked into a
tree without the need of any auxiliary nodes:
__________________________________________________ _
struct tree
{
struct tree* left;
struct tree* right;
struct tree** plink;
};


struct end_user_object
{
struct end_user_object* next;
struct tree tree1;
char x;
struct tree tree2;
double y;
struct tree tree3;
};


struct end_user_object_region
{
struct end_user_object buffer[1024];
struct end_user_object* flist;
size_t bump;
};


static struct end_user_object_region g_region;

static struct tree* g_trees[3];
__________________________________________________ _




The application should be able to store an `end_user_object' on three
separate trees at the same time without making a single function call to
allocate any memory.




IMVVHO, a generic container library should work on a free standing
implementation that does not have access to `malloc' and friends...

Intrusive data-structures to the rescue!!!

:^o


Nick Keighley 10-21-2009 07:12 AM

Re: simple BST implementation
 
On 20 Oct, 23:35, Nisse Engström <news.NOSPAM.wbars...@luden.se>
wrote:
> On Tue, 20 Oct 2009 08:02:15 +0000, Richard Heathfield wrote:
> > In <09300620-0ab8-4c38-9d3a-90ae210eb...@s6g2000vbp.googlegroups.com>,
> > Nick Keighley wrote:


> >> I was searching for a Dan Pop quote along these lines but I thought
> >> George would do in a pinch

>
> > You may have been thinking of this [from memory, so might be slightly
> > wrong]:

>
> > "The expression isn't unclear /at all/, and only an expert could
> > possibly have any doubts about it."

>
> "The expression isn't unclear *at all* and only an expert could actually
> *have doubts about it" *- Dan Pop 2001-02-06
>
> /Nisse


excellent!

Seebs 10-22-2009 02:47 AM

Re: simple BST implementation
 
On 2009-10-20, Richard Heathfield <rjh@see.sig.invalid> wrote:
> It should just take item 1, item 2, aux. (Good name, aux - I might
> steal that!)


It's hardly original to me, but I've been a big fan ever since I first
saw it used, because it's as close as I've ever seen to self-documenting
but still terse for sideband data.

-s
--
Copyright 2009, all wrongs reversed. Peter Seebach / usenet-nospam@seebs.net
http://www.seebs.net/log/ <-- lawsuits, religion, and funny pictures
http://en.wikipedia.org/wiki/Fair_Game_(Scientology) <-- get educated!


All times are GMT. The time now is 08:11 AM.

Powered by vBulletin®. Copyright ©2000 - 2014, vBulletin Solutions, Inc.
SEO by vBSEO ©2010, Crawlability, Inc.