Velocity Reviews

Velocity Reviews (http://www.velocityreviews.com/forums/index.php)
-   C Programming (http://www.velocityreviews.com/forums/f42-c-programming.html)
-   -   Hash table implementation. (http://www.velocityreviews.com/forums/t752728-hash-table-implementation.html)

lolzy@live.nl 08-11-2011 05:43 PM

Hash table implementation.
 
Hello comp.lang.c,

This is a hash table implementation I wrote, please comment. Thanks in
advance.


# START CODE

/*
* Hash table demo.
* (C) Jori Koolstra - 19:40 11-8-2011
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>


#define HASH_TABLE_SIZE 100


#define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
(hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)

#define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS( t, n, z) if
((z = hash_table_get_string_from_table(t, n)) == NULL)
hash_table_fatal_error(1, n)


struct table_data
{
char *uid;
char *d;
};


typedef struct table_data table_data;



struct hash_table
{
table_data data;
struct hash_table *x;
};


typedef struct hash_table hash_table;



hash_table ** hash_table_create_table(int);
hash_table * hash_table_get_string_from_table(hash_table **, char *);
int hash_table_hash(char *);
void hash_table_add_element(hash_table **, char *, char *);
int hash_table_delete_element(hash_table **, char *);
void hash_table_fatal_error(int, char *);



int main(void)
{
hash_table **table;
hash_table *y;

table = hash_table_create_table(HASH_TABLE_SIZE);

hash_table_add_element(table, "John", "Hello John!");
hash_table_add_element(table, "Kim", "Hello Kim!");
hash_table_add_element(table, "Jori", "Hello Jori!");
hash_table_add_element(table, "Jack", "Hello Jack!");

HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");

HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS( table, "Jack", y);

printf("%s\n", y->data.d);




return 0;
}



/*
* Print error and quit.
*/
void hash_table_fatal_error(int t, char *el)
{
switch (t)
{
case 0:
printf("ERROR: Could not delete element '%s' from hash table.\n",
el);
break;
case 1:
printf("ERROR: Could not find key '%s' in hash table.\n", el);
break;
}


exit(0);
}



/*
* Create a hash table with size 'size' and return a hash table array.
*/
hash_table ** hash_table_create_table(int size)
{
register int i = 0;
hash_table **x;


x = (hash_table **) calloc(size, sizeof(hash_table *));

for (; i <= size; ++i)
{
x[i] = (hash_table *) calloc(1, sizeof(hash_table));
}

return x;
}



/*
* Returns a specified hash table element from table 't' with key 's',
* or NULL on failure.
*/
hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
{
int id = hash_table_hash(s);
hash_table *q;

if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, s) != 0)
{
if (t[id]->x != NULL)
{
/* Move trough linked list */
for (q = t[id]->x; ; q = q->x)
{
if (strcmp(q->data.uid, s) == 0)
{
/* Found */
return q;
}


if (q->x == NULL)
{
break;
}
}
}

return NULL;
}

return (t[id]->data.uid == NULL ? NULL : t[id]);
}



/*
* Add 'data' to table 't' with key 'uid'.
*/
void hash_table_add_element(hash_table **t, char *uid, char *data)
{
int hash = hash_table_hash(uid);
hash_table *q = (hash_table *) calloc(1, sizeof(hash_table));
hash_table *c;

q->data.uid = strdup(uid);
q->data.d = strdup(data);
q->x = NULL;


c = t[hash];


if (c->data.uid != NULL)
{
while (c->x != NULL)
{
c = c->x;
}

c->x = q;
}
else
{
t[hash]->data.uid = strdup(uid);
t[hash]->data.d = strdup(data);
}
}



/*
* Delete element identified by 'uid' from hash table 't'.
* Returns: 1 on success, 0 on failure.
*/
int hash_table_delete_element(hash_table **t, char *uid)
{
int id = hash_table_hash(uid);
hash_table *q;


if (t[id]->data.uid == NULL)
{
return 0;
}


if (t[id]->data.uid != NULL && strcmp(t[id]->data.uid, uid) != 0)
{
/* Move trough linked list */
for (q = t[id]; ; q = q->x)
{
if (strcmp(q->x->data.uid, uid) == 0)
{
if (q->x->x != NULL)
{
free(q->x);
q->x = q->x->x;
}
else
{
free(q->x);
q->x = NULL;
}

return 1;
}


if (q->x == NULL)
{
break;
}
}

return 0;
}


if (t[id]->x == NULL)
{
t[id]->data.uid = NULL;
}
else
{
t[id]->data.uid = t[id]->x->data.uid;
t[id]->data.d = t[id]->x->data.d;
t[id]->x = t[id]->x->x;
}

return 1;
}




/***** NOT UNDER MY COPYRIGHT *****/
/*
* UNIX ELF hash.
* Published hash algorithm used in the UNIX ELF format for object
files.
*/
int hash_table_hash(char *data)
{
int h = 0, g;

while (*data)
{
h = (h << 4) + *data++;

if (g = h & 0xF0000000)
{
h ^= g >> 24;
}

h &= ~g;
}

return h % HASH_TABLE_SIZE;
}

# END CODE

Stefan Ram 08-11-2011 06:29 PM

Re: Hash table implementation.
 
lolzy@live.nl writes:
>x = (hash_table **) calloc(size, sizeof(hash_table *));
> for (; i <= size; ++i)
> {
> x[i] = (hash_table *) calloc(1, sizeof(hash_table));


x might be 0 here.


Keith Thompson 08-11-2011 06:49 PM

Re: Hash table implementation.
 
ram@zedat.fu-berlin.de (Stefan Ram) writes:
> lolzy@live.nl writes:
>>x = (hash_table **) calloc(size, sizeof(hash_table *));
>> for (; i <= size; ++i)
>> {
>> x[i] = (hash_table *) calloc(1, sizeof(hash_table));

>
> x might be 0 here.


And if x isn't null, the elements of x might not be null pointers;
there's no guarantee that a null pointer is represented as
all-bits-zero. If you're satisfied with portability only to
systems where null pointers *are* all-bits-zero, you can do that,
but I'd suggest at least documenting it. Or you can use malloc()
and then set each element to NULL in a loop.

--
Keith Thompson (The_Other_Keith) kst-u@mib.org <http://www.ghoti.net/~kst>
Nokia
"We must do something. This is something. Therefore, we must do this."
-- Antony Jay and Jonathan Lynn, "Yes Minister"

Ralf Damaschke 08-11-2011 08:02 PM

Re: Hash table implementation.
 
Keith Thompson <kst-u@mib.org> wrote:

> ram@zedat.fu-berlin.de (Stefan Ram) writes:
>> lolzy@live.nl writes:
>>>x = (hash_table **) calloc(size, sizeof(hash_table *));
>>> for (; i <= size; ++i)
>>> {
>>> x[i] = (hash_table *) calloc(1, sizeof(hash_table));

>>
>> x might be 0 here.

>
> And if x isn't null, the elements of x might not be null pointers;


You are right, at the place marked "here" for a non-null x these
pointers are either null pointers or point to an object
sufficiently large to hold an instance of hash_table.

-- Ralf

Ike Naar 08-11-2011 10:02 PM

Re: Hash table implementation.
 
On 2011-08-11, lolzy@live.nl <lolzy@live.nl> wrote:
> hash_table ** hash_table_create_table(int size)
> {
> register int i = 0;


It's probably a good idea to drop the 'register' qualification
and leave this kind of optimization to the compiler.
I'd prefer to make 'size' and 'i' of type size_t.

> hash_table **x;
>
>
> x = (hash_table **) calloc(size, sizeof(hash_table *));


The cast is unnecessary; the recommended idiom is

x = calloc(size, sizeof *x);

calloc can fail, and return NULL; you should check for that.
It's not necessary to set the elements of the array to all-bits-zero,
because you immediately overwrite each element in the for loop below.
So you might as well allocate the array using

x = malloc(size * sizeof *x);

> for (; i <= size; ++i)
> {
> x[i] = (hash_table *) calloc(1, sizeof(hash_table));
> }


This looks like an off-by-error; x has only 'size' elements,
numbered 0 to size-1. The last iteration of the for loop assigns
to the nonexisting element x[size].
>
> return x;
> }


lolzy@live.nl 08-12-2011 06:55 AM

Re: Hash table implementation.
 
Thanks for all your comments. I still have some questions :) :

* Why do you prefer malloc() over calloc() ?
* Why not cast for C++ compatibility?
* Why shouldn't I use the register keyword for loop variables?

lolzy@live.nl 08-12-2011 08:38 AM

Re: Hash table implementation.
 
I took a look at the code, and I optimized it to:
(Still looking for answers for my question 1 post up)
Thanks in advance.

/*
* Hash table demo.
* (C) Jori Koolstra - 19:40 11-8-2011
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>


#define HASH_TABLE_SIZE 100


#define HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(t, n) if
(hash_table_delete_element(t, n) == 0) hash_table_fatal_error(0, n)

#define HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS( t, n, z) if
((z = hash_table_get_string_from_table(t, n)) == NULL)
hash_table_fatal_error(1, n)


struct table_data
{
char *uid;
char *d;
};


typedef struct table_data table_data;



struct hash_table
{
table_data data;
struct hash_table *x;
};


typedef struct hash_table hash_table;



hash_table ** hash_table_create_table(int);
hash_table * hash_table_get_string_from_table(hash_table **, char *);
int hash_table_hash(char *);
void hash_table_add_element(hash_table **, char *, char *);
int hash_table_delete_element(hash_table **, char *);
void hash_table_fatal_error(int, char *);
void * hash_table_safe_calloc(size_t, size_t);


int main(void)
{
hash_table **table;
hash_table *y;

table = hash_table_create_table(HASH_TABLE_SIZE);

hash_table_add_element(table, "John", "Hello John!");
hash_table_add_element(table, "Kim", "Hello Kim!");
hash_table_add_element(table, "Jori", "Hello Jori!");
hash_table_add_element(table, "Jack", "Hello Jack!");

HASH_TABLE_DELETE_ELEMENT_CHECK_FOR_ERRORS(table, "Kim");

HASH_TABLE_GET_STRING_FROM_TABLE_CHECK_FOR_ERRORS( table, "Jack", y);

printf("%s\n", y->data.d);




return 0;
}



/*
* Print error and quit.
*/
void hash_table_fatal_error(int t, char *el)
{
switch (t)
{
case 0:
printf("ERROR: Could not delete element '%s' from hash table.\n",
el);
break;
case 1:
printf("ERROR: Could not find key '%s' in hash table.\n", el);
break;
case 2:
printf("ERROR: Could not allocate memory.\n");
break;
}


exit(0);
}



/*
* Create a hash table with size 'size' and return a hash table array.
*/
hash_table ** hash_table_create_table(int size)
{
register int i = 0;
hash_table **x;


x = (hash_table **) hash_table_safe_calloc(size, sizeof(hash_table
*));

for (; i < size; ++i)
{
x[i] = (hash_table *) hash_table_safe_calloc(1, sizeof(hash_table));
}

return x;
}



/*
* Returns a specified hash table element from table 't' with key 's',
* or NULL on failure.
*/
hash_table * hash_table_get_string_from_table(hash_table **t, char *s)
{
hash_table *q;

for (q = t[hash_table_hash(s)]; q; q = q->x)
{
if (q->data.uid && 0 == strcmp(q->data.uid, s))
{
return q;
}
}

return NULL;
}



/*
* Add 'data' to table 't' with key 'uid'.
*/
void hash_table_add_element(hash_table **t, char *uid, char *data)
{
int h = hash_table_hash(uid);
hash_table *q, *c = t[h];


if (c->data.uid != NULL)
{
q = (hash_table *) hash_table_safe_calloc(1, sizeof(hash_table));
q->data.uid = strdup(uid);
q->data.d = strdup(data);

for (; c->x != NULL; c = c->x)

c->x = q;
}
else
{
t[h]->data.uid = strdup(uid);
t[h]->data.d = strdup(data);
}
}



/*
* Delete element identified by 'uid' from hash table 't'.
* Returns: 1 on success, 0 on failure.
*/
int hash_table_delete_element(hash_table **t, char *uid)
{
int h = hash_table_hash(uid);
hash_table *q;


if (t[h]->data.uid != NULL && strcmp(t[h]->data.uid, uid) != 0)
{
/* Move trough linked list */
for (q = t[h]; q; q = q->x)
{
if (strcmp(q->x->data.uid, uid) == 0)
{
free(q->x);
q->x = q->x->x;

return 1;
}
}

return 0;
}


if (t[h]->x == NULL)
{
if (t[h]->data.uid == NULL)
{
return 0;
}
else
{
t[h]->data.uid = NULL;
}
}
else
{
t[h]->data.uid = t[h]->x->data.uid;
t[h]->data.d = t[h]->x->data.d;
t[h]->x = t[h]->x->x;
}

return 1;
}



/*
* Allocate space with error checking.
*/
void * hash_table_safe_calloc(size_t num, size_t size)
{
void *p;

if ((p = calloc(num, size)) == NULL)
{
hash_table_fatal_error(2, "");
}

return p;
}




/***** NOT UNDER MY COPYRIGHT *****/
/*
* UNIX ELF hash.
* Published hash algorithm used in the UNIX ELF format for object
files.
*/
int hash_table_hash(char *data)
{
int h = 0, g;

while (*data)
{
h = (h << 4) + *data++;

if (g = h & 0xF0000000)
{
h ^= g >> 24;
}

h &= ~g;
}

return h % HASH_TABLE_SIZE;
}

Nobody 08-12-2011 08:47 AM

Re: Hash table implementation.
 
On Thu, 11 Aug 2011 23:55:45 -0700, lolzy wrote:

> Thanks for all your comments. I still have some questions :) :
>
> * Why do you prefer malloc() over calloc() ?


calloc() zeros the allocated memory, which is inefficient if you don't
actually need to do so.

> * Why not cast for C++ compatibility?


There are arguments for and against casting. The main argument against is
that casting can prevent a diagnostic being issued if the prototype isn't
in scope.

> * Why shouldn't I use the register keyword for loop variables?


At best, it's a waste of both keystrokes and bytes; the compiler will
probably ignore it when deciding where to store the variable (i.e. the
only effect of a "register" qualifier is likely to be that you get an
error if you attempt to take the variable's address). At worst, the
compiler might actually pay attention to it, and you get less efficient
code than the compiler would have generated without it.

The "register" qualifier originated in an era where compilers performed a
fairly direct translation from source code to object code, without any
significant optimisation. It is somewhere between useless and worse than
useless with a modern compiler.


Michael Angelo Ravera 08-12-2011 09:54 AM

Re: Hash table implementation.
 
One thing that you may want to do is to use the size as more of a suggestion than a firm number. Many hash functions work better when the main size isa prime number. You can pick a prime number that is close to the size requested or pick the first prime that is not less than the size requested. Primacy is not really a requirement when the size of the table gets really large. It would be exceedingly reasonable to keep a list of primes less than 256 and require that the main size have no factors less than 256. This wouldallow you easily to pick a prime number less 64K and give you a "size thatisn't completely stupid" for larger tables. For sizes less than 256, you just scan the prime table upward to find the first prime.

If you don't always use a realy cool hash function, then I agree that you might like to use the "object inspired" approach of storing a function pointer to a hash function.

Ike Naar 08-12-2011 11:13 AM

Re: Hash table implementation.
 
On 2011-08-12, lolzy@live.nl <lolzy@live.nl> wrote:
> * Why do you prefer malloc() over calloc() ?


malloc allocates memory.
calloc allocates memory and initializes it to all-bytes-zero.
In your case, initializing the memory to all-bytes-zero is
wasted effort since you initialize the memory yourself.

> * Why not cast for C++ compatibility?


You'll have to make up your mind if you want to write the program
in C or in C++. They are different languages, and if you try to
write code that is both C and C++ you may end up with something
that is neither decent C nor decent C++.
For instance, in a C++ program you would probably prefer std::vector
over a malloc-ed array.

> * Why shouldn't I use the register keyword for loop variables?


Suppose the compiler honours the keyword, and reserves a register
for the loop variable. That register now cannot be used for more
useful purposes (e.g. as temporary storage for computing subexpressions)
and you may end up with less efficient code.


All times are GMT. The time now is 02:49 PM.

Powered by vBulletin®. Copyright ©2000 - 2014, vBulletin Solutions, Inc.
SEO by vBSEO ©2010, Crawlability, Inc.