#include <stdlib.h>
#include <string.h>
#include "etlib/generic.h"
#include "etlib/xmalloc.h"
#include "etlib/hash.h"

/*
    Mixture of hash table and binary tree.

    First level is a standard hash with the hashpjw function
    from the dragon book. But instead of a linked list in each
    slot I use a binary tree.
    To balance the tree, I take the low-byte of the full hash value
    (before the modulo) as the first char of each key.
    Storing increasing keys does not generate a perfectly balanced
    tree but one that is as good as one generated by random keys.

    usage:
	to define a hash table:
	    hashtable(data-type, table-size) identifier;
	    hashtable(data-type, table-size) id1, id2, id3;

	    data-type should be a simple type, a struct, or
	    a union. the special type hash_no_data is provided
	    when no data is needed.
	    table-size should be a prime.

	to look for an entry:
	    hash_find(table-identifier, string)

	to look for an entry and create a new one if not present:
	    hash_get(table-identifier, string)

	to add an entry
	    hash_add(table-identifier, string)

	    (this is an alias for hash_get(...))

	to get the string associated with an entry:
	    hash_name(table-identifier, hash_get/find(...))

	to get statistics about a hashtable;
	    struct hash_st st;
	    hash_stat(table-identifier, &st);
*/

struct symbol
{
    struct symbol *left;
    struct symbol *right;
    u8 misc[2];		/* contains user struct and name */
};



static inline u32
hash(u8 *str)
{
    u32 h = 0;

    while (*str)
	h = (h << 4) ^ (h >> 28) ^ *str++;

    return h ? h : 1;
}


/*
    Find a symbol. Return 0 if not found.
*/

void *
_hash_find(u8 *id, void *tab, int size, int usize)
{
    struct symbol *s;
    u32 h;
    int i;

    h = hash(id);
    s = ((struct symbol **)tab)[h % size];

    while (s)
    {
	i = (u8)h - s->misc[usize];
	if (i == 0)
	{
	    i = strcmp(id, s->misc + usize + 1);
	    if (i == 0)
		return s->misc;
	}
	s = i < 0 ? s->left : s->right;
    }
    return 0;
}



/*
    Get a symbol. Create if not found.
*/

void *
_hash_get(u8 *id, void *tab, int size, int usize)
{
    struct symbol *s, **ss;
    u32 h;
    int i;
    
    h = hash(id);
    ss = &((struct symbol **)tab)[h % size];

    while ( (s = *ss) )
    {
	i = (u8)h - s->misc[usize];
	if (i == 0)
	{
	    i = strcmp(id, s->misc + usize + 1);
	    if (i == 0)
		return s->misc;
	}
	ss = i < 0 ? &s->left : &s->right;
    }

    *ss = s = malloc(sizeof(*s) + usize + strlen(id));

    s->left = 0;
    s->right = 0;
    memset(s->misc, 0, usize);
    s->misc[usize] = h;
    strcpy(s->misc + usize + 1, id);

    return s->misc;
}

/*
    Delete a symbol.
*/

void
_hash_del(u8 *id, void *tab, int size, int usize)
{
    struct symbol *s, **ss;
    u32 h;
    int i;

    h = hash(id);
    ss = &((struct symbol **)tab)[h % size];

    while ( (s = *ss) )
    {
	i = (u8)h - s->misc[usize];
	if (i == 0)
	{
	    i = strcmp(id, s->misc + usize + 1);
	    if (i == 0)
	    {
		/* found, now remove it */
		if (s->left == 0)
		    *ss = s->right;
		else if (s->right == 0)
		    *ss = s->left;
		else
		{
		    struct symbol *t, **tt;

		    for (tt = &s->right; (t = *tt)->left; tt = &t->left)
			;
		    *tt = t->right;
		    t->left = s->left;
		    t->right = s->right;
		    *ss = t;
		}
		free(s);
		return;
	    }
	}
	ss = i < 0 ? &s->left : &s->right;
    }
}



static void
_stat(int depth, struct symbol *s, struct hash_st *st)
{
    while (s)
    {
	if (st->maxdepth < depth)
	    st->maxdepth = depth;
	st->nelem++;
	st->middepth += depth;
	depth++;
	_stat(depth, s->left, st);
    #if 0
	printf("<%s>\t", s->misc+5);
	if (s->left) printf("<%s>\t", s->left->misc+5); else printf(".\t");
	if (s->right) printf("<%s>\n", s->right->misc+5); else printf(".\n");
    #endif
	s = s->right;
    }
}



void
_hash_stat(void *tab, int size, struct hash_st *st)
{
    struct symbol **s;

    s = (struct symbol **)tab;

    st->nelem = 0;
    st->maxdepth = 0;
    st->middepth = 0;
    st->hashsize = size;

    while (size--)
	_stat(1, *s++, st);

    if (st->nelem)
	st->middepth = (st->middepth * 1000) / st->nelem;
}




#if 0

/* some primes:
    11 23 31 41 53 61 71 83 97
    101 211 307 401 503 601 701 809 907
    1009 2003 3001 4001 5003 6007 7001 8009 9001
    10007 20011 30011 40009 50021 60013 70001 80021 90001
    100003
*/

//hashtable(int, 97) pseudoop;
hashtable(int, 1) pseudoop;

main()
{
    struct hash_st st;
    u8 buf[256];

#if 1
    hash_add(pseudoop, "0");
    hash_add(pseudoop, "5");
    hash_add(pseudoop, "1");
    hash_add(pseudoop, "6");

    hash_stat(pseudoop, &st);	printf("-----------\n");
    hash_del(pseudoop, "5");

#else
    while (gets(buf))
	if (buf[0])
	    hash_add(pseudoop, buf);
#endif
    hash_stat(pseudoop, &st);
    printf("nelem   : %d\n", st.nelem);
    printf("hashsize: %d\n", st.hashsize);
    printf("maxdepth: %d\n", st.maxdepth);
    printf("middepth: %d.%03d\n", st.middepth / 1000, st.middepth % 1000);
}
#endif
