#include <sys/time.h>

#include <err.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

#include "hashes.h"

unsigned char **tdata;
unsigned char *bdata;
int binary, datasize, hashmask, hashsize, items;

void read_text_data(void);
void read_binary_data(void);
void dohash(char *, unsigned int (*)(unsigned char *, size_t));
int main(int, char **);

void
read_text_data(void)
{
	char buf[1024], *cp;
	int maxitems;

	items = 0;
	maxitems = 256;
	tdata = malloc(maxitems * sizeof(char *));
	if (tdata == NULL)
		err(1, "malloc(%ld)", (long)maxitems * sizeof(char *));

	while (fgets(buf, sizeof(buf), stdin) != NULL) {
		if ((cp = strrchr(buf, '\n')) != NULL)
			*cp = '\0';
		if (items >= maxitems) {
			maxitems <<= 1;
			tdata = realloc(tdata, maxitems * sizeof(char *));
			if (tdata == NULL)
				err(1, "malloc(%ld)",
				    (long)maxitems * sizeof(char *));
		}

		tdata[items] = strdup(buf);
		items++;
	}
}

void
read_binary_data(void)
{
	int maxitems;

	items = 0;
	maxitems = 256;
	bdata = malloc(maxitems * datasize);
	if (bdata == NULL)
		err(1, "malloc(%d)", maxitems * datasize);

	while (read(STDIN_FILENO, bdata + items * datasize, datasize) ==
	    datasize) {
		items++;
		if (items >= maxitems) {
			/*
			 * Fudge so 1M elements don't need 2M slots,
			 * only works nicely for powers-of-two :-)
			 */
			maxitems = (maxitems << 1) + 64;
			bdata = realloc(bdata, maxitems * datasize);
			if (bdata == NULL)
				err(1, "malloc(%d)", maxitems * datasize);
		}
	}
}

void
dohash(char *name, unsigned int (*hash)(unsigned char *, size_t))
{
	struct timeval t1, t2, t3;
	static int *buckets = NULL;
	int collfree, i, max, used;
	double avg, perfect, ratio, score, sq, stddev, sum, var;
	unsigned int h;

	if (buckets == NULL) {
		buckets = malloc(sizeof(int) * hashsize);
		if (buckets == NULL)
			err(1, "malloc(%ld)", (long)sizeof(int) * hashsize);
	}

	for (i = 0; i < hashsize; i++)
		buckets[i] = 0;

	/*
	 * First run though, gather statistics on the hash function.
	 */
	if (binary) {
		unsigned char *data;

		data = bdata;
		for (i = 0; i < items; i++, data += datasize) {
			h = (*hash)(data, datasize);
			buckets[h & hashmask]++;
		}
	} else {
		for (i = 0; i < items; i++) {
			h = (*hash)(tdata[i], strlen(tdata[i]));
			buckets[h & hashmask]++;
		}
	}

	/*
	 * Second run though, just calculate the time taken for the
	 * hash function.
	 */
	gettimeofday(&t1, NULL);
	if (binary) {
		unsigned char *data;

		data = bdata;
		for (i = 0; i < items; i++, data += datasize) {
			h = (*hash)(data, datasize);
			// buckets[h & hashmask]++;
		}
	} else {
		for (i = 0; i < items; i++) {
			h = (*hash)(tdata[i], strlen(tdata[i]));
			// buckets[h & hashmask]++;
		}
	}
	gettimeofday(&t2, NULL);
	timersub(&t2, &t1, &t3);

	collfree = max = used = 0;
	sum = sq = 0.0;
	for (i = 0; i < hashsize; i++) {
		if (buckets[i] > 0) {
			used++;
			if (buckets[i] == 1)
				collfree++;
			if (buckets[i] > max)
				max = buckets[i];
			sum += (double)buckets[i];
			sq += (double)buckets[i] * (double)buckets[i];
		}
	}

	avg = sum / used;
	var = sq / used - avg * avg;
	stddev = sqrt(var);
	ratio = items % hashsize;
	perfect = (items / hashsize) * (items / hashsize) * (hashsize - ratio) +
	    (1 + (items / hashsize)) * (1 + (items / hashsize)) * (ratio);
	score = perfect / sq;

	printf("%-10s %6d %6d %6d %6.2f %6.2f %7.4f us  %8.4f\n",
	    name, used, collfree, max, avg, stddev,
	    ((double)t3.tv_sec + (double)t3.tv_usec / 1000000.0) / items *
	    1000000.0,
	    1.0 / score);
}

int
main(int argc, char **argv)
{

	if (argc < 3)
		errx(1, "bad usage");
	binary = *argv[1] == 'b';
	hashsize = atoi(argv[2]);
	/* XXX check for power of two! */
	hashmask = hashsize - 1;
	if (binary)
		datasize = atoi(argv[3]);

	if (binary)
		read_binary_data();
	else
		read_text_data();

	printf("total items:  %d\n", items);

	printf("hash         used col/fr    max    avg  s/dev  hash time  niceness\n");
	dohash("dumb", dumbhash);
	dohash("dumbmul", dumbmulhash);
	dohash("fnv", fnv);
	dohash("lennart", lennart);
	dohash("crc", crchash);
	dohash("perl", perlhash);
	dohash("perlxor", perlxorhash);
	dohash("python", pythonhash);
	dohash("bernstein", bernstein);
	dohash("mouse", mousehash);
	dohash("honeyman", honeyman);
	dohash("pjw", pjwhash);
	dohash("bob", bobhash);
	dohash("torek", torekhash);
	dohash("byacc", byacchash);
	dohash("tcl", tclhash);
	dohash("gawk", gawkhash);
	dohash("gcc3", gcc3_hash);
	dohash("gcc3_2", gcc3_hash2);
	dohash("nemesi", nemhash);

	exit(0);
}
