/*
Postman's Sort (R) Version 1.0
Copyright (c) Robert Ramey 1991. All Rights Reserved
*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#include <links.h>
#include "psort.h"
#include "sublist.h"
#include "key.h"
#include "io.h"
#include "os.h"

#define FILE_GUESS 50000
#define RECORD_SIZE_GUESS 80
#define NSIZE 31
	/* default maximum number of digits left of radix point */
	/* don't increase this beyond 127 */
#define MAX_BYTES_PER_PASS 8

/*********************************************************************
The following structure contains one member for each byte on which a
record may be distributed.  That is, if a sublist is to be distribiuted
on the first two letters of an alphabetic key, the first tow members of
the table will be used.  In this example byte_count will equal 2.
**********************************************************************/
typedef struct {
	unsigned int *value;	/* points to sequence value array */
	unsigned int order;	/* number of possible sequence values for this byte */
	unsigned int count;	/* number of sublists to skip to get to next one */
	unsigned int key_index;	/* index of key where this byte is found */
	unsigned int field_index; /* key_index + 1 */
	unsigned int depth;		/* displacement within key where byte is found */
} BYTE_TABLE;

/*********************************************************************
The following data stores the state of sublist arrays and sort keys
**********************************************************************/
typedef struct {
	SUBLIST *sublist;
	unsigned int
		sublist_count,	/* number of sublists at this level */
		byte_count;		/* number of bytes to test on current pass */
	struct {
		unsigned int
			icount, /* number of frames in environment */
			count;	/* number of data frames so far in memory */
		FILE_SIZE
			limit,	/* amount of memory to be cleared to disk at */
						/* at a time */
			size;	/* amount filled in current frame */
	} frame;
	BYTE_TABLE byte_table[MAX_BYTES_PER_PASS];
						/* see above for explanation of BYTE TABLE */
} ENVIRONMENT;

/*********************************************************************
global data
**********************************************************************/
BOOLEAN uflag = FALSE;		/* unique flag value */
RECORD *(*infunc)();	/* pointer to function which gets next record */
MEM_SIZE seg_length = 16;	/* default size of stack segment */
unsigned int max_sublists = 0;
						/* maximum number of sublists / segment */
ENVIRONMENT *env_ptr = (ENVIRONMENT *)NULL;
						/* pointer to current environment */
void
sort();
private void
psort(unsigned int, unsigned int, SUBLIST *);
private unsigned int
plan(ENVIRONMENT *, unsigned int, unsigned int, unsigned long, FILE_SIZE);
private int
dist(SUBLIST *, SUBLIST*);
private void
dsort(SUBLIST *, unsigned int);
private void
nsort(SUBLIST *, unsigned int);
private RECORD *
list_input(SUBLIST *);
private void
fill_fields(RECORD *);
private BOOLEAN
fits(FILE_SIZE, unsigned int);
void *
need(STACK *, size_t);
void *
get_space(STACK *, size_t);
private void
display_out(clock_t);
private clock_t
display_in(unsigned int, unsigned int, long, ENVIRONMENT *);
private void
free_memory(FILE_SIZE);
private void
free_frames(unsigned int);
/*********************************************************************
sort - top level driver for sort engine
**********************************************************************/
void
sort(){
	ENVIRONMENT env;	/* dummy initial environment */
	unsigned int n;
	unsigned long rc;
	FILE_SIZE fsize;
	clock_t time;

	/* miniature version of sort */
	/* for a full explanation see below */

	/* try to guess file size */
	fsize = os_flength(stdin);
	if(fsize <= 0L)
		fsize = FILE_GUESS;

	rc = fsize / (record_size ? record_size : RECORD_SIZE_GUESS);
	n = plan(&env, 0, 0, rc, fsize);
	env.sublist = sublist_allocate(n);
	env.sublist_count = n;

	/* initialize stacks */
	assert(stk_push(d_stack));

	env.frame.limit = (FILE_SIZE) rb_size * n * K;
	env.frame.size = 0;
	env.frame.icount =
	env.frame.count = 1;
	env_ptr = &env;

	sublist_fclose();

	time = display_in(0, 0, 0L, &env);

	dist((SUBLIST *)NULL, env.sublist);

	display_out(time);

	infunc = sublist_input;

	dsort(env.sublist, 0);

	stk_pop(d_stack);
	stk_free(d_stack);

	return;
}
/*************************************************************
psort - distribution sort
**************************************************************/
private
void
psort(key_index, depth, sublist)
unsigned int
	key_index,
	depth;
SUBLIST *sublist;
{
	ENVIRONMENT
		*prev_env,	/* pointer to previous environment */
		env;		/* current set fo environment variables */
	clock_t time;	/* used to record time function started */

	/* use statics to save stack space for 2nd order recurrsive function */
	static unsigned int n;
					/* number of sublists needed by on this pass */
	static long record_count;
					/* number of records in the input sublist */

	record_count  = sublist->pcount + sublist->count;

	/* take care of end points */
	if(record_count == 0)
		return;
	if(record_count == 1){
		sublist_output(sublist, uflag);
		return;
	}

	/* there are no more keys we're done */
	if(key_index >= key_count){
		sublist_output(sublist, uflag);
		return;
	}

	/* if current key is exhausted */
	if(depth >= key[key_index].size){
		/* move on to the next one */
		depth = 0;
		/* if that was the last key we're done */
		if(++key_index >= key_count){
			sublist_output(sublist, uflag);
			return;
		}
	}

	/* figure out how many key bytes to use in distribution */
	/* and fill them into byte table of new environment */
	n = plan(&env, key_index, depth, record_count, sublist->size);

	/* allocate the calculated number of sublists, creating a new */
	/* memory area if necessary */
	env.sublist = sublist_allocate(n);
	env.sublist_count = n;

	/* save state of storage */
	/* try to be sure that memory exists for next pass */
	free_memory(sublist->size);

	assert(stk_push(d_stack));
	env.frame.limit = (FILE_SIZE) rb_size * n * K;
	env.frame.size = 0;
	env.frame.icount =
	env.frame.count = env_ptr->frame.count+1;

	/* close switch to other swap file */
	sublist_fclose();

	/* save pointer to previous environment for end of function */
	prev_env = env_ptr;
	env_ptr = &env;

	time = display_in(key_index, depth, record_count, &env);

	/* distribute the input sublist among the new sublists */
	/* according to the key bytes specified in the byte table*/
	n = dist(sublist, env.sublist);

	if(n = 0){
		/* reuse space used by sublist array */
		*sublist = (env.sublist)[n];
		env.sublist = sublist;
		env.sublist_count = 1;
		sublist_shrink();
		dsort(env.sublist, env.byte_count);
	}
	else{
		/* sort sublists in the proper sequence */
		dsort(env.sublist, 0);
	}

	/* and recover environment of caller */
	sublist_free();
	do{
		assert(stk_pop(d_stack));
	}while(env.frame.count-- > env.frame.icount);
	env_ptr = prev_env;
	display_out(time);
	return;
}
/*********************************************************************
plan - Figure how many sublists should be created for distributing the
current sublist.  Figure how many bytes should be used to determine
where a record will be distributed.
**********************************************************************/
private
unsigned int
plan(env_ptr, key_index, depth, record_count, fsize)
ENVIRONMENT *env_ptr;/* address of environment where new byte table is*/
					/* to be loaded */
unsigned int
	key_index,		/* where the next key starts */
	depth;
unsigned long record_count;
					/* number of records in sublists to be distributed */
FILE_SIZE fsize;	/* total size of sublist records in bytes */
{
	unsigned int i, j, sublist_count;
	BYTE_TABLE *bptr;

	/* initialize accumulators */
	env_ptr->byte_count = 0;
	bptr = env_ptr->byte_table;
	sublist_count = 1;

	/* figure how many bytes we can handle at a time */
	for(;;){

		/* fill byte table entry */
		bptr->value = key[key_index].seq->value;
		bptr->order = key[key_index].seq->order;
		bptr->key_index = key_index;
		bptr->field_index = key_index + 1;
		bptr->depth = depth;

		/* figure how many sublists will be created by */
		/* continuing one more level deep */
		i = sublist_count * bptr->order + 1;

		if(sublist_count > 1){

			/* There is no point in spreading records among too many */
			/* sublists most of which will be empty */
			if(sublist_count > record_count)
				break;

			/* if sublist won't fit into a segment */
			if(i > max_sublists)
				break;

			/* if more sublists won't fit in memory with data */
			if( !fits(fsize, i)){

				/* if blocks written to disk are going to be too small */
				/* we can quit */
				if( !fits((FILE_SIZE)i * rb_size * K, i))
					break;
			}
		}
		sublist_count = i;

		/* increment pointer and counter */
		++bptr;
		++(env_ptr->byte_count);

		/* if this key has been totally checked */
		if(++depth >= key[key_index].size){
			/* if this is the last key */
			if(++key_index == key_count)
				/* we're done */
				break;
			/* there are more keys, go on to the next one */
			depth = 0;
		}
	}

	/* figure increments at each level of distribution */
	i = 1;
	for(j = env_ptr->byte_count; j-- > 0 ;){
		env_ptr->byte_table[j].count = i;
		i = 1 + i * env_ptr->byte_table[j].order;
	}
	return sublist_count;
}
/*********************************************************************
fits - determine whether or not memory exists for the specified areas
**********************************************************************/
private
BOOLEAN
fits(fsize, scount)
FILE_SIZE fsize;
unsigned int scount;
{
	unsigned int blk_count;

	blk_count = stk_unused() + stk_blks(d_stack);

	if(scount * sizeof(SUBLIST) > stk_avl(s_stack))
		if(blk_count < 2)
			return FALSE;
		else
			--blk_count;

	if((FILE_SIZE)blk_count * seg_length * K + stk_avl(d_stack) > fsize)
		return TRUE;
	else
		return FALSE;
}
/*********************************************************************
dist - Distribute input list among sublists created for this level
of key.  This is the heart of the sort.
**********************************************************************/
private
int
dist(input_sublist, sublist)
SUBLIST *input_sublist, *sublist;
{
	register BYTE_TABLE *bptr;
	RECORD *record_address;
	unsigned int i, j, disp, dcount;
	BOOLEAN hflag;	/* flag to hold output for unique output record */
	BOOLEAN oflag;	/* flag to output directly since no more keys */
	int pdisp;

	oflag =
		env_ptr->byte_table[0].key_index == key_count - 1
		?  TRUE : FALSE;

	hflag = FALSE;

	dcount =
	disp = 0;
	pdisp = -1;

	/* for each record in input list */
	while(record_address = (*infunc)(input_sublist)){

		disp = 0;
		bptr = env_ptr->byte_table;

		/* check each byte in key */
		i = env_ptr->byte_count;
		do{
			j =bptr->value[/* key table */
					(record_address->data)[/* record address */
						record_address->field[/* field pointers */
							bptr->field_index/* key field index */
						] + bptr->depth/* displacement in field */
					]
				];
			/* if key is terminated prematurly */
			if(j == 0)
				/* add to sublist in appropriate array */
				break;

			/* accumulate displacement within array of sublists */
			disp += 1 + (j - 1) * (bptr++)->count;
			/* and go on to next byte in key */

		/* continue as long as we can check more of the key */
		}while(--i);

		/* if last key and key value is the minimum possible */
		if(disp == 0 && oflag){
			/* write record out immediatly */
			if(!hflag)
				rec_output(record_address);
			/* if unique flag is set, make sure that was the */
			/* record out */
			hflag = uflag;
			/* free up space used by record */
			if(!rec_memflag(record_address))
				stk_drop(d_stack);
		}
		else{
			if(!rec_memflag(record_address))
				rec_frame(record_address) = env_ptr->frame.count;
			/* link record into appropriate sublist */
			assert(record_address);
			link(record_address, sublist[disp].memory, 0);
			assert(sublist);
			sublist[disp].memory = record_address;
			++(sublist[disp].count);
		}

		if(disp != pdisp){
			++dcount;
			pdisp = disp;
		}
	}
	if(dcount > 1)
		return 0;
	else
		return disp;
}
/**********************************************************
dsort - small function to sort sublists in proper sequence.
***********************************************************/
private
void
dsort(sublist, level)
SUBLIST *sublist;
unsigned level;
{
	unsigned int j;
	BYTE_TABLE *bptr;

	if(level == env_ptr->byte_count){
		bptr = &env_ptr->byte_table[level-1];
		psort(bptr->key_index, bptr->depth+1, sublist);
		return;
	}
	bptr = &env_ptr->byte_table[level];
	if(key[bptr->key_index].key_type == SIGN){
		nsort(sublist, level);
		return;
	}
	if(!key[bptr->key_index].inverted){
		psort(bptr->key_index+1, 0, sublist++);
		for(j = 0; j < bptr->order ; ++j){
			dsort(sublist, level+1);
			sublist += bptr->count;
		}
	}
	else{
		sublist += bptr->count * bptr->order + 1;
		for(j = bptr->order; j > 0; --j){
			sublist -= bptr->count;
			dsort(sublist, level+1);
		}
		psort(bptr->key_index+1, 0, --sublist);
	}
}
/**********************************************************
nsort - small function to sort sublists in proper sequence.
special version of dsort for numeric fields.
***********************************************************/
private
void
nsort(sublist, level)
SUBLIST *sublist;
unsigned int level;
{
	unsigned int key_index;
	int k, j;
	BYTE_TABLE *bptr;

	bptr = &env_ptr->byte_table[level];
	key_index = bptr->key_index;

	k = bptr->count;
	++sublist;
	if(key[key_index].inverted){
		sublist += k * bptr->order;
		k *= -1;
	}
	key[key_index+1].inverted = TRUE;
	key[key_index+2].inverted = TRUE;
	for(j = NSIZE; j > 0; --j){
		dsort(sublist, level + 1);
		sublist += k;
	}
	key[key_index+1].inverted = FALSE;
	key[key_index+2].inverted = FALSE;
	for(; j < NSIZE; ++j){
		dsort(sublist, level + 1);
		sublist += k;
	}
	return;
}
