/* Lexical analyser for the CRUNCH language. */

# include	"crunch.h"
# include	<pfloat.h>

# define	INC_YYTEXT	2048	/* Size increment for yytext[] buffer */

void	ungetchar PROTO((int));
# define	ungetchar(ch) un_got_char = (ch)

/* Table used for fast character classification. */
# define	XSYMBOL		0x01	/* [A-Za-z_]		*/
# define	XSPACE		0x02	/* White space.		*/
# define	XDIGIT		0x04	/* Decimal digit.	*/
# define	XHEXDIGIT	0x08	/* Hex digit.		*/
# define	issymbol(ch)	(lextab[ch] & XSYMBOL)
# define	isspace(ch)	(lextab[ch] & XSPACE)
# define	isdigit(ch)	(lextab[ch] & XDIGIT)

unsigned char lextab[256] = {
/*0x00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0x10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0x20*/0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0x30*/0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,	/* 0..9 etc */
	0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0x40*/0x00, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x01,
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
	
/*0x50*/0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
	0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01,
	
/*0x60*/0x00, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x01,
	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
	
/*0x70*/0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
	0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0x80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0x90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0xa0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0xb0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0xc0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0xd0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0xe0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
/*0xf0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	
	};
struct map {
	int	val;
	char	*src_token;	/* Token in source language.	*/
	char	*name;		/* Token in CRISP macro language. */
	} opcodes[] = {
	K_DEFAULT,	"default",	"default",
	K_LIST,		"list",		"list",
	K_FLOAT,	"float",	"float",
	K_DOUBLE,	"double",	"double",
	K_INT,		"int",		"int",
	K_STRING,	"string",	"string",
	K_DECLARE,	"declare",	"declare",
	K_IF,		"if",		"if",
	K_ELSE,		"else",		"else",
	K_FOR,		"for",		"for",
	K_BREAK,	"break",	"break",
	K_CONTINUE,	"continue",	"continue",
	K_WHILE,	"while",	"while",
	K_SWITCH,	"switch",	"switch",
	K_CASE,		"case",		"",
	K_RETURN,	"return",	"return",
	K_STRUCT,	"struct",	"struct",
	K_UNION,	"union",	"union",
	K_EXTERN,	"extern",	"extern",
	K_STATIC,	"static",	"static",
	K_CONST,	"const",	"const",
	K_VOLATILE,	"volatile",	"volatile",
	K_LONG,		"long",		"long",
	K_CHAR,		"char",		"char",
	K_SHORT,	"short",	"short",
	K_UNSIGNED,	"unsigned",	"unsigned",
	K_VOID,		"void",		"void",
	K_SIGNED,	"signed",	"signed",
	K_DO,		"do",		"do",
	K_SIZEOF,	"sizeof",	"sizeof",
	K_TYPEDEF,	"typedef",	"typedef",
	K_GLOBAL,	"global",	"global",
	OCURLY,		(char *) NULL,		"{",
	CCURLY,		(char *) NULL,		"}",
	OROUND,		(char *) NULL,		"(",
	CROUND,		(char *) NULL,		")",
	COMMA,		(char *) NULL,		",",
	SEMICOLON,	(char *) NULL,		";",
	EQ,    		(char *) NULL,		"=",
	PLUS_EQ,	(char *) NULL,		"+=",
	MINUS_EQ,	(char *) NULL,		"-=",
	MUL_EQ,		(char *) NULL,		"*=",
	DIV_EQ,		(char *) NULL,		"/=",
	MOD_EQ,		(char *) NULL,		"%=",
	AND_EQ,		(char *) NULL,		"&=",
	OR_EQ,		(char *) NULL,		"|=",
	XOR_EQ,		(char *) NULL,		"^=",
	EQ_OP,		(char *) NULL,		"==",
	
	NE_OP,		(char *) NULL,		"!=",
	GE_OP,		(char *) NULL,		">=",
	GT_OP,		(char *) NULL,		">",
	LE_OP,		(char *) NULL,		"<=",
	LT_OP,		(char *) NULL,		"<",
	ARROW,		(char *) NULL,		"->",
	LSHIFT,		(char *) NULL,		"<<",
	RSHIFT,		(char *) NULL,		">>",
	LSHIFT_EQ,	(char *) NULL,		"<<=",
	RSHIFT_EQ,	(char *) NULL,		">>=",
	
	CAND,		(char *) NULL,		"&&",
	COR,		(char *) NULL,		"||",
	PLUS,		(char *) NULL,		"+",
	MINUS,		(char *) NULL,		"-",
	MUL,		(char *) NULL,		"*",
	DIV,		(char *) NULL,		"/",
	MOD,		(char *) NULL,		"%",
	OR,		(char *) NULL,		"|",
	AND,		(char *) NULL,		"&",
	XOR,		(char *) NULL,		"^",
	NOT,		(char *) NULL,		"!",
	COMPLEMENT,	(char *) NULL,		"~",
	PLUS_PLUS,	(char *) NULL,		"++",
	MINUS_MINUS,	(char *) NULL,		"--",
	COLON,		(char *) NULL,		":",
	OSQUARE,	(char *) NULL,		"[",
	CSQUARE,	(char *) NULL,		"]",
	DOT,		(char *) NULL,		".",
	0, 0, 0
	};

char	*yytext;
int	yytext_size;		/* Number of bytes allocated to yytext. */
int	yyleng;			/* Number of bytes in string.		*/
char	*current_fn;		/* Current function name for error messages*/

int	un_got_char;
int	col_no;			/* Start column of last token so we can */
				/* print helpful error messages.	*/
int	print_line = 1;		/* Set to FALSE if we dont want line in */
				/* error printed.			*/
int line_no;
FILE	*lex_fp;		/* File pointer for current input file. */
int	warnings;		/* Number of warnings.			*/
int	fatals;			/* Number of fatal errors.		*/
Head_p	hd_filenames;		/* List of filenames included (needed so */
				/* we can put correct file name with each */
				/* symbol defined without duplicated	*/
				/* filename for each symbol).		*/
char	function_name[256];	/* Last function name printed in warning */
				/* or error.				*/
extern int no_warnings;
char	*strdup();
static	void get_escaped_character PROTO((char *));

char	*get_filename PROTO((char *));
int	get_quoted_string PROTO((int));
void	get_while PROTO((int));
int	lookup_keywd PROTO((char *));
void	get_string PROTO((void));
void	lex_newline PROTO((void));
int	input PROTO((void));
void	seek_to_line PROTO((FILE *, int));
void	free_filenames PROTO((void));

# define	RETURN	return

/**********************************************************************/
/*   Called to initialise lexer when a new file is processed.	      */
/**********************************************************************/
void
init_lex()
{
	un_got_char = 0;
	col_no = 0;
	warnings = 0;
	fatals = 0;
	function_name[0] = 0;
	free_filenames();
	if (yytext_size == 0) {
		yytext_size = 128;
		yytext = (char *) chk_alloc(yytext_size);
		}
}
int
yylex()
{
	register int	ch;
	char	tmpbuf[256];
	
	while (1) {
		ch = input();
		if (isspace(ch))
			continue;

		/***********************************************/
		/*   Check  for  a symbol and save in yytext.  */
		/*   Keep expanding yytext if too big.	       */
		/***********************************************/
		if (issymbol(ch)) {
			int	k;
			symbol_t *sp;
			ungetchar(ch);
			get_while(XSYMBOL | XDIGIT);
			k = lookup_keywd(yytext);
			if (k > 0)
				return k;
			sp = lookup_sym(yytext, TRUE);
			yylval.sval = yytext; /*strdup(yytext);*/
			if (sp && 
			   ((sp->s_type & SC_MASK) >> SC_SHIFT) == 5) {
				return TYPEDEF_NAME;
				}
			yylval.sval = strdup(yytext);
			RETURN(SYMBOL);
			}
		if (isdigit(ch) || ch == '.') {
			int ret = parse_number(ch, &yylval.floatval, &yylval.ival);
			switch (ret) {
			  case PARSE_ERR_ELLIPSIS_ERROR:
				yyerror1("Ellipsis has the syntax '...'", (char *) NULL);
				continue;
			  case PARSE_ERR_FLOAT_SYNTAX_ERROR:
				yyerror1("Syntax error reading floating point constant", (char *) NULL);
				continue;
			  case PARSE_ERR_EOF_IN_FLOAT:
				yyerror1("EOF detected reading floating point constant", (char *) NULL);
				continue;
			  case PARSE_INTEGER:
			  	return INTEGER;
			  case PARSE_FLOAT:
			  	return FLOAT_CONST;
			  case PARSE_ELLIPSIS:
				return K_DOTS;
			  case PARSE_DOT:
				return DOT;
			  }
			continue;
			}
		switch (ch) {
			case '\r':
				continue;
			case '\n': 
				lex_newline();
				continue;
			case '(': RETURN(OROUND);
			case ')': RETURN(CROUND);
			case '{': RETURN(OCURLY);
			case '}': RETURN(CCURLY);
			case ',': RETURN(COMMA);
			case ';': RETURN(SEMICOLON);
			case ':': RETURN(COLON);
			case '"':
				get_string();
				yylval.sval = strdup(yytext);
				RETURN(STRING);
			case '=':
				ch = input();
				if (ch == '=')
					RETURN(EQ_OP);
				ungetchar(ch);
				RETURN(EQ);
			case '!':
				ch = input();
				if (ch == '=')
					RETURN(NE_OP);
				ungetchar(ch);
				RETURN(NOT);
				
			case '>':
				ch = input();
				if (ch == '=')
					RETURN(GE_OP);
				if (ch == '>') {
					ch = input();
					if (ch == '=')
						RETURN(RSHIFT_EQ);
					ungetchar(ch);
					RETURN(RSHIFT);
					}
				ungetchar(ch);
				RETURN(GT_OP);
			case '<':
				ch = input();
				if (ch == '=')
					RETURN(LE_OP);
				if (ch == '<') {
					ch = input();
					if (ch == '=')
						RETURN(LSHIFT_EQ);
					ungetchar(ch);
					RETURN(LSHIFT);
					}
				ungetchar(ch);
				RETURN(LT_OP);
			
			case '+':
				ch = input();
				if (ch == '=')
					RETURN(PLUS_EQ);
				if (ch == '+')
					RETURN(PLUS_PLUS);
				ungetchar(ch);
				RETURN(PLUS);
			case '-':
				ch = input();
				if (ch == '=')
					RETURN(MINUS_EQ);
				if (ch == '-')
					RETURN(MINUS_MINUS);
				if (ch == '>')
					RETURN(ARROW);
				ungetchar(ch);
				RETURN(MINUS);
			case '*':
				ch = input();
				if (ch == '=')
					RETURN(MUL_EQ);
				ungetchar(ch);
				RETURN(MUL);
			case '/':
				ch = input();
				if (ch == '=')
					RETURN(DIV_EQ);
				if (ch == '*') {
					while (1) {
						ch = input();
						if (ch == '\n')
							lex_newline();
						if (ch != '*')
							continue;
						ch = input();
						if (ch == '/')
							break;
						ungetchar(ch);
						}
					continue;
					}
				/***********************************************/
				/*   Allow // style comments.		       */
				/***********************************************/
				if (ch == '/') {
					while (input() != '\n')
						;
					lex_newline();
					continue;
					}
				ungetchar(ch);
				RETURN(DIV);
			case '%':
				ch = input();
				if (ch == '=')
					RETURN(MOD_EQ);
				ungetchar(ch);
				RETURN(MOD);
			case '^':
				ch = input();
				if (ch == '=')
					RETURN(XOR_EQ);
				ungetchar(ch);
				RETURN(XOR);
			case '&':
				ch = input();
				if (ch == '=')
					RETURN(AND_EQ);
				if (ch == '&')
					RETURN(CAND);
				ungetchar(ch);
				RETURN(AND);
			case '|':
				ch = input();
				if (ch == '=')
					RETURN(OR_EQ);
				if (ch == '|')
					RETURN(COR);
				ungetchar(ch);
				RETURN(OR);
			case '[':
				RETURN(OSQUARE);
			case ']':
				RETURN(CSQUARE);
			case '?':
				RETURN(QUESTION);
			case '~': RETURN(COMPLEMENT);
			case '#': {
				static char	buf[128];
				char	*cp = buf;
	
				if (col_no != 1)
					break;
				/* Handle #include, #define, etc */
				while ((*cp++ = (char) input()) != '\n')
					;
				cp[-1] = NULL;
				for (cp = buf; *cp == ' '; cp++)
					;
				lex_newline();
				while (*cp && !isdigit(*cp))
					cp++;
				line_no = atoi(cp);
				while (*cp && *cp != '"')
					cp++;
				filename = cp + 1;
				for (cp = filename; *cp && *cp != '"'; cp++)
					;
				*cp = NULL;
				continue;
				}
			case '\'':
				if (get_quoted_string('\'') == FALSE)
					yyerror("Character constant too long or unterminated.");
				yylval.ival = yytext[0];
				RETURN(INTEGER);
			case 0: RETURN(0);
			}
		sprintf(tmpbuf, "Invalid token/character '%c'", ch);
		yyerror1(tmpbuf, (char *) NULL);
		}
}
int
get_quoted_string(quote)
int	quote;
{	register unsigned char *cp = (unsigned char *) yytext;
	register int yytchar;

	if (quote == '"')
		*cp++ = '"';
	while (1) {
		if ((yytchar = input()) == 0) {
			*cp = NULL;
			return FALSE;
			}
		if (yytchar == quote) {
			*cp = NULL;
			break;
			}
		if (yytchar == '\n') {
			*cp = NULL;
			return FALSE;
			}
		if (yytchar != '\\') {
			*cp++ = (char) yytchar;
			continue;
			}
		*cp++ = '\\';
		*cp++ = (unsigned char) input();
		}

	for (cp = (unsigned char *) yytext; *cp; cp++) {
		if (*cp != '\\')
			continue;
		get_escaped_character((char *) cp);
		}
		
	return TRUE;
}
static void
get_escaped_character(str)
char	*str;
{	int	byte;
	char	*charp = str++;
	char ch = *str++;

	switch (ch) {
	  case 't':	byte = '\t'; break;
	  case 'n':	byte = '\n'; break;
	  case 'f':	byte = '\f'; break;
	  case 'r':	byte = '\r'; break;
	  case 'x':		
		byte = *str++;
		if (isdigit(byte))
			byte -= '0';
		else if (byte >= 'A' && byte <= 'F')
			byte = byte - 'A' + 10;
		else if (byte >= 'a' && byte <= 'f')
			byte = byte - 'a' + 10;
		else {
			str--;
			break;
			}

		/***********************************************/
		/*   Second digit.				*/
		/***********************************************/
		ch = *str++;
		if (isdigit(ch))
			byte = (byte << 4) + ch - '0';
		else if (ch >= 'A' && ch <= 'F')
			byte = (byte << 4) + ch - 'A' + 10;
		else if (ch >= 'a' && ch <= 'f')
			byte = (byte << 4) + ch - 'a' + 10;
		else
			str--;
		break;
	  default:
		byte = ch;
		break;
	  }
	*charp++ = (char) byte;
	strcpy(charp, str);
}
void
lex_newline()
{
	col_no = 0;
	line_no++;
}
int
input()
{	int	ch = un_got_char;
	if (ch) {
		un_got_char = 0;
		return ch;
		}
	col_no++;
	ch = fgetc(lex_fp);
	return ch == EOF ? 0 : ch;
}
void
get_while(class)
int	class;
{	register int ch;

	yyleng = 0;
	while (1) {
		ch = input();
		if (yyleng >= yytext_size - 1) {
			yytext_size += INC_YYTEXT;
			yytext = (char *) chk_realloc(yytext, yytext_size);
			}
		if ((lextab[ch] & class) == 0) {
			ungetchar(ch);
			yytext[yyleng] = NULL;
			return;
			}
		yytext[yyleng++] = (char) ch;
		}
}
void
get_string()
{	register int ch;

	yyleng = 0;
	while (1) {
		ch = input();
		if (yyleng >= yytext_size - 1) {
			yytext_size += INC_YYTEXT;
			yytext = (char *) chk_realloc(yytext, yytext_size);
			}
		if (ch == '"') {
			yytext[yyleng] = NULL;
			return;
			}
		if (ch == '\n') {
			yyerror("Unterminated string constant");
			yytext[yyleng] = NULL;
			return;
			}
		if (ch == '\\') {
			yytext[yyleng++] = '\\';
			ch = input();
			}
		if (ch == '\n')
			lex_newline();
		yytext[yyleng++] = (char) ch;
		}
}
int
lookup_keywd(str)
register char *str;
{	register struct map *mp;

	for (mp = opcodes; mp->src_token; mp++)
		if (*mp->src_token == *str && strcmp(str, mp->src_token) == 0)
			return mp->val;
	return -1;
}

char *
map(word)
register int	word;
{	register struct map *mp;
	static char buf[32];
	for (mp = opcodes; mp->name; mp++)
		if (mp->val == word)
			return mp->name;
	sprintf(buf, "<%x>", word);
	return buf;
}
/**********************************************************************/
/*   Function  to  print  the  current  function name if its changed  */
/*   since the last warning/error message.			      */
/**********************************************************************/
void
print_function_name()
{
	if (current_fn) {
		if (current_fn[0] && 
		    strcmp(current_fn, function_name) != 0)
			fprintf(errfp, "%s: In function %s():\n", 
				filename, current_fn);
		strcpy(function_name, current_fn);
		}
}
void
yyerror(str)
char *str;
{
	fatals++;
	print_function_name();
	fprintf(errfp, "%s:%d: %s\n", filename, line_no, str);
}
void
seek_to_line(fp, line_no)
FILE	*fp;
int	line_no;
{	char	buf[BUFSIZ];

	fseek(fp, 0L, 0);
	while (line_no-- > 0)
		if (fgets(buf, sizeof buf - 1, fp) == NULL)
			break;
}
/* VARARGS1 */
void
yyerror1(fmt, arg)
char *fmt;
char *arg;
{	char buf[BUFSIZ];

	print_line = 0;
	sprintf(buf, fmt, arg);
	yyerror(buf);
	print_line = 1;
}
/**********************************************************************/
/*   Prints a warning message out.				      */
/**********************************************************************/
void
yywarn(str)
char 	*str;
{
	if (no_warnings)
		return;
	print_function_name();
	fprintf(errfp, "%s:%d: warning: %s\n", filename, line_no, str);
}
/**********************************************************************/
/*   Function  to  print  a  warning  out  but with an explicit line  */
/*   number instead of the current one.				      */
/**********************************************************************/
void
yywarn_with_line_no(str, ln)
char 	*str;
int	ln;
{

	if (no_warnings)
		return;
	print_function_name();
	fprintf(errfp, "%s:%d: warning: %s\n", filename, ln, str);
}
char *
get_filename(name)
register char *name;
{	register List_p lp;
	char	*cp;

	if (hd_filenames == NULL)
		hd_filenames = ll_init();
	else {
		for (lp = ll_first(hd_filenames); lp; lp = ll_next(lp))
			if (strcmp(ll_elem(lp), name) == 0)
				return ll_elem(lp);
		}
	cp = strdup(name);
	ll_push(hd_filenames, cp);
	return cp;
		
}
/**********************************************************************/
/*   Free   up   all  filenames  which  were  allocated  for  symbol  */
/*   references.						      */
/**********************************************************************/
void
free_filenames()
{	register List_p lp;

	if (hd_filenames == NULL)
		return;
	while ((lp = ll_first(hd_filenames)) != NULL) {
		chk_free((void *) ll_elem(lp));
		ll_delete(lp);
		}
	ll_free(hd_filenames);
	hd_filenames = NULL;
}
/**********************************************************************/
/*   Define  routine  to unget character. Define it as a function so  */
/*   that the parse_number() function can access it.		      */
/**********************************************************************/
# undef ungetchar
void
ungetchar(ch)
int	ch;
{
	un_got_char = ch;
}

