// ForthCompiler.cpp
//
// FORTH compiler to generate FORTH Byte Code (FBC) from expressions
//   or programs
//
// Copyright (c) 1998--2009 Krishna Myneni and David P. Wallace 
//   Creative Consulting for Research and Education
//
// This software is provided under the terms of the General Public License.
//
// Revisions:
// 	9-12-1998
//	9-15-1998 added SP@, RP@, +!
//      9-16-1998 added -ROT, PICK, ROLL, A@ 
//	9-18-1998 error checking for incomplete structures at end of definition
//	10-6-1998 added ?DUP
//	10-14-1998 fixed COUNT
//	10-19-1998 added 0<, 0=, 0>, TRUE, FALSE, INVERT
//      02-09-1999 added EXECUTE, ' (tick)
//      03-01-1999 added OPEN, LSEEK, CLOSE, READ, WRITE
//      03-02-1999 added IOCTL
//      03-03-1999 added USLEEP
//      03-07-1999 added FILL, CMOVE
//      03-27-1999 added +LOOP, UNLOOP
//      03-31-1999 added CMOVE>, KEY
//      05-06-1999 added FLOOR, FROUND
//      05-24-1999 added FATAN2, LSHIFT, RSHIFT
//      05-27-1999 added ACCEPT
//      05-29-1999 added QUIT, BASE, BINARY, DECIMAL, HEX, U<, U.
//      06-02-1999 added */, */MOD, NUMBER?
//      06-05-1999 added CHAR (ASCII)
//      06-09-1999 function IsInt now calls Forth's NUMBER? 
//      06-16-1999 fixed to allow multiple LEAVEs within single DO-LOOP
//      07-18-1999 added FIND
//      08-24-1999 compiler reports redefinition of words
//      09-06-1999 added use of global ptr pTIB to permit implemetation of TICK, WORD, etc.
//      09-12-1999 added SYSTEM
//      10-2-1999  used precedence byte to determine execution of non-deferred words
//      10-4-1999 added CREATE, VARIABLE, FVARIABLE as intrinsic words
//      10-6-1999 added CONSTANT, FCONSTANT as intrinsic words
//      10-7-1999 added CHDIR
//      10-8-1999 added ERASE, [']
//      10-9-1999 added TIME&DATE, MS, ?, 2@, 2!, BL
//      10-20-1999 moved global input and output stream pointers into
//                   this module from ForthVM.cpp; added >FILE, CONSOLE
//      10-28-1999 added KEY?
//      11-16-1999 added RECURSE
//      12-14-1999 fixed ExtractName for case of null string
//      12-24-1999 added U>, F0=, F0<, S>D, D>F, F>D
//      12-25-1999 added CELLS, CELL+, CHAR+, DFLOATS, DFLOAT+, SFLOATS, SFLOAT+
//      12-27-1999 added BYE
//      1-13-2000  added ?ALLOT
//      1-23-2000  added 0<>, .R, U.R, [CHAR]; removed ASCII
//      1-24-2000  added LITERAL, made '"' and '."' immediate words
//      2-14-2000  added M*, UM*, FM/MOD
//      2-26-2000  added SM/REM, UM/MOD
//      3-1-2000   display VM errors
//      3-5-2000   changed DO, LEAVE, BEGIN, WHILE, REPEAT, UNTIL, AGAIN,
//                   IF, ELSE, THEN, RECURSE, and '(' from compiler directives
//                   to actual words.
//      3-7-2000   ensure control stacks are cleared after VM error.
//      5-17-2000  added DOES>
//      6-11-2000  added CASE, ENDCASE, OF, ENDOF
//      6-15-2000  added ?DO, ABORT"
//      8-08-2000  added default directory search for include files  DPW
//      9-05-2000  added M/, M+, D.
//      11-29-2000 added DABS, DNEGATE, M*/
//      04-22-2001 added D+, D-
//      04-24-2001 added 2>R, 2R>
//      05-13-2001 added .(, D<, D=, D0=
//      05-20-2001 added <#, #, #S, #>, SIGN, HOLD
//      05-30-2001 modified loop code to handle ?DO
//      09-03-2001 added >BODY, IMMEDIATE, NONDEFERRED, POSTPONE; fixed
//                   immediate execution of defined words.
//      12-08-2001 added EVALUATE
//      02-10-2002 made .( a non-deferred word
//      08-01-2002 added \ as a word; added STATE; fixed behavior of 
//                   POSTPONE for non-immediate words; added MS@;
//                   code cleanup in ForthCompiler()
//      09-25-2002 updated include statements and added "using" directives
//                   to resolve std namespace definitions for gcc 3.2
//      09-29-2002 added IMMEDIATE as a regular word; cleaned up logic
//                   for INCLUDE
//      04-11-2003 changed F>S to FROUND>S
//      04-15-2003 added FTRUNC and FTRUNC>S
//      01-31-2004 extended intrinsic wordlist to include INCLUDE,
//                   SOURCE, REFILL, NONDEFERRED, STATE
//      02-09-2004 added ALLOCATE and FREE.
//      03-18-2004 added FSINCOS, FSINH, FCOSH, FTANH, FASINH, FACOSH, 
//                   FATANH, SP!, RP!
//      04-xx-2004 changed intrinsic wordlist specification to
//                   structure array following suggestion of BK.
//      06-19-2004 updated error handling method after call to ForthVM()
//      09-05-2004 added FORTH-SIGNAL, RAISE, SET-ITIMER, and GET-ITIMER 
//      09-19-2005 added D>S
//      03-13-2006 added UTS/MOD dnw
//	03-16-2006 added UTM/ dnw
//	03-19-2006 added PPCTEST, STS/REM  dnw
//	03-21-2006 added DU<  dnw
//	03-28-2006 added UDM*  dnw
//	03-30-2006 added DS*  dnw
//	04-04-2006 renamed PPCTEST as CPUTEST  dnw
//	04-07-2006 added DMAX, DMIN  dnw
//	04-08-2006 added NUMBER_OF_INTRINSIC_WORDS calculation  dnw
//      04-19-2006 added SYSCALL  km
//      05-30-2006 added MOVE  km
//      02-14-2007 added #! to allow executable scripts  km
//      03-14-2008 more efficient compilation of words with opcodes > 255  km
//      03-15-2008 added INCLUDED as intrinsic word  km
//      03-16-2008 added 2CONSTANT and 2VARIABLE as intrinsic words  km
//      03-17-2008 factored code for compiling Forth words into CompileWord(),
//                   to be used by ForthCompiler() and CPP_postpone()  km
//      03-19-2008 removed currently unused variables and arrays in 
//                   ForthCompiler()  km
//      03-21-2008 moved intrinsic Forth word list to a common header file  km
//      04-02-2008 reorganized code in ForthCompiler(): moved some code to
//                   CompileWord(); use call to OpsCopyInt() for cleaner code  km
//      07-04-2008 revised ForthCompiler() to advance pointer in TIB to
//                   skip one whitespace character after extraction of token;
//                   changes to handling of pTIB made in ForthVM.cpp also  km
//      07-05-2008 modified IsInt() to remove dependency on C_numberquery();
//                   this change allows use of ForthCompiler.cpp for both
//                   regular and fast versions of kForth; changed a few char*
//                   declarations to const char* to avoid gcc 4.2.x warnings km
//      09-20-2009 added isBaseDigit()  km
//      09-27-2009 fix problem with isBaseDigit()  km
//      10-04-2009 use WordLists from 1.5.x dev tree  km
//      10-12-2009 changed return type for IsForthWord() and isBaseDigit()
//                   to type bool  km
//      11-25-2009 moved ExtractName(), isBaseDigit(), IsFloat(), and IsInt()
//                   to vmc.c  km
//      11-27-2009 moved strupr() to vmc.c
#include <iostream>
#include <fstream>
using std::cout;
using std::endl;
using std::istream;
using std::ostream;
using std::ifstream;
using std::ofstream;
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "fbc.h"
#include <vector>
#include <stack>
using std::vector;
using std::stack;
#include "ForthCompiler.h"

const int IMMEDIATE   = PRECEDENCE_IMMEDIATE;
const int NONDEFERRED = PRECEDENCE_NON_DEFERRED;

#include "ForthWords.h"

size_t NUMBER_OF_INTRINSIC_WORDS =
   sizeof(ForthWords) / sizeof(ForthWords[0]);


extern bool debug;

// Provided by ForthVM.cpp

extern vector<char*> StringTable;
extern SearchList SearchOrder;
void ClearControlStacks();
void OpsCopyInt (int, int);
void OpsPushInt (int);
void OpsPushDouble (double);
void PrintVM_Error (int);
int ForthVM (vector<byte>*, int**, byte**);
void RemoveLastWord();

extern "C" {

  // Provided by ForthVM.cpp

  int CPP_then();
  int CPP_immediate();
  int CPP_nondeferred();
  int CPP_source();
  int CPP_refill();

  // Provided by vmc.c
  void strupr (char*);
  char* ExtractName(char*, char*);
  int   IsFloat(char*, double*);
  int   IsInt(char*, int*);

}
  
// Provided by ForthVM.cpp
extern "C"  int* GlobalSp;
extern "C"  int* GlobalRp;
extern "C"  int Base;
extern "C"  int State;  // TRUE = compile, FALSE = interpret
extern "C"  char* pTIB; 
extern "C"  char TIB[];  // contains current line of input

// Provided by vm-common.s
extern "C"  int JumpTable[];


// stacks for keeping track of nested control structures

vector<int> ifstack;	// stack for if-then constructs
vector<int> beginstack;	// stack for begin ... constructs
vector<int> whilestack;	// stack for while jump holders
vector<int> dostack;    // stack for do loops
vector<int> querydostack; // stack for conditional do loops
vector<int> leavestack; // stack for leave jumps
vector<int> recursestack; // stack for recursion
vector<int> casestack;  // stack for case jumps
vector<int> ofstack;   // stack for of...endof constructs

int linecount;

// The global input and output streams

istream* pInStream ;
ostream* pOutStream ;

// Global ptr to current opcode vector

vector<byte>* pCurrentOps;

// The word currently being compiled (needs to be global)

WordListEntry NewWord;
//---------------------------------------------------------------


const char* C_ErrorMessages[] =
{
	"",
	"",
	"End of definition with no beginning",
	"End of string",	 
        "Not allowed inside colon definition",
	"Error opening file",
	"Incomplete IF...THEN structure",
	"Incomplete BEGIN structure",
	"Unknown word",
	"No matching DO",
	"Incomplete DO loop",
	"Incomplete CASE structure",
	"VM returned error"
};
//---------------------------------------------------------------

bool IsForthWord (char* name, WordListEntry* pE)
{
// Locate and Return a copy of the dictionary entry
//   with the specified name.  Return True if found,
//   False otherwise. A copy of the entry is returned
//   in *pE.

    return( SearchOrder.LocateWord (name, pE) );
}
//---------------------------------------------------------------

void OutputForthByteCode (vector<byte>* pFBC)
{
// Output opcode vector to an output stream for use in
//   debugging the compiler.

    int i, n = pFBC->size();
    byte* bp = (byte*) &(*pFBC)[0]; // ->begin();

    *pOutStream << "\nOpcodes:\n";
    for (i = 0; i < n; i++)
    {
        *pOutStream << ((int) *bp) << ' ';
        if (((i + 1) % 8) == 0) *pOutStream << '\n';
        ++bp;
    }
    *pOutStream << '\n';
    return;
}
//---------------------------------------------------------------

void SetForthInputStream (istream& SourceStream)
{
  // Set the input stream for the Forth Compiler and Virtual Machine

  pInStream = &SourceStream;
}
//--------------------------------------------------------------

void SetForthOutputStream (ostream& OutStream)
{
  // Set the output stream for the Forth Compiler and Virtual Machine

  pOutStream = &OutStream;
}
//---------------------------------------------------------------

void CompileWord (WordListEntry d)
{
  // Compile a word into the current opcode vector

  byte* bp;
  int wc = (d.WordCode >> 8) ? OP_CALLADDR : d.WordCode;
  pCurrentOps->push_back(wc);
  switch (wc) 
    {
    case OP_CALLADDR:
      bp = (byte*) d.Cfa;
      OpsPushInt(*((int*)(bp+1)));
      break;
	  
    case OP_DEFINITION:
      OpsPushInt((int) d.Cfa);
      break;

    case OP_ADDR:
      OpsPushInt((int) d.Pfa);
      break;

    case OP_IVAL:
      OpsPushInt(*((int*)d.Pfa));			
      break;

    case OP_FVAL:
      OpsPushDouble(*((double*) d.Pfa));
      break;

    default:
      ;
    }
}
//----------------------------------------------------------------

int ForthCompiler (vector<byte>* pOpCodes, int* pLc)
{
// The FORTH Compiler
//
// Reads and compiles the source statements from the input stream
//   into a vector of byte codes.
//
// Return value:
//
//  0   no error
//  other --- see ForthCompiler.h

  int ecode = 0;
  char WordToken[256];
  double fval;
  int i, j, ival, *sp;
  vector<byte>::iterator ib1, ib2;
  WordListEntry d;
  byte opval, *ip, *tp;

  if (debug) cout << ">Compiler Sp: " << GlobalSp << " Rp: " << GlobalRp << endl;

  ip = (byte *) &ival;

  linecount = *pLc;
  pCurrentOps = pOpCodes;

  while (TRUE)
    {
      // Read each line and parse

      pInStream->getline(TIB, 255);
      if (debug) (*pOutStream) << TIB << endl;

      if (pInStream->fail())
	{
	  if (State)
	    {
	      ecode = E_C_ENDOFSTREAM;  // reached end of stream before end of definition
	      break;
	    }
	  break;    // end of stream reached
	}
      ++linecount;
      pTIB = TIB;
      while (*pTIB && (pTIB < (TIB + 255)))
	{
	  if (*pTIB == ' ' || *pTIB == '\t')
	    ++pTIB;

	  else
	   {
	      pTIB = ExtractName (pTIB, WordToken);
	      if (*pTIB == ' ' || *pTIB == '\t') ++pTIB; // go past next ws char
	      strupr(WordToken);

	      if (IsForthWord(WordToken, &d))
		{
		  CompileWord(d);		  

		  if (d.WordCode == OP_UNLOOP)
		    {
		      if (dostack.empty())
			{
			  ecode = E_C_NODO;
			  goto endcompile;
			}
		    }
		  else if (d.WordCode == OP_LOOP || d.WordCode == OP_PLUSLOOP)
		    {
		      if (dostack.empty())
			{
			  ecode = E_C_NODO;
			  goto endcompile;
			}
		      i = dostack[dostack.size() - 1];
		      if (leavestack.size())
			{
			  do
			    {
			      j = leavestack[leavestack.size() - 1];
			      if (j > i)
				{
				  ival = pOpCodes->size() - j + 1;
				  OpsCopyInt(j, ival); // write relative jump count
				  leavestack.pop_back();
				}
			    } while ((j > i) && (leavestack.size())) ;
			}
		      dostack.pop_back();
		      if (querydostack.size())
			{
			  j = querydostack[querydostack.size() - 1];
			  if (j >= i)
			    {
			      CPP_then();
			      querydostack.pop_back();
			    }
			}
		    }
		  else
		    {
		      ;
		    }

		  int execution_method = EXECUTE_NONE;

		  switch (d.Precedence)
		    {
		      case IMMEDIATE:
			execution_method = EXECUTE_CURRENT_ONLY;
			break;
		      case NONDEFERRED:
			if (State)
			  NewWord.Precedence |= NONDEFERRED ;
			else
			  execution_method = EXECUTE_UP_TO;
			break;
		      case (NONDEFERRED + IMMEDIATE):
			execution_method = State ? EXECUTE_CURRENT_ONLY :
			  EXECUTE_UP_TO;
			break;
		      default:
			;
		    }

		  vector<byte> SingleOp;
		  
		  switch (execution_method)
		    {
		    case EXECUTE_UP_TO:
		      // Execute the opcode vector immediately up to and
		      //   including the current opcode

		      pOpCodes->push_back(OP_RET);
		      if (debug) OutputForthByteCode (pOpCodes);
		      ecode = ForthVM (pOpCodes, &sp, &tp);
		      pOpCodes->erase(pOpCodes->begin(), pOpCodes->end());
		      if (ecode) goto endcompile; 
		      break;

		    case EXECUTE_CURRENT_ONLY:
		      i = ((d.WordCode == OP_DEFINITION) || (d.WordCode == OP_IVAL) || 
			   (d.WordCode >> 8)) ? 5 : 1;
		      ib1 = pOpCodes->end() - i;
		      for (j = 0; j < i; j++) SingleOp.push_back(*(ib1+j));
		      SingleOp.push_back(OP_RET);
		      pOpCodes->erase(ib1, pOpCodes->end());
		      ecode = ForthVM (&SingleOp, &sp, &tp);
		      SingleOp.erase(SingleOp.begin(), SingleOp.end());
		      if (ecode) goto endcompile; 
		      pOpCodes = pCurrentOps; // may have been redirected
		      break;

		    default:
		      ;
		    }

		}  // end if (IsForthWord())

	      else if (IsInt(WordToken, &ival))
		{
		  pOpCodes->push_back(OP_IVAL);
		  OpsPushInt(ival);
		}
	      else if (IsFloat(WordToken, &fval))
		{
		  pOpCodes->push_back(OP_FVAL);
		  OpsPushDouble(fval);
		}
	      else
		{
		  *pOutStream << endl << WordToken << endl;
		  ecode = E_C_UNKNOWNWORD;  // unknown keyword
		  goto endcompile;
		}
	     }
	} // end while (*pTIB ...)
	
      if ((State == 0) && pOpCodes->size())
	{
	  // Execute the current line in interpretation state
	  pOpCodes->push_back(OP_RET);
	  if (debug) OutputForthByteCode (pOpCodes);
	  ecode = ForthVM (pOpCodes, &sp, &tp);
	  pOpCodes->erase(pOpCodes->begin(), pOpCodes->end());
	  if (ecode) goto endcompile; 
	}

    } // end while (TRUE)

endcompile:
    
  if ((ecode != E_C_NOERROR) && (ecode != E_C_ENDOFSTREAM))
    {
      // A compiler or vm error occurred; reset to interpreter mode and
      //   clear all flow control stacks.

      State = FALSE;
      ClearControlStacks();
    }
  if (debug) 
    {
      *pOutStream << "Error: " << ecode << " State: " << State << endl;
      *pOutStream << "<Compiler Sp: " << GlobalSp << " Rp: " << GlobalRp << endl;
    }
  *pLc = linecount;
  return ecode;
}


