/* [wsrch.c wk 10.9.91] W-Editor Search Operations
 *	Copyright (c) 1991 by Werner Koch (dd9jn)
 * This file is part of the W-Editor.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Searchalgorithm is Boyer/Moore-Patternmatching
 * History:
 *  9.05.92 wk	Fehler bei change, Move2Column() eingebaut
 * 03.01.93 wk	removed unused vars
 * 11.06.93 wk	added option 'l' and function CompareLine
 * 05.03.95 wk	fixed bug with 'l-' flagged search
 */

#include "wtailor.h"
RCSID("$Id: wsrch.c,v 1.9 1996/05/17 18:49:51 wernerk Exp $")
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <wk/lib.h>
#include <wk/string.h>

#include "w.h"
#include "wcmd.h"
#include "wscreen.h"
#include "wfile.h"


 /* Zortech failed compiling the r.e. - issuing an internal error */
#if (OS2 || UNIX || DOS386 || MSDOS) && !__ZTC__
#define USE_REGEXP 1
#include <wk/regexp.h>
#endif


#if  MAX_LINELEN >= (INT_MAX/2)  /* we need an int for indexing */
    #error MAX_LINELEN too big
#endif

/****** constants *********/
/******* typedefs ********/

typedef struct {
    int back:1;
    int any:1;
    int left:1;
    int reg:1;
} flags_t;

/******* globals **********/
static int lastFoundState ;
#if USE_REGEXP
static char last_re_error[30];
static char *help_re_buffer;
#endif
/******* prototypes *******/
static int DoLocate( const char *pattern, int *changeMode,
		     ushort *oldLen, ushort *newOff, ushort *newLen );
static int SearchArea( int fhd,
		       ulong startLine, ushort firstPos, ushort startPos,
		       ulong endLine,	ushort endPos, int posFlag,
		       const char *pattern , ushort plen, flags_t flg,
		       ulong *retLnr, ushort *retPos );
static int SearchLine( const char *s, ushort slen,
			  const char *p, ushort plen, int any, ushort *dtbl );
static int SearchLineBack( const char *s, ushort slen,
			  const char *p, ushort plen, int any, ushort *dtbl );
#if USE_REGEXP
static int RegLine( const char *s, ushort slen, regexp *re, int any );
#endif
static ushort *MakeDistanceTbl( const char *p, ushort plen, int flgBack );
/******* functions ********/


#if USE_REGEXP
void
RegError(const char *s)
{
    mem2str(last_re_error, s, DIM(last_re_error));
 /* Error(0,"error: regexp(3): %s", s); */
}
#endif





/****************
 * Implementation des locate commands
 * (Der String muss korrekt aufgebaut sein)
 */

int
Cmd_Locate( cmd_t *cmd )
{
    return DoLocate( cmd->arg.string, NULL,NULL,NULL,NULL );
}


/****************
 * This function returns the state of the last locate or change opertaion
 * it returns true, if the last call to locate was succesful.
 */

int
GetLastFoundState()
{
    return lastFoundState;
}



/****************
 * Loactefunction
 * if changeMode is not NULL, the special change flags are allowed
 * and changeMode will return true if replace all is selected
 * Also if changeFlag is not NULL: oldLen will return the len of
 * the string to be searched for, newOff will return the offset within pattern
 * of the new string and newLen will return its len.
 * if changeMode is initailay set to true, we will only parse pattern
 */

static int
DoLocate( const char *pattern, int *changeMode,
	  ushort *oldLen, ushort *newOff, ushort *newLen )
{
    int err, i, fhd, parseOnly, flgMark;
    const char *p;
    ushort plen;
    ulong startLine=0, endLine=0, line1, line2, lnr, saveLine;
    ushort firstPos, startPos=0, endPos=0, pos1, pos2, pos, savePos;
    flags_t flg;

  #if USE_REGEXP
    *last_re_error = 0;
  #endif
    err = 0;
    fhd = QryScreenFile();
    saveLine = GetFilePos( fhd, &savePos );
    parseOnly = 0;
    flgMark=0;
    memset(&flg,0,sizeof flg);
    flg.any = !GetSetOption( SETOPT_SEARCHCASE ); /* set to default */
    for(plen=0, p=pattern+1; *p != *pattern; p++ )
	plen++;
    if( plen && changeMode ) {
	*oldLen = plen;
	*newOff = p - pattern+1 ;
	for(plen=0,p++; *p != *pattern; p++ )
	    plen++;
	*newLen = plen;
	plen = *oldLen;
	if( *changeMode )
	    parseOnly = 1;
	*changeMode = 0;
    }
    pattern++;

    if( !plen )
	err = ERR_SRCHPAT;
    else {
	/* now analyse the flags */
	while( *++p )
	    if( *p == 'm' || *p == 'M' )
		flgMark=1;
	    else if( *p == 'l' || *p == 'L' )
		flg.left=1;
	  #if USE_REGEXP
	    else if( *p == 'r' || *p == 'R' )
		flg.reg=1;
	  #endif
	    else if( *p == 'a' || *p == 'A' )
		flg.any=1;
	    else if( *p == 'e' || *p == 'E' )
		flg.any=0;
	    else if( *p == '-' )
		flg.back=1;
	    else if( changeMode && *p == '*' )
		*changeMode=1;
	    else
		err = ERR_SRCHPAT; /* shouldn't occur */
	if( !err && flgMark ) {
	    flgMark = MarkInfo( &i );
	    if( !flgMark || i != fhd )
		err = ERR_NFOUND;
	}


	/* Aufgrund der Implementation von change ist die */
	/* FlagKombination "*-" nicht erlaubt; das backwardreplace */
	/* erfordert erweiterte positionierung mit mehreren Abfragen: */
	/* um differenzlaenge zurueck, am Zeileanfang eine Zeile hoeher */
	/* AdustMark muss auch angepasst werden, also lasse ich das mal lieber*/
	if( !err && flg.back && changeMode ) {
	    if( *changeMode )
		err = ERR_SRCHPAT;
	}

	if( !err && !parseOnly ) {
	    startLine = GetFilePos( fhd, &firstPos );
	    if( flg.back ) {
		firstPos += plen-1;
		if( firstPos )
		    firstPos--;
		endLine  = 0;
		if( flg.left ) {
		    startPos = 0;
		    endPos = MAX_LINELEN-1;
		}
		else {
		    startPos = MAX_LINELEN-1;
		    endPos = 0;
		}
	    }
	    else {
		firstPos++;
		endLine  = GetFileTotLines( fhd ) - 1;
		startPos = 0;
		endPos = MAX_LINELEN-1;
	    }
	    if( flgMark ) {
		line1 = MarkStart( &pos1);
		line2 = MarkEnd( &pos2);
		if( flg.back ) {
		    if( startLine > line2 ) {
			startLine = line2;
			firstPos = pos2;
		    }
		    endLine = line1;
		    if( flgMark == MARKTYPE_BLOCK ) {
			if( flg.left ) {
			    startPos = pos1;
			    endPos   = pos2+1;
			}
			else  {
			    startPos = pos2;
			    endPos   = pos1;
			}
		    }
		    else if( flgMark == MARKTYPE_CHAR ) {
			endPos = flg.left? (pos2+1):pos1;
		    }
		}
		else {
		    if( startLine < line1 ) {
			startLine = line1;
			firstPos = pos1;
		    }
		    endLine = line2;
		    if( flgMark == MARKTYPE_BLOCK ) {
			startPos = pos1;
			endPos	 = pos2+1;
		    }
		    else if( flgMark == MARKTYPE_CHAR ) {
			endPos	 = pos2+1;
		    }
		}
	    }
	}
    }

    if( !err && !parseOnly ) {
      #if USE_REGEXP
	regexp *re;
	if( flg.reg ) {
	    if( !help_re_buffer )
		help_re_buffer = xmalloc( MAX_LINELEN );
	    mem2str( help_re_buffer, pattern, plen+1 );
	    if( flg.any )
		strupr(help_re_buffer);
	    if( !(re=RegComp(help_re_buffer)) ) {
		ShowMessage("r.e.: %s - hit ESCAPE", last_re_error );
		while( GetKeyId(NULL) != K_VK_ESCAPE )
		    ;
		err = ERR_INVRE;
	    }
	}
	else
	    re = NULL;
      #endif
	if( err )
	    ;
	else if( SearchArea( fhd, startLine, firstPos, startPos,
			endLine, endPos, flgMark != MARKTYPE_BLOCK,
		      #if USE_REGEXP
			re ? (const char*)re : pattern,
		      #else
			pattern,
		      #endif
			plen, flg, &lnr, &pos ) ) {
	    /* found at lnr, pos */
	    Move2Pos( fhd, lnr, pos );
	    ResetFileFlag( fhd, WFILE_INCMD);
	}
	else
	    err = ERR_NFOUND;
	if( sigIntPending )
	    err = ERR_CMDINT;
      #if USE_REGEXP
	if( re )
	    RegFree(re);
      #endif
    }

 #if USE_REGEXP
    if( !err && *last_re_error ) {
	ShowMessage("r.e.: %s - hit ESCAPE", last_re_error );
	while( GetKeyId(NULL) != K_VK_ESCAPE )
	    ;
	err = ERR_NFOUND;
    }
 #endif
    if( err ) {
	lastFoundState = 0;
	Move2Pos( fhd, saveLine, savePos );
    }
    else
	lastFoundState = 1;
    return err;
}


/****************
 * Search thru a file from linenumber startline to
 * linenumber endline in within the line from position startPos to
 * position endPos; search for pattern which will have a length of
 * plen. If found, return linenumber and position in retLine and retPos.
 * Actual position in the file may be changed by this function.
 * if posFlag is true, the the startPos and endPos will only be used on
 * startLine and endLine and not inbetween.
 * Returns: true when found
 */

static int
SearchArea( int fhd,
	    ulong startLine, ushort firstPos, ushort startPos,
	    ulong endLine,   ushort endPos, int posFlag,
	    const char *pattern , ushort plen, flags_t flg,
	    ulong *retLnr, ushort *retPos )
{
    ushort *dTbl;
    const char *line;
    char *pUpper;
    ushort nbytes, sPos, ePos;
    int ind;

    ind = -1; /** Assume not found ***/
    if( flg.any && !flg.reg ) { /* already done for r.e. search */
	pUpper = xmalloc( plen+1 );
	for( nbytes=0; nbytes < plen; nbytes++ )
	    pUpper[nbytes] = toupper(pattern[nbytes]);
	pattern = pUpper;
    }
    else
	pUpper = NULL;
    dTbl = flg.left||flg.reg? NULL
			    : MakeDistanceTbl( pattern, plen, flg.back );

    if( flg.back && (flg.left
			  #if USE_REGEXP
			      || flg.reg
			  #endif
				       )) { /* left anchored backward search */
	sPos = startPos; ePos = endPos;
	if( startLine && firstPos >= sPos )
	    startLine--;
	while( startLine >= endLine && !SigIntPoll() ) {
	    SeekLine( fhd, startLine );
	    line = GetPtr2Line( fhd, &nbytes );
	    if( ePos < nbytes )
		nbytes = ePos+1;
	  #if USE_REGEXP
	    if( flg.reg ) {
		if( sPos < nbytes ) {
		    ind = RegLine( line+sPos, nbytes-sPos,
				   (regexp*)pattern, flg.any);
		    if( ind != -1 ) {
			*retLnr = startLine;
			*retPos = ind + sPos;
			break; /* found: break while loop */
		    }
		}
	    }
	    else
	  #endif
	    if( sPos < nbytes && nbytes-sPos >= plen &&
			    !(flg.any ? memicmp(line+sPos, pattern, plen)
				      : memcmp(line+sPos, pattern, plen)) ) {
		*retLnr = startLine;
		*retPos = sPos;
		ind = 0;
		break; /* found: break while loop */
	    }
	    if( !startLine )
		break; /* bottom of file reached */
	    startLine--;
	}
    }
    else if( flg.back ) { /* standard backward search */
	sPos = firstPos; ePos = endPos;
	while( startLine >= endLine && !SigIntPoll() ) {
	    SeekLine( fhd, startLine );
	    line = GetPtr2Line( fhd, &nbytes );
	    if( sPos < nbytes )
		nbytes = sPos+1;

	    if( ePos+plen <= nbytes ) {
		ind = SearchLineBack( line+ePos, nbytes-ePos,
				      pattern, plen, flg.any, dTbl );
		if( ind != -1 ) {
		    *retLnr = startLine;
		    *retPos = ind + ePos;
		    break; /* found: break while loop */
		}
	    }
	    if( !startLine )
		break; /* bottom of file reached */

	    startLine--;
	    if( posFlag )
		if( startLine == endLine ) {
		    sPos = startPos;
		    ePos = endPos;
		}
		else {
		    sPos = MAX_LINELEN-1;
		    ePos = 0;
		}
	    else
		sPos = startPos;
	}
    }
    else if( flg.left ) { /* left anchored forward search */
	sPos = startPos; ePos = endPos;
	if( sPos <= firstPos )
	    startLine++;
	while( startLine <= endLine && !SigIntPoll() ) {
	    SeekLine( fhd, startLine );
	    line = GetPtr2Line( fhd, &nbytes );
	    if( ePos < nbytes )
		nbytes = ePos+1;
	    if( sPos < nbytes && nbytes-sPos >= plen &&
			    !(flg.any ? memicmp(line+sPos, pattern, plen)
				      : memcmp(line+sPos, pattern, plen)) ) {
		*retLnr = startLine;
		*retPos = sPos;
		ind = 0;
		break; /* found: break while loop */
	    }
	    startLine++;
	}
    }
    else { /* standard forward search */
	sPos = firstPos; ePos = endPos;
	while( startLine <= endLine && !SigIntPoll() ) {
	    SeekLine( fhd, startLine );
	    line = GetPtr2Line( fhd, &nbytes );
	    if( ePos < nbytes )
		nbytes = ePos+1;

	    if( sPos+plen <= nbytes ) {  /* changed from < */
		ind =
		   #if USE_REGEXP
		      flg.reg ? RegLine( line+sPos, nbytes-sPos,
					(regexp*)pattern, flg.any) :
		   #endif
		      SearchLine( line+sPos, nbytes-sPos,
				  pattern, plen, flg.any, dTbl );
		if( ind != -1 ) {
		    *retLnr = startLine;
		    *retPos = ind + sPos;
		    break; /* found: break while loop */
		}
	    }

	    startLine++;
	    if( posFlag )
		if( startLine == endLine ) {
		    sPos = startPos;
		    ePos = endPos;
		}
		else {
		    sPos = 0;
		    ePos = MAX_LINELEN-1;
		}
	    else
		sPos = startPos;
	}
    }
    free( dTbl );
    free( pUpper );
    return ind != -1;
}




/****************
 * Search in Buffer str with length slen for
 * pattern p with len plen
 * dTbl is the Boyer/Moore distancetable of 256 Elements
 * Returns: Index within s or -1 if not found
 */

static int
SearchLine( const char *s, ushort slen,
	    const char *p, ushort plen, int any, ushort *dTbl )
{
    int i, j, k;
    byte c,c1;

    if( any ) {
	for( i = --plen; i < slen; i += dTbl[c1] )
	    for(j=plen,k=i,c1=c=toupper((byte)s[k]); c==(byte)p[j];
						     j--,k--,c=toupper((byte)s[k]))
		if( !j )
		    return k;
    }
    else {
	for( i = --plen; i < slen; i += dTbl[(byte)s[i]] )
	    for( j=plen,k=i; (byte)s[k] == (byte)p[j]; j--, k-- )
		if( !j )
		    return k;
    }
    return -1;
}


static int
SearchLineBack( const char *s, ushort slen,
		const char *p, ushort plen, int any, ushort *dTbl )
{
    int i,j,k;
    byte c,c1;

    plen--;
    if( any ) {
	for( i = slen-1-plen; i >= 0; i -= dTbl[c1] )
	    for(j=0,k=i,c1=c=toupper((byte)s[k]); c==(byte)p[j];
						  j++,k++,c=toupper((byte)s[k]))
		if( j==plen )
		    return i;
    }
    else {
	for( i = slen-1-plen; i >= 0; i -= dTbl[(byte)s[i]] )
	    for( j=0,k=i; (byte)s[k] == (byte)p[j]; j++, k++ )
		if( j==plen )
		    return i;
    }
    return -1;
}



/****************
 * Leider haben wir zur Zeit noch keine r.e. der auf einem buffer arbeitet,
 * deswegen wird hier ein statischer bereich benutzt
 */
#if USE_REGEXP
static int
RegLine( const char *s, ushort slen, regexp *re, int any )
{
    xassert( slen < MAX_LINELEN );
    mem2str(help_re_buffer, s, slen );
    if( any )
	strupr(help_re_buffer);
    if( RegExec( re, help_re_buffer ) )
	return RegPTR( re ) - help_re_buffer;
    else
	return -1;
}
#endif




/****************
 * This compiles pattern to the distance table, the table will be allocate
 * here and must be freed by using free().
 * Returns: Ptr to new allocated Table
 */

static ushort *
MakeDistanceTbl( const char *p, ushort plen, int flgBack )
{
    ushort *dTbl;
    int i;

    dTbl = xcalloc( 256 , sizeof *dTbl );
    for(i=0; i < 256; i++ )
	dTbl[i] = plen;
    if( flgBack )
	for(i=plen-1; i > 0; i-- )
	    dTbl[(byte)p[i]] = i;
    else
	for(i=0; i < plen-1; i++ )
	    dTbl[(byte)p[i]] = plen-i-1;
    return dTbl;
}


/****************
 * Implementation of the change command
 * (the Argstring must have correct form )
 * if confirmFlag is set, we will only have to do the replace part.
 * returns: ErrroCode or ERR_PSEUDO_CFN to indicate that we are
 * waiting for a confirm change.
 */

int
Cmd_Change( const char *pattern, int confirmFlag )
{
    int err, allFlag, fhd, resetNFound, adjustSum;
    ushort oldlen, newlen, newoff;
    const char *oldstr, *newstr;
    char *p;
    ushort nbytes, pos;
    ulong lastLnr, lnr;

    resetNFound = 0;
    lastLnr = ULONG_MAX;
    adjustSum = 0;
    do { /* use a loop, in case of allFlag is set */
	err = 0;
	allFlag = confirmFlag; /* allFlag set is: parseOnly */
	oldstr = pattern;
	err = DoLocate( oldstr, &allFlag, &oldlen, &newoff, &newlen);
	if( !err && !allFlag && !confirmFlag )
	    err = ERR_PSEUDO_CFN;
	else if( !err ) {
	    newstr = oldstr + newoff;
	    oldstr++; /* skip the first char ( delimiter ) */
	    fhd = QryScreenFile();
	    p = GetEditBuf( fhd, &nbytes );
	    lnr = GetFilePos( fhd, &pos );
	    if( oldlen == newlen ) { /* just replace */
		/* we don't have to mess around with nbytes */
		memcpy( p+pos, newstr, newlen );
	    }
	    else {
		/* first remove the old string */
		if( nbytes >= pos+oldlen ) {
		    memmove( p+pos, p+pos+oldlen, nbytes - pos - oldlen );
		    nbytes -= oldlen;
		}
		else
		    nbytes = pos ;
		SetEditBuf( fhd, nbytes );
		/* second insert the new string */
		if( newlen ) {
		    if( pos+newlen >= MAX_LINELEN-1 ) {
			newlen = MAX_LINELEN-1-pos;
			ShowMessage( GetErrorString(ERR_LTRUNC) );
		    }
		    if( nbytes >= pos )
			memmove( p+pos+newlen, p+pos, nbytes - pos);
		    memcpy( p+pos, newstr, newlen );
		    nbytes += newlen;
		    SetEditBuf( fhd, nbytes );
		}
	      #if 0
		if( allFlag && newlen > oldlen ) {
		    if( lastLnr == ULONG_MAX )
			lastLnr = lnr;
		    if( lnr == lastLnr ) {
			adjustSum += newlen - oldlen;
			AdjustBlockMark( fhd, newlen - oldlen );
		    }
		    else {
			if( adjustSum )
			    AdjustBlockMark( fhd, -adjustSum );
			adjustSum = 0;
			lastLnr = lnr;
		    }
		}
	      #endif
	    }
	    resetNFound = 1;
	    Move2Column( fhd, pos + newlen);
	}
    } while( !err && !confirmFlag );

   #if 0
    if( adjustSum )
	AdjustBlockMark( fhd, -adjustSum );
   #endif

    if( err == ERR_NFOUND && resetNFound )
	err = 0;

    return err;
}


/*** bottom of file ***/
