/* bool.c - Main routines.
   Copyright (C) 2000 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA.  */

/* Written by Marc Tardif <intmktg@cam.org>.  */

/* The buffering algorithm in this program can be summarised with the
   following illustration:

   buffer       pos_beg    buf_beg         pos_end   buf_end     lim
     v             v          v               v         v         v
     +------------------------+-----------------------------------+
               bufsalloc                     BUFSIZE
        +----------+----------+       +-------+         +---------+
          previous   previous          current            current
            save     residue            save              residue

   The buffer address points to the beginning of the memory location
   where the input is manipulated.  This segment is separated in two
   sequential fragments.

   The first fragment stores the relevant data from the previous read
   operation.  The size is kept in the bufsalloc variable which can
   increase if more space is required.  The data consists of two parts:
   - A save buffer containing the context preceding the last character
     parsed in the previous input buffer.  This part starts anywhere
     after the buffer address and stops at pos_beg.  This address is
     where parsing will continue following new input.
   - A residue buffer containing the characters which were not parsed
     from the previous input.  This part starts immediately after the
     save buffer at pos_beg and stops at buf_beg.  Coincidentally, the
     latter address is also where new data is read.

   The second fragment is where the read system call copies at most
   BUFSIZE bytes from the input.  The address of the buffer end is
   copied into the lim variable after which a NUL byte is appended as
   a sentinel character.

   Manipulation of the input consists of maintaining two pointers:
   - The buf_end address points to the last character parsed.  When
     the current buffer is exhausted and buf_end is not equal to lim,
     the difference consitutes the residue for the next input.
   - When parsing text, the pos_end address is always the same as
     buf_end.  But when parsing HTML, the formatted text is copied
     over the current buffer and the pointer is stored in pos_end.
     When lim is reached, the save buffer is copied from this point
     backwards.

   Note that pos_end and pos_beg are actually the same variable in
   the source code, one becomes the other after filling the input
   buffer.  Same applies to buf_end and buf_beg.
*/

#include <sys/types.h>
#include <locale.h>
#include <ctype.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include "config.h"

#include <errno.h>
#ifndef errno
extern int errno;
#endif

#ifndef HAVE_STRERROR
extern int sys_nerr;
extern char *sys_errlist[];
# define strerror(E) (0 <= (E) && (E) < sys_nerr ? sys_errlist[E] : "Unknown system error")
#endif

#if ! defined HAVE_MEMMOVE && ! defined memmove
# define memmove(d, s, n) bcopy (s, d, n)
#endif

#include "kw.h"
#include "getopt.h"
#include "options.h"

#define BUFFER 4096
#define SAVE    512

/* Base of buffer.  */
static char *buffer;

/* Allocated size of buffer storage region.  */
static size_t bufsalloc;

/* Command-line options.  */
struct options opt;
static char const short_options[] = "C:D:FHKLO:Vchiklq";
static struct option long_options[] =
{
  {"count", no_argument, NULL, 'c'},
  {"context", required_argument, NULL, 'C'},
  {"distance", required_argument, NULL, 'D'},
  {"files-with-matches", no_argument, NULL, 'l'},
  {"files-without-match", no_argument, NULL, 'L'},
  {"fixed-string", no_argument, NULL, 'F'},
  {"help", no_argument, &opt.show_help, 1},
  {"ignore-case", no_argument, NULL, 'i'},
  {"no-filename", no_argument, NULL, 'h'},
  {"no-keyword", no_argument, NULL, 'k'},
  {"occurences", required_argument, NULL, 'O'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
  {"version", no_argument, NULL, 'V'},
  {"with-filename", no_argument, NULL, 'H'},
  {"with-keyword", no_argument, NULL, 'K'},
  {0, 0, 0, 0}
};

static void
syserr (const char *msg)
{
  if (errno)
    fprintf (stderr, "bool: %s: %s\n", msg, strerror (errno));
  else
    fprintf (stderr, "bool: %s\n", msg);
  exit (1);
}

static void
fatal (const char *msg)
{
  fprintf (stderr, "bool: %s\n", msg);
  exit (2);
}

static int
ck_atoi (char const *str, int *out)
{
  char const *p;

  for (p = str; *p; p++)
    if (*p < '0' || *p > '9')
      return -1;

  *out = atoi (optarg);
  return 0;
}

static int
init (char *str)
{
  buffer = malloc (BUFFER + SAVE + 1);
  if (!buffer)
    return 0;

  switch (kw_init (str))
    {
    case -1:
      fatal ("invalid query");
    case 0:
      return 0;
    case 1:
      break;
    default:
      if (!opt.no_keyword)
        opt.with_keyword = 1;
      break;
    }

  bufsalloc = SAVE;
  return 1;
}

static enum extension
get_extension (char *file)
{
  register char *end;
  struct { char *str; int val; } ext[] =
    {
      { "html", HTML },
      { "htm", HTML },
      { NULL, TEXT }
    };

  file += 1;
  for (end = NULL; *file; file++)
    if (*file == '.')
      end = file;

  if (end)
    {
      register char *left, *right;
      int i;

      end += 1;
      for (i=0; ext[i].str; i++)
        {
          left = end;
          right = ext[i].str;
          while (tolower (*left) == *right++)
            if (*left++ == '\0')
              return ext[i].val;
        }
    }

  return TEXT;
}

static int
readfile (int fd)
{
  ssize_t bytesread;
  size_t residue, save;
  char *pos, *buf;

  residue = 0;
  buf = pos = buffer + bufsalloc;
  *(buf - 1) = '\0';

  for (;;)
    {
      size_t residue_offset, save_offset;
      char *lim;

      while ((bytesread = read (fd, buf, BUFFER)) < 0
             && errno == EINTR)
        continue;

      if (bytesread <= 0)
        break;

      lim = buf + bytesread;
      *lim = '\0';
      save = kw_exec (&pos, &buf, lim);
      residue = lim - buf;

      residue_offset = buf - buffer;
      save_offset = pos - save - buffer;
      if (bufsalloc < residue + save)
        {
          while (bufsalloc < residue + save)
            bufsalloc *= 2;

          buffer = realloc (buffer, bufsalloc + BUFFER + 1);
          if (!buffer)
            fatal ("memory exhausted");
        }

      buf = buffer + bufsalloc;
      pos = buf - residue;
      lim = pos - save;
      *(lim - 1) = '\0';
      memmove (lim, buffer + save_offset, save);
      memmove (pos, buffer + residue_offset, residue);
    }

  if (!bytesread && residue)
    {
      *buf = '\0';
      kw_exec (&pos, &buf, buf);
    }

  return kw_print ();
}

static int
openfile (char *file)
{
  enum extension type;
  int desc, status;

  if (!file)
    {
      desc = 0;
      type = TEXT;
      opt.filename = "(standard input)";
    }
  else
    {
      while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
        continue;

      type = get_extension (file);
      opt.filename = file;
    }

  kw_prep (type);
  status = readfile (desc);

  while (close (desc) != 0)
    if (errno != EINTR)
      {
        syserr ("close");
        break;
      }

  return status;
}

static void
usage (int status)
{
  if (status != 0)
    fprintf (stderr, "\
Usage: bool [options] PATTERN [FILE]...\n\
Try `bool --help' for more information.\n\
");
  else
    printf ("\
Usage: bool [OPTION]... PATTERN [FILE] ...\n\
Search for PATTERN in each FILE or standard input.\n\
\n\
Interpretation:\n\
  -F, --fixed-string        PATTERN is a string, not a boolean expression\n\
  -i, --ignore-case         ignore case distinctions\n\
\n\
Output control:\n\
  -H, --with-filename       print the filename for each match\n\
  -h, --no-filename         suppress the prefixing filename on output\n\
  -K, --with-keyword        print the keyword for each match\n\
  -k, --no-keyword          suppress the prefixing keyword on outptut\n\
  -q, --quiet, --silent     suppress all normal output\n\
  -L, --files-without-match only print FILE names containing no match\n\
  -l, --files-with-matches  only print FILE names containing matches\n\
  -c, --count               only print a count of matches per FILE\n\
\n\
Context control:\n\
  -C, --context=NUM         print NUM characters of output context\n\
  -NUM                      same as --context=NUM\n\
  -D, --distance=NUM        NEAR matches are at most NUM words apart\n\
  -O, --occurences=NUM      print NUM lines of context for each keyword\n\
\n\
Miscellaneous:\n\
  -V, --version             print version information and exit\n\
      --help                display this help and exit\n\
");

  exit (status);
}

int
main (int argc, char *argv[])
{
  extern char *optarg;
  extern int optind;
  int c, state;

  state = 0;
  opt.context = DEFAULT_CONTEXT;
  opt.distance = DEFAULT_DISTANCE;
  opt.occurences = DEFAULT_OCCURENCES;

  setlocale (LC_CTYPE, "");

  while ((c = getopt_long (argc, argv, short_options, long_options, NULL))
         != -1)
    switch (c)
      {
      case 'C':
        if (optarg)
          {
            if (ck_atoi (optarg, &opt.context))
              fatal ("invalid context argument");
          }
        break;
      case 'c':
        opt.out_quiet = 1;
        opt.count_matches = 1;
        break;
      case 'D':
        if (optarg)
          {
            if (ck_atoi (optarg, &opt.distance))
              fatal ("invalid distance argument");
          }
        break;
      case 'F':
        opt.fixed_string = 1;
        break;
      case 'H':
        opt.with_filename = 1;
        break;
      case 'h':
        opt.no_filename = 1;
        break;
      case 'i':
        opt.ignore_case = 1;
        break;
      case 'K':
        opt.with_keyword = 1;
        break;
      case 'k':
        opt.no_keyword = 1;
        break;
      case 'L':
        opt.out_quiet = 1;
        opt.list_files = 1;
        break;
      case 'l':
        opt.out_quiet = 1;
        opt.list_files = 2;
        break;
      case 'O':
        if (optarg)
          {
            if (ck_atoi (optarg, &opt.occurences))
              fatal ("invalid occurence argument");
          }
        break;
      case 'q':
        opt.out_quiet = 1;
        break;
      case 'V':
        printf ("bool (GNU) %s\n", VERSION);
        exit (0);
      case 0:
        /* long options */
        break;
      default:
        usage (1);
        break;
      }

  if (opt.show_help)
    usage (0);

  if (optind == argc)
    usage (1);

  if (!init (argv[optind++]))
    fatal ("memory exhausted");

  if (argc - optind > 1 && !opt.no_filename)
    opt.with_filename = 1;

  if (optind < argc)
    {
      state = 0;
      do
        {
          char *file = argv[optind];
          state += openfile (strcmp (file, "-") == 0 ? (char *) NULL : file);
        }
      while (++optind < argc);
    }
  else
    state = openfile ((char *) NULL);

  kw_free ();
  free (buffer);
  return !state;
}

