#! /usr/bin/perl # $Id: dict2index,v 1.3 2000/05/11 19:30:16 villate Exp $ # # dict2index - program to create the orca glossary index file with the # format used by the dict program. # Copyright (C) Jaime Villate , 2000 # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # Jaime Villate, Faculdade de Engenharia, Rua dos Bragas, 4050-123 Porto # Portugal # # The ORCA project =head1 NAME dict2index - Creates the .index file for a .dict text file =head1 SYNOPSIS S =head1 DESCRIPTION This programs reads a dict text file and creates the corresponding index file required by the dict program. The dict text file can be uncompressed (filename extension dict) or compressed (filename extension dict.dz). =head1 AUTHOR Jaime Villate Evillate@fe.up.ptE. =head1 SEE ALSO L(1), L(8) =cut use strict; use vars qw(@b64); @b64 = ('A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'); sub base64 { (my $n) = @_; my $val = ''; if ($n > 0) { while ($n != 0) { $val = $b64[$n % 64] . $val; $n = int($n/64); } } else { $val = 'A'; } return $val; } sub usage { print "Usage \n"; exit; } my $offset = 0; my $keyw = ''; my $offset64 = 'A'; my $len = 0; my $linelen = 0; my $len64 = 'A'; usage() if ($#ARGV != 0); my $filehandle = $ARGV[0]; usage() if ($filehandle =~ /^\-/); (my $indexname = $filehandle) =~ s/\.dict/\.index/; if ($filehandle =~ /\.dz$/) { $filehandle = "zcat $filehandle|"; $indexname =~ s/\.dz$//; } else { $filehandle = "< $filehandle"; } open(DICT,$filehandle); open(IND,"| sort -df >$indexname"); while () { $linelen = length($_); if (/^(\S)/) { $len64 = base64($len); print IND "$keyw\t$offset64\t$len64\n" if(length($keyw)); $offset64 = base64($offset); $len = length($_); chop; $keyw = $_; } else { $len += length($_); } $offset += $linelen; } $len64 = base64($len); print IND "$keyw\t$offset64\t$len64\n" if(length($keyw)); close IND; close DICT; exit;