/*
 * Decompiled with CFR 0.152.
 */
package at.ac.tuwien.dbai.pdfwrap;

import at.ac.tuwien.dbai.pdfwrap.analysis.PageProcessor;
import at.ac.tuwien.dbai.pdfwrap.exceptions.DocumentProcessingException;
import at.ac.tuwien.dbai.pdfwrap.model.document.GenericSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.IXHTMLSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.Page;
import at.ac.tuwien.dbai.pdfwrap.model.graph.AdjacencyGraph;
import at.ac.tuwien.dbai.pdfwrap.pdfread.PDFObjectExtractor;
import at.ac.tuwien.dbai.pdfwrap.pdfread.PDFPage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.log4j.Logger;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;

public class ProcessFile {
    private static final Logger LOG = Logger.getLogger(ProcessFile.class);
    public static final String DEFAULT_ENCODING = "UTF-8";
    public static final String PASSWORD = "-password";
    public static final String ENCODING = "-encoding";
    public static final String CONSOLE = "-console";
    public static final String START_PAGE = "-startPage";
    public static final String END_PAGE = "-endPage";
    public static final String XMILLUM = "-xmillum";
    public static final String NOBORDERS = "-noborders";
    public static final String PROCESS_SPACES = "-spaces";
    public static final String NORULINGLINES = "-norulinglines";

    public static List<Page> processPDF(byte[] theFile, PageProcessor pp, int startPage, int endPage, String encoding, String password, List<AdjacencyGraph<GenericSegment>> adjGraphList, boolean GUI) throws DocumentProcessingException {
        boolean toConsole = false;
        if (password == null) {
            password = "";
        }
        if (encoding == null || encoding == "") {
            encoding = DEFAULT_ENCODING;
        }
        if (startPage == 0) {
            startPage = 1;
        }
        if (endPage == 0) {
            endPage = Integer.MAX_VALUE;
        }
        ByteArrayInputStream inStream = new ByteArrayInputStream(theFile);
        PDDocument document = null;
        try {
            PDFObjectExtractor extractor = new PDFObjectExtractor();
            document = PDDocument.load((InputStream)inStream);
            if (document.isEncrypted()) {
                try {
                    document.decrypt(password);
                }
                catch (InvalidPasswordException e) {
                    if (password != null && password != "") {
                        throw new DocumentProcessingException("Error: The supplied password is incorrect.");
                    }
                    throw new DocumentProcessingException("Error: The document is encrypted.");
                }
                catch (CryptographyException e) {
                    throw new DocumentProcessingException((Exception)((Object)e));
                }
            }
            extractor.setStartPage(startPage);
            extractor.setEndPage(endPage);
            List<PDFPage> thePages = extractor.findObjects(document);
            List<Page> theResult = new ArrayList<Page>();
            startPage = extractor.getStartPage();
            endPage = extractor.getEndPage();
            Iterator<PDFPage> pageIter = thePages.iterator();
            int currentPage = -1;
            while (pageIter.hasNext()) {
                ++currentPage;
                PDFPage thePage = pageIter.next();
                Page resultPage = pp.processPage(thePage);
                theResult.add(resultPage);
                if (adjGraphList == null) continue;
                adjGraphList.add(pp.getAdjGraph());
            }
            if (!GUI) {
                theResult = PageProcessor.processDocPages(theResult, null);
            }
            if (document != null) {
                document.close();
            }
            return theResult;
        }
        catch (IOException e) {
            e.printStackTrace();
            throw new DocumentProcessingException(e);
        }
    }

    public static Document processResultPageToXMLDocument(Page resultPage, boolean toXHTML, boolean borders) throws DocumentProcessingException {
        ArrayList<Page> theResult = new ArrayList<Page>();
        theResult.add(resultPage);
        return ProcessFile.processResultToXMLDocument(theResult, toXHTML, borders);
    }

    public static Document processResultToXMLDocument(List<Page> theResult, boolean toXHTML, boolean borders) throws DocumentProcessingException {
        Document resultDocument;
        Element newBodyElement = null;
        Element docElement = null;
        try {
            if (toXHTML) {
                resultDocument = ProcessFile.setUpXML("html");
                docElement = resultDocument.getDocumentElement();
                if (borders) {
                    Element newHeadElement = resultDocument.createElement("head");
                    Element newStyleElement = resultDocument.createElement("style");
                    newStyleElement.setAttribute("type", "text/css");
                    Text text = resultDocument.createTextNode("table {border-collapse: collapse;}");
                    Text newTextElement2 = resultDocument.createTextNode("td, th {border: 1px solid grey; padding: 2px 4px;}");
                    newStyleElement.appendChild(text);
                    newStyleElement.appendChild(newTextElement2);
                    newHeadElement.appendChild(newStyleElement);
                    docElement.appendChild(newHeadElement);
                }
                newBodyElement = resultDocument.createElement("body");
            } else {
                resultDocument = ProcessFile.setUpXML("PDFResult");
                docElement = resultDocument.getDocumentElement();
            }
        }
        catch (ParserConfigurationException e) {
            throw new DocumentProcessingException(e);
        }
        int pageNo = 0;
        for (GenericSegment genericSegment : theResult) {
            if (genericSegment instanceof Page) {
                Page resultPage = (Page)genericSegment;
                ++pageNo;
                if (toXHTML) {
                    resultPage.setPageNo(pageNo);
                    resultPage.addAsXHTML(resultDocument, newBodyElement);
                    continue;
                }
                Element newPageElement = resultDocument.createElement("page");
                newPageElement.setAttribute("page_number", Integer.toString(pageNo));
                resultPage.addAsXmillum(resultDocument, newPageElement, resultPage, 300.0f);
                docElement.appendChild(newPageElement);
                continue;
            }
            if (!(genericSegment instanceof IXHTMLSegment)) continue;
            IXHTMLSegment c = (IXHTMLSegment)((Object)genericSegment);
            if (!toXHTML) continue;
            c.addAsXHTML(resultDocument, newBodyElement);
        }
        if (toXHTML) {
            docElement.appendChild(newBodyElement);
        }
        return resultDocument;
    }

    public static Document processPDFToXMLDocument(byte[] theFile, PageProcessor pp, boolean toXHTML, boolean borders, int startPage, int endPage, String encoding, String password) throws DocumentProcessingException {
        List<Page> theResult = ProcessFile.processPDF(theFile, pp, startPage, endPage, encoding, password, null, false);
        return ProcessFile.processResultToXMLDocument(theResult, toXHTML, borders);
    }

    public static byte[] processPDFToByteArray(byte[] theFile, PageProcessor pp, boolean toXHTML, boolean borders, int startPage, int endPage, String encoding, String password) throws DocumentProcessingException {
        Document resultDocument = ProcessFile.processPDFToXMLDocument(theFile, pp, toXHTML, borders, startPage, endPage, encoding, password);
        return ProcessFile.serializeXML(resultDocument);
    }

    public static void main(String[] args) throws Exception {
        boolean toConsole = false;
        boolean toXHTML = true;
        boolean borders = true;
        boolean rulingLines = true;
        boolean processSpaces = false;
        boolean currentArgumentIndex = false;
        String password = "";
        String encoding = DEFAULT_ENCODING;
        PDFObjectExtractor extractor = new PDFObjectExtractor();
        String inFile = null;
        String outFile = null;
        int startPage = 1;
        int endPage = Integer.MAX_VALUE;
        int i = 0;
        while (i < args.length) {
            if (args[i].equals(PASSWORD)) {
                if (++i >= args.length) {
                    ProcessFile.usage();
                }
                password = args[i];
            } else if (args[i].equals(ENCODING)) {
                if (++i >= args.length) {
                    ProcessFile.usage();
                }
                encoding = args[i];
            } else if (args[i].equals(START_PAGE)) {
                if (++i >= args.length) {
                    ProcessFile.usage();
                }
                startPage = Integer.parseInt(args[i]);
            } else if (args[i].equals(END_PAGE)) {
                if (++i >= args.length) {
                    ProcessFile.usage();
                }
                endPage = Integer.parseInt(args[i]);
            } else if (args[i].equals(CONSOLE)) {
                toConsole = true;
            } else if (args[i].equals(NOBORDERS)) {
                borders = false;
            } else if (args[i].equals(XMILLUM)) {
                toXHTML = false;
            } else if (args[i].equals(NORULINGLINES)) {
                rulingLines = false;
            } else if (args[i].equals(PROCESS_SPACES)) {
                processSpaces = false;
            } else if (inFile == null) {
                inFile = args[i];
            } else {
                outFile = args[i];
            }
            ++i;
        }
        if (inFile == null) {
            ProcessFile.usage();
        }
        if (outFile == null && inFile.length() > 4) {
            outFile = String.valueOf(inFile.substring(0, inFile.length() - 4)) + ".txt";
        }
        File inputFile = new File(inFile);
        byte[] inputDoc = ProcessFile.getBytesFromFile(inputFile);
        Document resultDocument = null;
        PageProcessor pp = new PageProcessor();
        pp.setProcessType(5);
        pp.setRulingLines(rulingLines);
        pp.setProcessSpaces(processSpaces);
        resultDocument = ProcessFile.processPDFToXMLDocument(inputDoc, pp, toXHTML, borders, startPage, endPage, encoding, password);
        OutputStreamWriter output = null;
        output = toConsole ? new OutputStreamWriter(System.out) : (encoding != null ? new OutputStreamWriter((OutputStream)new FileOutputStream(outFile), encoding) : new OutputStreamWriter(new FileOutputStream(outFile)));
        System.out.println("resultDocument: " + resultDocument);
        ProcessFile.serializeXML(resultDocument, output);
        if (output != null) {
            ((Writer)output).close();
        }
    }

    public static byte[] PDFToXHTML(byte[] theFile, int startPage, int endPage, String encoding, String password) throws DocumentProcessingException {
        PageProcessor pp = new PageProcessor(5);
        return ProcessFile.processPDFToByteArray(theFile, pp, true, true, startPage, endPage, encoding, password);
    }

    protected static Document setUpXML(String nodeName) throws ParserConfigurationException {
        DocumentBuilderFactory myFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder myDocBuilder = myFactory.newDocumentBuilder();
        DOMImplementation myDOMImpl = myDocBuilder.getDOMImplementation();
        Document resultDocument = myDOMImpl.createDocument("at.ac.tuwien.dbai.pdfwrap", nodeName, null);
        return resultDocument;
    }

    public static byte[] getBytesFromFile(File file) throws IOException {
        FileInputStream is = new FileInputStream(file);
        long length = file.length();
        byte[] bytes = new byte[(int)length];
        int offset = 0;
        int numRead = 0;
        while (offset < bytes.length && (numRead = ((InputStream)is).read(bytes, offset, bytes.length - offset)) >= 0) {
            offset += numRead;
        }
        if (offset < bytes.length) {
            throw new IOException("Could not completely read file " + file.getName());
        }
        ((InputStream)is).close();
        return bytes;
    }

    public static byte[] serializeXML(Document resultDocument) throws DocumentProcessingException {
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        try {
            OutputStreamWriter output = new OutputStreamWriter((OutputStream)outStream, DEFAULT_ENCODING);
            ProcessFile.serializeXML(resultDocument, output);
        }
        catch (IOException e) {
            throw new DocumentProcessingException(e);
        }
        return outStream.toByteArray();
    }

    public static void serializeXML(Document resultDocument, OutputStream outStream) throws DocumentProcessingException {
        try {
            OutputStreamWriter output = new OutputStreamWriter(outStream, DEFAULT_ENCODING);
            ProcessFile.serializeXML(resultDocument, output);
        }
        catch (IOException e) {
            throw new DocumentProcessingException(e);
        }
    }

    public static void serializeXML(Document resultDocument, Writer output) throws IOException {
        OutputFormat myOutputFormat = new OutputFormat(resultDocument, DEFAULT_ENCODING, true);
        XMLSerializer s = new XMLSerializer(output, myOutputFormat);
        try {
            s.serialize(resultDocument);
            output.flush();
        }
        catch (IOException e) {
            System.err.println("Couldn't serialize document: " + e.getMessage());
            throw e;
        }
    }

    private static void usage() {
        System.err.println("Usage: java at.ac.tuwien.dbai.pdfwrap.ProcessFile [OPTIONS] <PDF file> [Text File]\n  -password  <password>        Password to decrypt document\n  -encoding  <output encoding> (ISO-8859-1,UTF-16BE,UTF-16LE,...)\n  -xmillum                     output XMIllum XML (instead of XHTML)\n  -norulinglines               do not process ruling lines\n  -spaces                      split low-level segments according to spaces\n  -console                     Send text to console instead of file\n  -startPage <number>          The first page to start extraction(1 based)\n  -endPage <number>            The last page to extract(inclusive)\n  <PDF file>                   The PDF document to use\n  [Text File]                  The file to write the text to\n");
        System.exit(1);
    }
}

