/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.tools;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.util.StringUtil;
import org.archive.format.http.HttpHeaders;
import org.archive.format.warc.WARCConstants;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.uid.UUIDGenerator;
import org.archive.util.DateUtils;
import org.archive.util.anvl.ANVLRecord;

public class WARCUtils {
    public static final String SOFTWARE = "software";
    public static final String HTTP_HEADER_FROM = "http-header-from";
    public static final String HTTP_HEADER_USER_AGENT = "http-header-user-agent";
    public static final String HOSTNAME = "hostname";
    public static final String ROBOTS = "robots";
    public static final String OPERATOR = "operator";
    public static final String FORMAT = "format";
    public static final String CONFORMS_TO = "conformsTo";
    public static final String IP = "ip";
    public static final UUIDGenerator generator = new UUIDGenerator();
    public static final String CRLF = "\r\n";
    public static final String COLONSP = ": ";
    protected static final Pattern PROBLEMATIC_HEADERS = Pattern.compile("(?i)(?:Content-(?:Encoding|Length)|Transfer-Encoding)");
    protected static final String X_HIDE_HEADER = "X-Crawler-";

    public static final ANVLRecord getWARCInfoContent(Configuration conf) {
        ANVLRecord record = new ANVLRecord();
        record.addLabelValue(FORMAT, "WARC File Format 1.0");
        record.addLabelValue(CONFORMS_TO, "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf");
        record.addLabelValue(SOFTWARE, conf.get("http.agent.name", ""));
        record.addLabelValue(HTTP_HEADER_USER_AGENT, WARCUtils.getAgentString(conf.get("http.agent.name", ""), conf.get("http.agent.version", ""), conf.get("http.agent.description", ""), conf.get("http.agent.url", ""), conf.get("http.agent.email", "")));
        record.addLabelValue(HTTP_HEADER_FROM, conf.get("http.agent.email", ""));
        try {
            record.addLabelValue(HOSTNAME, WARCUtils.getHostname(conf));
            record.addLabelValue(IP, WARCUtils.getIPAddress(conf));
        }
        catch (UnknownHostException unknownHostException) {
            // empty catch block
        }
        record.addLabelValue(ROBOTS, "classic");
        record.addLabelValue(OPERATOR, conf.get("http.agent.email", ""));
        return record;
    }

    public static final String getHostname(Configuration conf) throws UnknownHostException {
        return StringUtil.isEmpty(conf.get("http.agent.host", "")) ? InetAddress.getLocalHost().getHostName() : conf.get("http.agent.host");
    }

    public static final String getIPAddress(Configuration conf) throws UnknownHostException {
        return InetAddress.getLocalHost().getHostAddress();
    }

    public static final byte[] toByteArray(HttpHeaders headers) throws IOException {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        headers.write((OutputStream)out);
        return out.toByteArray();
    }

    public static final String getAgentString(String name, String version, String description, String URL2, String email) {
        StringBuffer buf = new StringBuffer();
        buf.append(name);
        if (version != null) {
            buf.append("/").append(version);
        }
        if (description != null && description.length() != 0 || email != null && email.length() != 0 || URL2 != null && URL2.length() != 0) {
            buf.append(" (");
            if (description != null && description.length() != 0) {
                buf.append(description);
                if (URL2 != null || email != null) {
                    buf.append("; ");
                }
            }
            if (URL2 != null && URL2.length() != 0) {
                buf.append(URL2);
                if (email != null) {
                    buf.append("; ");
                }
            }
            if (email != null && email.length() != 0) {
                buf.append(email);
            }
            buf.append(")");
        }
        return buf.toString();
    }

    public static final WARCRecordInfo docToMetadata(NutchDocument doc) throws UnsupportedEncodingException {
        WARCRecordInfo record = new WARCRecordInfo();
        record.setType(WARCConstants.WARCRecordType.metadata);
        record.setUrl((String)doc.getFieldValue("id"));
        record.setCreate14DigitDate(DateUtils.get14DigitDate((Date)((Date)doc.getFieldValue("tstamp"))));
        record.setMimetype("application/warc-fields");
        record.setRecordId(generator.getRecordID());
        ANVLRecord metadata = new ANVLRecord();
        for (String field : doc.getFieldNames()) {
            List<Object> values = doc.getField(field).getValues();
            for (Object value : values) {
                if (value instanceof Date) {
                    metadata.addLabelValue(field, DateUtils.get14DigitDate());
                    continue;
                }
                metadata.addLabelValue(field, (String)value);
            }
        }
        record.setContentLength((long)metadata.getLength());
        record.setContentStream((InputStream)new ByteArrayInputStream(metadata.getUTF8Bytes()));
        return record;
    }

    public static final String fixHttpHeaders(String headers, int contentLength) {
        if (headers == null) {
            return null;
        }
        int start = 0;
        int lineEnd = 0;
        int last = 0;
        int trailingCrLf = 0;
        StringBuilder replace = new StringBuilder();
        while (start < headers.length()) {
            lineEnd = headers.indexOf(CRLF, start);
            trailingCrLf = 1;
            if (lineEnd == -1) {
                lineEnd = headers.length();
                trailingCrLf = 0;
            }
            int colonPos = -1;
            for (int i = start; i < lineEnd; ++i) {
                if (headers.charAt(i) != ':') continue;
                colonPos = i;
                break;
            }
            if (colonPos == -1) {
                boolean valid = true;
                if (start != 0) {
                    if (lineEnd + 4 == headers.length() && headers.endsWith("\r\n\r\n")) {
                        trailingCrLf = 2;
                    } else {
                        valid = false;
                    }
                }
                if (!valid) {
                    if (last < start) {
                        replace.append(headers.substring(last, start));
                    }
                    last = lineEnd + 2 * trailingCrLf;
                }
                start = lineEnd + 2 * trailingCrLf;
                continue;
            }
            String name = headers.substring(start, colonPos);
            if (PROBLEMATIC_HEADERS.matcher(name).matches()) {
                boolean needsFix = true;
                if (name.equalsIgnoreCase("content-length")) {
                    String value = headers.substring(colonPos + 1, lineEnd).trim();
                    try {
                        int l = Integer.parseInt(value);
                        if (l == contentLength) {
                            needsFix = false;
                        }
                    }
                    catch (NumberFormatException numberFormatException) {
                        // empty catch block
                    }
                }
                if (needsFix) {
                    if (last < start) {
                        replace.append(headers.substring(last, start));
                    }
                    last = lineEnd + 2 * trailingCrLf;
                    replace.append(X_HIDE_HEADER).append(headers.substring(start, lineEnd + 2 * trailingCrLf));
                    if (trailingCrLf == 0) {
                        replace.append(CRLF);
                        trailingCrLf = 1;
                    }
                    if (name.equalsIgnoreCase("content-length")) {
                        replace.append("Content-Length").append(COLONSP).append(contentLength).append(CRLF);
                    }
                }
            }
            start = lineEnd + 2 * trailingCrLf;
        }
        if (last > 0 || trailingCrLf != 2) {
            if (last < headers.length()) {
                replace.append(headers.substring(last));
            }
            while (trailingCrLf < 2) {
                replace.append(CRLF);
                ++trailingCrLf;
            }
            return replace.toString();
        }
        return headers;
    }
}

