/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.transform.tokenize;

import java.io.Serializable;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.common.Types;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPost;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPre;

public class Tokenizer
implements Serializable {
    private static final long serialVersionUID = 7155673772374114577L;
    protected static final Log LOG = LogFactory.getLog((String)Tokenizer.class.getName());
    private final TokenizerPre tokenizerPre;
    private final TokenizerPost tokenizerPost;

    protected Tokenizer(TokenizerPre tokenizerPre, TokenizerPost tokenizerPost) {
        this.tokenizerPre = tokenizerPre;
        this.tokenizerPost = tokenizerPost;
    }

    public Types.ValueType[] getSchema() {
        return this.tokenizerPost.getOutSchema();
    }

    public long getNumRows(long inRows) {
        return this.tokenizerPost.getNumRows(inRows);
    }

    public long getNumCols() {
        return this.tokenizerPost.getNumCols();
    }

    public FrameBlock tokenize(FrameBlock in, FrameBlock out) {
        List<DocumentToTokens> documentsToTokenList = this.tokenizerPre.tokenizePre(in);
        return this.tokenizerPost.tokenizePost(documentsToTokenList, out);
    }

    static class DocumentToTokens {
        List<Object> keys;
        List<Token> tokens;

        public DocumentToTokens(List<Object> keys, List<Token> tokens) {
            this.keys = keys;
            this.tokens = tokens;
        }
    }

    static class Token {
        String textToken;
        long startIndex;
        long endIndex;

        public Token(String token, long startIndex) {
            this.textToken = token;
            this.startIndex = startIndex;
            this.endIndex = startIndex + (long)token.length();
        }
    }
}

