// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "custom_normalizer_config.h"
#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h"
#include "olap/rowset/segment_v2/inverted_index/token_filter/token_filter_factory.h"
#include "olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer_factory.h"

namespace doris::segment_v2::inverted_index {

class CustomNormalizer;
using CustomNormalizerPtr = std::shared_ptr<CustomNormalizer>;

class CustomNormalizer : public Analyzer {
public:
    class Builder {
    public:
        Builder() = default;
        ~Builder() = default;

        void add_char_filter(const std::string& name, const Settings& params);
        void add_token_filter(const std::string& name, const Settings& params);

        CustomNormalizerPtr build();

    private:
        std::vector<CharFilterFactoryPtr> _char_filters;
        std::vector<TokenFilterFactoryPtr> _token_filters;

        friend class CustomNormalizer;
    };

    CustomNormalizer(Builder* builder);
    ~CustomNormalizer() override = default;

    bool isSDocOpt() override { return true; }

    TokenStream* tokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) override;
    TokenStream* reusableTokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) override;

    TokenStream* tokenStream(const TCHAR* fieldName, const ReaderPtr& reader) override;
    TokenStream* reusableTokenStream(const TCHAR* fieldName, const ReaderPtr& reader) override;

    static CustomNormalizerPtr build_custom_normalizer(const CustomNormalizerConfigPtr& config);

private:
    ReaderPtr init_reader(ReaderPtr reader);
    TokenStreamComponentsPtr create_components();

    TokenizerFactoryPtr _keyword_tokenizer;
    std::vector<CharFilterFactoryPtr> _char_filters;
    std::vector<TokenFilterFactoryPtr> _token_filters;

    TokenStreamComponentsPtr _reuse_token_stream;
};

} // namespace doris::segment_v2::inverted_index