// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/data-sink.cc
// and modified by Doris

#include "exec/data_sink.h"

#include <gen_cpp/DataSinks_types.h>
#include <gen_cpp/PaloInternalService_types.h>
#include <glog/logging.h>

#include <map>
#include <memory>
#include <ostream>
#include <string>

#include "common/config.h"
#include "runtime/query_context.h"
#include "runtime/query_statistics.h"
#include "vec/sink/async_writer_sink.h"
#include "vec/sink/group_commit_block_sink.h"
#include "vec/sink/multi_cast_data_stream_sink.h"
#include "vec/sink/vdata_stream_sender.h"
#include "vec/sink/vhive_table_sink.h"
#include "vec/sink/viceberg_table_sink.h"
#include "vec/sink/vmemory_scratch_sink.h"
#include "vec/sink/volap_table_sink.h"
#include "vec/sink/volap_table_sink_v2.h"
#include "vec/sink/vresult_file_sink.h"
#include "vec/sink/vresult_sink.h"

namespace doris {
class DescriptorTbl;
class TExpr;

DataSink::DataSink(const RowDescriptor& desc) : _row_desc(desc) {
    _query_statistics = std::make_shared<QueryStatistics>();
}

std::shared_ptr<QueryStatistics> DataSink::get_query_statistics_ptr() {
    return _query_statistics;
}

Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink,
                                  const std::vector<TExpr>& output_exprs,
                                  const TPlanFragmentExecParams& params,
                                  const RowDescriptor& row_desc, RuntimeState* state,
                                  std::unique_ptr<DataSink>* sink, DescriptorTbl& desc_tbl) {
    switch (thrift_sink.type) {
    case TDataSinkType::DATA_STREAM_SINK: {
        if (!thrift_sink.__isset.stream_sink) {
            return Status::InternalError("Missing data stream sink.");
        }
        // TODO: figure out good buffer size based on size of output row
        sink->reset(new vectorized::VDataStreamSender(state, pool, params.sender_id, row_desc,
                                                      thrift_sink.stream_sink,
                                                      params.destinations));
        // RETURN_IF_ERROR(sender->prepare(state->obj_pool(), thrift_sink.stream_sink));
        break;
    }
    case TDataSinkType::RESULT_SINK: {
        if (!thrift_sink.__isset.result_sink) {
            return Status::InternalError("Missing data buffer sink.");
        }

        int result_sink_buffer_size_rows = vectorized::RESULT_SINK_BUFFER_SIZE;
        if (!thrift_sink.result_sink.__isset.type ||
            thrift_sink.result_sink.type == TResultSinkType::ARROW_FLIGHT_PROTOCAL) {
            result_sink_buffer_size_rows = config::arrow_flight_result_sink_buffer_size_rows;
        }

        // TODO: figure out good buffer size based on size of output row
        sink->reset(new doris::vectorized::VResultSink(
                row_desc, output_exprs, thrift_sink.result_sink, result_sink_buffer_size_rows));
        break;
    }
    case TDataSinkType::RESULT_FILE_SINK: {
        if (!thrift_sink.__isset.result_file_sink) {
            return Status::InternalError("Missing result file sink.");
        }

        // TODO: figure out good buffer size based on size of output row
        // Result file sink is not the top sink
        if (params.__isset.destinations && params.destinations.size() > 0) {
            sink->reset(new doris::vectorized::VResultFileSink(
                    state, pool, params.sender_id, row_desc, thrift_sink.result_file_sink,
                    params.destinations, output_exprs, desc_tbl));
        } else {
            sink->reset(new doris::vectorized::VResultFileSink(row_desc, output_exprs));
        }
        break;
    }
    case TDataSinkType::MEMORY_SCRATCH_SINK: {
        if (!thrift_sink.__isset.memory_scratch_sink) {
            return Status::InternalError("Missing data buffer sink.");
        }

        sink->reset(new vectorized::MemoryScratchSink(row_desc, output_exprs));
        break;
    }
    case TDataSinkType::MYSQL_TABLE_SINK: {
#ifdef DORIS_WITH_MYSQL
        if (!thrift_sink.__isset.mysql_table_sink) {
            return Status::InternalError("Missing data buffer sink.");
        }
        vectorized::VMysqlTableSink* vmysql_tbl_sink =
                new vectorized::VMysqlTableSink(row_desc, output_exprs);
        sink->reset(vmysql_tbl_sink);
        break;
#else
        return Status::InternalError(
                "Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON");
#endif
    }
    case TDataSinkType::ODBC_TABLE_SINK: {
        if (!thrift_sink.__isset.odbc_table_sink) {
            return Status::InternalError("Missing data odbc sink.");
        }
        sink->reset(new vectorized::VOdbcTableSink(row_desc, output_exprs));
        break;
    }

    case TDataSinkType::JDBC_TABLE_SINK: {
        if (!thrift_sink.__isset.jdbc_table_sink) {
            return Status::InternalError("Missing data jdbc sink.");
        }
        if (config::enable_java_support) {
            sink->reset(new vectorized::VJdbcTableSink(row_desc, output_exprs));
        } else {
            return Status::InternalError(
                    "Jdbc table sink is not enabled, you can change be config "
                    "enable_java_support to true and restart be.");
        }
        break;
    }

    case TDataSinkType::EXPORT_SINK: {
        RETURN_ERROR_IF_NON_VEC;
        break;
    }
    case TDataSinkType::GROUP_COMMIT_OLAP_TABLE_SINK:
    case TDataSinkType::OLAP_TABLE_SINK: {
        DCHECK(thrift_sink.__isset.olap_table_sink);
        if (state->query_options().enable_memtable_on_sink_node &&
            !_has_inverted_index_or_partial_update(thrift_sink.olap_table_sink)) {
            sink->reset(new vectorized::VOlapTableSinkV2(pool, row_desc, output_exprs));
        } else {
            sink->reset(new vectorized::VOlapTableSink(pool, row_desc, output_exprs));
        }
        break;
    }
    case TDataSinkType::HIVE_TABLE_SINK: {
        if (!thrift_sink.__isset.hive_table_sink) {
            return Status::InternalError("Missing hive table sink.");
        }
        sink->reset(new vectorized::VHiveTableSink(pool, row_desc, output_exprs));
        break;
    }
    case TDataSinkType::ICEBERG_TABLE_SINK: {
        if (!thrift_sink.__isset.iceberg_table_sink) {
            return Status::InternalError("Missing iceberg table sink.");
        }
        sink->reset(new vectorized::VIcebergTableSink(pool, row_desc, output_exprs));
        break;
    }
    case TDataSinkType::GROUP_COMMIT_BLOCK_SINK: {
        Status status = Status::OK();
        DCHECK(thrift_sink.__isset.olap_table_sink);
#ifndef NDEBUG
        DCHECK(state->get_query_ctx() != nullptr);
        state->get_query_ctx()->query_mem_tracker->is_group_commit_load = true;
#endif
        sink->reset(new vectorized::GroupCommitBlockSink(pool, row_desc, output_exprs, &status));
        RETURN_IF_ERROR(status);
        break;
    }
    case TDataSinkType::MULTI_CAST_DATA_STREAM_SINK: {
        return Status::NotSupported("MULTI_CAST_DATA_STREAM_SINK only support in pipeline engine");
    }

    default: {
        std::stringstream error_msg;
        std::map<int, const char*>::const_iterator i =
                _TDataSinkType_VALUES_TO_NAMES.find(thrift_sink.type);
        const char* str = "Unknown data sink type ";

        if (i != _TDataSinkType_VALUES_TO_NAMES.end()) {
            str = i->second;
        }

        error_msg << str << " not implemented.";
        return Status::InternalError(error_msg.str());
    }
    }

    if (*sink != nullptr) {
        RETURN_IF_ERROR((*sink)->init(thrift_sink));
        if (state->get_query_ctx()) {
            state->get_query_ctx()->register_query_statistics((*sink)->get_query_statistics_ptr());
        }
    }

    return Status::OK();
}

Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink,
                                  const std::vector<TExpr>& output_exprs,
                                  const TPipelineFragmentParams& params,
                                  const size_t& local_param_idx, const RowDescriptor& row_desc,
                                  RuntimeState* state, std::unique_ptr<DataSink>* sink,
                                  DescriptorTbl& desc_tbl) {
    const auto& local_params = params.local_params[local_param_idx];
    switch (thrift_sink.type) {
    case TDataSinkType::DATA_STREAM_SINK: {
        if (!thrift_sink.__isset.stream_sink) {
            return Status::InternalError("Missing data stream sink.");
        }
        // TODO: figure out good buffer size based on size of output row
        *sink = std::make_unique<vectorized::VDataStreamSender>(state, pool, local_params.sender_id,
                                                                row_desc, thrift_sink.stream_sink,
                                                                params.destinations);
        // RETURN_IF_ERROR(sender->prepare(state->obj_pool(), thrift_sink.stream_sink));
        break;
    }
    case TDataSinkType::RESULT_SINK: {
        if (!thrift_sink.__isset.result_sink) {
            return Status::InternalError("Missing data buffer sink.");
        }

        int result_sink_buffer_size_rows = vectorized::RESULT_SINK_BUFFER_SIZE;
        if (!thrift_sink.result_sink.__isset.type ||
            thrift_sink.result_sink.type == TResultSinkType::ARROW_FLIGHT_PROTOCAL) {
            result_sink_buffer_size_rows = config::arrow_flight_result_sink_buffer_size_rows;
        }

        // TODO: figure out good buffer size based on size of output row
        sink->reset(new doris::vectorized::VResultSink(
                row_desc, output_exprs, thrift_sink.result_sink, result_sink_buffer_size_rows));
        break;
    }
    case TDataSinkType::RESULT_FILE_SINK: {
        if (!thrift_sink.__isset.result_file_sink) {
            return Status::InternalError("Missing result file sink.");
        }

        // TODO: figure out good buffer size based on size of output row
        // Result file sink is not the top sink
        if (params.__isset.destinations && params.destinations.size() > 0) {
            sink->reset(new doris::vectorized::VResultFileSink(
                    state, pool, local_params.sender_id, row_desc, thrift_sink.result_file_sink,
                    params.destinations, output_exprs, desc_tbl));
        } else {
            sink->reset(new doris::vectorized::VResultFileSink(row_desc, output_exprs));
        }
        break;
    }
    case TDataSinkType::MEMORY_SCRATCH_SINK: {
        if (!thrift_sink.__isset.memory_scratch_sink) {
            return Status::InternalError("Missing data buffer sink.");
        }

        sink->reset(new vectorized::MemoryScratchSink(row_desc, output_exprs));
        break;
    }
    case TDataSinkType::MYSQL_TABLE_SINK: {
#ifdef DORIS_WITH_MYSQL
        if (!thrift_sink.__isset.mysql_table_sink) {
            return Status::InternalError("Missing data buffer sink.");
        }
        vectorized::VMysqlTableSink* vmysql_tbl_sink =
                new vectorized::VMysqlTableSink(row_desc, output_exprs);
        sink->reset(vmysql_tbl_sink);
        break;
#else
        return Status::InternalError(
                "Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON");
#endif
    }
    case TDataSinkType::ODBC_TABLE_SINK: {
        if (!thrift_sink.__isset.odbc_table_sink) {
            return Status::InternalError("Missing data odbc sink.");
        }
        sink->reset(new vectorized::VOdbcTableSink(row_desc, output_exprs));
        break;
    }

    case TDataSinkType::JDBC_TABLE_SINK: {
        if (!thrift_sink.__isset.jdbc_table_sink) {
            return Status::InternalError("Missing data jdbc sink.");
        }
        if (config::enable_java_support) {
            sink->reset(new vectorized::VJdbcTableSink(row_desc, output_exprs));
        } else {
            return Status::InternalError(
                    "Jdbc table sink is not enabled, you can change be config "
                    "enable_java_support to true and restart be.");
        }
        break;
    }

    case TDataSinkType::EXPORT_SINK: {
        RETURN_ERROR_IF_NON_VEC;
        break;
    }
    case TDataSinkType::GROUP_COMMIT_OLAP_TABLE_SINK:
    case TDataSinkType::OLAP_TABLE_SINK: {
        DCHECK(thrift_sink.__isset.olap_table_sink);
        if (state->query_options().enable_memtable_on_sink_node &&
            !_has_inverted_index_or_partial_update(thrift_sink.olap_table_sink)) {
            sink->reset(new vectorized::VOlapTableSinkV2(pool, row_desc, output_exprs));
        } else {
            sink->reset(new vectorized::VOlapTableSink(pool, row_desc, output_exprs));
        }
        break;
    }
    case TDataSinkType::HIVE_TABLE_SINK: {
        if (!thrift_sink.__isset.hive_table_sink) {
            return Status::InternalError("Missing hive table sink.");
        }
        sink->reset(new vectorized::VHiveTableSink(pool, row_desc, output_exprs));
        break;
    }
    case TDataSinkType::ICEBERG_TABLE_SINK: {
        if (!thrift_sink.__isset.iceberg_table_sink) {
            return Status::InternalError("Missing iceberg table sink.");
        }
        sink->reset(new vectorized::VIcebergTableSink(pool, row_desc, output_exprs));
        break;
    }
    case TDataSinkType::MULTI_CAST_DATA_STREAM_SINK: {
        DCHECK(thrift_sink.__isset.multi_cast_stream_sink);
        DCHECK_GT(thrift_sink.multi_cast_stream_sink.sinks.size(), 0);
        auto multi_cast_data_streamer = std::make_shared<pipeline::MultiCastDataStreamer>(
                row_desc, pool, thrift_sink.multi_cast_stream_sink.sinks.size());
        sink->reset(new vectorized::MultiCastDataStreamSink(multi_cast_data_streamer));
        break;
    }
    case TDataSinkType::GROUP_COMMIT_BLOCK_SINK: {
        Status status = Status::OK();
        DCHECK(thrift_sink.__isset.olap_table_sink);
#ifndef NDEBUG
        DCHECK(state->get_query_ctx() != nullptr);
        state->get_query_ctx()->query_mem_tracker->is_group_commit_load = true;
#endif
        sink->reset(new vectorized::GroupCommitBlockSink(pool, row_desc, output_exprs, &status));
        RETURN_IF_ERROR(status);
        break;
    }

    default: {
        std::stringstream error_msg;
        std::map<int, const char*>::const_iterator i =
                _TDataSinkType_VALUES_TO_NAMES.find(thrift_sink.type);
        const char* str = "Unknown data sink type ";

        if (i != _TDataSinkType_VALUES_TO_NAMES.end()) {
            str = i->second;
        }

        error_msg << str << " not implemented.";
        return Status::InternalError(error_msg.str());
    }
    }

    if (*sink != nullptr) {
        RETURN_IF_ERROR((*sink)->init(thrift_sink));
        RETURN_IF_ERROR((*sink)->prepare(state));
        if (state->get_query_ctx()) {
            state->get_query_ctx()->register_query_statistics((*sink)->get_query_statistics_ptr());
        }
    }

    return Status::OK();
}

Status DataSink::init(const TDataSink& thrift_sink) {
    return Status::OK();
}

Status DataSink::prepare(RuntimeState* state) {
    return Status::OK();
}
} // namespace doris
