Clickhouse源码分析-Replicated Database创建流程

前置准备

DDL:

CREATE DATABASE my_replicated_db
ENGINE = Replicated('/clickhouse/databases/my_replicated_db', '{shard}', '{replica}');

 这里需要你提前启动1个clickhouse,1个clickhouse-keeper。

 源码分析

断点:

b Interpreters/DDLWorker.cpp:146

b Databases/DatabaseReplicatedWorker.cpp:135

前台线程调用栈:

Clickhouse源码分析-Replicated Database创建流程_第1张图片

后台线程调用栈:

Clickhouse源码分析-Replicated Database创建流程_第2张图片

 可以看到前台线程执行成功后直接返回给客户端:

Clickhouse源码分析-Replicated Database创建流程_第3张图片

主要看一下initializeReplication:

    String active_path = fs::path(database->replica_path) / "active";
    String active_id = toString(ServerUUID::get());

database->replica_path = /clickhouse/databases/my_replicated_db/replicas/s1|r1

active_path = /clickhouse/databases/my_replicated_db/replicas/s1|r1/active

    String log_ptr_str = zookeeper->get(database->replica_path + "/log_ptr");
    UInt32 our_log_ptr = parse(log_ptr_str);
    UInt32 max_log_ptr = parse(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
    logs_to_keep = parse(zookeeper->get(database->zookeeper_path + "/logs_to_keep"));

log_ptr_str = 0

database->zookeeper_path = /clickhouse/databases/my_replicated_db

max_log_ptr = 21956

logs_to_keep = 1000

    UInt64 digest;
    String digest_str;
    UInt64 local_digest;
    if (zookeeper->tryGet(database->replica_path + "/digest", digest_str))
    {
        digest = parse(digest_str);
        std::lock_guard lock{database->metadata_mutex};
        local_digest = database->tables_metadata_digest;
    }

digest = 0

local_digest = 0

再看一下recoverLostReplica:

这一部分为了构建依赖:

    /// Create all needed tables in a proper order

    TablesDependencyGraph tables_dependencies("DatabaseReplicated (" + getDatabaseName() + ")");

    for (const auto & [table_name, create_table_query] : table_name_to_metadata)

    {

        /// Note that table_name could contain a dot inside (e.g. .inner.1234-1234-1234-1234)

        /// And QualifiedTableName::parseFromString doesn't handle this.

        auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name};

        auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query);

        tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ast, getContext()->getCurrentDatabase()).dependencies);

    }

执行创建表语句:

    //外层遍历每一个依赖块, 有依赖关系的表放在一个依赖块
    for (const auto & tables_to_create : tables_to_create_by_level)
    {
        for (const auto & table_id : tables_to_create)
        {
                .....

                /// Check larger comment in DatabaseOnDisk::createTableFromAST
                /// TL;DR applySettingsFromQuery will move the settings from engine to query level
                /// making it possible to overcome a backward incompatible change.
                InterpreterSetQuery::applySettingsFromQuery(query_ast, create_query_context);
                LOG_INFO(log, "Executing {}", query_ast->formatForLogging());
                InterpreterCreateQuery(query_ast, create_query_context).execute();

            //内存执行有依赖关系表的创建
        }

        runner.waitForAllToFinishAndRethrowFirstError();
    }

未完待续......

奇怪问题

问题1:关于gdb

不显示源码的问题:

(gdb) l 133 in ./build/./src/Databases/DatabaseReplicatedWorker.cpp (gdb) 133 in ./build/./src/Databases/DatabaseReplicatedWorker.cpp (gdb) 133 in ./build/./src/Databases/DatabaseReplicatedWorker.cpp (gdb) 133 in ./build/./src/Databases/DatabaseReplicatedWorker.cpp

info source查看:

(gdb) set substitute-path ./build/./src /home/yanglw/work/ClickHouse/src
(gdb) info source
Current source file is ./build/./src/Databases/DatabaseReplicatedWorker.cpp
Compilation directory is ./build
Contains 533 lines.
Source language is c++.
Producer is Ubuntu clang version 19.1.7 (++20250114103320+cd708029e0b2-1~exp1~20250114103432.75).
Compiled with DWARF 5 debugging format.
Does not include preprocessor macro info.

解决:/home/user/clickhouse/src为调试的Clickhouse的源码路径

(gdb) set substitute-path ./build/./src /home/user/clickhouse/src

 问题2:关于clickhouse

CREATE TABLE IF NOT EXISTS my_replicated_db.replicated_table
(
    `event_date` Date,
    `event_time` DateTime,
    `user_id` UInt32,
    `event_type` String,
    `value` Float64
)
ENGINE = ReplicatedMergeTree('/clickhouse/databases/my_replicated_db/replicated_table', '{shard}', '{replica}')
PARTITION BY toYYYYMM(event_date)
ORDER BY (event_date, event_time, user_id)
SETTINGS index_granularity = 8192

Query id: 49e0846f-3642-4a1c-a0a5-9ee4faa469ce


Elapsed: 0.003 sec.

Received exception from server (version 25.6.1):
Code: 80. DB::Exception: Received from localhost:9000. DB::Exception: Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. If you really want to specify it explicitly, then you should use some macros to distinguish different shards and replicas. (INCORRECT_QUERY)

关键信息:

DB::Exception: Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments.

可以将database_replicated_allow_replicated_engine_arguments设置为1,使session中能使用明确zookeeper路径创建复制表。

SET database_replicated_allow_replicated_engine_arguments = 1;

你可能感兴趣的:(Clickhouse源码分析-Replicated Database创建流程)