确保 TiDB 集群已部署 TiCDC 组件(版本需兼容 Flink CDC 3.0.1),并启动同步服务:
# 示例:启动 TiCDC 捕获 changefeed
cdc cli changefeed create \
--pd="localhost:2379" \
--sink-uri="blackhole://" \
--changefeed-id="flink-cdc-demo"
获取 TiDB 集群的 PD(Placement Driver)地址,用于 Flink 连接:
# 查询 PD 地址
cat /path/to/tidb-deploy/pd-2379/conf/config.toml | grep advertise-client-urls
# 输出示例:advertise-client-urls = "http://192.168.1.10:2379"
<dependency>
<groupId>com.ververicagroupId>
<artifactId>flink-sql-connector-tidb-cdcartifactId>
<version>3.0.1version>
<scope>providedscope>
dependency>
<dependency>
<groupId>mysqlgroupId>
<artifactId>mysql-connector-javaartifactId>
<version>8.0.26version>
dependency>
$FLINK_HOME/lib/
后重启 Flink 集群。-- 配置 checkpoint(每 3 秒)
SET 'execution.checkpointing.interval' = '3s';
-- 创建 TiDB CDC 表
CREATE TABLE tidb_orders (
order_id INT,
order_date TIMESTAMP(3),
customer_name STRING,
price DECIMAL(10, 5),
order_status BOOLEAN,
-- 元数据列
db_name STRING METADATA FROM 'database_name' VIRTUAL,
table_name STRING METADATA FROM 'table_name' VIRTUAL,
op_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL,
PRIMARY KEY(order_id) NOT ENFORCED
) WITH (
'connector' = 'tidb-cdc',
'pd-addresses' = '192.168.1.10:2379', -- PD 地址(必填)
'database-name' = 'mydb',
'table-name' = 'orders',
'scan.startup.mode' = 'initial', -- 启动模式:initial(全量+增量)或 latest-offset(仅增量)
'tikv.grpc.timeout_in_ms' = '20000', -- GRPC 超时时间(毫秒)
'tikv.batch_get_concurrency' = '20' -- 批量获取并发度
);
参数名 | 必选 | 默认值 | 类型 | 说明 |
---|---|---|---|---|
connector |
是 | 无 | String | 固定为tidb-cdc |
pd-addresses |
是 | 无 | String | TiDB 集群 PD 地址(格式:host1:port1,host2:port2 ) |
database-name |
是 | 无 | String | 要监控的数据库名称 |
table-name |
是 | 无 | String | 要监控的表名称 |
scan.startup.mode |
否 | initial |
String | 启动模式:initial (全量+增量)、latest-offset (仅增量) |
tikv.grpc.timeout_in_ms |
否 | 无 | Long | TiKV GRPC 请求超时时间(毫秒) |
-- 连接 TiDB
mysql -h 127.0.0.1 -P 4000 -u root
-- 创建测试数据库和表
CREATE DATABASE mydb;
USE mydb;
CREATE TABLE orders (
order_id INT PRIMARY KEY,
order_date TIMESTAMP(3),
customer_name VARCHAR(100),
price DECIMAL(10, 5),
order_status BOOLEAN
);
-- 插入测试数据
INSERT INTO orders VALUES
(1, '2023-01-01 10:00:00.000', 'Alice', 100.50, true),
(2, '2023-01-02 11:00:00.000', 'Bob', 200.75, false);
COMMIT;
-- 查询 TiDB CDC 表(首次触发全量扫描)
SELECT * FROM tidb_orders;
-- 在 TiDB 中更新数据
UPDATE mydb.orders SET price = 150.00 WHERE order_id = 1;
COMMIT;
-- 观察 Flink 输出:应显示更新后的记录,op_ts 为变更时间
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.cdc.connectors.tidb.TiDBSource;
import org.apache.flink.cdc.connectors.tidb.TiKVChangeEventDeserializationSchema;
import org.apache.flink.cdc.connectors.tidb.TiKVSnapshotEventDeserializationSchema;
import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.util.Collector;
import org.tikv.kvproto.Cdcpb;
import org.tikv.kvproto.Kvrpcpb;
import java.util.HashMap;
public class TiDBSourceExample {
public static void main(String[] args) throws Exception {
// 配置 TiDB Source
TiDBSource<String> source = TiDBSource.<String>builder()
.database("mydb")
.tableName("orders")
.tiConf(TDBSourceOptions.getTiConfiguration("192.168.1.10:2379", new HashMap<>()))
.snapshotEventDeserializer(new TiKVSnapshotEventDeserializationSchema<String>() {
@Override
public void deserialize(Kvrpcpb.KvPair record, Collector<String> out) {
out.collect("Snapshot: " + record.toString());
}
@Override
public TypeInformation<String> getProducedType() {
return BasicTypeInfo.STRING_TYPE_INFO;
}
})
.changeEventDeserializer(new TiKVChangeEventDeserializationSchema<String>() {
@Override
public void deserialize(Cdcpb.Event.Row record, Collector<String> out) {
out.collect("Change: " + record.toString());
}
@Override
public TypeInformation<String> getProducedType() {
return BasicTypeInfo.STRING_TYPE_INFO;
}
})
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(3000);
env.addSource(source).print().setParallelism(1);
env.execute("TiDB CDC Example");
}
}
连接 PD 失败
ERROR: Failed to connect to PD server at 192.168.1.10:2379
ping
和 telnet
验证)cdc cli cluster info --pd=http://192.168.1.10:2379
权限不足
ERROR: Access denied for user 'root'@'%'
GRANT SELECT ON mydb.orders TO 'flink_user'@'%';
大数类型精度丢失
DECIMAL(65, 0)
映射到 Flink 时可能截断-- 将超高精度 DECIMAL 映射为 STRING
CREATE TABLE tidb_orders (
-- 其他字段...
big_amount STRING -- 替代 DECIMAL(65, 0)
) WITH (...)
TiCDC 同步延迟
tikv.grpc.timeout_in_ms
增大超时时间tikv.batch_get_concurrency
提高并发度性能调优
'tikv.scan_batch_size' = '1000', -- 单次扫描行数
'tikv.scan_concurrency' = '16' -- 扫描并发度
高可用配置
'pd-addresses' = 'pd1:2379,pd2:2379,pd3:2379'
监控与维护
cdc cli changefeed list --pd=http://pd:2379
cdc cli gc --pd=http://pd:2379 --older-than=7d
通过以上步骤,可完成 Flink TiDB CDC 的全流程配置与验证。生产环境中需特别注意 PD 地址配置、大数类型映射及 TiCDC 服务稳定性,以确保数据一致性和系统性能。