Kafka是一个分布式的流处理平台,具有以下特点:
# 下载Kafka
wget https://downloads.apache.org/kafka/3.5.1/kafka_2.13-3.5.1.tgz
# 解压
tar -xzf kafka_2.13-3.5.1.tgz
# 启动Zookeeper
bin/zookeeper-server-start.sh config/zookeeper.properties
# 启动Kafka
bin/kafka-server-start.sh config/server.properties
# server.properties
broker.id=0
listeners=PLAINTEXT://:9092
log.dirs=/tmp/kafka-logs
zookeeper.connect=localhost:2181
# 创建Topic
bin/kafka-topics.sh --create --bootstrap-server localhost:9092 \
--replication-factor 1 --partitions 3 --topic test-topic
# 查看Topic列表
bin/kafka-topics.sh --list --bootstrap-server localhost:9092
# 查看Topic详情
bin/kafka-topics.sh --describe --bootstrap-server localhost:9092 \
--topic test-topic
# 删除Topic
bin/kafka-topics.sh --delete --bootstrap-server localhost:9092 \
--topic test-topic
<dependency>
<groupId>org.apache.kafkagroupId>
<artifactId>kafka-clientsartifactId>
<version>3.5.1version>
dependency>
public class KafkaProducerExample {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("key.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<>(props);
// 发送消息
ProducerRecord<String, String> record =
new ProducerRecord<>("test-topic", "key", "value");
producer.send(record, (metadata, exception) -> {
if (exception == null) {
System.out.println("Message sent successfully");
} else {
exception.printStackTrace();
}
});
producer.close();
}
}
public class KafkaConsumerExample {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("group.id", "test-group");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
Consumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList("test-topic"));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
for (ConsumerRecord<String, String> record : records) {
System.out.printf("offset = %d, key = %s, value = %s%n",
record.offset(), record.key(), record.value());
}
}
}
}
<dependency>
<groupId>org.springframework.kafkagroupId>
<artifactId>spring-kafkaartifactId>
dependency>
spring:
kafka:
bootstrap-servers: localhost:9092
producer:
key-serializer: org.apache.kafka.common.serialization.StringSerializer
value-serializer: org.apache.kafka.common.serialization.StringSerializer
consumer:
group-id: test-group
auto-offset-reset: earliest
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
@Service
public class KafkaProducerService {
@Autowired
private KafkaTemplate<String, String> kafkaTemplate;
public void sendMessage(String topic, String message) {
kafkaTemplate.send(topic, message)
.addCallback(
result -> System.out.println("消息发送成功"),
ex -> System.out.println("消息发送失败: " + ex.getMessage())
);
}
}
@Service
public class KafkaConsumerService {
@KafkaListener(topics = "test-topic", groupId = "test-group")
public void listen(String message) {
System.out.println("接收到消息: " + message);
}
}
// 自定义分区器
public class CustomPartitioner implements Partitioner {
@Override
public int partition(String topic, Object key, byte[] keyBytes,
Object value, byte[] valueBytes, Cluster cluster) {
List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
int numPartitions = partitions.size();
// 自定义分区逻辑
return Math.abs(key.hashCode() % numPartitions);
}
}
// 配置分区器
props.put("partitioner.class", "com.example.CustomPartitioner");
// 手动提交偏移量
props.put("enable.auto.commit", "false");
consumer.poll(Duration.ofMillis(100));
consumer.commitSync();
// 异步提交
consumer.commitAsync((offsets, exception) -> {
if (exception != null) {
System.out.println("Commit failed for offsets: " + offsets);
}
});
// 生产者配置压缩
props.put("compression.type", "gzip"); // 支持gzip, snappy, lz4, zstd
生产者指标
消费者指标
# 查看消费者组
bin/kafka-consumer-groups.sh --bootstrap-server localhost:9092 --list
# 查看消费者组详情
bin/kafka-consumer-groups.sh --bootstrap-server localhost:9092 \
--describe --group test-group
# 查看Topic消息
bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 \
--topic test-topic --from-beginning
可靠性配置
// 确保消息可靠发送
props.put("acks", "all");
props.put("retries", 3);
props.put("max.in.flight.requests.per.connection", 1);
性能优化
// 批量发送
props.put("batch.size", 16384);
props.put("linger.ms", 1);
消费者配置
// 设置合适的拉取大小
props.put("max.poll.records", 500);
// 设置合理的心跳时间
props.put("heartbeat.interval.ms", 3000);
异常处理
try {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
processRecords(records);
consumer.commitSync();
} catch (Exception e) {
// 处理异常
handleException(e);
}
监控告警
容量规划
安全管理
<dependency>
<groupId>org.springframework.cloudgroupId>
<artifactId>spring-cloud-starter-stream-kafkaartifactId>
dependency>
# application.yml
spring:
cloud:
stream:
kafka:
binder:
brokers: localhost:9092
autoCreateTopics: true
bindings:
input:
destination: input-topic
contentType: application/json
output:
destination: output-topic
contentType: application/json
@Service
public class MessageService {
@Autowired
private MessageChannel output;
public void sendMessage(Message<?> message) {
output.send(message);
}
}
@Service
public class MessageListener {
@StreamListener("input")
public void handleMessage(Message<?> message) {
System.out.println("Received message: " + message.getPayload());
}
}
# 拉取Zookeeper镜像
docker pull wurstmeister/zookeeper
# 拉取Kafka镜像
docker pull wurstmeister/kafka
# 运行Zookeeper容器
docker run -d --name zookeeper -p 2181:2181 wurstmeister/zookeeper
# 运行Kafka容器
docker run -d --name kafka \
-p 9092:9092 \
-e KAFKA_ADVERTISED_HOST_NAME=localhost \
-e KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 \
--link zookeeper:zookeeper \
wurstmeister/kafka
# docker-compose.yml
version: '3'
services:
zookeeper:
image: wurstmeister/zookeeper
container_name: zookeeper
ports:
- "2181:2181"
networks:
- kafka-network
kafka1:
image: wurstmeister/kafka
container_name: kafka1
ports:
- "9092:9092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:9092
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
depends_on:
- zookeeper
networks:
- kafka-network
kafka2:
image: wurstmeister/kafka
container_name: kafka2
ports:
- "9093:9092"
environment:
KAFKA_BROKER_ID: 2
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka2:9092
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
depends_on:
- zookeeper
networks:
- kafka-network
kafka3:
image: wurstmeister/kafka
container_name: kafka3
ports:
- "9094:9092"
environment:
KAFKA_BROKER_ID: 3
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka3:9092
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
depends_on:
- zookeeper
networks:
- kafka-network
kafka-ui:
image: provectuslabs/kafka-ui:latest
container_name: kafka-ui
ports:
- "8080:8080"
environment:
KAFKA_CLUSTERS_0_NAME: local
KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka1:9092,kafka2:9092,kafka3:9092
KAFKA_CLUSTERS_0_ZOOKEEPER: zookeeper:2181
depends_on:
- kafka1
- kafka2
- kafka3
networks:
- kafka-network
networks:
kafka-network:
driver: bridge
问题: 如何确保消息不丢失?
解决方案:
生产者端:
// 设置acks为all,确保所有副本都收到消息
props.put("acks", "all");
// 设置重试次数
props.put("retries", 3);
// 使用同步发送或带回调的异步发送
producer.send(record, (metadata, exception) -> {
if (exception != null) {
// 处理发送失败的情况
}
});
Broker端:
# 设置副本因子
replication.factor=3
# 设置最小同步副本数
min.insync.replicas=2
# 启用幂等性
enable.idempotence=true
消费者端:
// 禁用自动提交
props.put("enable.auto.commit", "false");
// 手动提交偏移量
try {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
processRecords(records);
consumer.commitSync();
} catch (Exception e) {
// 处理异常
}
问题: 如何避免消息重复消费?
解决方案:
启用幂等性:
// 生产者端启用幂等性
props.put("enable.idempotence", "true");
消费者端实现幂等性:
// 使用数据库唯一约束或分布式锁
public void processMessage(ConsumerRecord<String, String> record) {
String messageId = record.key();
if (isMessageProcessed(messageId)) {
return; // 消息已处理,跳过
}
// 处理消息
processBusinessLogic(record.value());
// 标记消息已处理
markMessageAsProcessed(messageId);
}
问题: Kafka性能优化有哪些方法?
解决方案:
生产者优化:
// 批量发送
props.put("batch.size", 16384);
props.put("linger.ms", 1);
// 压缩消息
props.put("compression.type", "gzip");
// 使用异步发送
producer.send(record, callback);
消费者优化:
// 增加单次拉取的消息数量
props.put("max.poll.records", 500);
// 增加消费者线程数
ExecutorService executor = Executors.newFixedThreadPool(3);
for (int i = 0; i < 3; i++) {
executor.submit(() -> {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
processRecords(records);
}
});
}
Broker优化:
# 增加分区数
num.partitions=8
# 优化日志段大小
log.segment.bytes=1073741824
# 优化刷盘策略
log.flush.interval.messages=10000
log.flush.interval.ms=1000
问题: 客户端连接Kafka失败怎么办?
解决方案:
问题: 如何解决分区不平衡问题?
解决方案:
手动重新分配分区:
# 创建重新分配计划
bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092 \
--topics-to-move-json-file reassign.json \
--broker-list "0,1,2" --generate
# 执行重新分配
bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092 \
--reassignment-json-file reassign.json --execute
自动平衡:
# 启用自动平衡
auto.leader.rebalance.enable=true
// 使用消息头传递追踪ID
public class MessageTracer {
private static final String TRACE_ID = "trace_id";
public static void addTraceId(ProducerRecord<String, String> record) {
String traceId = UUID.randomUUID().toString();
record.headers().add(TRACE_ID, traceId.getBytes());
}
public static String getTraceId(ConsumerRecord<String, String> record) {
Header header = record.headers().lastHeader(TRACE_ID);
return header != null ? new String(header.value()) : null;
}
}
// 生产者使用
ProducerRecord<String, String> record = new ProducerRecord<>("topic", "key", "value");
MessageTracer.addTraceId(record);
producer.send(record, callback);
// 消费者使用
String traceId = MessageTracer.getTraceId(record);
log.info("Processing message with trace ID: {}", traceId);
// 使用消费者拦截器过滤消息
public class MessageFilterInterceptor implements ConsumerInterceptor<String, String> {
@Override
public ConsumerRecords<String, String> onConsume(ConsumerRecords<String, String> records) {
List<ConsumerRecord<String, String>> filteredRecords = new ArrayList<>();
for (ConsumerRecord<String, String> record : records) {
if (shouldProcess(record)) {
filteredRecords.add(record);
}
}
return new ConsumerRecords<>(filteredRecords);
}
private boolean shouldProcess(ConsumerRecord<String, String> record) {
// 实现过滤逻辑
return true;
}
@Override
public void onCommit(Map<TopicPartition, OffsetAndMetadata> offsets) {
// 提交偏移量时的处理
}
@Override
public void close() {
// 关闭资源
}
@Override
public void configure(Map<String, ?> configs) {
// 配置拦截器
}
}
// 配置拦截器
props.put("interceptor.classes", "com.example.MessageFilterInterceptor");
// 使用消费者拦截器转换消息
public class MessageTransformInterceptor implements ConsumerInterceptor<String, String> {
@Override
public ConsumerRecords<String, String> onConsume(ConsumerRecords<String, String> records) {
List<ConsumerRecord<String, String>> transformedRecords = new ArrayList<>();
for (ConsumerRecord<String, String> record : records) {
String transformedValue = transform(record.value());
transformedRecords.add(new ConsumerRecord<>(
record.topic(), record.partition(), record.offset(),
record.key(), transformedValue, record.headers(), record.timestamp()));
}
return new ConsumerRecords<>(transformedRecords);
}
private String transform(String value) {
// 实现转换逻辑
return value.toUpperCase();
}
// 其他方法实现...
}
// 根据消息内容路由到不同主题
public class MessageRouter {
private final KafkaTemplate<String, String> kafkaTemplate;
public MessageRouter(KafkaTemplate<String, String> kafkaTemplate) {
this.kafkaTemplate = kafkaTemplate;
}
public void routeMessage(String message) {
String topic = determineTopic(message);
kafkaTemplate.send(topic, message);
}
private String determineTopic(message) {
// 根据消息内容决定目标主题
if (message.contains("error")) {
return "error-topic";
} else if (message.contains("warning")) {
return "warning-topic";
} else {
return "info-topic";
}
}
}
// 使用Kafka Streams进行消息聚合
public class MessageAggregator {
public static void main(String[] args) {
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "message-aggregator");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
StreamsBuilder builder = new StreamsBuilder();
// 从输入主题读取消息
KStream<String, String> input = builder.stream("input-topic");
// 按key分组并计数
KTable<String, Long> counts = input
.groupByKey()
.count();
// 将结果写入输出主题
counts.toStream().to("output-topic");
KafkaStreams streams = new KafkaStreams(builder.build(), props);
streams.start();
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
}
命名规范
分区数量
副本因子
消息格式
消息大小
消息键设计
可靠性配置
// 确保消息可靠发送
props.put("acks", "all");
props.put("retries", 3);
props.put("max.in.flight.requests.per.connection", 1);
性能优化
// 批量发送
props.put("batch.size", 16384);
props.put("linger.ms", 1);
// 压缩
props.put("compression.type", "gzip");
异常处理
producer.send(record, (metadata, exception) -> {
if (exception != null) {
// 记录错误
log.error("Failed to send message", exception);
// 重试或告警
}
});
消费者配置
// 设置合适的拉取大小
props.put("max.poll.records", 500);
// 设置合理的心跳时间
props.put("heartbeat.interval.ms", 3000);
// 设置合理的会话超时时间
props.put("session.timeout.ms", 30000);
消费者组管理
偏移量管理
// 手动提交偏移量
try {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
processRecords(records);
consumer.commitSync();
} catch (Exception e) {
// 处理异常
}
监控告警
容量规划
安全管理
备份恢复