Apache Kafka 是一个分布式流处理平台,由 LinkedIn 开发并开源,具有高吞吐、低延迟、可水平扩展等特性。它广泛应用于实时数据管道、日志聚合、事件溯源、消息队列等场景。
wget https://downloads.apache.org/kafka/3.7.0/kafka_2.13-3.7.0.tgz
bin/zookeeper-server-start.sh config/zookeeper.properties
bin/kafka-server-start.sh config/server.properties
bin/kafka-topics.sh --create --topic demo-topic \
--bootstrap-server localhost:9092 \
--partitions 3 --replication-factor 1
bin/kafka-console-producer.sh --topic demo-topic --bootstrap-server localhost:9092
bin/kafka-console-consumer.sh --topic demo-topic --bootstrap-server localhost:9092 --from-beginning
<dependency>
<groupId>org.apache.kafkagroupId>
<artifactId>kafka-clientsartifactId>
<version>3.7.0version>
dependency>
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
public class KafkaProducerDemo {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<>(props);
for (int i = 0; i < 10; i++) {
ProducerRecord<String, String> record =
new ProducerRecord<>("demo-topic", "key-" + i, "value-" + i);
producer.send(record, (metadata, exception) -> {
if (exception == null) {
System.out.printf("Sent to partition %d, offset %d%n",
metadata.partition(), metadata.offset());
} else {
exception.printStackTrace();
}
});
}
producer.close();
}
}
import org.apache.kafka.clients.consumer.*;
import java.time.Duration;
import java.util.Collections;
import java.util.Properties;
public class KafkaConsumerDemo {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("group.id", "test-group");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("auto.offset.reset", "earliest"); // 从最早消息开始消费
Consumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Collections.singletonList("demo-topic"));
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
for (ConsumerRecord<String, String> record : records) {
System.out.printf("Received: partition=%d, offset=%d, key=%s, value=%s%n",
record.partition(), record.offset(), record.key(), record.value());
}
}
} finally {
consumer.close();
}
}
}
batch.size
、linger.ms
(生产者)和 max.poll.records
(消费者)。group.id
导致消费混乱。enable.idempotence=true
和事务 API。Kafka 是构建实时数据管道的核心工具,适用于日志收集、事件流处理等场景。通过合理设计 Topic 分区和消费者组,结合 Java 客户端 API,可快速实现高可靠的消息系统。对于需要更强事务支持或复杂路由的场景,建议结合 RabbitMQ 或 Pulsar 使用。
技术视角的奇妙联想
“人生到处知何似,应似飞鸿踏雪泥”
若用分布式系统比喻:
雪泥:如同消息队列中的持久化存储(如Kafka的日志保留)
鸿爪:类似系统产生的数据痕迹(offset记录)
飞鸿:恰似流动的实时数据流(stream processing)