Flink实战-(4)Flink Kafka实时同步到Hbase

1 Maven依赖



    4.0.0

    org.example
    flink-kafka-hbase
    1.0-SNAPSHOT


    
        1.13.6
        2.11
    


    
        
            mysql
            mysql-connector-java
            5.1.34
        
        
            com.alibaba
            fastjson
            1.2.28
            compile
        
        
        
            com.google.guava
            guava
            23.0
        
        
            com.google.code.gson
            gson
            2.8.5
        
        
            org.apache.httpcomponents
            httpclient
            4.5.2
        
        
            org.projectlombok
            lombok
            1.18.4
        
        
            com.jayway.jsonpath
            json-path
            2.4.0
            compile
        
        
            joda-time
            joda-time
            2.9.9
        
        
            junit
            junit
            4.12
            test
        
        
        
            org.apache.flink
            flink-statebackend-rocksdb_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-connector-kafka_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-java
            ${flink.version}
        
        
            org.apache.flink
            flink-streaming-java_${scala.binary.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-clients_${scala.binary.version}
            ${flink.version}
        

        
            org.apache.flink
            flink-runtime-web_${scala.binary.version}
            ${flink.version}
        
        
            com.alibaba.ververica
            flink-connector-mysql-cdc
            1.4.0
        
        
            org.apache.flink
            flink-hbase_${scala.binary.version}
            1.9.3
        
        
            org.apache.hadoop
            hadoop-common
            2.7.4
        
        
            org.apache.hadoop
            hadoop-mapreduce-client-core
            2.6.0
        
        
            org.apache.flink
            flink-hadoop-compatibility_${scala.binary.version}
            ${flink.version}
        
        
            org.slf4j
            slf4j-api
            1.7.25
        
    

    
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    8
                    8
                
            
            
                org.apache.maven.plugins
                maven-shade-plugin
                3.1.0
                
                    false
                
                
                    
                        package
                        
                            shade
                        

                        
                            
                                
                                    
                                    com.cwf.kafka.hbasedemo.KafkaHBaseStreamWriteMain
                                
                                
                                    reference.conf
                                
                            
                            
                                
                                    *:*:*:*
                                    
                                        META-INF/*.SF
                                        META-INF/*.DSA
                                        META-INF/*.RSA
                                    
                                
                            
                        
                    
                
            
        
    

2 Java代码

2.1 Kafka生产者

package com.cwf.kafka.hbasedemo;

import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;

/**
 * 往kafka中写数据,模拟生产者
 */
@Slf4j
public class KafkaUtilsProducer {
    public static final String broker_list = "10.252.92.4:9092";
    public static final String topic = "zhisheng";  //kafka topic 需要和 flink 程序用同一个 topic

    public static void writeToKafka() throws InterruptedException {
        Properties props = new Properties();
        props.put("bootstrap.servers", broker_list);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        KafkaProducer producer = new KafkaProducer(props);
        int i = 0;
        while (true) {
            Thread.sleep(100L);// 每隔100ms 发送一次
            ProducerRecord record = new ProducerRecord(
                    topic, null, null, String.valueOf(System.currentTimeMillis()));
            producer.send(record);
            log.info("record:{}", record);
            if (i % 10 == 0) {
                producer.flush();
                log.info("flush");
            }
            i++;
        }
    }

    public static void main(String[] args) throws InterruptedException {
        writeToKafka();
    }
}
    

2.2 主类

package com.cwf.kafka.hbasedemo;

import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.Properties;

@Slf4j
public class KafkaHBaseStreamWriteMain {
    public static String TOPIC = "zhisheng";

    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.addSource(new FlinkKafkaConsumer<>(
                TOPIC,   //这个 kafka topic 需要和上面的工具类的 topic 一致
                new SimpleStringSchema(),
                getKafkaProps()))
                .writeUsingOutputFormat(new HBaseOutputFormat());

        env.execute("Flink HBase connector sink");
    }


    private static Properties getKafkaProps() {
        // 配置kafka
        Properties props = new Properties();
        props.put("bootstrap.servers", "10.252.92.4:9092");
        props.put("zookeeper.connect", "10.252.92.4:2181");
        props.put("group.id", "metric-group");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("auto.offset.reset", "latest");
        return props;
    }

    private static class HBaseOutputFormat implements OutputFormat {

        private org.apache.hadoop.conf.Configuration configuration;
        private Connection connection = null;
        private Table table = null;

        @Override
        public void configure(Configuration parameters) {
            // 配置Hbase
            configuration = HBaseConfiguration.create();
            configuration.set("hbase.zookeeper.quorum", "10.252.92.4:2181");
            configuration.set("hbase.zookeeper.property.clientPort", "2081");
            configuration.set("hbase.rpc.timeout", "30000");
            configuration.set("hbase.client.operation.timeout", "30000");
            configuration.set("hbase.client.scanner.timeout.period", "30000");
        }

        @Override
        public void open(int taskNumber, int numTasks) throws IOException {
            connection = ConnectionFactory.createConnection(configuration);
            TableName tableName = TableName.valueOf("zhisheng_stream");
            Admin admin = connection.getAdmin();
            if (!admin.tableExists(tableName)) { //检查是否有该表,如果没有,创建
                log.info("不存在表:{}", tableName);
                admin.createTable(
                        new HTableDescriptor(TableName.valueOf("zhisheng_stream"))
                                .addFamily(new HColumnDescriptor("info_stream")));
            }
            table = connection.getTable(tableName);
        }

        @Override
        public void writeRecord(String record) throws IOException {
            log.info("rowkey->{},column->info_stream:{},value->{}", record.substring(6, 10), record, "cwf_" + record);
            Put put = new Put(Bytes.toBytes(record.substring(6, 10)));
            put.addColumn(Bytes.toBytes("info_stream"), Bytes.toBytes(record), Bytes.toBytes("cwf_" + record));
            table.put(put);
        }

        @Override
        public void close() throws IOException {
            table.close();
            connection.close();
        }
    }
}

3、本地运行

控制台

Flink实战-(4)Flink Kafka实时同步到Hbase_第1张图片

 Hbase控制台

 这样就说明在本地运行成功了 完成了 生产者->Kafka->消费者(Flink)->数据仓库(Hbase)

4、打包发布

Flink实战-(4)Flink Kafka实时同步到Hbase_第2张图片

Flink实战-(4)Flink Kafka实时同步到Hbase_第3张图片

在Hbase查找Rowkey=2162

Flink实战-(4)Flink Kafka实时同步到Hbase_第4张图片

成功

你可能感兴趣的:(大数据,kafka,hbase,flink)