直接上代码如下:
**
注意:
1、此程序不需要打jar包到Linux上运行
2、集群为高可用
3、resource包需要添加hadoop的四个配置文件,log4j.properties为日志查看
**
package HDFSToHbase.answer_base.mapper;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
**
**
package HDFSToHbase.answer_base.reducer;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
**
**
package HDFSToHbase.answer_base.job;
import HDFSToHbase.answer_base.mapper.HDFSToHbaseMapper;
import HDFSToHbase.answer_base.reducer.HDFSToHbaseReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import util.HadoopUtil;
import java.io.IOException;
/**
@description 把数据从hdfs迁移到hbase
@author: [email protected]
@create: 2018-12-06 21:36:05
**/
public class HDFSToHbaseJob {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//获取configuration的实例对象
Configuration conf= HadoopUtil.getRemoteHadoopConf();
//指定zookeeper、hbase的信息和输出的表名称
conf.set("hbase.zookeeper.quorum","bigdata1:2181,bigdata2:2181,bigdata3:2181");
conf.set("hbase.rootdir","hdfs://mycluster:8020/hbase");
conf.set(TableOutputFormat.OUTPUT_TABLE,args[1]);
conf.set("info",args[2]);
//获取连接对象
Connection connection = ConnectionFactory.createConnection(conf);
//获取admin
Admin admin = connection.getAdmin();
//表存在,则删除表
if ((admin.getTableDescriptor(TableName.valueOf(args[1])))!=null){
admin.disableTable(TableName.valueOf(args[1]));//停止表应用
admin.deleteTable(TableName.valueOf(args[1]));//删除表
}
//创建表、列簇
HTableDescriptor hbaseTable = new HTableDescriptor(TableName.valueOf(args[1]));
hbaseTable.addFamily(new HColumnDescriptor(args[2]));
admin.createTable(hbaseTable);
//关闭
admin.close();
connection.close();
//获取job实例并设置名称
Job job=Job.getInstance(conf,HDFSToHbaseJob.class.getSimpleName());
TableMapReduceUtil.addDependencyJars(job);
//设置job的类
job.setJarByClass(HDFSToHbaseJob.class);
//设置mapper类和K、V的类型
job.setMapperClass(HDFSToHbaseMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//设置reducer的类和输出类型
job.setReducerClass(HDFSToHbaseReducer.class);
job.setOutputValueClass(NullWritable.class);
//设置输入路径地址和输出格式
FileInputFormat.addInputPath(job,new Path(args[0]));
job.setOutputFormatClass(TableOutputFormat.class);
//将作业提交到群集并等待它完成
job.waitForCompletion(true);
}
}
**
**
package util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
/**
@description 数据制造器
@author:
@create: 2018-11-14 19:34:45
**/
public class HadoopUtil {
/**
/**
/**
**
4.0.0
org.qianfeng
qilap
1.0-SNAPSHOT
org.apache.hadoop
hadoop-client
2.8.1
org.apache.hive
hive-service
2.3.4