Hbase调用JavaAPI实现批量导入操作

1、将日志文件(请下载附件)上传到HDFS中,利用hadoop的操作命令上传:hadoop  fs -put input  /

 Hbase调用JavaAPI实现批量导入操作_第1张图片

2、创建Hbase表,通过Java操作

  1. import java.text.SimpleDateFormat;  
  2. import java.util.Date;  
  3.   
  4. import org.apache.hadoop.conf.Configuration;  
  5. import org.apache.hadoop.hbase.client.Put;  
  6. import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;  
  7. import org.apache.hadoop.hbase.mapreduce.TableReducer;  
  8. import org.apache.hadoop.hbase.util.Bytes;  
  9. import org.apache.hadoop.io.LongWritable;  
  10. import org.apache.hadoop.io.NullWritable;  
  11. import org.apache.hadoop.io.Text;  
  12. import org.apache.hadoop.mapreduce.Counter;  
  13. import org.apache.hadoop.mapreduce.Job;  
  14. import org.apache.hadoop.mapreduce.Mapper;  
  15. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
  16. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;  
  17.   
  18. public class HbaseBatchImport {  
  19.   
  20.     public static void main(String[] args) throws Exception {  
  21.         final Configuration configuration = new Configuration();  
  22.         // 设置zookeeper  
  23.         configuration.set("hbase.zookeeper.quorum""hadoop1");  
  24.   
  25.         // 设置hbase表名称  
  26.         configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");  
  27.   
  28.         // 将该值改大,防止hbase超时退出  
  29.         configuration.set("dfs.socket.timeout""180000");  
  30.   
  31.         final Job job = new Job(configuration, "HBaseBatchImport");  
  32.   
  33.         job.setMapperClass(BatchImportMapper.class);  
  34.         job.setReducerClass(BatchImportReducer.class);  
  35.         // 设置map的输出,不设置reduce的输出类型  
  36.         job.setMapOutputKeyClass(LongWritable.class);  
  37.         job.setMapOutputValueClass(Text.class);  
  38.   
  39.         job.setInputFormatClass(TextInputFormat.class);  
  40.         // 不再设置输出路径,而是设置输出格式类型  
  41.         job.setOutputFormatClass(TableOutputFormat.class);  
  42.   
  43.         FileInputFormat.setInputPaths(job, "hdfs://hadoop1:9000/input");  
  44.   
  45.         job.waitForCompletion(true);  
  46.     }  
  47.   
  48.     static class BatchImportMapper extends  
  49.             Mapper<LongWritable, Text, LongWritable, Text> {  
  50.         SimpleDateFormat dateformat1 = new SimpleDateFormat("yyyyMMddHHmmss");  
  51.         Text v2 = new Text();  
  52.   
  53.         protected void map(LongWritable key, Text value, Context context)  
  54.                 throws java.io.IOException, InterruptedException {  
  55.             final String[] splited = value.toString().split("\t");  
  56.             try {  
  57.                 final Date date = new Date(Long.parseLong(splited[0].trim()));  
  58.                 final String dateFormat = dateformat1.format(date);  
  59.                 String rowKey = splited[1] + ":" + dateFormat;  
  60.                 v2.set(rowKey + "\t" + value.toString());  
  61.                 context.write(key, v2);  
  62.             } catch (NumberFormatException e) {  
  63.                 final Counter counter = context.getCounter("BatchImport",  
  64.                         "ErrorFormat");  
  65.                 counter.increment(1L);  
  66.                 System.out.println("出错了" + splited[0] + " " + e.getMessage());  
  67.             }  
  68.         };  
  69.     }  
  70.   
  71.     static class BatchImportReducer extends  
  72.             TableReducer<LongWritable, Text, NullWritable> {  
  73.         protected void reduce(LongWritable key,  
  74.                 java.lang.Iterable<Text> values, Context context)  
  75.                 throws java.io.IOException, InterruptedException {  
  76.             for (Text text : values) {  
  77.                 final String[] splited = text.toString().split("\t");  
  78.   
  79.                 final Put put = new Put(Bytes.toBytes(splited[0]));  
  80.                 put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"), Bytes  
  81.                         .toBytes(splited[1]));  
  82.                 // 省略其他字段,调用put.add(....)即可  
  83.                 context.write(NullWritable.get(), put);  
  84.             }  
  85.         };  
  86.     }  
  87.   
  88. }  

 4、查看导入结果:

Hbase调用JavaAPI实现批量导入操作_第2张图片

 

你可能感兴趣的:(Hbase调用JavaAPI实现批量导入操作)