编写mapreduce程序从HBase的一张表中求某一列的方差

表中数据 

编写mapreduce程序从HBase的一张表中求某一列的方差_第1张图片

package com.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class GetAttentionVariance {

    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();

        FileSystem fs = FileSystem.get(conf);

        Job job = Job.getInstance(conf);

        job.setJarByClass(GetAttentionVariance.class);

        Scan scan = new Scan();
        scan.addColumn("Info".getBytes(), "attention".getBytes());

        TableMapReduceUtil.initTableMapperJob(
                "data_t".getBytes(), // 指定表名
                scan, // 指定扫描数据的条件
                MyMapper.class, // 指定mapper class
                Text.class, // mapper阶段的输出的key的类型
                DoubleWritable.class, // mapper阶段的输出的value的类型
                job // job对象
        );

        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        Path outputPath = new Path("/attention/variance");

        if (fs.exists(outputPath)) {
            fs.delete(outputPath, true);
        }

        FileOutputFormat.setOutputPath(job, outputPath);

        boolean isSuccess = job.waitForCompletion(true);

        if (!isSuccess) {
            throw new IOException("任务运行错误!");
        }

        System.exit(isSuccess ? 0 : 1);
    }

    public static class MyMapper extends TableMapper {

        Text outKey = new Text("attention_variance");
        DoubleWritable outValue = new DoubleWritable();

        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context)
                throws IOException, InterruptedException {

            boolean isContainsColumn = value.containsColumn("Info".getBytes(), "attention".getBytes());

            if (isContainsColumn) {
                List listCells = value.getColumnCells("Info".getBytes(), "attention".getBytes());
                Cell cell = listCells.get(0);
                byte[] cloneValue = CellUtil.cloneValue(cell);
                double attention = Double.valueOf(Bytes.toString(cloneValue));
                outValue.set(attention);
                context.write(outKey, outValue);
            }

        }

    }

    public static class MyReducer extends Reducer {

        DoubleWritable outValue_variance = new DoubleWritable();

        @Override
        protected void reduce(Text key, Iterable values, Context context)
                throws IOException, InterruptedException {

            int count = 0;
            double sum = 0;
            double attention_variance = 0;
            List list = new ArrayList<>(); 
            for (DoubleWritable value : values) {
                list.add(value.get());
            	count++;
                sum += value.get();
            }
            //均值
            double attention_mean = sum / count;

            for (double value : list) {     
                attention_variance +=  Math.pow(value - attention_mean,2);;
            }

            //方差
            attention_variance = attention_variance/count;
            
            outValue_variance.set(attention_variance);
            context.write(key, outValue_variance);
        }
    }
}


结果:

【注意】Iterable values只能用一次,要用List将里面的值保存起来。

 

 

你可能感兴趣的:(大数据)