MR案例之手机流量

MR案例之手机流量

序列化原理

Eclipse快捷键:

main方法快捷:main+alt+/

输出快捷:sysout+alt+/

FlowBean.java

package com.matrix.flowsum;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

/**
 * 
 * 
 * FlowBean
* 创建人:Matrix
* 时间:2016年3月10日-上午8:56:04
* * @version 1.0.0 * */ public class FlowBean implements Writable { // 为什么要实现?Writable // 实现Writable接口后,可以重写序列化和反序列化接口 // 手机号码 private String phoneNB; // 上行流量 private long up_flow; // 下行流量 private long d_flow; // 总流量 private long s_flow; // 空参构造函数 // 在反序列化时,反射机制需要调用空参构造函数,所以显示定义了一个空参构造函数 public FlowBean() { super(); } // 为了方便数据初始化方便加入一个带参的构造函数 // 有参构造函数 public FlowBean(String phoneNB, long up_flow, long d_flow) { super(); this.phoneNB = phoneNB; this.up_flow = up_flow; this.d_flow = d_flow; this.s_flow = up_flow + d_flow; } public String getPhoneNB() { return phoneNB; } public void setPhoneNB(String phoneNB) { this.phoneNB = phoneNB; } public long getUp_flow() { return up_flow; } public void setUp_flow(long up_flow) { this.up_flow = up_flow; } public long getD_flow() { return d_flow; } public void setD_flow(long d_flow) { this.d_flow = d_flow; } public long getS_flow() { return s_flow; } public void setS_flow(long s_flow) { this.s_flow = s_flow; } // 将对象序列化到流中 // 把bean中的数据序列化到数据输出流中去 @Override public void write(DataOutput out) throws IOException { // 手机号码 out.writeUTF(phoneNB); // 上行流量 out.writeLong(up_flow); // 下行流量 out.writeLong(d_flow); // 总流量 out.writeLong(s_flow); } // 从数据流中反序列出对象的数据 // 从数据读入流读出对象字段时,必须要跟序列化顺序保持一致 @Override public void readFields(DataInput in) throws IOException { // 手机号码 phoneNB = in.readUTF(); // 上行流量 up_flow = in.readLong(); // 下行流量 d_flow = in.readLong(); // 总流量 s_flow = in.readLong(); } @Override public String toString() { return "【手机号码:" + phoneNB + " 上行流量:" + up_flow + " 下行流量:" + d_flow + " 总流量:" + s_flow+"】"; } }

FlowSumMapper.java

package com.matrix.flowsum;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * LongWritable(流量), Text,Text,FlowBean(自定义类)
 * 
 * FlowSumMapper
* 创建人:Matrix
* 时间:2016年3月10日-上午8:45:34
* * @version 1.0.0 * */ public class FlowSumMapper extends Mapper { // FlowBean是我们自定义的一种数据类型,要在hadoop的各个节点之间传输,应该遵循hadoop的序列化机制,就必须实现hadoop相应的序列化接口LongWritable // 拿到日志中的一行数据,然后抽取出需要的一行字段,手机号、上行流量、下行流量,然后封装成kv发送出去 @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 拿一行数据,并切分成各个字段 // String[] line = value.toString().split(" "); String line = value.toString(); // 按照"\t"制表符切分字符串 String[] field = StringUtils.split(line, "\t"); // 拿到需要的数据 // 获取手机号码 String phoneNB = field[1]; // 获取上行流量 long up_flow = Long.parseLong(field[7]); // 获取下行流量 long d_flow = Long.parseLong(field[8]); // 封装数据为键值对Key/Value System.out.println(phoneNB + "-------->" + new FlowBean(phoneNB, up_flow, d_flow).toString()); context.write(new Text(phoneNB), new FlowBean(phoneNB, up_flow, d_flow)); } }

FlowSumReducer.java

package com.matrix.flowsum;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowSumReducer extends Reducer {

    // reduce的业务逻辑就是遍历values,然后累加求和再输出
    @Override
    protected void reduce(Text key, Iterable values, Context context)
            throws IOException, InterruptedException {
        long up_flow_counter = 0;
        long d_flow_counter = 0;

        // 求相同手机号的上行流量和、下行流量和
        for (FlowBean flowBean : values) {
            up_flow_counter += flowBean.getUp_flow();
            d_flow_counter += flowBean.getD_flow();
        }

        // 输出
        context.write(key, new FlowBean(key.toString(), up_flow_counter, d_flow_counter));
    }
}

FlowSumRunner.java

package com.matrix.flowsum;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import org.apache.hadoop.io.Text;

/**
 * Job提交和规范写法
 * 
 * FlowSumRunner
* 创建人:Matrix
* 时间:2016年3月10日-下午4:24:29
* * @version 1.0.0 * */ public class FlowSumRunner { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://node1:8020"); FileSystem fs = FileSystem.get(// new URI("hdfs://node1:8020"), // conf, // "root"// ); Job job = Job.getInstance(conf); // 设置用户名称 job.setJobName("flowsum"); job.setJarByClass(FlowSumRunner.class); job.setMapperClass(FlowSumMapper.class); job.setReducerClass(FlowSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FlowBean.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); // job.setNumReduceTasks(3); FileInputFormat.setInputPaths(job, new Path("/usr/matrix/input/flowsum.dat")); // 针对目录进行判断 Path outdir = new Path("/usr/matrix/output/flowsum"); if (fs.exists(outdir)) { fs.delete(outdir, true); } // 设置输出数据目录 // path 一个目录 而且不能存在 FileOutputFormat.setOutputPath(job, outdir); boolean f = job.waitForCompletion(true); if (f) { System.out.println("WordCount程序运行成功!"); } } }

运行结果:

处理之后的文件内容:

自定义排序实现

针对文件内容实现自定义排序

SortMR.java

package com.matrix.SortMR;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.matrix.flowsum.FlowBean;

public class SortMR {

    public static class SortMapper extends Mapper {

        // 拿到一行数据切分出各个字段,封装为一个flowbean,作为key输出
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            // 拿到一行数据
            String line = value.toString();
            // 且各个字段
            String[] fields = StringUtils.split(line, "\t");
            // 手机号码
            String phoneNB = fields[0];
            // 上行流量
            long up_flow = Long.parseLong(fields[1]);
            // 下行流量
            long d_flow = Long.parseLong(fields[2]);

            context.write(new FlowBean(phoneNB, up_flow, d_flow), NullWritable.get());
        }

    }

    public static class SortReducer extends Reducer {

        @Override
        protected void reduce(FlowBean key, Iterable value, Context context)
                throws IOException, InterruptedException {

            System.out.println(key);

            String phoneNB = key.getPhoneNB();
            context.write(new Text(phoneNB), key);
        }

    }

    public static void main(String[] args) {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://node1:8020");

        try {

            FileSystem fs = FileSystem.get(conf);

            Job job = Job.getInstance(conf);

            job.setJarByClass(SortMR.class);

            job.setJobName("sortMR");

            job.setMapperClass(SortMapper.class);
            job.setReducerClass(SortReducer.class);

            job.setMapOutputKeyClass(FlowBean.class);
            job.setMapOutputValueClass(NullWritable.class);

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(FlowBean.class);

            // 设置数据输入路径
            FileInputFormat.setInputPaths(job, "/usr/matrix/input/sort");

            // 设置输出输出路径
            Path outer = new Path("/usr/matrix/output/sortMR");

            // 如果存在路径
            if (fs.exists(outer)) {
                fs.delete(outer, true);
            }

            FileOutputFormat.setOutputPath(job, outer);

            boolean f = job.waitForCompletion(true);

            if (f) {
                System.out.println("程序运行成功!");
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

运行结果:

处理过后的文件:

你可能感兴趣的:(hadoop生态组件)