根据需要构造自己的数据类型
FlowBean.java
package flow;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/**
* 使用对象构造自己的数据结构,这个对象要实现Writable的接口
* 1、该类一定要保留空参构造函数
* 2、write方法中输出字段二进制数据的顺序 要与 readFields方法读取数据的顺序一致
*/
public class FlowBean implements Writable {
private int upFlow;
private int dFlow;
private int amountFlow;
public FlowBean(){}
public FlowBean(int upFlow, int dFlow) {
this.upFlow = upFlow;//上行流量
this.dFlow = dFlow;//下行流量
this.amountFlow = upFlow + dFlow;//总流量
}
public int getUpFlow() {
return upFlow;
}
public void setUpFlow(int upFlow) {
this.upFlow = upFlow;
}
public int getdFlow() {
return dFlow;
}
public void setdFlow(int dFlow) {
this.dFlow = dFlow;
}
public int getAmountFlow() {
return amountFlow;
}
public void setAmountFlow(int amountFlow) {
this.amountFlow = amountFlow;
}
@Override
public void write(DataOutput out) throws IOException {//该方法必须重写序列化,让hadoop集群来调用
out.writeInt(upFlow);
out.writeUTF(phone);
out.writeInt(dFlow);
out.writeInt(amountFlow);
}
@Override
public void readFields(DataInput in) throws IOException {//该方法必须重写反序列化,让hadoop集群调用
this.upFlow = in.readInt();
this.phone = in.readUTF();
this.dFlow = in.readInt();
this.amountFlow = in.readInt();
}
@Override
public String toString() {//返回最终的结果
return this.phone + ","+this.upFlow +","+ this.dFlow +"," + this.amountFlow;
}
}
partitioner
package flow;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* MapTask通过这个类的getPartition方法,根据partitioner方法决定maper的结果如何分配给reducer
*/
public class ProvincePartitioner extends Partitioner{
static HashMap codeMap = new HashMap<>();
static{
codeMap.put("135", 0);
codeMap.put("136", 1);
codeMap.put("137", 2);
codeMap.put("138", 3);
codeMap.put("139", 4);
}
@Override//默认是根据key的haskcode的值%reducernum来分配任务
public int getPartition(Text key, FlowBean value, int numPartitions) {
Integer code = codeMap.get(key.toString().substring(0, 3));
return code==null?5:code;
}
}
mapper
package flow;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class FlowCountMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split("\t");
int upFlow = Integer.parseInt(fields[1]);
int dFlow = Integer.parseInt(fields[2]);
context.write(new Text(phone), new FlowBean(upFlow, dFlow));
}
}
reducer
package flow;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class FlowCountReducer extends Reducer{
/**
* key:是某个手机号
* values:是这个手机号所产生的所有访问记录中的流量数据
*/
@Override
protected void reduce(Text key, Iterable values, Reducer.Context context)
throws IOException, InterruptedException {
int upSum = 0;
int dSum = 0;
for(FlowBean value:values){
upSum += value.getUpFlow();
dSum += value.getdFlow();
}
context.write(key, new FlowBean(key.toString(), upSum, dSum));
}
}
JobSubmitter.java
package flow;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class JobSubmitter {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(JobSubmitter.class);
job.setMapperClass(FlowCountMapper.class);
job.setReducerClass(FlowCountReducer.class);
job.setPartitionerClass(ProvincePartitioner.class);//如果不设定默认使用HashPartitioner
job.setNumReduceTasks(6);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
FileInputFormat.setInputPaths(job, new Path("F:\\input"));
FileOutputFormat.setOutputPath(job, new Path("F:\\province-output"));
job.waitForCompletion(true);
}
}