package com.mr.distinct;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
/**
* @author luobao
*
*/
public class Person implements WritableComparable {
private Text name;
private Text sex;
private Text age;
private Text remark;
public Person() {
this.name = new Text();
this.sex = new Text();
this.age = new Text();
this.remark = new Text();
}
@Override
public void readFields(DataInput paramDataInput) throws IOException {
this.name.readFields(paramDataInput);
this.sex.readFields(paramDataInput);
this.age.readFields(paramDataInput);
this.remark.readFields(paramDataInput);
}
@Override
public void write(DataOutput paramDataOutput) throws IOException {
this.name.write(paramDataOutput);
this.sex.write(paramDataOutput);
this.age.write(paramDataOutput);
this.remark.write(paramDataOutput);
}
@Override
public int compareTo(Person person) {
// 这里定制distinct哪些字段,现在是忽略remark字段的定制
Text t = new Text(this.name.toString() + this.sex.toString()
+ this.age.toString());
return t.compareTo(new Text(person.getName().toString()
+ person.getSex().toString() + person.getAge().toString()));
}
public Text getName() {
return name;
}
public void setName(Text name) {
this.name = name;
}
public Text getSex() {
return sex;
}
public void setSex(Text sex) {
this.sex = sex;
}
public Text getAge() {
return age;
}
public void setAge(Text age) {
this.age = age;
}
public Text getRemark() {
return remark;
}
public void setRemark(Text remark) {
this.remark = remark;
}
@Override
public String toString() {
return "Person [name=" + name + ", sex=" + sex + ", age=" + age
+ ", remark=" + remark + "]";
}
}
package com.mr.distinct;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author luobao
*
*/
public class Distinct {
public static class DistinctUserMapper extends
Mapper
Person [name=张三, sex=女, age=20, remark=备注1]
Person [name=张三, sex=男, age=20, remark=备注2]
Person [name=李四, sex=男, age=21, remark=备注3]
Person [name=王五, sex=女, age=20, remark=备注]