二次排序的实现代码

 Key是可以排序的。
 需要对value排序。
 1.自定义key
 
  import org.apache.hadoop.io.WritableComparable;
  import java.io.DataInput;
  import java.io.DataOutput;
  import java.io.IOException;
  /**
   * 自定义组合key
   */
  public class ComboKey implements WritableComparable {
   private int year ;
   private int temp ;
   public int getYear() {
    return year;
   }
   public void setYear(int year) {
    this.year = year;
   }
   public int getTemp() {
    return temp;
   }
   public void setTemp(int temp) {
    this.temp = temp;
   }
   /**
    * 对key进行比较实现
    */
   public int compareTo(ComboKey o) {
    int y0 = o.getYear();
    int t0 = o.getTemp() ;
    //年份相同(升序)
    if(year == y0){
     //气温降序
     return -(temp - t0) ;
    }
    else{
     return year - y0 ;
    }
   }
   /**
    * 串行化过程
    */
   public void write(DataOutput out) throws IOException {
    //年份
    out.writeInt(year);
    //气温
    out.writeInt(temp);
   }
   public void readFields(DataInput in) throws IOException {
    year = in.readInt();
    temp = in.readInt();
   }
  }
 2.自定义分区类,按照年份分区
  /**
   * 自定义分区类
   */
  public class YearPartitioner extends Partitioner {
   public int getPartition(ComboKey key, NullWritable nullWritable, int numPartitions) {
    int year = key.getYear();
    return year % numPartitions;
   }
  }
 3.定义分组对比器
  public class YearGroupComparator extends WritableComparator {
   protected YearGroupComparator() {
    super(ComboKey.class, true);
   }
   public int compare(WritableComparable a, WritableComparable b) {
    ComboKey k1 = (ComboKey)a ;
    ComboKey k2 = (ComboKey)b ;
    return k1.getYear() - k2.getYear() ;
   }
  }
 4.定义Key排序对比器
  package com.it18zhang.hdfs.maxtemp.allsort.secondarysort;
  import org.apache.hadoop.io.WritableComparable;
  import org.apache.hadoop.io.WritableComparator;
  /**
   *ComboKeyComparator
   */
  public class ComboKeyComparator extends WritableComparator {
   protected ComboKeyComparator() {
    super(ComboKey.class, true);
   }
   public int compare(WritableComparable a, WritableComparable b) {
    ComboKey k1 = (ComboKey) a;
    ComboKey k2 = (ComboKey) b;
    return k1.compareTo(k2);
   }
  }
 
 5.编写Mapper
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
 * WCTextMapper
 */
public class MaxTempMapper extends Mapper{
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] arr = line.split(" ");
        ComboKey keyOut = new ComboKey();
        keyOut.setYear(Integer.parseInt(arr[0]));
        keyOut.setTemp(Integer.parseInt(arr[1]));
        context.write(keyOut,NullWritable.get());
    }
}


 6.编写Reduce
  
  import org.apache.commons.lang.ObjectUtils;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.NullWritable;
  import org.apache.hadoop.mapreduce.Reducer;
  import java.io.IOException;
  /**
   * Reducer
   */
  public class MaxTempReducer extends Reducer{
   /**
    */
   protected void reduce(ComboKey key, Iterable values, Context context) throws IOException, InterruptedException {
    int year = key.getYear();
    int temp = key.getTemp();
    System.out.println("==============>reduce");
    for(NullWritable v : values){
     System.out.println(key.getYear() + " : " + key.getTemp());
    }
    context.write(new IntWritable(year),new IntWritable(temp));
   }
  }
 
 7.App

  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.NullWritable;
  import org.apache.hadoop.mapreduce.Job;
  import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  /**
   *
   */
  public class MaxTempApp {
   public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS","file:///");
    Job job = Job.getInstance(conf);
    //设置job的各种属性
    job.setJobName("SecondarySortApp");                        //作业名称
    job.setJarByClass(MaxTempApp.class);                 //搜索类
    job.setInputFormatClass(TextInputFormat.class); //设置输入格式
    //添加输入路径
    FileInputFormat.addInputPath(job,new Path(args[0]));
    //设置输出路径
    FileOutputFormat.setOutputPath(job,new Path(args[1]));
    job.setMapperClass(MaxTempMapper.class);             //mapper类
    job.setReducerClass(MaxTempReducer.class);           //reducer类
    //设置Map输出类型
    job.setMapOutputKeyClass(ComboKey.class);            //
    job.setMapOutputValueClass(NullWritable.class);      //
    //设置ReduceOutput类型
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);         //
    //设置分区类
    job.setPartitionerClass(YearPartitioner.class);
    //设置分组对比器
    job.setGroupingComparatorClass(YearGroupComparator.class);
    //设置排序对比器
    job.setSortComparatorClass(ComboKeyComparator.class);
    job.setNumReduceTasks(3);                           //reduce个数
    //
    job.waitForCompletion(true);
   }
  }

你可能感兴趣的:(二次排序的实现代码)