Java开发简单Hadoop程序

阅读更多

pom.xml



  4.0.0

  com.kovansys.test
  1.1.0

  hadoop_test

  jar

  

    1.8
    1.8

    3.2.0
    UTF-8
    UTF-8

  
  
    target
    
      
        org.apache.maven.plugins
        maven-compiler-plugin
        3.8.0
      
    
  

  
    
      org.apache.hadoop
      hadoop-common
      ${hadoop.version}
    
    
    
      org.apache.hadoop
      hadoop-hdfs
      ${hadoop.version}
    

    
      org.apache.hadoop
      hadoop-client
      ${hadoop.version}
    

    
      org.apache.hadoop
      hadoop-mapreduce-client-core
      ${hadoop.version}
    

  

 

    WordCountStarter类

package hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCountStarter {
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: WordCountStarter  ");
      System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCountStarter.class);
    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}

 

   WordCountMapper类

package hadoop;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper {
  private static final LongWritable one = new LongWritable(1);
  private Text word = new Text();

  public void map(Object key, Text value, Mapper.Context context) throws IOException, InterruptedException {
    StringTokenizer itr = new StringTokenizer(value.toString());
    while (itr.hasMoreTokens()) {
      this.word.set(itr.nextToken());
      context.write(this.word, one);
    }
  }
}

 

    WordCountReducer类

    

package hadoop;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer {
  private LongWritable result = new LongWritable();

  public void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException {
    int sum = 0;
    for (LongWritable val : values) {
      sum += val.get();
    }
    this.result.set(sum);
    context.write(key, this.result);
  }
}

 

   #打包

   $mvn clean install

   #将生成的jar包放到服务器上 如/opt/temp/hadoop_test-1.1.0.jar

   #运行mapreducer任务

   bin/hadoop jar /opt/temp/hadoop_test-1.1.0.jar hadoop.WordCountStarter  input output

   #查看结果

   bin/hdfs dfs -cat output/*

   

    

你可能感兴趣的:(hadoop,java,大数据)