Hadoop自定义数据类型编程练习

 

 Hadoop自定义数据类型编程练习

 

 

代码:

package zidongyi;
 
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
 
importorg.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
importorg.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
importorg.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
 
public class KpiApp {
       staticfinal String INPUT_PATH ="hdfs://192.168.1.100:9000/input/HTTP_20130313143750.dat";
       staticfinal String OUT_PATH = "hdfs://192.168.1.100:9000/output/out02";
       publicstatic void main(String[] args) throws Exception{
              finalJob job = new Job(new Configuration(), KpiApp.class.getSimpleName());
              //1.1指定输入文件路径
              FileInputFormat.setInputPaths(job,INPUT_PATH);
              //指定哪个类用来格式化输入文件
              job.setInputFormatClass(TextInputFormat.class);
             
              //1.2指定自定义的Mapper类
              job.setMapperClass(MyMapper.class);
              //指定输出<k2,v2>的类型
              job.setMapOutputKeyClass(Text.class);
              job.setMapOutputValueClass(KpiWritable.class);
             
              //1.3指定分区类
              job.setPartitionerClass(HashPartitioner.class);
              job.setNumReduceTasks(1);
             
              //1.4TODO 排序、分区
             
              //1.5  TODO (可选)合并
             
              //2.2指定自定义的reduce类
              job.setReducerClass(MyReducer.class);
              //指定输出<k3,v3>的类型
              job.setOutputKeyClass(Text.class);
              job.setOutputValueClass(KpiWritable.class);
             
              //2.3指定输出到哪里
              FileOutputFormat.setOutputPath(job,new Path(OUT_PATH));
              //设定输出文件的格式化类
              job.setOutputFormatClass(TextOutputFormat.class);
             
              //把代码提交给JobTracker执行
              job.waitForCompletion(true);
       }
 
       staticclass MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable>{
              protectedvoid map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,KpiWritable>.Contextcontext) throws IOException ,InterruptedException {
                     finalString[] splited = value.toString().split("\t");
                     finalString msisdn = splited[1];
                     finalText k2 = new Text(msisdn);
                     finalKpiWritable v2 = new KpiWritable(splited[6],splited[7],splited[8],splited[9]);
                     context.write(k2,v2);
              };
       }
      
       staticclass MyReducer extends Reducer<Text, KpiWritable, Text, KpiWritable>{
              /**
               * @param     k2    表示整个文件中不同的手机号码      
               * @param     v2s  表示该手机号在不同时段的流量的集合
               */
              protectedvoid reduce(Text k2, java.lang.Iterable<KpiWritable> v2s,org.apache.hadoop.mapreduce.Reducer<Text,KpiWritable,Text,KpiWritable>.Contextcontext) throws IOException ,InterruptedException {
                     longupPackNum = 0L;
                     longdownPackNum = 0L;
                     longupPayLoad = 0L;
                     longdownPayLoad = 0L;
                    
                     for(KpiWritable kpiWritable : v2s) {
                            upPackNum+= kpiWritable.upPackNum;
                            downPackNum+= kpiWritable.downPackNum;
                            upPayLoad+= kpiWritable.upPayLoad;
                            downPayLoad+= kpiWritable.downPayLoad;
                     }
                    
                     finalKpiWritable v3 = new KpiWritable(upPackNum+"",downPackNum+"", upPayLoad+"", downPayLoad+"");
                     context.write(k2,v3);
              };
       }
}
 
class KpiWritable implements Writable{
       longupPackNum;
       longdownPackNum;
       longupPayLoad;
       longdownPayLoad;
      
       publicKpiWritable(){}
      
       publicKpiWritable(String upPackNum, String downPackNum, String upPayLoad, StringdownPayLoad){
              this.upPackNum= Long.parseLong(upPackNum);
              this.downPackNum= Long.parseLong(downPackNum);
              this.upPayLoad= Long.parseLong(upPayLoad);
              this.downPayLoad= Long.parseLong(downPayLoad);
       }
      
      
       @Override
       publicvoid readFields(DataInput in) throws IOException {
              this.upPackNum= in.readLong();
              this.downPackNum= in.readLong();
              this.upPayLoad= in.readLong();
              this.downPayLoad= in.readLong();
       }
 
       @Override
       publicvoid write(DataOutput out) throws IOException {
              out.writeLong(upPackNum);
              out.writeLong(downPackNum);
              out.writeLong(upPayLoad);
              out.writeLong(downPayLoad);
       }
      
       @Override
       publicString toString() {
              returnupPackNum + "\t" + downPackNum + "\t" + upPayLoad +"\t" + downPayLoad;
       }
}
 





数据上传到HDFS上面:

 

 

运行过程Console:

15/02/2200:04:22 WARN util.NativeCodeLoader: Unable to load native-hadoop library foryour platform... using builtin-java classes where applicable
15/02/2200:04:22 WARN mapred.JobClient: Use GenericOptionsParser for parsing thearguments. Applications should implement Tool for the same.
15/02/2200:04:22 WARN mapred.JobClient: No job jar file set.  User classes may not be found. SeeJobConf(Class) or JobConf#setJar(String).
15/02/2200:04:22 INFO input.FileInputFormat: Total input paths to process : 1
15/02/2200:04:22 WARN snappy.LoadSnappy: Snappy native library not loaded
15/02/2200:04:23 INFO mapred.JobClient: Running job: job_local1887351217_0001
15/02/2200:04:23 INFO mapred.LocalJobRunner: Waiting for map tasks
15/02/2200:04:23 INFO mapred.LocalJobRunner: Starting task:attempt_local1887351217_0001_m_000000_0
15/02/2200:04:23 INFO mapred.Task:  UsingResourceCalculatorPlugin : null
15/02/2200:04:23 INFO mapred.MapTask: Processing split: hdfs://192.168.1.100:9000/input/HTTP_20130313143750.dat:0+2214
15/02/2200:04:23 INFO mapred.MapTask: io.sort.mb = 100
15/02/2200:04:23 INFO mapred.MapTask: data buffer = 79691776/99614720
15/02/2200:04:23 INFO mapred.MapTask: record buffer = 262144/327680
15/02/2200:04:23 INFO mapred.MapTask: Starting flush of map output
15/02/2200:04:23 INFO mapred.MapTask: Finished spill 0
15/02/2200:04:23 INFO mapred.Task: Task:attempt_local1887351217_0001_m_000000_0 isdone. And is in the process of commiting
15/02/2200:04:23 INFO mapred.LocalJobRunner:
15/02/2200:04:23 INFO mapred.Task: Task 'attempt_local1887351217_0001_m_000000_0' done.
15/02/2200:04:23 INFO mapred.LocalJobRunner: Finishing task:attempt_local1887351217_0001_m_000000_0
15/02/2200:04:23 INFO mapred.LocalJobRunner: Map task executor complete.
15/02/2200:04:23 INFO mapred.Task:  UsingResourceCalculatorPlugin : null
15/02/2200:04:23 INFO mapred.LocalJobRunner:
15/02/2200:04:23 INFO mapred.Merger: Merging 1 sorted segments
15/02/2200:04:23 INFO mapred.Merger: Down to the last merge-pass, with 1 segments leftof total size: 1011 bytes
15/02/2200:04:23 INFO mapred.LocalJobRunner:
15/02/2200:04:24 INFO mapred.Task: Task:attempt_local1887351217_0001_r_000000_0 isdone. And is in the process of commiting
15/02/2200:04:24 INFO mapred.LocalJobRunner:
15/02/2200:04:24 INFO mapred.Task: Task attempt_local1887351217_0001_r_000000_0 isallowed to commit now
15/02/2200:04:24 INFO mapred.JobClient:  map 100%reduce 0%
15/02/2200:04:24 INFO output.FileOutputCommitter: Saved output of task'attempt_local1887351217_0001_r_000000_0' tohdfs://192.168.1.100:9000/output/out02
15/02/2200:04:24 INFO mapred.LocalJobRunner: reduce > reduce
15/02/2200:04:24 INFO mapred.Task: Task 'attempt_local1887351217_0001_r_000000_0' done.
15/02/2200:04:25 INFO mapred.JobClient:  map 100%reduce 100%
15/02/2200:04:25 INFO mapred.JobClient: Job complete: job_local1887351217_0001
15/02/2200:04:25 INFO mapred.JobClient: Counters: 19
15/02/2200:04:25 INFO mapred.JobClient:   FileOutput Format Counters
15/02/2200:04:25 INFO mapred.JobClient:     BytesWritten=556
15/02/2200:04:25 INFO mapred.JobClient:   FileInput Format Counters
15/02/2200:04:25 INFO mapred.JobClient:     BytesRead=2214
15/02/2200:04:25 INFO mapred.JobClient:  FileSystemCounters
15/02/2200:04:25 INFO mapred.JobClient:    FILE_BYTES_READ=1365
15/02/2200:04:25 INFO mapred.JobClient:    HDFS_BYTES_READ=4428
15/02/2200:04:25 INFO mapred.JobClient:    FILE_BYTES_WRITTEN=141054
15/02/2200:04:25 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=556
15/02/2200:04:25 INFO mapred.JobClient:  Map-Reduce Framework
15/02/2200:04:25 INFO mapred.JobClient:     Mapoutput materialized bytes=1015
15/02/2200:04:25 INFO mapred.JobClient:     Mapinput records=22
15/02/2200:04:25 INFO mapred.JobClient:    Reduce shuffle bytes=0
15/02/2200:04:25 INFO mapred.JobClient:    Spilled Records=44
15/02/2200:04:25 INFO mapred.JobClient:     Mapoutput bytes=965
15/02/2200:04:25 INFO mapred.JobClient:     Totalcommitted heap usage (bytes)=323878912
15/02/2200:04:25 INFO mapred.JobClient:    Combine input records=0
15/02/2200:04:25 INFO mapred.JobClient:    SPLIT_RAW_BYTES=120
15/02/2200:04:25 INFO mapred.JobClient:    Reduce input records=22
15/02/2200:04:25 INFO mapred.JobClient:     Reduce input groups=21
15/02/2200:04:25 INFO mapred.JobClient:    Combine output records=0
15/02/2200:04:25 INFO mapred.JobClient:    Reduce output records=21
15/02/2200:04:25 INFO mapred.JobClient:     Mapoutput records=22


 

 

HDFS显示运行结果:

Hadoop自定义数据类型编程练习_第1张图片

 

 

 


 

 

 

附数据:

 

格式为:记录报告时间戳、手机号码、AP mac、AC mac、访问的网址、网址种类、上行数据包数、下行数据包数、上行总流量、下行总流量、HTTP Response的状态。

 

 

1363157985066     13726230503  00-FD-07-A4-72-B8:CMCC  120.196.100.82      i02.c.aliimg.com           24    27    2481       24681     200
1363157995052     13826544101  5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4                4     0     264  0     200
1363157991076     13926435656  20-10-7A-28-CC-0A:CMCC  120.196.100.99                    2     4     132  1512       200
1363154400022     13926251106  5C-0E-8B-8B-B1-50:CMCC  120.197.40.4                4     0     240  0     200
1363157993044     18211575961  94-71-AC-CD-E6-18:CMCC-EASY     120.196.100.99      iface.qiyi.com 视频网站       15    12       1527       2106       200
1363157995074     84138413       5C-0E-8B-8C-E8-20:7DaysInn     120.197.40.4  122.72.52.12         20    16    4116       1432       200
1363157993055     13560439658  C4-17-FE-BA-DE-D9:CMCC       120.196.100.99                    18    15    1116 954  200
1363157995033     15920133257  5C-0E-8B-C7-BA-20:CMCC 120.197.40.4  sug.so.360.cn  信息安全       20    20    3156       2936       200
1363157983019     13719199419  68-A1-B7-03-07-B1:CMCC-EASY      120.196.100.82                    4     0     240  0     200
1363157984041     13660577991  5C-0E-8B-92-5C-20:CMCC-EASY      120.197.40.4  s19.cnzz.com  站点统计       24    9       6960       690  200
1363157973098     15013685858  5C-0E-8B-C7-F7-90:CMCC  120.197.40.4  rank.ie.sogou.com  搜索引擎       28    27    3659       3538       200
1363157986029     15989002119  E8-99-C4-4E-93-E0:CMCC-EASY      120.196.100.99      www.umeng.com   站点统计       3       3     1938       180  200
1363157992093     13560439658  C4-17-FE-BA-DE-D9:CMCC       120.196.100.99                    15    9     918  4938       200
1363157986041     13480253104  5C-0E-8B-C7-FC-80:CMCC-EASY     120.197.40.4                3     3     180  180  200
1363157984040     13602846565  5C-0E-8B-8B-B6-00:CMCC  120.197.40.4  2052.flash2-http.qq.com       综合门户       15    12       1938       2910       200
1363157995093     13922314466  00-FD-07-A2-EC-BA:CMCC       120.196.100.82      img.qfc.cn             12    12    3008       3720       200
1363157982040     13502468823  5C-0A-5B-6A-0B-D4:CMCC-EASY    120.196.100.99      y0.ifengimg.com    综合门户       57       102  7335       110349    200
1363157986072     18320173382  84-25-DB-4F-10-1A:CMCC-EASY     120.196.100.99      input.shouji.sogou.com  搜索引擎       21    18    9531       2412       200
1363157990043     13925057413  00-1F-64-E1-E6-9A:CMCC  120.196.100.55      t3.baidu.com   搜索引擎       69    63    11058       48243     200
1363157988072     13760778710  00-FD-07-A4-7B-08:CMCC  120.196.100.82                    2     2     120  120  200
1363157985079     13823070001  20-7C-8F-70-68-1F:CMCC   120.196.100.99                    6     3     360  180  200
1363157985069     13600217502  00-1F-64-E2-E8-B1:CMCC  120.196.100.55                    18    138  1080       186852    200
 


 

 

你可能感兴趣的:(Hadoop自定义数据类型编程练习)