大数据学习笔记【连载】

.MapReduce原理

逻辑上:

1、split

2、map

3、shuffle

4、reduce

四个过程

物理上:

JobTracker节点:JobTracker创建每一个Task(即MapTask和ReduceTask)

并将它们分发到各个TaskTracker服务中去执行。负责调度Job的每一个子任务task运行于TaskTracker上。

TaskTracker节点:运行在多个节点上的slaver服务。TaskTracker主动与JobTracker通信,接收作业,并负责直接执行每一个任务。TaskTracker都需要运行在HDFS的DataNode上

 

3.hdfs存储机制

1) client端发送写文件请求,namenode检查文件是否存在,如果已存在,直接返回错误信息,否则,发送给client一些可用namenode节点
2) client将文件分块,并行存储到不同节点上datanode上,发送完成后,client同时发送信息给namenode和datanode
3) namenode收到的client信息后,发送确信信息给datanode
4) datanode同时收到namenode和datanode的确认信息后,提交写操作。

4.用MapReduce找出存在公共好友的两个人

数据样本:

A:B,C

B:A,E

C:A,D

D:C

E:B

 

import java.io.IOException;

import java.util.ArrayList;

//import java.util.Collection;

//import java.util.Map.Entry;

//import java.util.HashMap;

import java.lang.Iterable;

 

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Job;

importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;

importorg.apache.hadoop.mapreduce.lib.input.TextInputFormat;

importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

importorg.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

//import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.util.ToolRunner;

import org.apache.hadoop.util.Tool;

import org.apache.commons.lang.StringUtils;

 

public class FindUsers extends Configuredimplements Tool  {

   

   static class Map extends Mapper {

       public void map(LongWritable key, Text val, Context context) throwsIOException, InterruptedException {

       

           String line = val.toString();

           String user = line.split(":")[0];

           String[] friends = line.split(":")[1].split(",");

           String joined_friends = StringUtils.join(friends, ",");

           for(String friend : friends) {       

                int result =friend.compareTo(user);

                if(result > 0) {

                    context.write(new Text(user+ "," + friend), new Text(joined_friends));

                }

                else if(result < 0) {

                    context.write(newText(friend + "," + user), new Text(joined_friends));

                }

           }

       }

    }

   

   static class Reduce extends Reducer {

       public void reduce(Text key, Iterable vals, Context context)throws IOException, InterruptedException {

           String string_key = key.toString();

           String[] users = string_key.split(",");

           ArrayList list = new ArrayList();

           

           int i = 0;

           

           String state = "Off";

           for(Text val : vals) {

                i++;

                String[] all_people =val.toString().split(",");

                ArrayListinner_list = new ArrayList();

               

                if(state.equals("Off")) {

                    for(String one_people:all_people) {

                        list.add(one_people);

                        state = "On";

                        continue;

                    }

                   

                }

               

                if(state.equals("On")) {

                    for(String one_people:all_people) {

                       inner_list.add(one_people);

                    }

                }

 

                list.retainAll(inner_list);

           }

           

           if( ! users[0].equals(users[1])) {

                for(String user : users) {

                    if(list.contains(user))list.remove(user);

                }

                if(list.size() >= 1&& i >=2) {

                    context.write(newText(StringUtils.join(users, ",")), newText(StringUtils.join(list,",")));

                }

           }

       }

    }

 

   @Override

   public int run(String[] args) throws Exception {

           

       Job job = Job.getInstance(getConf());

       job.setJarByClass(getClass());

       

       job.setMapperClass(Map.class);

       job.setReducerClass(Reduce.class);

       

       //job.setNumReduceTasks(0);

       job.setMapOutputKeyClass(Text.class);

       job.setOutputValueClass(Text.class);

       job.setOutputKeyClass(Text.class);

       job.setOutputValueClass(Text.class);

       job.setInputFormatClass(TextInputFormat.class);

       job.setOutputFormatClass(TextOutputFormat.class);

       

       FileInputFormat.addInputPath(job, new Path(args[0]));

       FileOutputFormat.setOutputPath(job, new Path(args[1]));

 

       return job.waitForCompletion(true) ? 0 : 1;

    }

 

   

   public static void main(String[] args) throws Exception {

        ToolRunner.run(new FindUsers(), args);

       

    }

}

 

你可能感兴趣的:(大数据学习笔记【连载】)