weixin_30556161

MapReduce部分源码解读(一)

 1 /**
 2  * Licensed to the Apache Software Foundation (ASF) under one
 3  * or more contributor license agreements.  See the NOTICE file
 4  * distributed with this work for additional information
 5  * regarding copyright ownership.  The ASF licenses this file
 6  * to you under the Apache License, Version 2.0 (the
 7  * "License"); you may not use this file except in compliance
 8  * with the License.  You may obtain a copy of the License at
 9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 package org.apache.hadoop.mapreduce.lib.input;
20 
21 import org.apache.hadoop.classification.InterfaceAudience;
22 import org.apache.hadoop.classification.InterfaceStability;
23 import org.apache.hadoop.fs.Path;
24 import org.apache.hadoop.io.LongWritable;
25 import org.apache.hadoop.io.Text;
26 import org.apache.hadoop.io.compress.CompressionCodec;
27 import org.apache.hadoop.io.compress.CompressionCodecFactory;
28 import org.apache.hadoop.io.compress.SplittableCompressionCodec;
29 import org.apache.hadoop.mapreduce.InputFormat;
30 import org.apache.hadoop.mapreduce.InputSplit;
31 import org.apache.hadoop.mapreduce.JobContext;
32 import org.apache.hadoop.mapreduce.RecordReader;
33 import org.apache.hadoop.mapreduce.TaskAttemptContext;
34 
35 import com.google.common.base.Charsets;
36 
37 /** An {@link InputFormat} for plain text files.  Files are broken into lines.
38  * Either linefeed or carriage-return are used to signal end of line.  Keys are
39  * the position in the file, and values are the line of text.. */
40 @InterfaceAudience.Public
41 @InterfaceStability.Stable
42 public class TextInputFormat extends FileInputFormat {
43 
44   @Override
45   public RecordReader 
46     createRecordReader(InputSplit split,
47                        TaskAttemptContext context) {
48     String delimiter = context.getConfiguration().get(
49         "textinputformat.record.delimiter");
50     byte[] recordDelimiterBytes = null;
51     if (null != delimiter)
52       recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
53     return new LineRecordReader(recordDelimiterBytes);
54   }
55 
56   @Override
57   protected boolean isSplitable(JobContext context, Path file) {
58     final CompressionCodec codec =
59       new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
60     if (null == codec) {
61       return true;
62     }
63     return codec instanceof SplittableCompressionCodec;
64   }
65 
66 }

TextInputFormat

父类(TextInputFormat本身含义为把每一行解析成键值对)

  1 /**
  2  * Licensed to the Apache Software Foundation (ASF) under one
  3  * or more contributor license agreements.  See the NOTICE file
  4  * distributed with this work for additional information
  5  * regarding copyright ownership.  The ASF licenses this file
  6  * to you under the Apache License, Version 2.0 (the
  7  * "License"); you may not use this file except in compliance
  8  * with the License.  You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  * See the License for the specific language governing permissions and
 16  * limitations under the License.
 17  */
 18 
 19 package org.apache.hadoop.mapreduce.lib.input;
 20 
 21 import java.io.IOException;
 22 import java.util.ArrayList;
 23 import java.util.List;
 24 
 25 import org.apache.commons.logging.Log;
 26 import org.apache.commons.logging.LogFactory;
 27 import org.apache.hadoop.classification.InterfaceAudience;
 28 import org.apache.hadoop.classification.InterfaceStability;
 29 import org.apache.hadoop.conf.Configuration;
 30 import org.apache.hadoop.fs.FileStatus;
 31 import org.apache.hadoop.fs.FileSystem;
 32 import org.apache.hadoop.fs.LocatedFileStatus;
 33 import org.apache.hadoop.fs.Path;
 34 import org.apache.hadoop.fs.PathFilter;
 35 import org.apache.hadoop.fs.BlockLocation;
 36 import org.apache.hadoop.fs.RemoteIterator;
 37 import org.apache.hadoop.mapred.LocatedFileStatusFetcher;
 38 import org.apache.hadoop.mapred.SplitLocationInfo;
 39 import org.apache.hadoop.mapreduce.InputFormat;
 40 import org.apache.hadoop.mapreduce.InputSplit;
 41 import org.apache.hadoop.mapreduce.Job;
 42 import org.apache.hadoop.mapreduce.JobContext;
 43 import org.apache.hadoop.mapreduce.Mapper;
 44 import org.apache.hadoop.mapreduce.security.TokenCache;
 45 import org.apache.hadoop.util.ReflectionUtils;
 46 import org.apache.hadoop.util.StringUtils;
 47 
 48 import com.google.common.base.Stopwatch;
 49 import com.google.common.collect.Lists;
 50 
 51 /** 
 52  * A base class for file-based {@link InputFormat}s.
 53  * 
 54  * FileInputFormat is the base class for all file-based 
 55  * InputFormats. This provides a generic implementation of
 56  * {@link #getSplits(JobContext)}.
 57  * Subclasses of FileInputFormat can also override the 
 58  * {@link #isSplitable(JobContext, Path)} method to ensure input-files are
 59  * not split-up and are processed as a whole by {@link Mapper}s.
 60  */
 61 @InterfaceAudience.Public
 62 @InterfaceStability.Stable
 63 public abstract class FileInputFormat extends InputFormat {
 64   public static final String INPUT_DIR = 
 65     "mapreduce.input.fileinputformat.inputdir";
 66   public static final String SPLIT_MAXSIZE = 
 67     "mapreduce.input.fileinputformat.split.maxsize";
 68   public static final String SPLIT_MINSIZE = 
 69     "mapreduce.input.fileinputformat.split.minsize";
 70   public static final String PATHFILTER_CLASS = 
 71     "mapreduce.input.pathFilter.class";
 72   public static final String NUM_INPUT_FILES =
 73     "mapreduce.input.fileinputformat.numinputfiles";
 74   public static final String INPUT_DIR_RECURSIVE =
 75     "mapreduce.input.fileinputformat.input.dir.recursive";
 76   public static final String LIST_STATUS_NUM_THREADS =
 77       "mapreduce.input.fileinputformat.list-status.num-threads";
 78   public static final int DEFAULT_LIST_STATUS_NUM_THREADS = 1;
 79 
 80   private static final Log LOG = LogFactory.getLog(FileInputFormat.class);
 81 
 82   private static final double SPLIT_SLOP = 1.1;   // 10% slop
 83   
 84   @Deprecated
 85   public static enum Counter { 
 86     BYTES_READ
 87   }
 88 
 89   private static final PathFilter hiddenFileFilter = new PathFilter(){
 90       public boolean accept(Path p){
 91         String name = p.getName(); 
 92         return !name.startsWith("_") && !name.startsWith("."); 
 93       }
 94     }; 
 95 
 96   /**
 97    * Proxy PathFilter that accepts a path only if all filters given in the
 98    * constructor do. Used by the listPaths() to apply the built-in
 99    * hiddenFileFilter together with a user provided one (if any).
100    */
101   private static class MultiPathFilter implements PathFilter {
102     private List filters;
103 
104     public MultiPathFilter(List filters) {
105       this.filters = filters;
106     }
107 
108     public boolean accept(Path path) {
109       for (PathFilter filter : filters) {
110         if (!filter.accept(path)) {
111           return false;
112         }
113       }
114       return true;
115     }
116   }
117   
118   /**
119    * @param job
120    *          the job to modify
121    * @param inputDirRecursive
122    */
123   public static void setInputDirRecursive(Job job,
124       boolean inputDirRecursive) {
125     job.getConfiguration().setBoolean(INPUT_DIR_RECURSIVE,
126         inputDirRecursive);
127   }
128  
129   /**
130    * @param job
131    *          the job to look at.
132    * @return should the files to be read recursively?
133    */
134   public static boolean getInputDirRecursive(JobContext job) {
135     return job.getConfiguration().getBoolean(INPUT_DIR_RECURSIVE,
136         false);
137   }
138 
139   /**
140    * Get the lower bound on split size imposed by the format.
141    * @return the number of bytes of the minimal split for this format
142    */
143   protected long getFormatMinSplitSize() {
144     return 1;
145   }
146 
147   /**
148    * Is the given filename splitable? Usually, true, but if the file is
149    * stream compressed, it will not be.
150    * 
151    * FileInputFormat implementations can override this and return
152    * false to ensure that individual input files are never split-up
153    * so that {@link Mapper}s process entire files.
154    * 
155    * @param context the job context
156    * @param filename the file name to check
157    * @return is this file splitable?
158    */
159   protected boolean isSplitable(JobContext context, Path filename) {
160     return true;
161   }
162 
163   /**
164    * Set a PathFilter to be applied to the input paths for the map-reduce job.
165    * @param job the job to modify
166    * @param filter the PathFilter class use for filtering the input paths.
167    */
168   public static void setInputPathFilter(Job job,
169                                         Classextends PathFilter> filter) {
170     job.getConfiguration().setClass(PATHFILTER_CLASS, filter, 
171                                     PathFilter.class);
172   }
173 
174   /**
175    * Set the minimum input split size
176    * @param job the job to modify
177    * @param size the minimum size
178    */
179   public static void setMinInputSplitSize(Job job,
180                                           long size) {
181     job.getConfiguration().setLong(SPLIT_MINSIZE, size);
182   }
183 
184   /**
185    * Get the minimum split size
186    * @param job the job
187    * @return the minimum number of bytes that can be in a split
188    */
189   public static long getMinSplitSize(JobContext job) {
190     return job.getConfiguration().getLong(SPLIT_MINSIZE, 1L);
191   }
192 
193   /**
194    * Set the maximum split size
195    * @param job the job to modify
196    * @param size the maximum split size
197    */
198   public static void setMaxInputSplitSize(Job job,
199                                           long size) {
200     job.getConfiguration().setLong(SPLIT_MAXSIZE, size);
201   }
202 
203   /**
204    * Get the maximum split size.
205    * @param context the job to look at.
206    * @return the maximum number of bytes a split can include
207    */
208   public static long getMaxSplitSize(JobContext context) {
209     return context.getConfiguration().getLong(SPLIT_MAXSIZE, 
210                                               Long.MAX_VALUE);
211   }
212 
213   /**
214    * Get a PathFilter instance of the filter set for the input paths.
215    *
216    * @return the PathFilter instance set for the job, NULL if none has been set.
217    */
218   public static PathFilter getInputPathFilter(JobContext context) {
219     Configuration conf = context.getConfiguration();
220     Class filterClass = conf.getClass(PATHFILTER_CLASS, null,
221         PathFilter.class);
222     return (filterClass != null) ?
223         (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
224   }
225 
226   /** List input directories.
227    * Subclasses may override to, e.g., select only files matching a regular
228    * expression. 
229    * 
230    * @param job the job to list input paths for
231    * @return array of FileStatus objects
232    * @throws IOException if zero items.
233    */
234   protected List listStatus(JobContext job
235                                         ) throws IOException {
236     Path[] dirs = getInputPaths(job);
237     if (dirs.length == 0) {
238       throw new IOException("No input paths specified in job");
239     }
240     
241     // get tokens for all the required FileSystems..
242     TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
243                                         job.getConfiguration());
244 
245     // Whether we need to recursive look into the directory structure
246     boolean recursive = getInputDirRecursive(job);
247 
248     // creates a MultiPathFilter with the hiddenFileFilter and the
249     // user provided one (if any).
250     List filters = new ArrayList();
251     filters.add(hiddenFileFilter);
252     PathFilter jobFilter = getInputPathFilter(job);
253     if (jobFilter != null) {
254       filters.add(jobFilter);
255     }
256     PathFilter inputFilter = new MultiPathFilter(filters);
257     
258     List result = null;
259 
260     int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
261         DEFAULT_LIST_STATUS_NUM_THREADS);
262     Stopwatch sw = new Stopwatch().start();
263     if (numThreads == 1) {
264       result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
265     } else {
266       Iterable locatedFiles = null;
267       try {
268         LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
269             job.getConfiguration(), dirs, recursive, inputFilter, true);
270         locatedFiles = locatedFileStatusFetcher.getFileStatuses();
271       } catch (InterruptedException e) {
272         throw new IOException("Interrupted while getting file statuses");
273       }
274       result = Lists.newArrayList(locatedFiles);
275     }
276     
277     sw.stop();
278     if (LOG.isDebugEnabled()) {
279       LOG.debug("Time taken to get FileStatuses: " + sw.elapsedMillis());
280     }
281     LOG.info("Total input paths to process : " + result.size()); 
282     return result;
283   }
284 
285   private List singleThreadedListStatus(JobContext job, Path[] dirs,
286       PathFilter inputFilter, boolean recursive) throws IOException {
287     List result = new ArrayList();
288     List errors = new ArrayList();
289     for (int i=0; i < dirs.length; ++i) {
290       Path p = dirs[i];
291       FileSystem fs = p.getFileSystem(job.getConfiguration()); 
292       FileStatus[] matches = fs.globStatus(p, inputFilter);
293       if (matches == null) {
294         errors.add(new IOException("Input path does not exist: " + p));
295       } else if (matches.length == 0) {
296         errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
297       } else {
298         for (FileStatus globStat: matches) {
299           if (globStat.isDirectory()) {
300             RemoteIterator iter =
301                 fs.listLocatedStatus(globStat.getPath());
302             while (iter.hasNext()) {
303               LocatedFileStatus stat = iter.next();
304               if (inputFilter.accept(stat.getPath())) {
305                 if (recursive && stat.isDirectory()) {
306                   addInputPathRecursively(result, fs, stat.getPath(),
307                       inputFilter);
308                 } else {
309                   result.add(stat);
310                 }
311               }
312             }
313           } else {
314             result.add(globStat);
315           }
316         }
317       }
318     }
319 
320     if (!errors.isEmpty()) {
321       throw new InvalidInputException(errors);
322     }
323     return result;
324   }
325   
326   /**
327    * Add files in the input path recursively into the results.
328    * @param result
329    *          The List to store all files.
330    * @param fs
331    *          The FileSystem.
332    * @param path
333    *          The input path.
334    * @param inputFilter
335    *          The input filter that can be used to filter files/dirs. 
336    * @throws IOException
337    */
338   protected void addInputPathRecursively(List result,
339       FileSystem fs, Path path, PathFilter inputFilter) 
340       throws IOException {
341     RemoteIterator iter = fs.listLocatedStatus(path);
342     while (iter.hasNext()) {
343       LocatedFileStatus stat = iter.next();
344       if (inputFilter.accept(stat.getPath())) {
345         if (stat.isDirectory()) {
346           addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
347         } else {
348           result.add(stat);
349         }
350       }
351     }
352   }
353   
354   
355   /**
356    * A factory that makes the split for this class. It can be overridden
357    * by sub-classes to make sub-types
358    */
359   protected FileSplit makeSplit(Path file, long start, long length, 
360                                 String[] hosts) {
361     return new FileSplit(file, start, length, hosts);
362   }
363   
364   /**
365    * A factory that makes the split for this class. It can be overridden
366    * by sub-classes to make sub-types
367    */
368   protected FileSplit makeSplit(Path file, long start, long length, 
369                                 String[] hosts, String[] inMemoryHosts) {
370     return new FileSplit(file, start, length, hosts, inMemoryHosts);
371   }
372 
373   /** 
374    * Generate the list of files and make them into FileSplits.
375    * @param job the job context
376    * @throws IOException
377    */
378   public List getSplits(JobContext job) throws IOException {
379     Stopwatch sw = new Stopwatch().start();
380     long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
381     long maxSize = getMaxSplitSize(job);
382 
383     // generate splits
384     List splits = new ArrayList();
385     List files = listStatus(job);
386     for (FileStatus file: files) {
387       Path path = file.getPath();
388       long length = file.getLen();
389       if (length != 0) {
390         BlockLocation[] blkLocations;
391         if (file instanceof LocatedFileStatus) {
392           blkLocations = ((LocatedFileStatus) file).getBlockLocations();
393         } else {
394           FileSystem fs = path.getFileSystem(job.getConfiguration());
395           blkLocations = fs.getFileBlockLocations(file, 0, length);
396         }
397         if (isSplitable(job, path)) {
398           long blockSize = file.getBlockSize();
399           long splitSize = computeSplitSize(blockSize, minSize, maxSize);
400 
401           long bytesRemaining = length;
402           while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
403             int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
404             splits.add(makeSplit(path, length-bytesRemaining, splitSize,
405                         blkLocations[blkIndex].getHosts(),
406                         blkLocations[blkIndex].getCachedHosts()));
407             bytesRemaining -= splitSize;
408           }
409 
410           if (bytesRemaining != 0) {
411             int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
412             splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining,
413                        blkLocations[blkIndex].getHosts(),
414                        blkLocations[blkIndex].getCachedHosts()));
415           }
416         } else { // not splitable
417           splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(),
418                       blkLocations[0].getCachedHosts()));
419         }
420       } else { 
421         //Create empty hosts array for zero length files
422         splits.add(makeSplit(path, 0, length, new String[0]));
423       }
424     }
425     // Save the number of input files for metrics/loadgen
426     job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
427     sw.stop();
428     if (LOG.isDebugEnabled()) {
429       LOG.debug("Total # of splits generated by getSplits: " + splits.size()
430           + ", TimeTaken: " + sw.elapsedMillis());
431     }
432     return splits;
433   }
434 
435   protected long computeSplitSize(long blockSize, long minSize,
436                                   long maxSize) {
437     return Math.max(minSize, Math.min(maxSize, blockSize));
438   }
439 
440   protected int getBlockIndex(BlockLocation[] blkLocations, 
441                               long offset) {
442     for (int i = 0 ; i < blkLocations.length; i++) {
443       // is the offset inside this block?
444       if ((blkLocations[i].getOffset() <= offset) &&
445           (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
446         return i;
447       }
448     }
449     BlockLocation last = blkLocations[blkLocations.length -1];
450     long fileLength = last.getOffset() + last.getLength() -1;
451     throw new IllegalArgumentException("Offset " + offset + 
452                                        " is outside of file (0.." +
453                                        fileLength + ")");
454   }
455 
456   /**
457    * Sets the given comma separated paths as the list of inputs 
458    * for the map-reduce job.
459    * 
460    * @param job the job
461    * @param commaSeparatedPaths Comma separated paths to be set as 
462    *        the list of inputs for the map-reduce job.
463    */
464   public static void setInputPaths(Job job, 
465                                    String commaSeparatedPaths
466                                    ) throws IOException {
467     setInputPaths(job, StringUtils.stringToPath(
468                         getPathStrings(commaSeparatedPaths)));
469   }
470 
471   /**
472    * Add the given comma separated paths to the list of inputs for
473    *  the map-reduce job.
474    * 
475    * @param job The job to modify
476    * @param commaSeparatedPaths Comma separated paths to be added to
477    *        the list of inputs for the map-reduce job.
478    */
479   public static void addInputPaths(Job job, 
480                                    String commaSeparatedPaths
481                                    ) throws IOException {
482     for (String str : getPathStrings(commaSeparatedPaths)) {
483       addInputPath(job, new Path(str));
484     }
485   }
486 
487   /**
488    * Set the array of {@link Path}s as the list of inputs
489    * for the map-reduce job.
490    * 
491    * @param job The job to modify 
492    * @param inputPaths the {@link Path}s of the input directories/files 
493    * for the map-reduce job.
494    */ 
495   public static void setInputPaths(Job job, 
496                                    Path... inputPaths) throws IOException {
497     Configuration conf = job.getConfiguration();
498     Path path = inputPaths[0].getFileSystem(conf).makeQualified(inputPaths[0]);
499     StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
500     for(int i = 1; i < inputPaths.length;i++) {
501       str.append(StringUtils.COMMA_STR);
502       path = inputPaths[i].getFileSystem(conf).makeQualified(inputPaths[i]);
503       str.append(StringUtils.escapeString(path.toString()));
504     }
505     conf.set(INPUT_DIR, str.toString());
506   }
507 
508   /**
509    * Add a {@link Path} to the list of inputs for the map-reduce job.
510    * 
511    * @param job The {@link Job} to modify
512    * @param path {@link Path} to be added to the list of inputs for 
513    *            the map-reduce job.
514    */
515   public static void addInputPath(Job job, 
516                                   Path path) throws IOException {
517     Configuration conf = job.getConfiguration();
518     path = path.getFileSystem(conf).makeQualified(path);
519     String dirStr = StringUtils.escapeString(path.toString());
520     String dirs = conf.get(INPUT_DIR);
521     conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
522   }
523   
524   // This method escapes commas in the glob pattern of the given paths.
525   private static String[] getPathStrings(String commaSeparatedPaths) {
526     int length = commaSeparatedPaths.length();
527     int curlyOpen = 0;
528     int pathStart = 0;
529     boolean globPattern = false;
530     List pathStrings = new ArrayList();
531     
532     for (int i=0; i) {
533       char ch = commaSeparatedPaths.charAt(i);
534       switch(ch) {
535         case '{' : {
536           curlyOpen++;
537           if (!globPattern) {
538             globPattern = true;
539           }
540           break;
541         }
542         case '}' : {
543           curlyOpen--;
544           if (curlyOpen == 0 && globPattern) {
545             globPattern = false;
546           }
547           break;
548         }
549         case ',' : {
550           if (!globPattern) {
551             pathStrings.add(commaSeparatedPaths.substring(pathStart, i));
552             pathStart = i + 1 ;
553           }
554           break;
555         }
556         default:
557           continue; // nothing special to do for this character
558       }
559     }
560     pathStrings.add(commaSeparatedPaths.substring(pathStart, length));
561     
562     return pathStrings.toArray(new String[0]);
563   }
564   
565   /**
566    * Get the list of input {@link Path}s for the map-reduce job.
567    * 
568    * @param context The job
569    * @return the list of input {@link Path}s for the map-reduce job.
570    */
571   public static Path[] getInputPaths(JobContext context) {
572     String dirs = context.getConfiguration().get(INPUT_DIR, "");
573     String [] list = StringUtils.split(dirs);
574     Path[] result = new Path[list.length];
575     for (int i = 0; i < list.length; i++) {
576       result[i] = new Path(StringUtils.unEscapeString(list[i]));
577     }
578     return result;
579   }
580 
581 }

FileInputFormat

父类

  1 /**
  2  * Licensed to the Apache Software Foundation (ASF) under one
  3  * or more contributor license agreements.  See the NOTICE file
  4  * distributed with this work for additional information
  5  * regarding copyright ownership.  The ASF licenses this file
  6  * to you under the Apache License, Version 2.0 (the
  7  * "License"); you may not use this file except in compliance
  8  * with the License.  You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  * See the License for the specific language governing permissions and
 16  * limitations under the License.
 17  */
 18 
 19 package org.apache.hadoop.mapreduce;
 20 
 21 import java.io.IOException;
 22 import java.util.List;
 23 
 24 import org.apache.hadoop.classification.InterfaceAudience;
 25 import org.apache.hadoop.classification.InterfaceStability;
 26 import org.apache.hadoop.fs.FileSystem;
 27 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 28 
 29 /** 
 30  * InputFormat describes the input-specification for a 
 31  * Map-Reduce job. 
 32  * 
 33  * The Map-Reduce framework relies on the InputFormat of the
 34  * job to:

 35  * 

 36  *   
 37  *   Validate the input-specification of the job. 
 38  *   

 39  *   Split-up the input file(s) into logical {@link InputSplit}s, each of 
 40  *   which is then assigned to an individual {@link Mapper}.
 41  *   
 42  *   
 43  *   Provide the {@link RecordReader} implementation to be used to glean
 44  *   input records from the logical InputSplit for processing by 
 45  *   the {@link Mapper}.
 46  *   
 47  * 
 48  * 
 49  * The default behavior of file-based {@link InputFormat}s, typically 
 50  * sub-classes of {@link FileInputFormat}, is to split the 
 51  * input into logical {@link InputSplit}s based on the total size, in 
 52  * bytes, of the input files. However, the {@link FileSystem} blocksize of  
 53  * the input files is treated as an upper bound for input splits. A lower bound 
 54  * on the split size can be set via 
 55  * docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize">
 56  * mapreduce.input.fileinputformat.split.minsize.
 57  * 
 58  * Clearly, logical splits based on input-size is insufficient for many 
 59  * applications since record boundaries are to respected. In such cases, the
 60  * application has to also implement a {@link RecordReader} on whom lies the
 61  * responsibility to respect record-boundaries and present a record-oriented
 62  * view of the logical InputSplit to the individual task.
 63  *
 64  * @see InputSplit
 65  * @see RecordReader
 66  * @see FileInputFormat
 67  */
 68 @InterfaceAudience.Public
 69 @InterfaceStability.Stable
 70 public abstract class InputFormat {
 71 
 72   /** 
 73    * Logically split the set of input files for the job.  
 74    * 
 75    * 
Each {@link InputSplit} is then assigned to an individual {@link Mapper}
 76    * for processing.
 77    *
 78    * Note: The split is a logical split of the inputs and the
 79    * input files are not physically split into chunks. For e.g. a split could
 80    * be <input-file-path, start, offset> tuple. The InputFormat
 81    * also creates the {@link RecordReader} to read the {@link InputSplit}.
 82    * 
 83    * @param context job configuration.
 84    * @return an array of {@link InputSplit}s for the job.
 85    */
 86   public abstract 
 87     List getSplits(JobContext context
 88                                ) throws IOException, InterruptedException;
 89   
 90   /**
 91    * Create a record reader for a given split. The framework will call
 92    * {@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
 93    * the split is used.
 94    * @param split the split to be read
 95    * @param context the information about the task
 96    * @return a new record reader
 97    * @throws IOException
 98    * @throws InterruptedException
 99    */
100   public abstract 
101     RecordReader createRecordReader(InputSplit split,
102                                          TaskAttemptContext context
103                                         ) throws IOException, 
104                                                  InterruptedException;
105 
106 }

InputFormat源码

   * Each {@link InputSplit} is then assigned to an individual {@link Mapper}
   * for processing.

*
*

Note: The split is a logical split of the inputs and the
* input files are not physically split into chunks. For e.g. a split could
* be <input-file-path, start, offset> tuple. The InputFormat
* also creates the {@link RecordReader} to read the {@link InputSplit}.
*
* @param context job configuration.
* @return an array of {@link InputSplit}s for the job.
*/
public abstract
List getSplits(JobContext context
) throws IOException, InterruptedException;

　　意思是：每一个文件逻辑上切分成若干个split(由getsplit方法)，一个split对应一个mapper任务

 /**
   * Create a record reader for a given split. The framework will call
   * {@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
   * the split is used.
   * @param split the split to be read
   * @param context the information about the task
   * @return a new record reader
   * @throws IOException
   * @throws InterruptedException
   */
  public abstract 
    RecordReader createRecordReader(InputSplit split,
                                         TaskAttemptContext context
                                        ) throws IOException, 
                                                 InterruptedException;

}

　　意思是：split本质上是文件内容一部分，由RecordReader来处理文件内容(键值对)，进入RecordReader查看，可得该抽象类将data数据拆分成键值对，目的是输入给Mapper

/**
* The record reader breaks the data into key/value pairs for input to the
* {@link Mapper}.
* @param
* @param
*/

public abstract class RecordReader implements Closeable {

  /**
   * Called once at initialization.
   * @param split the split that defines the range of records to read
   * @param context the information about the task
   * @throws IOException
   * @throws InterruptedException
   */

　　由此总结，源码分析的

文件-----------通过----------->getsplits()-----------分解为------------>InputSplit------------通过-------------->RecordReader类(由createRecordReader()方法创建的)-------处理---------->map(k1,v1)

第一部分：文件切分

问题1:如何将文件切分成split，查看自雷的getsplits()方法

 1  /** 
 2    * Generate the list of files and make them into FileSplits.
 3    * @param job the job context
 4    * @throws IOException
 5    */
 6   public List getSplits(JobContext job) throws IOException {
 7     Stopwatch sw = new Stopwatch().start();
 8     long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
 9     long maxSize = getMaxSplitSize(job);
10 
11     // generate splits
12     List splits = new ArrayList();
13     List files = listStatus(job);
14     for (FileStatus file: files) {
15       Path path = file.getPath();
16       long length = file.getLen();
17       if (length != 0) {
18         BlockLocation[] blkLocations;
19         if (file instanceof LocatedFileStatus) {
20           blkLocations = ((LocatedFileStatus) file).getBlockLocations();
21         } else {
22           FileSystem fs = path.getFileSystem(job.getConfiguration());
23           blkLocations = fs.getFileBlockLocations(file, 0, length);
24         }
25         if (isSplitable(job, path)) {
26           long blockSize = file.getBlockSize();
27           long splitSize = computeSplitSize(blockSize, minSize, maxSize);
28 
29           long bytesRemaining = length;
30           while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
31             int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
32             splits.add(makeSplit(path, length-bytesRemaining, splitSize,
33                         blkLocations[blkIndex].getHosts(),
34                         blkLocations[blkIndex].getCachedHosts()));
35             bytesRemaining -= splitSize;
36           }
37 
38           if (bytesRemaining != 0) {
39             int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
40             splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining,
41                        blkLocations[blkIndex].getHosts(),
42                        blkLocations[blkIndex].getCachedHosts()));
43           }
44         } else { // not splitable
45           splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(),
46                       blkLocations[0].getCachedHosts()));
47         }
48       } else { 
49         //Create empty hosts array for zero length files
50         splits.add(makeSplit(path, 0, length, new String[0]));
51       }
52     }
53     // Save the number of input files for metrics/loadgen
54     job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
55     sw.stop();
56     if (LOG.isDebugEnabled()) {
57       LOG.debug("Total # of splits generated by getSplits: " + splits.size()
58           + ", TimeTaken: " + sw.elapsedMillis());
59     }
60     return splits;
61   }

getSplits

  /** 
   * Generate the list of files and make them into FileSplits.
     将文件切分成split
   * @param job the job context
   * @throws IOException
   */

(1)　通过add方法将切片加入列表

(2)add方法中通过makesplit方法实现逻辑块的切分

（3）makeSplit内部使用FileSplit进行文件切分

(4)FileSplit三个参数的意义如下

hosts值得是包含块的节点列表，即block块。从start开始处理，处理多长length，处理的数据信息位于那个block块上，因此Split是逻辑切分

没有真正切分，如此对程序的影响，不会真正去读磁盘数据，而是使用HDFS读数据方法。

（5）分析文件长度不为0程序如何执行

if (length != 0) {
        BlockLocation[] blkLocations;
        if (file instanceof LocatedFileStatus) {
          blkLocations = ((LocatedFileStatus) file).getBlockLocations();
        } else {
          FileSystem fs = path.getFileSystem(job.getConfiguration());
          blkLocations = fs.getFileBlockLocations(file, 0, length);
        }
        if (isSplitable(job, path)) { //如果文件被切分，并非所有文件 都可以切分，比如密码文件，通常有文件结构决定是否可以被切分
          long blockSize = file.getBlockSize();
          long splitSize = computeSplitSize(blockSize, minSize, maxSize);

          long bytesRemaining = length;
          while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
            int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
            splits.add(makeSplit(path, length-bytesRemaining, splitSize, //length-bytesRemaining为剩余字节

                       blkLocations[blkIndex].getHosts(), 
                       blkLocations[blkIndex].getCachedHosts())); 
                       bytesRemaining -= splitSize; }

如果文件大小300，length=300，bytesRemaining=300
执行第一次makesplit(0,128) 按splitSize=128切分
bytesRemaining=300-128=172
执行第二次makesplit(300-172=128,128)
bytesRemaining=172-128=44
执行第三次makesplit(300-44=256,128)

文件不允许被分割，执行以下程序

} else { // not splitable
          splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(),
                      blkLocations[0].getCachedHosts()));
        }

　　（6）分析文件块大小

380 long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); //结果分析为1L
381 long maxSize = getMaxSplitSize(job);//最大为Long的最大值

查看變量minsize的源代碼

   long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));

　　点击getFormatMinSplitSize()查看，为1L

  /**
   * Get the lower bound on split size imposed by the format.
   * @return the number of bytes of the minimal split for this format
   */
  protected long getFormatMinSplitSize() {
    return 1;
  }

　　点击getMinSplitSize()查看，计算办法为返回当前文件块的最小尺寸，如果配置文件中没有SPLIT_MINSIZE参数则返回1L

  /**
   * Get the minimum split size
   * @param job the job
   * @return the minimum number of bytes that can be in a split
   */
  public static long getMinSplitSize(JobContext job) {
    return job.getConfiguration().getLong(SPLIT_MINSIZE, 1L);
  }

　　从而得到minsplit最小值为1L

而真正计算block是在399行，默认情况下inputsplit和block的大小均为128M，换句话说，一个map处理数据块的大小是一个block块大小

397        if (isSplitable(job, path)) {
398          long blockSize = file.getBlockSize();
399          long splitSize = computeSplitSize(blockSize, minSize, maxSize);

当inputsplit和block的大小不同的时候，就会产生网络传输，如果inputsplit比block大，则inputsplit所需的是一个block块是不够的的，必须在找一个block块。

如果inputsplit比block小，block块中得一部分数据是没有被处理的，可能被别的map处理，也就可鞥产生网络传输，也是一种数据本地化的。

因为源码中使用的是for循环，因此，没一个文件都会去切分split

（7）两个50M，一个200M的文件和一个空文件会产生几个split

空白文件也会产生split，两个50M产生两个split，一个200M产生2个split，共需要5个map任务

**********************************************************

第二部分通过createRecordReader()处理map任务

（1）解读createRecordReader

/** An {@link InputFormat} for plain text files.  Files are broken into lines.
 * Either linefeed or carriage-return are used to signal end of line.  Keys are
 * the position in the file, and values are the line of text.. */
@InterfaceAudience.Public
@InterfaceStability.Stable
public class TextInputFormat extends FileInputFormat {

  @Override
  public RecordReader 
    createRecordReader(InputSplit split,
                       TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get(
        "textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter)
      recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);//delimiter为分隔符，编码格式为utf-8，在解析的时候如果不是这个格式将会出错

return new LineRecordReader(recordDelimiterBytes); }

LineRecordReader为RecordReader的子类

/**
 * Treats keys as offset in file and value as line.   key为偏移量，value为每一行的值
 */
@InterfaceAudience.LimitedPrivate({"MapReduce", "Pig"})
@InterfaceStability.Evolving
public class LineRecordReader extends RecordReader {
  private static final Log LOG = LogFactory.getLog(LineRecordReader.class);
  public static final String MAX_LINE_LENGTH = 
    "mapreduce.input.linerecordreader.line.maxlength";

  private long start;
  private long pos;
  private long end;

RecordReader类

 /**
   * Called once at initialization.
   * @param split the split that defines the range of records to read
   * @param context the information about the task
   * @throws IOException
   * @throws InterruptedException
   */
  public abstract void initialize(InputSplit split,    //初始化，执行一次
                                  TaskAttemptContext context
                                  ) throws IOException, InterruptedException;

  /**
   * Read the next key, value pair.
   * @return true if a key/value pair was read
   * @throws IOException
   * @throws InterruptedException
   */
  public abstract     //读取下一个键值对  这个键值对是map端的k1和v1
  boolean nextKeyValue() throws IOException, InterruptedException;

  /**
   * Get the current key   //得到key值
   * @return the current key or null if there is no current key
   * @throws IOException
   * @throws InterruptedException
   */
  public abstract
  KEYIN getCurrentKey() throws IOException, InterruptedException;
  
  /**
   * Get the current value.
   * @return the object that was read
   * @throws IOException
   * @throws InterruptedException
   */
  public abstract 
  VALUEIN getCurrentValue() throws IOException, InterruptedException;
  
  /**
   * The current progress of the record reader through its data.
   * @return a number between 0.0 and 1.0 that is the fraction of the data read
   * @throws IOException
   * @throws InterruptedException
   */
  public abstract float getProgress() throws IOException, InterruptedException;
  
  /**
   * Close the record reader.
   */
  public abstract void close() throws IOException;
}

上述没有key和value的值，这是需要注意的，下面却提供了key和value的get方法，因此key和value在类的字段中存放，

在方法体中对key和value赋值，然后再利用getCurrentkey和getCurrentValue获得key和value。

比如：while(rs.next()){rs.getLong()}

Enumeration里面有一个hasMoreElement()方法也是上述情况，hashtable方法，用element做迭代，最后归结到Enumeration

因此上述程序可理解为：

while(rr.nextKeyValue()){key=rr.getCurrentKey(),value=rr.getCurrentValue(),map(key,value,context)}

通过源代码可以验证上述猜想，key和value的类型已经固定，因此在mapreduce中可以省略不写

其中SplitLineReader为行读取器。

（2）

**split.getStart()处理被处理数据的起始位置，和行没有关系

起始位置赋给了pos当前位置，现在查找netKeyVAalue()方法

LineRecordReader类中

public boolean nextKeyValue() throws IOException {
    if (key == null) {
      key = new LongWritable();
    }
    key.set(pos);
    if (value == null) {
      value = new Text();
    }
    int newSize = 0;
    // We always read one extra line, which lies outside the upper
    // split limit i.e. (end - 1)
    while (getFilePosition() <= end || in.needAdditionalRecordAfterSplit()) {
      if (pos == 0) {
        newSize = skipUtfByteOrderMark();
      } else {
        newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos));
        pos += newSize;
      }

      if ((newSize == 0) || (newSize < maxLineLength)) {
        break;
      }

      // line too long. try again
      LOG.info("Skipped line of size " + newSize + " at pos " + 
               (pos - newSize));
    }
    if (newSize == 0) {
      key = null;
      value = null;
      return false;
    } else {
      return true;
    }
  }

如：hello you

hello me

上述文件中，会被切分成一个split，在这里

第一次调用nextKeyValue()的时候start=0，value=hello you，end=19,pos=0，key=0，newsize=10

第二次调用nextKeyValue()的时候key=10，value=hello me,newsize=10

由readLine()方法从输入流中读取给定文本，返回值为被读取字节的数量

newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos));读取一行数据，将数据放入value中，返回值为被读取字节的长度，还包括新行(换行)

总结，key和value的值就是通过nextKeyValue()方法赋值的。

第三部分当key，value被赋值之后，剩下的问题就是如何被map函数所调用？

*****************************************************************

从map类分析：

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapreduce;

import java.io.IOException;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapreduce.task.MapContextImpl;

/** 
 * Maps input key/value pairs to a set of intermediate key/value pairs.  
 * 
 * Maps are the individual tasks which transform input records into a 
 * intermediate records. The transformed intermediate records need not be of 
 * the same type as the input records. A given input pair may map to zero or 
 * many output pairs. 
 * 
 * The Hadoop Map-Reduce framework spawns one map task for each 
 * {@link InputSplit} generated by the {@link InputFormat} for the job.
 * Mapper implementations can access the {@link Configuration} for 
 * the job via the {@link JobContext#getConfiguration()}.
 * 
 * 
The framework first calls 
 * {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
 * {@link #map(Object, Object, Context)} 
 * for each key/value pair in the InputSplit. Finally 
 * {@link #cleanup(Context)} is called.
 * 
 * All intermediate values associated with a given output key are 
 * subsequently grouped by the framework, and passed to a {@link Reducer} to  
 * determine the final output. Users can control the sorting and grouping by 
 * specifying two key {@link RawComparator} classes.
 *
 * The Mapper outputs are partitioned per 
 * Reducer. Users can control which keys (and hence records) go to 
 * which Reducer by implementing a custom {@link Partitioner}.
 * 
 * 
Users can optionally specify a combiner, via 
 * {@link Job#setCombinerClass(Class)}, to perform local aggregation of the 
 * intermediate outputs, which helps to cut down the amount of data transferred 
 * from the Mapper to the Reducer.
 * 
 * 
Applications can specify if and how the intermediate
 * outputs are to be compressed and which {@link CompressionCodec}s are to be
 * used via the Configuration.
 *  
 * If the job has zero
 * reduces then the output of the Mapper is directly written
 * to the {@link OutputFormat} without sorting by keys.
 * 
 * Example:
 * 
 * public class TokenCounterMapper 
 *     extends Mapper<Object, Text, Text, IntWritable>{
 *    
 *   private final static IntWritable one = new IntWritable(1);
 *   private Text word = new Text();
 *   
 *   public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
 *     StringTokenizer itr = new StringTokenizer(value.toString());
 *     while (itr.hasMoreTokens()) {
 *       word.set(itr.nextToken());
 *       context.write(word, one);
 *     }
 *   }
 * }
 * 

 *
 * Applications may override the {@link #run(Context)} method to exert 
 * greater control on map processing e.g. multi-threaded Mappers 
 * etc.
 * 
 * @see InputFormat
 * @see JobContext
 * @see Partitioner  
 * @see Reducer
 */
@InterfaceAudience.Public
@InterfaceStability.Stable
public class Mapper {

  /**
   * The Context passed on to the {@link Mapper} implementations.
   */
  public abstract class Context
    implements MapContext {
  }
  
  /**
   * Called once at the beginning of the task.任务执行开始调用
   */
  protected void setup(Context context
                       ) throws IOException, InterruptedException {
    // NOTHING
  }

  /**input split的每一个键值对都调用一次
   * Called once for each key/value pair in the input split. Most applications
   * should override this, but the default is the identity function.
   */
  @SuppressWarnings("unchecked")
  protected void map(KEYIN key, VALUEIN value, 
                     Context context) throws IOException, InterruptedException {
    context.write((KEYOUT) key, (VALUEOUT) value);
  }

  /**
   * Called once at the end of the task.
   */
  protected void cleanup(Context context
                         ) throws IOException, InterruptedException {
    // NOTHING
  }
  
  /**
   * Expert users can override this method for more complete control over the
   * execution of the Mapper.
   * @param context
   * @throws IOException
   */
  public void run(Context context) throws IOException, InterruptedException {
    setup(context);  执行一次
    try {
      while (context.nextKeyValue()) {
        map(context.getCurrentKey(), context.getCurrentValue(), context);//一个inputsplit调用一次map函数
      }
    } finally {
      cleanup(context);  执行一次
    }
  }
}

nextKeyValue()方法查看，

找到nextkeyvalue的实现，ctrl+t也可进入

MapContextImpl类下面提供了该类的实现

其种reader由RecordReader类提供

另外通过Context源码查看

进入该类

研究mapcontext，mapcontext是在构造函数中赋值的

查看WrappedMapper類的nextkeyvalue()方法

通過查看其實現，可以查找其實鮮類

可觀察到以下reader的實現情況

reader最終是有RecordReader來聲明的。

總結：

调用MapContextImpl有参构造方法，然后将RecordReader赋值进去(57行),从而可以调用80行的nextkeyvalue()方法，然后MapContextImpl的父类Context调用nextkeyvalue()

总结：从源代码的角度分析map函数处理的是如何从HDFS文件中获取的？答：

1.从TextInputFormat入手分析，找到父类FileInputFormat，找到父类InputFormat。
在InputFormat中找到2个方法，分别是getSplits(...)和createRecordReader(...)。
通过注释知道getSplits(...)作用是把输入文件集合中的所有内容解析成一个个的InputSplits，每一个InputSplit对应一个mapper task。
createRecordReader(...)作用是创建一个RecordReader的实现类。RecordReader作用是解析InputSplit产生一个个的。
2.在FileInputFormat中找到getSplits(...)的实现。
通过实现，获知
(1)每个SplitSize的大小和默认的block大小一致，好处是满足数据本地性。
(2)每个输入文件都会产生一个InputSplit，即使是空白文件，也会产生InputSPlit；
如果一个文件非常大，那么会按照InputSplit大小，切分产生多个InputSplit。
3.在TextInputFormat中找到createRecordReader(...)的实现，在方法中找到了LineRecordReader。
接下来分析LineRecordReader类。
在RecordReader类中，通过查看多个方法，知晓key、value作为类的属性存在的，且知道了nextKeyValue()方法的用法。
在LineRecordReader类中，重点分析了nextKeyValue(...)方法。在这个方法中，重点分析了newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos));
在in.readLine(...)中，第一个形参存储被读取的行文本内容，返回值表示被读取内容的字节数。
通过以上代码，分析了InputSplit中的内容是如何转化为一个个的。
4.从Mapper类中进行分析，发现了setup()、cleanup()、map()、run()。
在run()方法中，通过while，调用context.nextKeyValue(...)。
进一步分析Context的接口类是org.apache.hadoop.mapreduce.lib.map.WrappedMapper.MapContext，MapContext调用了nextKeyValue(...)。最终找到了MapContext的实现了MapContextImpl类org.apache.hadoop.mapreduce.task.MapContextImpl。
在这个类的构造方法中，发现传入了RecordReader的实现类。

转载于:https://www.cnblogs.com/jackchen-Net/p/6405951.html

你可能感兴趣的:(MapReduce部分源码解读(一))

python 读excel每行替换_Python脚本操作Excel实现批量替换功能 weixin_39646695 python 读excel每行替换
Python脚本操作Excel实现批量替换功能大家好，给大家分享下如何使用Python脚本操作Excel实现批量替换。使用的工具Openpyxl，一个处理excel的python库，处理excel，其实针对的就是WorkBook，Sheet，Cell这三个最根本的元素~明确需求原始excel如下我们的目标是把下面excel工作表的sheet1表页A列的内容“替换我吧”批量替换为B列的“我用来替换的
x86-64汇编语言训练程序与实战十除以十等于一
本文还有配套的精品资源，点击获取简介：汇编语言是一种低级语言，与机器代码紧密相关，特别适用于编写系统级代码及性能要求高的应用。nasm编译器是针对x86和x86-64架构的汇编语言编译器，支持多种语法风格和指令集。项目Euler提供数学和计算机科学问题，鼓励编程技巧应用，前100个问题的答案可共享。x86-64架构扩展了寄存器数量并引入新指令，提升了数据处理效率。学习汇编语言能够深入理解计算机底层
男士护肤品哪个牌子好？十大男士护肤品排行榜高省APP珊珊
很多男生意识到护肤的必要性，开始着手护肤，但不知道该选哪个男士护肤品品牌使用好。目前市面上很多男士护肤品品牌，可谓琳琅满目，让人眼花缭乱。男士挑选护肤品时，根据自己皮肤需求去正规渠道挑选合适的知名护肤品比较放心靠谱。高省APP，是2021年推出的平台，0投资，0风险、高省APP佣金更高，模式更好，终端用户不流失。【高省】是一个自用省钱佣金高，分享推广赚钱多的平台，百度有几百万篇报道，也期待你的加入
2022-10-20 体力劳动者
不因感觉稍纵即逝就不加记录。在女儿睡觉后我记下今天的小故事。接手新班级后，今天是第二次收到家长的感谢信（微信）。是我表扬次数最多的两位学生家长致来的感谢，他们明显感受到孩子自信、阳光了不少，写作业由被动变为了主动，家庭氛围也由鸡飞狗跳变成了其乐融融。在被顽皮的学生气得头晕之后，我感到了久违的价值感，责任感甚至使命感，我回复家长这样一句话：我们也需要家长的反馈好让我们的教育工作更有劲头。我也认识到，
程翔授《评价一篇记叙文》行吟斯基
桂林十一中高一2中学生自读程老师学生文章板书课题师巡看。看完举手。问：它是记叙文。不商量。独立打分。学生评价打分。师：高低都正常，不受干扰。师巡，略评。打完举手。调查：分层次举手——高分先举手。最低分。最高95分。最低45分。女：差距太大！师：同一篇，相差55分。若是你的文章，愿落谁手？男：身临其境感觉。师：你有此经历？没也没关系。女：不优美……，结尾无升华……无感悟……师：辞藻不美？(师追问)男
《玉骨遥》：大司命为什么不杀朱颜？原因没那么简单 windy天意晚晴
《玉骨遥》里，朱颜就是时影的命劫之人。重明与时影早就知道，他们一直瞒着大司命，如今大司命也知道了真相。可是大司命却没有杀朱颜，而是给朱颜下了诛心咒，还说时影的命劫已经破了，真的如此吗？1、计划总是赶不上变化的大司命从目前剧情来说，大司命还不如时影，他信心十足的事情总会有纰漏。他不让时影见命劫之女，结果时影还是遇上了。他想让时影走火入魔，一心复仇，结果时影在朱颜的劝说下放下了仇恨。大司命让时影开山收
移动端城市区县二级联动选择功能实现包 good2know
本文还有配套的精品资源，点击获取简介：本项目是一套为移动端设计的jQuery实现方案，用于简化用户在选择城市和区县时的流程。它包括所有必需文件：HTML、JavaScript、CSS及图片资源。通过动态更新下拉菜单选项，实现城市到区县的联动效果，支持数据异步加载。开发者可以轻松集成此功能到移动网站或应用，并可基于需求进行扩展和优化。1.jQuery移动端解决方案概述jQuery技术简介jQuery
日更006 终极训练营day3 懒cici
人生创业课（2）今天的主题：学习方法一：遇到有用的书，反复读，然后结合自身实际，列践行清单，不要再写读书笔记思考这本书与我有什么关系，我在哪些地方能用到，之后我该怎么用方法二：读完书没映像怎么办?训练你的大脑，方法：每读完一遍书，立马合上书，做一场分享，几分钟都行对自己的学习要求太低，要逼自己方法三：学习深度不够怎么办？找到细分领域的榜样，把他们的文章、书籍、产品都体验一遍，成为他们的超级用户，向
自律打卡第四天：比昨天进步一点点花儿的念想
今天新闻我们县城又确诊了一例，截止目前已经确诊的三例了，打开，看了一篇简友写的武汉的真实情况，有病住不了院，还没等到床位已经去世的消息，心里更加的难受，武汉尚且这样，如果是我们这没有高速没有火车的十八线的小县城发生这种情况，那情况将是更加的不堪设想，不敢想，唯有祈求灾难早点快去，平安才是最大的福气。突然觉得我的自律打卡，比昨天进步一点点。更希望疫情战争每一天都要比昨天好一点，希望一觉醒来听到的是好
15个小技巧，让我的Windows电脑更好用了！曹元_
01.桌面及文档处理第一部分的技巧，主要是围绕桌面的一些基本操作，包括主题设置、常用文档文件快捷打开的多种方式等等。主题换色默认情况下，我们的Win界面可能就是白色的文档界面，天蓝色的图表背景，说不出哪里不好看，但是就是觉得不够高级。imageimage说到高级感，本能第一反应就会和暗色模式联想起来，如果我们将整个界面换成黑夜模式的话，它会是这样的。imageimage更改主题颜色及暗色模式，我们
（二）SAP Group Reporting (GR) 核心子模块功能及数据流向架构解析
数据如何从子公司流转到合并报表的全过程，即数据采集→合并引擎→报表输出，特别是HANA内存计算如何优化传统ETL瓶颈。SAPGroupReporting(GR)核心模块功能及数据流向的架构解析，涵盖核心组件、数据处理流程和关键集成点，适用于S/4HANA1809+版本：一、核心功能模块概览模块功能关键事务码/FioriApp数据采集(DataCollection)整合子公司财务数据（SAP/非SA
月光下的罪恶（5）允歌玖沐
5.被孤立顾纨是转校过来的，进入学校后，回头率很高“诶诶诶，你看那女生，哪个系的？”“不知道没见过。”“看那样，一看就是个胆小的货。”顾纨当做没听到，更狠的话她都听过，更何况女生们耍心眼？“他爸爸是做黑生意的，估计女儿也不是什么好的，你以后离他一家子远点。”她走向自己要上课的教室，一进门，所有人的目光看向她，顾纨若无其事的走进教室，开始上课。下课，一群人站起来，但是很显然，她周围的一圈人都不愿意和
day15｜前端框架学习和算法 universe_01 前端算法笔记
T22括号生成先把所有情况都画出来，然后（在满足什么情况下）把不符合条件的删除。T78子集要画树状图，把思路清晰。可以用暴力法、回溯法和DFS做这个题DFS深度搜索：每个边都走完，再回溯应用：二叉树搜索，图搜索回溯算法=DFS+剪枝T200岛屿数量（非常经典BFS宽度把树状转化成队列形式，lambda匿名函数“一次性的小函数，没有名字”setup语法糖：让代码更简洁好写的语法ref创建：基本类型的
贝多芬诞辰250周年纪念万千星河赴远方
就算不是古典音乐爱好者，你也一定听说过贝多芬。作为古典音乐史上最伟大的音乐家之一，他不仅是古典主义风格的集大成者，同时也是浪漫主义风格的开创者。贝多芬肖像画（1813年）贝多芬的一生共创作了9部交响曲、36首钢琴奏鸣曲、10部小提琴奏鸣曲、16首弦乐四重奏、1部歌剧及2部弥撒曲等等。数量虽然不及前辈海顿、莫扎特多，但他几乎改造了当时所有的音乐表达形式，赋予了它们全新的价值，对后世音乐的发展产生了极
三件事—小白猫·雨天·八段锦咸鱼月亮
1.最近楼下出现一只非常漂亮的粘人小白猫，看着不像是流浪猫，非常亲人。眼睛比蓝球的还大，而且是绿色的，很漂亮。第一次遇到它，它就跟我到电梯口，如果我稍微招招手，肯定就跟我进电梯了。后来我喂过它几次，好可惜不能养它，一只蓝球就是我的极限了。2.下雨天就心烦，好奇怪。明明以前我超爱看窗外的雨和听雨声，看来近来的心情不够宁静了。3.最近在练八段锦，从第一次就爱上了这个运动，很轻松缓慢，但是却出汗。感觉可
25-1-2019 树藤与海岛呢
hello八月来报道了今天看到了一篇文章就只想记下那两句话：良田千顷不过一日三餐广夏万间只睡卧榻三尺大概的意思就是要珍惜当下不要等来不及的时候才珍惜分享今天的两餐最近没有时间运动呢下个月补回好了说完了哈哈goodnight图片发自App图片发自App
《极简思维》第三部分小洋苏兮
整理你的人际关系如何改善人际关系？摘录：因为人际关系问题是人们生活中不快乐的主要原因。感想：感觉这个说的挺对，之前我总是埋头学习，不管舍友不管自己的合作伙伴的一些事情，但实际上，这学期关注了之后好多了摘录：“亲密关系与社交会让你健康而快乐。这是基础。太过于关注成就或不太关心人际关系的人都不怎么快乐。基本上来说，人类就是建立在人脉关系上的。”感想：但是如果有时想的太多就不太好，要以一个开放的心态跟别
力扣热题100-------54. 螺旋矩阵海航Java之路力扣 leetcode 矩阵 java
给你一个m行n列的矩阵matrix，请按照顺时针螺旋顺序，返回矩阵中的所有元素。示例1：输入：matrix=[[1,2,3],[4,5,6],[7,8,9]]输出：[1,2,3,6,9,8,7,4,5]示例2：输入：matrix=[[1,2,3,4],[5,6,7,8],[9,10,11,12]]输出：[1,2,3,4,8,12,11,10,9,5,6,7]提示：m==matrix.lengthn
我不懂什么是爱，但我给你全部我拥有的香尧
因为怕黑，所以愿意陪伴在夜中行走的人，给他一点点的安全感。因为渴望温柔与爱，所以愿意为别的孩子付出爱与温柔。因为曾遭受侮辱和伤害，所以不以同样的方式施于其他人。如果你向别人出之以利刃，对方还了你爱与包容，真的不要感激他，真的不要赞美他。每一个被人伤害过的人心里都留下了一颗仇恨的种子，他也会想要有一天以眼还眼，以牙还牙。但他未让那颗种子生根发芽，他用一把心剑又一次刺向他自己，用他血荐仇恨，开出一朵温
你要记住，最重要的是:随时做好准备，为了你可能成为更好的自己，放弃现在的自己。霖霖z
打卡人:周云日期:2018年11月09日【日精进打卡第180天】【知～学习】《六项精进》0遍共214遍《通篇》1遍共106遍《大学》2遍共347遍《坚强工作，温柔生活》ok《不抱怨的世界》104-108页《经典名句》你要记住，最重要的是:随时做好准备，为了你可能成为更好的自己，放弃现在的自己。【行～实践】一、修身：（对自己个人）1、坚持打卡二、齐家：（对家庭和家人）打扫卫生，接送孩子，洗衣做饭，陪
SpringMVC执行流程（原理），通俗易懂国服冰 SpringMVC spring mvc
SpringMVC执行流程（原理），通俗易懂一、图解SpringMVC流程二、进一步理解Springmvc的执行流程1、导入依赖2、建立展示的视图3、web.xml4、spring配置文件springmvc-servlet5、Controller6、tomcat配置7、访问的url8、视图页面一、图解SpringMVC流程图为SpringMVC的一个较完整的流程图，实线表示SpringMVC框架提
C++ 计数排序、归并排序、快速排序每天搬一点点砖 c++数据结构算法
计数排序：是一种基于哈希的排序算法。他的基本思想是通过统计每个元素的出现次数，然后根据统计结果将元素依次放入排序后的序列中。这种排序算法适用于范围较小的情况，例如整数范围在0到k之间计数排序步骤：1初始化一个长度为最大元素值加1的计数数组，所有元素初始化为02遍历原始数组，将每个元素值作为索引，在计数数组中对应位置加13将数组清空4遍历计数器数组，按照数组中的元素个数放回到元数组中计数排序的优点和
现在发挥你的优势爱生活的佑嘉
来和我做咨询的一些朋友，涉及到定位的，都会说，我不知道我的优势是什么，你能不能帮我看看？还有一些朋友，喜欢做各种测试来了解自己，测试过后，然并卵。今天，我想来聊聊优势，如何能了解自己的优势是什么。首先，我们要知道，如果要成为“不一般”的人，我们所做的事情，就要基于自身的优势。我做管理者十多年，看到每个员工都有不同的特长，有的擅长数字，有的擅长人际，有的擅长写作。这些知道自己优势并且在这方面刻意练习
2023-11-02 一帆f
发现浸润心田的感觉：今天一个机缘之下突然想分享我的婆媳关系，我一边分享一边回忆我之前和儿媳妇关系的微妙变化，特别是分享到我能感受到儿媳妇的各种美好，现在也能心平气和的和老公平等对话，看到自己看到老公，以己推人以人推己自然而然的换位思考，心中有一种美好的能量在涌动，一种浸润心田的感觉从心胸向全身扩散，美好极了……我很想记住这种感觉，赶紧把它写下来以留纪念，也就是当我看见他人的美好，美好的美妙的浸润心
贫穷家庭的孩子考上985以后会怎样？ Mellisa蜜思言
我出生在一个贫穷的农村家庭，据我妈说，我出生的时候才4斤多，而她生完我以后月子里就瘦到70斤。家里一直很穷，父母都是在菜市场卖菜的，家里还有几亩地种庄稼的。我很小开始就要去帮忙，暑假的生活就是帮忙去卖菜和割稻谷，那时候自己对于割稻谷这种事情有着莫名的恐惧，生怕自己长大以后还是每年都要过着割稻谷这种日子。父母因为忙于生计无暇顾及我的学习，幸好我因为看到他们这样子的生活，内心里有深深的恐惧感，驱使着我
实时数据流计算引擎Flink和Spark剖析程小舰 flink spark 数据库 kafka hadoop
在过去几年，业界的主流流计算引擎大多采用SparkStreaming，随着近两年Flink的快速发展，Flink的使用也越来越广泛。与此同时，Spark针对SparkStreaming的不足，也继而推出了新的流计算组件。本文旨在深入分析不同的流计算引擎的内在机制和功能特点，为流处理场景的选型提供参考。（DLab数据实验室w.x.公众号出品）一.SparkStreamingSparkStreamin
密码正则验证：大小写字母、数字、特殊字符至少8位 qq_21875331 渐进式的成长
正则表达式：密码必须包含大写字母、数字、特殊字符（四种里至少三种，且至少8位）写法一：/((^(?=.*[a-z])(?=.*[A-Z])(?=.*\W)[\da-zA-Z\W]{8,16}$)|(^(?=.*\d)(?=.*[A-Z])(?=.*\W)[\da-zA-Z\W]{8,16}$)|(^(?=.*\d)(?=.*[a-z])(?=.*\W)[\da-zA-Z\W]{8,16}$)|(^
48. 旋转图像 - 力扣（LeetCode） Fiee-77 #数组 leetcode 算法 python 数据结构数组
题目：给定一个n×n的二维矩阵matrix表示一个图像。请你将图像顺时针旋转90度。你必须在原地旋转图像，这意味着你需要直接修改输入的二维矩阵。请不要使用另一个矩阵来旋转图像。示例1：输入：matrix=[[1,2,3],[4,5,6],[7,8,9]]输出：[[7,4,1],[8,5,2],[9,6,3]]示例2：输入：matrix=[[5,1,9,11],[2,4,8,10],[13,3,6,
日更50天有什么收益？星湾二宝
坚持在平台上日更50天了，平台也为我生成了日更50天徽章，小开心一下这份坚持。日更50天徽章那坚持50天都有哪些收益呢？收益一，就是最直观的那些钻和贝，我这边确实不太高，但是这些贝足够支撑我保持会员的资格，能够在发文的时候帮助友友们去除广告，方便阅读。钻和贝收益二，文章的收获，日更50天，坚持写作3.7万文字，书写的文字也从开始的流水账/碎碎念逐渐加入自己的思考和观点。以前，一个念头会一晃而过，如
别再讲道理啦，对方听不进去的方所
我之前写过一篇叫做《你总妄想改变他人》，然后就有朋友跟我说，有一些方法可以改变他人之类的。嗯，是这样，但是任何具体的问题，都要限定好语境，描述清楚前提条件，然后再表达观点，我的这位朋友的说法就犯了一刀切的错误，这样并不能让讨论正常展开（这篇我得先给她看看，不然可能会挨揍）。好了，hhhh，谁让她不能写文章呢，我就来再说一说吧。我前面说过，我们在学到一个道理、学会一种方法之后，总是迫不及待地想要去与
深入浅出Java Annotation(元注解和自定义注解） Josh_Persistence Java Annotation 元注解自定义注解
一、基本概述　　 Annontation是Java5开始引入的新特征。中文名称一般叫注解。它提供了一种安全的类似注释的机制，用来将任何的信息或元数据（metadata）与程序元素（类、方法、成员变量等）进行关联。　　更通俗的意思是为程序的元素（类、方法、成员变量）加上更直观更明了的说明，这些说明信息是与程序的业务逻辑无关，并且是供指定的工具或
mysql优化特定类型的查询 annan211 java 工作 mysql
本节所介绍的查询优化的技巧都是和特定版本相关的，所以对于未来mysql的版本未必适用。 1 优化count查询对于count这个函数的网上的大部分资料都是错误的或者是理解的都是一知半解的。在做优化之前我们先来看看真正的count()函数的作用到底是什么。 count()是一个特殊的函数，有两种非常不同的作用，他可以统计某个列值的数量，也可以统计行数。在统
MAC下安装多版本JDK和切换几种方式棋子chessman jdk
环境： MAC AIR,OS X 10.10,64位历史：过去 Mac 上的 Java 都是由 Apple 自己提供，只支持到 Java 6，并且OS X 10.7 开始系统并不自带（而是可选安装）（原自带的是1.6）。后来 Apple 加入 OpenJDK 继续支持 Java 6，而 Java 7 将由 Oracle 负责提供。在终端中输入jav
javaScript （1） Array_06 JavaScript java 浏览器
JavaScript 1、运算符　　运算符就是完成操作的一系列符号，它有七类：　　赋值运算符（=,+=,-=,*=,/=,%=,<<=,>>=,|=,&=）、算术运算符(+,-,*,/,++,--,%)、比较运算符(>,<,<=,>=,==,===,!=,!==)、逻辑运算符(||,&&,!)、条件运算(?:)、位
国内顶级代码分享网站袁潇含 java jdk oracle .net PHP
现在国内很多开源网站感觉都是为了利益而做的当然利益是肯定的,否则谁也不会免费的去做网站 &
Elasticsearch、MongoDB和Hadoop比较随意而生 mongodb hadoop 搜索引擎
IT界在过去几年中出现了一个有趣的现象。很多新的技术出现并立即拥抱了“大数据”。稍微老一点的技术也会将大数据添进自己的特性，避免落大部队太远，我们看到了不同技术之间的边际的模糊化。假如你有诸如Elasticsearch或者Solr这样的搜索引擎，它们存储着JSON文档，MongoDB存着JSON文档，或者一堆JSON文档存放在一个Hadoop集群的HDFS中。你可以使用这三种配
mac os 系统科研软件总结张亚雄 mac os
1.1 Microsoft Office for Mac 2011 大客户版，自行搜索。 1.2 Latex （MacTex）: 系统环境：https://tug.org/mactex/ &nb
Maven实战（四）生命周期 AdyZhang maven
1. 三套生命周期 Maven拥有三套相互独立的生命周期，它们分别为clean，default和site。每个生命周期包含一些阶段，这些阶段是有顺序的，并且后面的阶段依赖于前面的阶段，用户和Maven最直接的交互方式就是调用这些生命周期阶段。以clean生命周期为例，它包含的阶段有pre-clean, clean 和 post
Linux下Jenkins迁移 aijuans Jenkins
1. 将Jenkins程序目录copy过去源程序在/export/data/tomcatRoot/ofctest-jenkins.jd.com下面 tar -cvzf jenkins.tar.gz ofctest-jenkins.jd.com &
request.getInputStream()只能获取一次的问题 ayaoxinchao request Inputstream
问题：在使用HTTP协议实现应用间接口通信时，服务端读取客户端请求过来的数据，会用到request.getInputStream()，第一次读取的时候可以读取到数据，但是接下来的读取操作都读取不到数据原因： 1. 一个InputStream对象在被读取完成后，将无法被再次读取，始终返回-1； 2. InputStream并没有实现reset方法（可以重
数据库SQL优化大总结之百万级数据库优化方案 BigBird2012 SQL优化
网上关于SQL优化的教程很多，但是比较杂乱。近日有空整理了一下，写出来跟大家分享一下，其中有错误和不足的地方，还请大家纠正补充。这篇文章我花费了大量的时间查找资料、修改、排版，希望大家阅读之后，感觉好的话推荐给更多的人，让更多的人看到、纠正以及补充。 1.对查询进行优化，要尽量避免全表扫描，首先应考虑在 where 及 order by 涉及的列上建立索引。 2.应尽量避免在 where
jsonObject的使用 bijian1013 java json
在项目中难免会用java处理json格式的数据，因此封装了一个JSONUtil工具类。 JSONUtil.java package com.bijian.json.study; import java.util.ArrayList; import java.util.Date; import java.util.HashMap;
[Zookeeper学习笔记之六]Zookeeper源代码分析之Zookeeper.WatchRegistration bit1129 zookeeper
Zookeeper类是Zookeeper提供给用户访问Zookeeper service的主要API，它包含了如下几个内部类首先分析它的内部类，从WatchRegistration开始，为指定的znode path注册一个Watcher， /** * Register a watcher for a particular p
【Scala十三】Scala核心七：部分应用函数 bit1129 scala
何为部分应用函数？ Partially applied function: A function that’s used in an expression and that misses some of its arguments.For instance, if function f has type Int => Int => Int, then f and f(1) are p
Tomcat Error listenerStart 终极大法 ronin47 tomcat
Tomcat报的错太含糊了，什么错都没报出来，只提示了Error listenerStart。为了调试，我们要获得更详细的日志。可以在WEB-INF/classes目录下新建一个文件叫logging.properties，内容如下 Java代码 handlers = org.apache.juli.FileHandler, java.util.logging.ConsoleHa
不用加减符号实现加减法 BrokenDreams 实现
今天有群友发了一个问题，要求不用加减符号(包括负号)来实现加减法。分析一下，先看最简单的情况，假设1+1，按二进制算的话结果是10，可以看到从右往左的第一位变为0，第二位由于进位变为1。
读《研磨设计模式》-代码笔记-状态模式-State bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ /* 当一个对象的内在状态改变时允许改变其行为，这个对象看起来像是改变了其类状态模式主要解决的是当控制一个对象状态的条件表达式过于复杂时的情况把状态的判断逻辑转移到表示不同状态的一系列类中，可以把复杂的判断逻辑简化如果在
CUDA程序block和thread超出硬件允许值时的异常 cherishLC CUDA
调用CUDA的核函数时指定block 和 thread大小，该大小可以是dim3类型的（三维数组），只用一维时可以是usigned int型的。以下程序验证了当block或thread大小超出硬件允许值时会产生异常！！！GPU根本不会执行运算！！！所以验证结果的正确性很重要！！！在VS中创建CUDA项目会有一个模板，里面有更详细的状态验证。以下程序在K5000GPU上跑的。
诡异的超长时间GC问题定位 chenchao051 jvm cms GC hbase swap
HBase的GC策略采用PawNew+CMS, 这是大众化的配置，ParNew经常会出现停顿时间特别长的情况，有时候甚至长到令人发指的地步，例如请看如下日志： 2012-10-17T05:54:54.293+0800: 739594.224: [GC 739606.508: [ParNew: 996800K->110720K(996800K), 178.8826900 secs] 3700
maven环境快速搭建 daizj 安装 mavne 环境配置
一下载maven 安装maven之前，要先安装jdk及配置JAVA_HOME环境变量。这个安装和配置java环境不用多说。 maven下载地址：http://maven.apache.org/download.html，目前最新的是这个apache-maven-3.2.5-bin.zip，然后解压在任意位置，最好地址中不要带中文字符，这个做java 的都知道，地址中出现中文会出现很多
PHP网站安全，避免PHP网站受到攻击的方法 dcj3sjt126com PHP
对于PHP网站安全主要存在这样几种攻击方式:1、命令注入(Command Injection)2、eval注入(Eval Injection)3、客户端脚本攻击(Script Insertion)4、跨网站脚本攻击(Cross Site Scripting, XSS)5、SQL注入攻击(SQL injection)6、跨网站请求伪造攻击(Cross Site Request Forgerie
yii中给CGridView设置默认的排序根据时间倒序的方法 dcj3sjt126com GridView
public function searchWithRelated() { $criteria = new CDbCriteria; $criteria->together = true; //without th
Java集合对象和数组对象的转换 dyy_gusi java集合
在开发中，我们经常需要将集合对象（List，Set）转换为数组对象，或者将数组对象转换为集合对象。Java提供了相互转换的工具，但是我们使用的时候需要注意，不能乱用滥用。 1、数组对象转换为集合对象最暴力的方式是new一个集合对象，然后遍历数组，依次将数组中的元素放入到新的集合中，但是这样做显然过
nginx同一主机部署多个应用 geeksun nginx
近日有一需求，需要在一台主机上用nginx部署2个php应用，分别是wordpress和wiki，探索了半天，终于部署好了，下面把过程记录下来。 1. 在nginx下创建vhosts目录，用以放置vhost文件。 mkdir vhosts 2. 修改nginx.conf的配置，在http节点增加下面内容设置，用来包含vhosts里的配置文件 #
ubuntu添加admin权限的用户账号 hongtoushizi ubuntu useradd
ubuntu创建账号的方式通常用到两种：useradd 和adduser . 本人尝试了useradd方法，步骤如下： 1:useradd 使用useradd时，如果后面不加任何参数的话，如：sudo useradd sysadm 创建出来的用户将是默认的三无用户：无home directory ,无密码,无系统shell。顾应该如下操作：
第五章常用Lua开发库2-JSON库、编码转换、字符串处理 jinnianshilongnian nginx lua
JSON库在进行数据传输时JSON格式目前应用广泛，因此从Lua对象与JSON字符串之间相互转换是一个非常常见的功能；目前Lua也有几个JSON库，本人用过cjson、dkjson。其中cjson的语法严格（比如unicode \u0020\u7eaf），要求符合规范否则会解析失败（如\u002），而dkjson相对宽松，当然也可以通过修改cjson的源码来完成
Spring定时器配置的两种实现方式OpenSymphony Quartz和java Timer详解 yaerfeng1989 timer quartz 定时器
原创整理不易，转载请注明出处：Spring定时器配置的两种实现方式OpenSymphony Quartz和java Timer详解代码下载地址：http://www.zuidaima.com/share/1772648445103104.htm 有两种流行Spring定时器配置：Java的Timer类和OpenSymphony的Quartz。 1.Java Timer定时首先继承jav
Linux下df与du两个命令的差别？ pda158 linux
　一、df显示文件系统的使用情况，与du比較，就是更全盘化。　　最经常使用的就是 df -T，显示文件系统的使用情况并显示文件系统的类型。　　举比例如以下：　　[root@localhost ~]# df -T 　　Filesystem Type &n
[转]SQLite的工具类 ---- 通过反射把Cursor封装到VO对象 ctfzh VO android sqlite 反射 Cursor
在写DAO层时，觉得从Cursor里一个一个的取出字段值再装到VO(值对象)里太麻烦了，就写了一个工具类，用到了反射，可以把查询记录的值装到对应的VO里，也可以生成该VO的List。使用时需要注意：考虑到Android的性能问题，VO没有使用Setter和Getter，而是直接用public的属性。表中的字段名需要和VO的属性名一样，要是不一样就得在查询的SQL中
该学习笔记用到的Employee表 vipbooks oracle sql 工作
这是我在学习Oracle是用到的Employee表，在该笔记中用到的就是这张表，大家可以用它来学习和练习。 drop table Employee; -- 员工信息表 create table Employee( -- 员工编号 EmpNo number(3) primary key, -- 姓