先写个java的,近期会对照实现clojure版的 并提供clojure实现中宏的介绍入口类package jvm.storm.starter; import jvm.storm.starter.wordcount.SplitSentence; import jvm.storm.starter.wordcount.WordCount; import jvm.storm.starter.wordcount.WordCountSpout; import backtype.storm.Config; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.topology.InputDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; /** * @author guiqiangl E-mail:[email protected] * @version 创建时间:2011-11-24 下午04:40:26 * */ public class WordCountStart { public static void main (String[] args){ TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("1", new WordCountSpout(""), 5);//发射器 InputDeclarer fieldsGrouping = builder.setBolt("2", new SplitSentence(), 5); fieldsGrouping.fieldsGrouping("1", new Fields("word")); builder.setBolt("3", new WordCount(), 5) .fieldsGrouping("2", new Fields("word")); Config conf = new Config(); conf.setDebug(false); // 本地模式 // LocalCluster cluster = new LocalCluster(); // cluster.submitTopology("rolling-demo", conf, builder.createTopology()); //远程启动 conf.setNumWorkers(20); conf.setMaxSpoutPending(5000); try { StormSubmitter.submitTopology("rolling-demo", conf, builder.createTopology()); } catch (AlreadyAliveException e) { e.printStackTrace(); } catch (InvalidTopologyException e) { e.printStackTrace(); } //结束 // cluster.killTopology("rolling-demo"); // cluster.shutdown(); } }发射器
package jvm.storm.starter.wordcount; import java.util.Map; import java.util.Random; import org.apache.log4j.Logger; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; /** * @author guiqiangl E-mail:[email protected] * @version 创建时间:2011-11-24 下午04:41:34 * */ public class WordCountSpout implements IRichSpout { private static final long serialVersionUID = -620768344883063619L; public static Logger LOG = Logger.getLogger(WordCountSpout.class); SpoutOutputCollector _collector; public WordCountSpout(String string) { } public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, SpoutOutputCollector collector) { _collector = collector; } public void close() { } public void nextTuple() { String[] words = new String[] {"nathan", "mike", "jackson", "golda", "bertels"}; Random rand = new Random(); String word = words[rand.nextInt(words.length)]; _collector.emit(new Values(word)); try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } public void ack(Object msgId) { } public void fail(Object msgId) { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } @Override public boolean isDistributed() { return false; } }单词拆分:
package jvm.storm.starter.wordcount; import java.util.Map; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; /** * @author guiqiangl E-mail:[email protected] * @version 创建时间:2011-11-24 下午04:48:29 * */ public class SplitSentence implements IRichBolt { private static final long serialVersionUID = -424523368294777576L; OutputCollector _collector; public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; } public void execute(Tuple tuple) { String sentence = tuple.getString(0); for(String word: sentence.split(" ")) { _collector.emit(tuple, new Values(word));//anchoring //_collector.emit(new Values(word));//unanchoring } _collector.ack(tuple); } public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } }计数:
package jvm.storm.starter.wordcount; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; /** * @author guiqiangl E-mail:[email protected] * @version 创建时间:2011-11-24 下午04:56:13 * */ public class WordCount implements IRichBolt { private static final long serialVersionUID = -6706714875516091987L; public Map<String, Integer> counterMap = new HashMap<String, Integer>(); OutputCollector _collector; BufferedWriter output = null; public void prepare(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, OutputCollector collector) { _collector = collector; try { output = new BufferedWriter(new FileWriter("/home/hadoop/桌面/wordcount.txt" , true)); } catch (IOException e) { e.printStackTrace(); try { output.close(); } catch (IOException e1) { e1.printStackTrace(); } } } public void execute(Tuple tuple) { String sentence = tuple.getString(0); Integer count = counterMap.get(sentence); if(count == null){ count = 0; } count ++; counterMap.put(sentence, count); Iterator<String> iterator = counterMap.keySet().iterator(); while(iterator.hasNext()){ String next = iterator.next(); try { System.out.print(next + ":" + counterMap.get(next) + " "); output.write(next + ":" + counterMap.get(next) + " "); output.flush(); } catch (IOException e) { e.printStackTrace(); try { output.close(); } catch (IOException e1) { e1.printStackTrace(); } } } System.out.println(); _collector.ack(tuple); } public void cleanup() { } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } }