Flink入门及实战(2)-批处理

1 java 版本批处理

Flink入门及实战(2)-批处理_第1张图片
Flink入门及实战(2)-批处理_第2张图片


package com.tzb.demo;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;


public class BatchWordCountJava {
    public static void main(String[] args) throws Exception {
        // 获取运行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        String path = "D:\\word";
        String outPath = "D:\\word\\result";

        // 读取本地文件
        DataSource<String> text = env.readTextFile(path);

        AggregateOperator<Tuple2<String, Integer>> counts = text.flatMap(new Tokenizer())
                .groupBy(0)
                .sum(1);

        counts.writeAsCsv(outPath, "\n", " ").setParallelism(1 );

        env.execute("batch word count");

    }

    public static class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {

        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] tokens = value.toLowerCase().split("\\W+");
            for (String token : tokens) {
                if (token.length() >= 0) {
                    out.collect(new Tuple2<String, Integer>(token, 1));
                }
            }
        }
    }
}


Flink入门及实战(2)-批处理_第3张图片

2 scala 批处理

package com.tzb.scalademo

import org.apache.flink.api.scala.ExecutionEnvironment

object BatchWordCountSize {
    def main(args: Array[String]): Unit = {

        val inputPath = "D:\\word"
        val outPut = "D:\\word\\res"

        val env = ExecutionEnvironment.getExecutionEnvironment
        val text = env.readTextFile(inputPath)

        // 隐式转换
        import org.apache.flink.api.scala._

        val counts = text.flatMap(_.toLowerCase().split("\\W+"))
          .filter(_.nonEmpty)
          .map((_, 1))
          .groupBy(0)
          .sum(1)

        counts.writeAsCsv(outPut, "\n", " ").setParallelism(1)

        env.execute("batch word count")

    }
}


Flink入门及实战(2)-批处理_第4张图片

3 流处理和批处理

3.1 流处理 Streaming

  • StreamExecutionEnvironment
  • DataStreaming

3.2 批处理 Batch

  • ExecutionEnvironment
  • DataSet

你可能感兴趣的:(#,Flink入门及实战,flink,批处理)