【云星数据---Apache Flink实战系列(精品版)】:Apache Flink批处理API详解与编程实战025--DateSet实用API详解025

一、Flink DateSet定制API详解(JAVA版) -002

flatMap

element为粒度,对element进行1:n的转化。

执行程序:

package code.book.batch.dataset.advance.api;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.util.Collector;

import java.util.List;
public class FlatMapFunction001java {
    public static void main(String[] args) throws Exception {
        // 1.设置运行环境,准备运行的数据
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet text = env.fromElements("flink vs spark", "buffer vs shuffle");

        // 2.以element为粒度,将element进行map操作,转化为大写并添加后缀字符串"--##bigdata##"
        DataSet text2 = text.flatMap(new FlatMapFunction() {
            @Override
            public void flatMap(String s, Collector collector) throws Exception {
                collector.collect(s.toUpperCase() + "--##bigdata##");
            }
        });
        text2.print();

        // 3.以element为粒度,将element进行map操作,转化为大写并添加后缀字符串"--##bigdata##"
        DataSet text3 = text.flatMap(new FlatMapFunction() {
            @Override
            public void flatMap(String s, Collector collector) throws Exception {
                collector.collect(s.toUpperCase().split("\\s+"));
            }
        });
        final List collect = text3.collect();
        //显示结果,使用Lambda表达式的写法
        collect.forEach(arr -> {
            for (String token : arr) {
                System.out.println(token);
            }
        });
        //显示结果,不使用Lambda表达式的写法
        for (String[] arr : collect) {
            for (String token : arr) {
                System.out.println(token);
            }
        }
    }
}

执行结果:

text2.print();
FLINK VS SPARK--##bigdata##
BUFFER VS  SHUFFLE--##bigdata##

collect.forEach(arr -> {
for (String token : arr) {System.out.println(token);}});
FLINK
VS
SPARK
BUFFER
VS
SHUFFLE

filter

element为粒度,对element进行过滤操作。将满足过滤条件的element组成新的DataSet

执行程序:

package code.book.batch.dataset.advance.api;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;

public class FilterFunction001java {
    public static void main(String[] args) throws Exception {
        // 1.设置运行环境,准备运行的数据
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet text = env.fromElements(2, 4, 7, 8, 9, 6);

        //2.对DataSet的元素进行过滤,筛选出偶数元素
        DataSet text2 =text.filter(new FilterFunction() {
            @Override
            public boolean filter(Integer e) throws Exception {
                return e%2==0;
            }
        });
        text2.print();

        //3.对DataSet的元素进行过滤,筛选出大于5的元素
        DataSet text3 =text.filter(new FilterFunction() {
            @Override
            public boolean filter(Integer e) throws Exception {
                return e>5;
            }
        });
        text3.print();
    }
}

执行结果:

text2.print()
2
4
8
6

text3.print()
7
8
9
6

你可能感兴趣的:(bigdata,cloudcomputing,flink)