spark 2.0.0 Structured Streaming Programming
package com.dt.spark200;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.streaming.StreamingQuery;
public class Spark200StructuredStreaming {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("JavaStructuredNetworkWordCount")
.master("local")
.config("spark.sql.warehouse.dir", "file:///G:/IMFBigDataSpark2016/IMFJavaWorkspace_Spark200/Spark200Demo/spark-warehouse")
.getOrCreate();
// Create DataFrame representing the stream of input lines from connection to localhost:9999
Dataset
.readStream()
.format("socket")
.option("host", "pc")
.option("port", 9999)
.load();
// Split the lines into words
Dataset
.as(Encoders.STRING())
.flatMap(
new FlatMapFunction
@Override
public Iterator
return Arrays.asList(x.split(" ")).iterator();
}
}, Encoders.STRING());
// Generate running word count
Dataset
StreamingQuery query = wordCounts.writeStream()
.outputMode("complete")
.format("console")
.start();
query.awaitTermination();
while(true){}
}
}