spark 2.0.0 Structured Streaming Programming

spark 2.0.0 Structured Streaming Programming

 

spark 2.0.0 Structured Streaming Programming_第1张图片

 

 

spark 2.0.0 Structured Streaming Programming_第2张图片

 

spark 2.0.0 Structured Streaming Programming_第3张图片

spark 2.0.0 Structured Streaming Programming_第4张图片

 

 

 

 

 

 

package com.dt.spark200;

import java.util.Arrays;
import java.util.Iterator;

import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.streaming.StreamingQuery;

 

public class Spark200StructuredStreaming {

 public static void main(String[] args) {
  
  SparkSession spark = SparkSession
       .builder()
       .appName("JavaStructuredNetworkWordCount")
       .master("local")
       .config("spark.sql.warehouse.dir", "file:///G:/IMFBigDataSpark2016/IMFJavaWorkspace_Spark200/Spark200Demo/spark-warehouse")
       .getOrCreate();
  
  
  // Create DataFrame representing the stream of input lines from connection to localhost:9999
  Dataset lines = spark
    .readStream()
    .format("socket")
    .option("host", "pc")
    .option("port", 9999)
    .load();

  // Split the lines into words
  Dataset words = lines
      .as(Encoders.STRING())
      .flatMap(
          new FlatMapFunction() {
            @Override
            public Iterator call(String x) {
              return Arrays.asList(x.split(" ")).iterator();
            }
          }, Encoders.STRING());

  // Generate running word count
  Dataset wordCounts = words.groupBy("value").count();
  
  StreamingQuery query = wordCounts.writeStream()
      .outputMode("complete")
      .format("console")
      .start();

    query.awaitTermination();
  while(true){}
  
 }

}

 

你可能感兴趣的:(大数据蘑菇云行动)