目前没解决,本地代码可以运行,打包集群运行也没问题,可能是zeppelin哪里的依赖包冲突。
scala任务代码
Head
default
%flink.conf
flink.execution.packages org.apache.flink:flink-connector-kafka_2.11:1.11.2,com.alibaba:fastjson:1.2.60,org.apache.flink:flink-parquet_2.11:1.11.2,org.apache.parquet:parquet-hadoop:1.11.1,org.apache.flink:flink-core:1.11.2
flink.execution.jars /wyyt/software/1.11.2-flink/flink-scala-opt-1.0-SNAPSHOT-shade.jar
FINISHED
Took 0 sec. Last updated by anonymous at May 21 2021, 11:28:25 AM.
%flink.conf
READY
%flink.conf
flink.execution.packages org.apache.flink:flink-connector-kafka_2.11:1.12.2,com.alibaba:fastjson:1.2.60,org.apache.hadoop:hadoop-client:3.0.0,org.apache.flink:flink-parquet_2.11:1.12.2,org.apache.hadoop:hadoop-common:3.0.0,org.apache.flink
:flink-core:1.12.2
FINISHED
Took 0 sec. Last updated by anonymous at May 20 2021, 4:06:51 PM. (outdated)
%flink.conf
flink.execution.packages org.apache.flink:flink-connector-kafka_2.11:1.12.2,com.alibaba:fastjson:1.2.60,org.apache.flink:flink-parquet_2.11:1.12.2,org.apache.flink:flink-core:1.12.2,org.apache.flink:flink-shaded-hadoop-2-uber:2.7.5-9.0
flink.execution.jars /wyyt/software/flink-1.12.2-2.11/flink-1.12.2/flink-hudi-1.0-SNAPSHOT-shade.jar
READY
%flink
val data = benv.fromElements("hello world", "hello flink", "hello hadoop")
data.flatMap(line => line.split("\\s"))
.map(w => (w, 1))
.groupBy(0)
.sum(1)
.print()
data: org.apache.flink.api.scala.DataSet[String] = org.apache.flink.api.scala.DataSet@709f0986 (flink,1) (hadoop,1) (world,1) (hello,3)
FLINK JOB FINISHED
Took 53 sec. Last updated by anonymous at May 21 2021, 11:29:19 AM.
%flink
READY
import com.sjb.kafka.{KafkaOutputFormat}
import org.apache.commons.lang3.StringUtils
import org.apache.flink.api.common.io.FileInputFormat
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.core.fs.Path
import org.apache.flink.formats.parquet.ParquetRowInputFormat
import org.apache.flink.types.Row
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, RemoteIterator}
import org.apache.parquet.column.ColumnDescriptor
import org.apache.parquet.hadoop.ParquetFileReader
import org.apache.parquet.hadoop.metadata.ParquetMetadata
import org.apache.parquet.schema.MessageType
import scala.collection.JavaConversions._
benv.setParallelism(1)
val config: Configuration = new Configuration
val namenode: String = "hdfs://bi-524:8020"
config.set("fs.defaultFS", namenode)
//todo 写活的。
// val parameterTool: ParameterTool = ParameterTool.fromArgs(args)
//todo 传入参数 --table xxx --database xxx --sinktopic xx --prod prod
// String databaseName = parameterTool.getRequired("database")
val databaseName: String = "test"
// String tableName = parameterTool.getRequired("table")
val tableName: String = "fs_plt_assure_orders_bak"
// String sinkTopic = parameterTool.getRequired("sinktopic")
val sinkTopic: String = "cep_test"
// val patternParam: String = parameterTool.get("prod", "")
var path: String = ""
var serverProd: String = ""
serverProd ="dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092"
path = "hdfs://bi-524:8020/user/hive/warehouse/" + databaseName + ".db/"
val p: org.apache.hadoop.fs.Path = new org.apache.hadoop.fs.Path(path)
val fs: FileSystem = FileSystem.get(config)
print(fs.exists(p))
if (!fs.exists(p) || !fs.isDirectory(p)) {
}
var locatedFileStatusRemoteIterator: RemoteIterator[LocatedFileStatus] = null
var messageType: MessageType = null
val coStr: util.List[String] = new util.ArrayList[String]
val tableHdfsPathStr: org.apache.hadoop.fs.Path = new org.apache.hadoop.fs.Path(path + tableName + "///")
if (fs.isDirectory(tableHdfsPathStr)) {
locatedFileStatusRemoteIterator = fs.listFiles(tableHdfsPathStr, true)
var flieName: org.apache.hadoop.fs.Path = null
//todo 循环取每个分片File的名字:000000_0
if (lo