package com.avcdata;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import java.util.List;
public class RDD2DataFrameRelection {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("RDD2DataFrameRelection").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc);
JavaRDD lines = sc.textFile("students.txt");
JavaRDD studentRDD = lines.map((Function) line -> {
String[] lineSplited = line.split(",");
Student stu = new Student();
stu.setId(Integer.valueOf(lineSplited[0]));
stu.setName(lineSplited[1]);
stu.setAge(Integer.valueOf(lineSplited[2]));
return stu;
});
// 使用反射方式将RDD转换为DataFrame
DataFrame studentDF = sqlContext.createDataFrame(studentRDD, Student.class);
studentDF.printSchema();
// 有了DataFrame后就可以注册一个临时表,SQL语句还是查询年龄小于18岁的人
studentDF.registerTempTable("student");
DataFrame teenagerDF = sqlContext.sql("SELECT * FROM student WHERE age <= 18");
JavaRDD teenagerRDD = teenagerDF.toJavaRDD();
JavaRDD teenagerStudentRDD = teenagerRDD.map((Function) row -> {
// 可以直接通过列名了从Row里面来获取数据,这样的好处就是不用担心顺序
int id = row.getAs("id");
int age = row.getAs("age");
String name = row.getAs("name");
Student stu = new Student();
stu.setId(id);
stu.setAge(age);
stu.setName(name);
return stu;
});
List studentList = teenagerStudentRDD.collect();
studentList.forEach(System.out::println);
}
}
Scala版本
package com.avcdata
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
case class Person(id: Int, name: String, age: Int)
object RDD2DataFrame {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("RDD2DataFrame").setMaster("local")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val lines = sc.textFile("students.txt")
val studentRDD = lines.map(line => {
val lineSplited = line.split(",")
Person(lineSplited(0).toInt, lineSplited(1), lineSplited(2).toInt)
})
import sqlContext.implicits._
val studentDF = studentRDD.toDF()
studentDF.registerTempTable("person")
val teenagerDF = sqlContext.sql("SELECT * FROM person WHERE age < 18")
teenagerDF.printSchema()
val teenagerPersonRDD = teenagerDF.rdd.map(row => Person(row.getAs("id"), row.getAs("name"), row.getAs("age")))
teenagerPersonRDD.collect().foreach(println)
}
}
RDD2DataFrameDynamic.java
package com.avcdata;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import java.util.ArrayList;
import java.util.List;
public class RDD2DataFrameDynamic {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("RDD2DataFrameRelection").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc);
JavaRDD lines = sc.textFile("students.txt");
JavaRDD rows = lines.map((Function) line -> {
String[] lineSplited = line.split(",");
return RowFactory.create(Integer.valueOf(lineSplited[0]), lineSplited[1], Integer.valueOf(lineSplited[2]));
});
// 动态构造元数据,还有一种方式是通过反射的方式来构建出DataFrame,这里我们用的是动态创建元数据
// 有些时候我们一开始不确定有哪些列,而这些列需要从数据库比如MySQL或者配置文件来加载出来
List fields = new ArrayList<>();
fields.add(DataTypes.createStructField("id", DataTypes.IntegerType, true));
fields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("age", DataTypes.IntegerType, true));
StructType schema = DataTypes.createStructType(fields);
DataFrame studentDF = sqlContext.createDataFrame(rows, schema);
studentDF.registerTempTable("stu");
DataFrame teenagerDF = sqlContext.sql("SELECT * FROM stu WHERE age <= 18");
List teenagerList = teenagerDF.javaRDD().collect();
teenagerList.forEach(System.out::println);
}
}
RDD2DataFrameDynamic.scala
package com.avcdata
import org.apache.spark.sql.{RowFactory, SQLContext}
import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
import org.apache.spark.{SparkConf, SparkContext}
object RDD2DataFramedy {
def main(args: Array[String]): Unit = {
// 初始化配置及上下文
val conf = new SparkConf().setAppName("RDD2DataFrame").setMaster("local")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
// 从文件中读取行RDD
val lines = sc.textFile(path = "students.txt")
// 将文件RDD转为RDD[Row]
val rows = lines.map(line => {
val lineSplited = line.split(",")
RowFactory.create(Integer.valueOf(lineSplited(0)), lineSplited(1), Integer.valueOf(lineSplited(2)))
})
// 自定义DataFrame格式
val schema = new StructType().add(StructField("id", DataTypes.IntegerType, nullable = true))
.add(StructField("name", DataTypes.StringType, nullable = true))
.add(StructField("age", DataTypes.IntegerType, nullable = true))
val personDF = sqlContext.createDataFrame(rows, schema)
personDF.registerTempTable(tableName = "stu")
val teenagerPersonDF = sqlContext.sql(sqlText = "SELECT * FROM stu WHERE age < 18")
teenagerPersonDF.rdd.collect foreach println
}
}
Traits are a fundamental unit of code reuse in Scala. A trait encapsulates method and field definitions, which can then be reused by mixing them into classes. Unlike class inheritance, in which each c
版本:WebLogic Server 10.3
说明:%DOMAIN_HOME%:指WebLogic Server 域(Domain)目录
例如我的做测试的域的根目录 DOMAIN_HOME=D:/Weblogic/Middleware/user_projects/domains/base_domain
1.为了保证操作安全,备份%DOMAIN_HOME%/security/Defa
http://crazyjvm.iteye.com/blog/1693757 文中提到相关超时问题,但是又出现了一个问题,我把min和max都设置成了180000,但是仍然出现了以下的异常信息:
Client session timed out, have not heard from server in 154339ms for sessionid 0x13a3f7732340003
在Mysql 众多表中查找一个表名或者字段名的 SQL 语句:
方法一:SELECT table_name, column_name from information_schema.columns WHERE column_name LIKE 'Name';
方法二:SELECT column_name from information_schema.colum