java实现sparksql操作数据库

1.创建java maven项目,添加依赖

 
            org.apache.spark
            spark-core_2.11
            2.3.0
        
        
        
            mysql
            mysql-connector-java
            5.1.38
        
        
        
            org.apache.spark
            spark-sql_2.11
            2.3.0
        
        
        
            com.alibaba
            fastjson
            1.2.47
        

2.写数据

 SparkConf conf = new SparkConf().setAppName("HelloWorld").setMaster("local");
       
        JavaSparkContext sc = new JavaSparkContext(conf);
             SQLContext sqlContext = new SQLContext(sc);

        //写入的数据内容
        JavaRDD personData = sc.parallelize(Arrays.asList("java chinese 5", "c++ chinese 6"));
        //数据库内容
        String url = "jdbc:mysql://localhost:3306/demo";
        Properties connectionProperties = new Properties();
        connectionProperties.put("user", "root");
        connectionProperties.put("password", "123456");
        connectionProperties.put("driver", "com.mysql.jdbc.Driver");
        /**
         * 第一步:在RDD的基础上创建类型为Row的RDD
         */
        //将RDD变成以Row为类型的RDD。Row可以简单理解为Table的一行数据
        JavaRDD personsRDD = personData.map(new Function() {
            @Override
            public Row call(String line) throws Exception {
                String[] splited = line.split(" ");
                return RowFactory.create(splited[0], splited[1], Integer.valueOf(splited[2]));
            }
        });

        /**
         * 第二步:动态构造DataFrame的元数据。
         */
        List structFields = new ArrayList();
        structFields.add(DataTypes.createStructField("search_word", DataTypes.StringType, true));
        structFields.add(DataTypes.createStructField("lang", DataTypes.StringType, true));
        structFields.add(DataTypes.createStructField("hot_index", DataTypes.IntegerType, true));

        //构建StructType,用于最后DataFrame元数据的描述
        StructType structType = DataTypes.createStructType(structFields);

        /**
         * 第三步:基于已有的元数据以及RDD来构造DataFrame
         */
       Dataset personsDF = sqlContext.createDataFrame(personsRDD, structType);

        /**
         * 第四步:将数据写入到person表中
         */
        personsDF.write().mode("append").jdbc(url, "person", connectionProperties);


        sc.close();

3.读取数据

 private static void getTagByDay(SQLContext sqlContext) {
        String url = "jdbc:mysql://192.168.1.87:3306/tnoat_news_bz";
        //查找的表名
        String table = "news";
        //增加数据库的用户名(user)密码(password),指定test数据库的驱动(driver)
        Properties connectionProperties = new Properties();
        connectionProperties.put("user", "dbuser");
        connectionProperties.put("password", "asdQWE!@#");
        connectionProperties.put("driver", "com.mysql.jdbc.Driver");

        //SparkJdbc读取Postgresql的products表内容
        System.out.println("读取test数据库中的user_test表内容");

        // 读取表中所有数据
        sqlContext.read().jdbc(url, table, connectionProperties).createOrReplaceTempView("news");
        Dataset jd = sqlContext.sql("SELECT * FROM news  ");
        //显示数据
        jd.show();

    }

第二种

 DbConnection dbConnection = new DbConnection(MYSQL_DRIVER, MYSQL_CONNECTION_URL, MYSQL_USERNAME, MYSQL_PWD);

        //1.数据的输入
        SparkConf conf = new SparkConf().setAppName("SearchWordRecommend").setMaster("local").setSparkHome("/usr/lib/spark/spark-2.1.1-bin-hadoop2.7");
        JavaSparkContext sparkContext = new JavaSparkContext(conf);
        SQLContext sqlContext = new SQLContext(sparkContext);


        // Load data from MySQL
        JdbcRDD jdbcRDD = new JdbcRDD<>(sparkContext.sc(), dbConnection, "select * from news where 0 >= ? and 0 <= ? ", 0,
                        10, 2, new MapResult(), ClassManifestFactory$.MODULE$.fromClass(JSONObject.class));

        // Convert to JavaRDD
        JavaRDD javaRDD = JavaRDD.fromRDD(jdbcRDD, ClassManifestFactory$.MODULE$.fromClass(JSONObject.class));

        javaRDD.foreach(new VoidFunction() {
            @Override
            public void call(JSONObject jsonObject) throws Exception {
                System.out.println(jsonObject);
            }
        });
static class DbConnection extends AbstractFunction0 implements Serializable {

        private String driverClassName;
        private String connectionUrl;
        private String userName;
        private String password;

        public DbConnection(String driverClassName, String connectionUrl, String userName, String password) {
            this.driverClassName = driverClassName;
            this.connectionUrl = connectionUrl;
            this.userName = userName;
            this.password = password;
        }

        @Override
        public Connection apply() {
            try {
                Class.forName(driverClassName);
            } catch (ClassNotFoundException e) {
                //LOGGER.error("Failed to load driver class", e);
            }

            Properties properties = new Properties();
            properties.setProperty("user", userName);
            properties.setProperty("password", password);

            Connection connection = null;
            try {
                connection = DriverManager.getConnection(connectionUrl, properties);
            } catch (SQLException e) {
                // LOGGER.error("Connection failed", e);
            }

            return connection;
        }
    }

    static class MapResult extends AbstractFunction1 implements Serializable {

        @Override
        public JSONObject apply(ResultSet resultSet) {
            ResultSetMetaData metaData = null;
            JSONObject jsonObj = new JSONObject();
            try {
                metaData = resultSet.getMetaData();
                int columnCount = metaData.getColumnCount();
                // 遍历每一列
                for (int i = 1; i <= columnCount; i++) {
                    String columnName = metaData.getColumnLabel(i);
                    String value = resultSet.getString(columnName);
                    jsonObj.put(columnName, value);
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
            return jsonObj;
        }
    }

}




你可能感兴趣的:(spark)