spark初学(四)- 连接数据库汇总

连接的数据库为mongo:

private SparkSession sparkSession() {
        StringBuffer mongoUrl = new StringBuffer("mongodb://" + username + ":" + password + "@");
        String[] url = urls.split(",");
        for (int i = url.length; i > 0; i--) {
            if (i == 1) {
                mongoUrl.append(url[i - 1] + ":" + port);
            } else {
                mongoUrl.append(url[i - 1] + ":" + port + ",");
            }
        }
        String appName = "allSiteUpdateTask" + DateUtils.getTodayString();
        SparkSession spark = SparkSession.builder()
            .master(master)
            .appName(appName)
            .config("spark.mongodb.input.uri", mongoUrl.toString() + "/" + database)
            .config("spark.mongodb.input.database", database)
            .config("spark.mongodb.input.collection", "statisticsSiteBak1")
            .getOrCreate();
        return spark;
    }

进行汇总:

JavaSparkContext jsc = new JavaSparkContext(sparkSession().sparkContext());
            JavaMongoRDD rdd = MongoSpark.load(jsc);
            JavaMongoRDD dordd = rdd.withPipeline(list);
            JavaPairRDD aggregatedRdds = dordd.mapToPair(new mapToPair());
            JavaPairRDD reduce = aggregatedRdds.reduceByKey((i1,i2) -> new Integer[]{(i1[0] + i2[0]), (i1[1] + i2[1]), (i1[2] + i2[2]), (i1[3] + i2[3]), (i1[4] + i2[4]), (i1[5] + i2[5])});

            List>  result = reduce.take(10);
            System.out.println(result.size());

分组代码:

static class mapToPair implements PairFunction{
        @Override
        public Tuple2 call(Document dou) throws Exception {
            //1000 1500 2000 2500 3000 3500
            Integer[] result = new Integer[]{0, 0, 0, 0, 0, 0};
            if(StringUtils.isNotEmpty(dou.getString("hi1"))){
                if(Float.parseFloat(dou.getString("hi1")) <= 1000.00){
                    result[0] = 1;
                }else if(1000.00 < Float.parseFloat(dou.getString("hi1")) && Float.parseFloat(dou.getString("hi1")) <= 1200.00){
                    result[1] = 1;
                }else if(1200.00 < Float.parseFloat(dou.getString("hi1")) && Float.parseFloat(dou.getString("hi1")) <= 1400.00){
                    result[2] = 1;
                }else if(1400.00 < Float.parseFloat(dou.getString("hi1")) && Float.parseFloat(dou.getString("hi1")) <= 1600.00){
                    result[3] = 1;
                }else if(1600.00 < Float.parseFloat(dou.getString("hi1")) && Float.parseFloat(dou.getString("hi1")) <= 1800.00){
                    result[4] = 1;
                }else if(1800.00 < Float.parseFloat(dou.getString("hi1")) && Float.parseFloat(dou.getString("hi1")) <= 2000.00){
                    result[5] = 1;
                }
            }
            return new Tuple2(dou.getString("bs2"),result);
        }
    }

你可能感兴趣的:(spark)