livy使用样例_livy提交spark应用

spark-submit的使用shell时时灵活性较低,livy作为spark提交的一种工具,是使用接口或者java客户端的方式提交,可以集成到web应用中

1.客户端提交的方式

核心代码

LivyClient client = new LivyClientBuilder()

.setURI(new URI(livyUrl))

.build();

try {

System.err.printf("Uploading %s to the Spark context...\n", piJar);

client.uploadJar(new File(piJar)).get();

System.err.printf("Running PiJob with %d samples...\n", samples);

double pi = client.submit(new PiJob(samples)).get();

System.out.println("Pi is roughly: " + pi);

} finally {

client.stop(true);

}

2.REST API

1.以最常使用的batches接口作为例子,请求参数

rest 的http

import org.apache.http.HttpEntity;

import org.apache.http.HttpResponse;

import org.apache.http.client.methods.HttpDelete;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.client.methods.HttpPost;

import org.apache.http.entity.StringEntity;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import org.apache.http.util.EntityUtils;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

public class HttpUtils {

//post 请求

public String postAccess(String url, Map headers, String data) {

HttpPost post = new HttpPost(url);

if (headers != null && headers.size() > 0) {

headers.forEach((K, V) -> post.addHeader(K, V));

}

try {

StringEntity entity = new StringEntity(data);

entity.setContentEncoding("UTF-8");

entity.setContentType("application/json");

post.setEntity(entity);

HttpResponse response = httpClient.execute(post);

HttpEntity resultEntity = response.getEntity();

result = EntityUtils.toString(resultEntity);

return result;

} catch (Exception e) {

e.printStackTrace();

logger.error("postAccess执行有误" + e.getMessage());

}

return result;

}

}

livy提交spark应用类,异步线程进行状态打印或者也可以状态监控返回web端

import com.alibaba.fastjson.JSON;

import com.alibaba.fastjson.JSONObject;

import com.wanmi.sbc.dw.utils.GsonUtil;

import com.wanmi.sbc.dw.utils.HttpUtils;

import lombok.SneakyThrows;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

import org.springframework.beans.BeanUtils;

import org.springframework.stereotype.Component;

import java.io.IOException;

import java.util.Arrays;

import java.util.HashMap;

import java.util.List;

/**

* @ClassName: com.spark.submit.impl.livy.LivyApp

* @Description: livy提交spark任务

* @Author: 小何

* @Time: 2020/12/15 10:46

* @Version: 1.0

*/

@Component

public class LivyServer {

private static final Logger logger = LoggerFactory.getLogger(LivyServer.class);

private static final List FAIl_STATUS_LIST = Arrays.asList("shutting_down", "error", "dead", "killed");

private final HashMap headers;

private HttpUtils httpUtils;

public LivyServer() {

headers = new HashMap<>();

headers.put("Content-Type", "application/json");

headers.put("X-Requested-By", "admin");

}

/**

* 提交参数

*

* @param livyParam

* @return

*/

@SneakyThrows

public String batchSubmit(LivyParam livyParam) {

this.httpUtils = new HttpUtils();

String livyUri = livyParam.getLivyUri();

LivyParam livyParamCopy = new LivyParam();

BeanUtils.copyProperties(livyParam, livyParamCopy);

livyParamCopy.setLivyUri(null);

String request = GsonUtil.toJsonString(livyParamCopy);

logger.info("任务提交信息{}", request);

String result = httpUtils.postAccess(livyUri + "/batches", headers, request);

if (!GsonUtil.isJson(result)) {

logger.info("任务提交错误:{}", result);

return "error:" + result;

}

if (result == null) {

return "error:" + "livy地址:" + livyUri + "错误,请检查";

}

logger.info("提交返回任务返回信息:{}", result);

JSONObject jsonObject = JSONObject.parseObject(result);

String state = jsonObject.getString("state");

String id = jsonObject.getString("id");

Thread thread = new Thread(() -> {

try {

queryState(livyParam.getLivyUri(), id, state);

} catch (InterruptedException | IOException e) {

logger.error("线程运行出错:{}", e.fillInStackTrace());

}

}, livyParam.getName() + System.currentTimeMillis());

thread.start();

return result;

}

//提交任务执行状态验证

public void queryState(String livyUrl, String batchId, String responseState) throws InterruptedException, IOException {

if (responseState != null && !FAIl_STATUS_LIST.contains(responseState)) {

boolean isRunning = true;

while (isRunning) {

String url = livyUrl + "/batches/" + batchId;

String batchesInfo = httpUtils.getAccess(url, headers);

JSONObject info = JSON.parseObject(batchesInfo);

String id = info.getString("id");

String sta = info.getString("state");

String appId = info.getString("appId");

String appInfo = info.getString("appInfo");

logger.info("livy:sessionId:{},state:{}", id, sta);

if ("success".equals(sta)) {

logger.info("任务{}:执行完成", appId, appInfo);

httpUtils.close();

isRunning = false;

} else if (FAIl_STATUS_LIST.contains(sta) || sta == null) {

logger.error("任务{}执行有误,请检查后重新提交:\n", appId, batchesInfo);

httpUtils.close();

isRunning = false;

} else if ("running".equals(sta) || "idle".equals(sta) || "starting".equals(sta)) {

logger.info("查看任务{},运行状态:\n{}", appId, batchesInfo);

} else {

logger.info("任务{}状态:{},未知,退出任务查看", id, sta);

isRunning = false;

}

Thread.sleep(5000);

}

}

}

}

livy请求参数

@Data

public class LivyParam {

/**

* livy的地址

*/

private String livyUri;

/**

* 要运行的jar包路径

*/

private String file;

/**

* 运行的代理名

*/

private String proxyUser;

/**

* 运行主类

*/

private String className;

/**

* 主类的参数

*/

private List args;

/**

* 需要运行的jar包

*/

private String thirdJarPath;

private List jars;

private List pyFiles;

private List files;

private String driverMemory;

private Integer driverCores;

private String executorMemory;

private Integer executorCores;

private Integer numExecutors;

private List archives;

/**

* 队列

*/

private String queue;

/**

* appName

*/

private String name;

/**

* 其他配置

*/

private Map conf;

}

测试

构建参数

new livyParam = new LivyParam();

livyParam.setLivyUri(sparkSubmitParam.getLivyUri());

livyParam.setClassName(sparkSubmitParam.getClassName());

livyParam.setArgs(sparkSubmitParam.getArgs());

livyParam.setConf(sparkSubmitParam.getConf());

livyParam.setDriverCores(sparkSubmitParam.getDriverCores());

livyParam.setDriverMemory(sparkSubmitParam.getDriverMemory());

livyParam.setArchives(sparkSubmitParam.getArchives());

livyParam.setExecutorCores(sparkSubmitParam.getExecutorCores());

livyParam.setExecutorMemory(sparkSubmitParam.getExecutorMemory());

livyParam.setJars(sparkSubmitParam.getJars());

livyParam.setFile(sparkSubmitParam.getFile());

livyParam.setName(sparkSubmitParam.getName());

livyParam.setQueue(sparkSubmitParam.getQueue());

livyParam.setProxyUser(sparkSubmitParam.getProxyUser());

//发送请求

String result = liveServer.batchSubmit(livyParam);

你可能感兴趣的:(livy使用样例)