1、从hive认证hook中找到sentry认证方法,并将认证代码提取出来
2、从spark sql的逻辑计划中提取sentry认证方法所需的参数
3、通过spark sql extensions将认证添加到spark sql的执行过程中
从sentry源代码中逐步查看hive hook的代码,终于找到了真正认证的方法,代码如下:
public void auth(HiveOperation stmtOperation, Set<ReadEntity> inputs, Set<WriteEntity> outputs, String user) {
HiveAuthzPrivileges stmtAuthObject = HiveAuthzPrivilegesMap.getHiveAuthzPrivileges(stmtOperation);
List<List<DBModelAuthorizable>> inputHierarchy = new ArrayList<List<DBModelAuthorizable>>();
List<List<DBModelAuthorizable>> outputHierarchy = new ArrayList<List<DBModelAuthorizable>>();
switch (stmtAuthObject.getOperationScope()) {
case SERVER:
// validate server level privileges if applicable. Eg create UDF,register jar etc ..
List<DBModelAuthorizable> serverHierarchy = new ArrayList<DBModelAuthorizable>();
serverHierarchy.add(hiveAuthzBinding.getAuthServer());
inputHierarchy.add(serverHierarchy);
break;
case DATABASE:
// workaround for database scope statements (create/alter/drop db)
List<DBModelAuthorizable> dbHierarchy = new ArrayList<DBModelAuthorizable>();
getInputHierarchyFromInputs(inputHierarchy, inputs);
getOutputHierarchyFromOutputs(outputHierarchy, outputs);
break;
case TABLE:
// workaround for add partitions
if (partitionURI != null) {
inputHierarchy.add(ImmutableList.of(hiveAuthzBinding.getAuthServer(), partitionURI));
}
if (indexURI != null) {
outputHierarchy.add(ImmutableList.of(hiveAuthzBinding.getAuthServer(), indexURI));
}
getInputHierarchyFromInputs(inputHierarchy, inputs);
for (WriteEntity writeEntity : outputs) {
if (filterWriteEntity(writeEntity)) {
continue;
}
List<DBModelAuthorizable> entityHierarchy = new ArrayList<DBModelAuthorizable>();
entityHierarchy.add(hiveAuthzBinding.getAuthServer());
entityHierarchy.addAll(getAuthzHierarchyFromEntity(writeEntity));
outputHierarchy.add(entityHierarchy);
}
if (currTab != null) {
List<DBModelAuthorizable> externalAuthorizableHierarchy = new ArrayList<DBModelAuthorizable>();
externalAuthorizableHierarchy.add(hiveAuthzBinding.getAuthServer());
externalAuthorizableHierarchy.add(currDB);
externalAuthorizableHierarchy.add(currTab);
inputHierarchy.add(externalAuthorizableHierarchy);
}
if (currOutTab != null) {
List<DBModelAuthorizable> externalAuthorizableHierarchy = new ArrayList<DBModelAuthorizable>();
externalAuthorizableHierarchy.add(hiveAuthzBinding.getAuthServer());
externalAuthorizableHierarchy.add(currOutDB);
externalAuthorizableHierarchy.add(currOutTab);
outputHierarchy.add(externalAuthorizableHierarchy);
}
break;
default:
throw new AuthorizationException("Unknown operation scope type " + stmtAuthObject.getOperationScope().toString());
}
hiveAuthzBinding.authorize(stmtOperation, stmtAuthObject, new Subject(user), inputHierarchy, outputHierarchy);
}
该认证方法需要4个参数:
HiveOperation stmtOperation sql的操作类型,create、select或者其他
Set inputs 需要读取的表
Set outputs 需要创建或者写入的表
String user 用户名
其中hiveAuthzBinding
的初始化如下:
hiveConf = new HiveConf();
// 初始化需要hive-site和sentry-site
hiveConf.addResource(new FileInputStream("xxx/conf/hive-site.xml"));
authzConf = HiveAuthzBindingHook.loadAuthzConf(hiveConf);
hiveAuthzBinding = new HiveAuthzBinding(hiveConf, authzConf);
获取spark sql LogicalPlan有很多方法,因为之前学习过自定义Optimizer规则,所以首先就想到可以自定义一个Rule,在自定义Rule中完成权限认证
class SentryAuthRule extends Rule[LogicalPlan]{
override def apply(plan: LogicalPlan): LogicalPlan = {
// do something
plan
}
}
def getHiveOperation(plan: LogicalPlan): HiveOperation = {
plan match {
case c: Command => c match {
case _: AlterDatabasePropertiesCommand => HiveOperation.ALTERDATABASE
case p if p.nodeName == "AlterTableAddColumnsCommand" => HiveOperation.ALTERTABLE_ADDCOLS
case _: AlterTableAddPartitionCommand => HiveOperation.ALTERTABLE_ADDPARTS
case p if p.nodeName == "AlterTableChangeColumnCommand" =>
HiveOperation.ALTERTABLE_RENAMECOL
case _: AlterTableDropPartitionCommand => HiveOperation.ALTERTABLE_DROPPARTS
case _: AlterTableRecoverPartitionsCommand => HiveOperation.MSCK
case _: AlterTableRenamePartitionCommand => HiveOperation.ALTERTABLE_RENAMEPART
case a: AlterTableRenameCommand =>
if (!a.isView) HiveOperation.ALTERTABLE_RENAME else HiveOperation.ALTERVIEW_RENAME
case _: AlterTableSetPropertiesCommand
| _: AlterTableUnsetPropertiesCommand => HiveOperation.ALTERTABLE_PROPERTIES
case _: AlterTableSerDePropertiesCommand => HiveOperation.ALTERTABLE_SERDEPROPERTIES
case _: AlterTableSetLocationCommand => HiveOperation.ALTERTABLE_LOCATION
case _: AlterViewAsCommand => HiveOperation.QUERY
// case _: AlterViewAsCommand => HiveOperation.ALTERVIEW_AS
case _: AnalyzeColumnCommand => HiveOperation.QUERY
// case _: AnalyzeTableCommand => HiveOperation.ANALYZE_TABLE
// Hive treat AnalyzeTableCommand as QUERY, obey it.
case _: AnalyzeTableCommand => HiveOperation.QUERY
case p if p.nodeName == "AnalyzePartitionCommand" => HiveOperation.QUERY
case _: CreateDatabaseCommand => HiveOperation.CREATEDATABASE
case _: CreateDataSourceTableAsSelectCommand
| _: CreateHiveTableAsSelectCommand => HiveOperation.CREATETABLE_AS_SELECT
case _: CreateFunctionCommand => HiveOperation.CREATEFUNCTION
case _: CreateTableCommand
| _: CreateDataSourceTableCommand => HiveOperation.CREATETABLE
case _: CreateTableLikeCommand => HiveOperation.CREATETABLE
case _: CreateViewCommand
| _: CacheTableCommand
| _: CreateTempViewUsing => HiveOperation.CREATEVIEW
case p if p.nodeName == "DescribeColumnCommand" => HiveOperation.DESCTABLE
case _: DescribeDatabaseCommand => HiveOperation.DESCDATABASE
case _: DescribeFunctionCommand => HiveOperation.DESCFUNCTION
case _: DescribeTableCommand => HiveOperation.DESCTABLE
case _: DropDatabaseCommand => HiveOperation.DROPDATABASE
// Hive don't check privileges for `drop function command`, what about a unverified user
// try to drop functions.
// We treat permanent functions as tables for verifying.
case d: DropFunctionCommand if !d.isTemp => HiveOperation.DROPTABLE
case d: DropFunctionCommand if d.isTemp => HiveOperation.DROPFUNCTION
case _: DropTableCommand => HiveOperation.DROPTABLE
case e: ExplainCommand => getHiveOperation(e.logicalPlan)
case _: InsertIntoDataSourceCommand => HiveOperation.QUERY
case p if p.nodeName == "InsertIntoDataSourceDirCommand" => HiveOperation.QUERY
//case _: InsertIntoHadoopFsRelationCommand => HiveOperation.CREATETABLE_AS_SELECT
case _: InsertIntoHadoopFsRelationCommand => HiveOperation.QUERY
case p if p.nodeName == "InsertIntoHiveDirCommand" => HiveOperation.QUERY
case p if p.nodeName == "InsertIntoHiveTable" => HiveOperation.QUERY
case _: LoadDataCommand => HiveOperation.LOAD
case p if p.nodeName == "SaveIntoDataSourceCommand" => HiveOperation.QUERY
case s: SetCommand if s.kv.isEmpty || s.kv.get._2.isEmpty => HiveOperation.SHOWCONF
case _: SetDatabaseCommand => HiveOperation.SWITCHDATABASE
case _: ShowCreateTableCommand => HiveOperation.SHOW_CREATETABLE
case _: ShowColumnsCommand => HiveOperation.SHOWCOLUMNS
case _: ShowDatabasesCommand => HiveOperation.SHOWDATABASES
case _: ShowFunctionsCommand => HiveOperation.SHOWFUNCTIONS
case _: ShowPartitionsCommand => HiveOperation.SHOWPARTITIONS
case _: ShowTablesCommand => HiveOperation.SHOWTABLES
case _: ShowTablePropertiesCommand => HiveOperation.SHOW_TBLPROPERTIES
case s: StreamingExplainCommand =>
getHiveOperation(s.queryExecution.optimizedPlan)
case _: TruncateTableCommand => HiveOperation.TRUNCATETABLE
case _: UncacheTableCommand => HiveOperation.DROPVIEW
// Commands that do not need build privilege goes as explain type
case _ =>
// AddFileCommand
// AddJarCommand
// ...
HiveOperation.EXPLAIN
}
case _ => HiveOperation.QUERY
}
}
Command类型会有输出表,所以要分别提取:
plan match {
// RunnableCommand
case cmd: Command => buildCommand(cmd, inputs, outputs)
// Queries
case _ => buildQuery(plan, inputs)
}
private[this] def buildCommand(plan: Command, inputs: util.HashSet[ReadEntity], outputs: util.HashSet[WriteEntity]): Unit = {
def addTableOrViewLevelObjs4Input(table: TableIdentifier): Unit = {
table.database match {
case Some(db) =>
val tbName = table.table
inputs.add(new ReadEntity(new Table(db, tbName)))
case _ =>
}
}
def addTableOrViewLevelObjs4Output(table: TableIdentifier, writeType: WriteEntity.WriteType = WriteEntity.WriteType.DDL_SHARED): Unit = {
table.database match {
case Some(db) =>
val tbName = table.table
outputs.add(new WriteEntity(new Table(db, tbName), writeType))
case _ =>
}
}
def addDbLevelObjs4Input(databaseName: String): Unit = {
// TODO 确认操作
val database = new Database()
database.setName(databaseName)
inputs.add(new ReadEntity(database))
}
def addDbLevelObjs4Output(databaseName: String, writeType: WriteEntity.WriteType = WriteEntity.WriteType.DDL_SHARED): Unit = {
// TODO 确认操作
val database = new Database()
database.setName(databaseName)
outputs.add(new WriteEntity(database, writeType))
}
plan match {
case a: AlterDatabasePropertiesCommand =>
addDbLevelObjs4Output(a.databaseName)
case a if a.nodeName == "AlterTableAddColumnsCommand" =>
// TODO 给表添加列
addTableOrViewLevelObjs4Input(getFieldVal(a, "table").asInstanceOf[TableIdentifier])
addTableOrViewLevelObjs4Output(getFieldVal(a, "table").asInstanceOf[TableIdentifier])
case a: AlterTableAddPartitionCommand =>
// TODO 添加表分区
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a if a.nodeName == "AlterTableChangeColumnCommand" =>
addTableOrViewLevelObjs4Input(getFieldVal(a, "tableName").asInstanceOf[TableIdentifier])
case a: AlterTableDropPartitionCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterTableRecoverPartitionsCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterTableRenameCommand if !a.isView || a.oldName.database.nonEmpty =>
// rename tables / permanent views
addTableOrViewLevelObjs4Input(a.oldName)
addTableOrViewLevelObjs4Output(a.newName)
case a: AlterTableRenamePartitionCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterTableSerDePropertiesCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterTableSetLocationCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterTableSetPropertiesCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterTableUnsetPropertiesCommand =>
addTableOrViewLevelObjs4Input(a.tableName)
addTableOrViewLevelObjs4Output(a.tableName)
case a: AlterViewAsCommand =>
if (a.name.database.nonEmpty) {
// it's a permanent view
addTableOrViewLevelObjs4Output(a.name)
}
buildQuery(a.query, inputs)
case a: AnalyzeColumnCommand =>
addTableOrViewLevelObjs4Input(a.tableIdent)
addTableOrViewLevelObjs4Output(a.tableIdent)
case a if a.nodeName == "AnalyzePartitionCommand" =>
addTableOrViewLevelObjs4Input(getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier])
addTableOrViewLevelObjs4Output(getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier])
case a: AnalyzeTableCommand =>
addTableOrViewLevelObjs4Input(getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier])
addTableOrViewLevelObjs4Output(getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier])
case c: CacheTableCommand => c.plan.foreach {
buildQuery(_, inputs)
}
case c: CreateDatabaseCommand => addDbLevelObjs4Output(c.databaseName)
case c: CreateDataSourceTableAsSelectCommand =>
c.table.identifier.database match {
case Some(db) =>
addDbLevelObjs4Output(db)
case _ =>
}
addTableOrViewLevelObjs4Output(c.table.identifier)
buildQuery(c.query, inputs)
case c: CreateDataSourceTableCommand =>
addTableOrViewLevelObjs4Output(c.table.identifier)
case c: CreateFunctionCommand if !c.isTemp =>
addDbLevelObjs4Output(c.databaseName.get)
//TODO function 不考虑先
//addFunctionLevelObjs(c.databaseName, c.functionName, outputObjs)
case c: CreateHiveTableAsSelectCommand =>
c.tableDesc.identifier.database match {
case Some(db) =>
addDbLevelObjs4Output(db)
case _ =>
}
addTableOrViewLevelObjs4Output(c.tableDesc.identifier)
buildQuery(c.query, inputs)
case c: CreateTableCommand =>
// 如果创建表的话需要库的权限
addDbLevelObjs4Output(c.table.identifier.database.get, WriteEntity.WriteType.DDL_SHARED)
case c: CreateTableLikeCommand =>
c.targetTable.database match {
case Some(db) =>
addDbLevelObjs4Output(db)
case _ =>
}
// hive don't handle source table's privileges, we should not obey that, because
// it will cause meta information leak
addTableOrViewLevelObjs4Input(c.sourceTable)
case c: CreateViewCommand =>
c.viewType match {
case PersistedView =>
// PersistedView will be tied to a database
c.name.database match {
case Some(db) =>
addDbLevelObjs4Output(db)
case _ =>
}
addTableOrViewLevelObjs4Output(c.name)
case _ =>
}
buildQuery(c.child, inputs)
case d if d.nodeName == "DescribeColumnCommand" =>
addTableOrViewLevelObjs4Input(getFieldVal(d, "table").asInstanceOf[TableIdentifier])
case d: DescribeDatabaseCommand =>
addDbLevelObjs4Input(d.databaseName)
case d: DescribeFunctionCommand =>
// todo
// addFunctionLevelObjs(d.functionName.database, d.functionName.funcName, inputObjs)
case d: DescribeTableCommand => addTableOrViewLevelObjs4Input(d.table)
case d: DropDatabaseCommand =>
// outputObjs are enough for privilege check, adding inputObjs for consistency with hive
// behaviour in case of some unexpected issues.
addDbLevelObjs4Input(d.databaseName)
addDbLevelObjs4Output(d.databaseName)
case d: DropFunctionCommand =>
addDbLevelObjs4Output(d.databaseName.get)
case d: DropTableCommand => addTableOrViewLevelObjs4Input(d.tableName)
case i: InsertIntoDataSourceCommand =>
i.logicalRelation.catalogTable.foreach { table =>
addTableOrViewLevelObjs4Output(table.identifier)
}
buildQuery(i.query, inputs)
case i if i.nodeName == "InsertIntoDataSourceDirCommand" =>
buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputs)
case i: InsertIntoHadoopFsRelationCommand =>
// we are able to get the override mode here, but ctas for hive table with text/orc
// format and parquet with spark.sql.hive.convertMetastoreParquet=false can success
// with privilege checking without claiming for UPDATE privilege of target table,
// which seems to be same with Hive behaviour.
// So, here we ignore the overwrite mode for such a consistency.
i.catalogTable foreach { t =>
addTableOrViewLevelObjs4Output(t.identifier)
}
buildQuery(i.query, inputs)
case i if i.nodeName == "InsertIntoHiveDirCommand" =>
buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputs)
case i if i.nodeName == "InsertIntoHiveTable" =>
addTableOrViewLevelObjs4Output(getFieldVal(i, "table").asInstanceOf[CatalogTable].identifier, WriteEntity.WriteType.INSERT)
buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputs)
case l: LoadDataCommand =>
addTableOrViewLevelObjs4Output(l.table)
case s if s.nodeName == "SaveIntoDataSourceCommand" =>
// TODO
// buildQuery(getFieldVal(s, "query").asInstanceOf[LogicalPlan], outputObjs)
case s: SetDatabaseCommand =>
// 切换数据库
addDbLevelObjs4Input(s.databaseName)
// addDbLevelObjs4Output(s.databaseName)
case s: ShowColumnsCommand => addTableOrViewLevelObjs4Input(s.tableName)
case s: ShowCreateTableCommand => addTableOrViewLevelObjs4Input(s.table)
case s: ShowFunctionsCommand => s.db.foreach(addDbLevelObjs4Input(_))
case s: ShowPartitionsCommand => addTableOrViewLevelObjs4Input(s.tableName)
case s: ShowTablePropertiesCommand => addTableOrViewLevelObjs4Input(s.table)
case s: ShowTablesCommand => addDbLevelObjs4Input(s.databaseName.get)
case s: TruncateTableCommand =>
addTableOrViewLevelObjs4Output(s.tableName)
case _ =>
}
}
private[this] def buildQuery(plan: LogicalPlan, inputs: util.HashSet[ReadEntity], projectionList: Seq[NamedExpression] = Nil): Unit = {
def addInput(table: TableIdentifier): Unit = {
table.database match {
case Some(db) =>
val tbName = table.table
inputs.add(new ReadEntity(new Table(db, tbName)))
case _ =>
}
}
plan match {
case p: Project => buildQuery(p.child, inputs, p.projectList)
case h if h.nodeName == "HiveTableRelation" =>
addInput(getFieldVal(h, "tableMeta").asInstanceOf[CatalogTable].identifier)
case m if m.nodeName == "MetastoreRelation" =>
addInput(getFieldVal(m, "catalogTable").asInstanceOf[CatalogTable].identifier)
case c if c.nodeName == "CatalogRelation" =>
addInput(getFieldVal(c, "tableMeta").asInstanceOf[CatalogTable].identifier)
case l: LogicalRelation if l.catalogTable.nonEmpty => addInput(l.catalogTable.get.identifier)
case u: UnresolvedRelation =>
// Normally, we shouldn't meet UnresolvedRelation here in an optimized plan.
// Unfortunately, the real world is always a place where miracles happen.
// We check the privileges directly without resolving the plan and leave everything
// to spark to do.
addInput(u.tableIdentifier)
case p =>
for (child <- p.children) {
buildQuery(child, inputs, projectionList)
}
}
}
val user = UserGroupInformation.getCurrentUser.getShortUserName
完成后的apply方法如下:
SentryAuthorizable extends Rule[LogicalPlan] with Logging {
// 第一步中抽取的方法类
def sparkAuth: SparkAuthHook
override def apply(plan: LogicalPlan): LogicalPlan = {
try {
val hiveOperation = PrivilegesBuilder.getHiveOperation(plan)
val (inputs, outputs) = PrivilegesBuilder.extractReadAndWrite(plan)
val user = UserGroupInformation.getCurrentUser.getShortUserName
sparkAuth.auth(hiveOperation, inputs, outputs, user)
} catch {
case e: AuthorizationException =>
val lastQueryPrivilegeErrors = sparkAuth.getLastQueryPrivilegeErrors
val errors = StringUtils.join(lastQueryPrivilegeErrors, ";")
error(
s"""
${e.getMessage}
The required privileges: ${errors}
""".stripMargin)
throw e
case e: Exception =>
}
plan
}
}
在Spark中,用户自定义的规则可以通过两种方式配置生效:
1、使用SparkSession.Builder中的withExtenstion方法,withExtension方法是一个高阶函数,接收一个自定义函数作为参数,这个自定义函数以SparkSessionExtensions作为参数,用户可以实现这个函数,通过SparkSessionExtensions的inject开头的方法添加用户自定义规则。
2、通过Spark配置参数,具体参数名为spark.sql.extensions。用户可以将1中的自定义函数实现定义为一个类,将完整类名作为参数值。
因为种种原因,没有使用spark自带的thrift server,而是使用的开源实现Kyuubi(github:https://github.com/yaooqinn/spark-authorizer,感谢大神贡献),所以还应该将认证集成到Kyuubi中
// KYUUBI-99: Add authorizer support after use initial db
AuthzHelper.get.foreach { auth =>
_sparkSession.experimental.extraOptimizations ++= auth.rule
}
通过查找配置,找到以下两个配置:
spark.kyuubi.authorization.enabled
spark.kyuubi.authorization.class
在启动脚本中添加上以上参数,将spark.kyuubi.authorization.class
指向上面写的自定义Rule就可以实现权限认证了。
按理到此应该就完成,但是事实却并非如此,如果spark sql查询hive view的时候,此时权限认证抛出的错误是需要原始表的权限,这样是不符合实际情况,比如某张视图是为了做数据脱敏产生的,原始表存放了脱敏前的数据,所以只能赋权给用户视图权限,而不能赋权原始表权限。下面继续改造。。。
class SentryAuthorizerByParser(parser: ParserInterface) extends ParserInterface with Logging {
override def parsePlan(sqlText: String): LogicalPlan = {
val plan = parser.parsePlan(sqlText)
// 此处已经可以得到LogicalPlan,认证方法和上面一样
...
plan
}
// 其他方法省略
....
}
经过测试发现,该认证方法会被调用多次,查询语句中的视图会被翻译成原始sql再调用一遍parse,所以还是会有上面需要原始表权限的问题
继续想办法。。。
想到个比较笨的办法,通过调用栈来排除掉多余的权限检查:
var checked = true
for (index <- 0 to stackTrace.length - 1) {
val elementStr = stackTrace(index).toString
// 此处是当前类,判断调用当前parse方法是否为SessionCatalog.lookupRelation
if (elementStr.indexOf("SentryAuthorizerByParser.parsePlan") != -1) {
val nextStackStr = stackTrace(index + 1).toString
if (nextStackStr.indexOf("org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupRelation") != -1) checked = false
}
}
if(checked){
// do auth
}
经过这个之后就可以准备的仅判断一次权限了。
kyuubi中sparksession是通过反射获取到的,无法注入spark.sql.extensions
,修改的地方如下
在类SparkSessionWithUGI
的create
方法中代码加强一下,原代码如下:
_sparkSession = ReflectUtils.newInstance(
classOf[SparkSession].getName,
Seq(classOf[SparkContext]),
Seq(context)).asInstanceOf[SparkSession]
修改后的代码:
val extensionConfOption = context.getConf.get("spark.sql.extensions", "")
if ("".equals(extensionConfOption)) {
_sparkSession = ReflectUtils.newInstance(
classOf[SparkSession].getName,
Seq(classOf[SparkContext]),
Seq(context)).asInstanceOf[SparkSession]
} else {
val extensionConfClassName = extensionConfOption
val extensions = new SparkSessionExtensions
try {
val extensionConf = ReflectUtils.newInstance(extensionConfClassName, Seq(), Seq())
.asInstanceOf[SparkSessionExtensions => Unit]
extensionConf(extensions)
} catch {
// Ignore the error if we cannot find the class or when the class has the wrong type.
case e@(_: ClassCastException |
_: ClassNotFoundException |
_: NoClassDefFoundError) =>
logger.warn(s"Cannot use $extensionConfClassName to configure session extensions.", e)
}
// new SparkSession(sparkContext, None, None, extensions)
_sparkSession = ReflectUtils.newInstance(
classOf[SparkSession].getName,
Seq(classOf[SparkContext], classOf[Option[Nothing]],
classOf[Option[Nothing]],
classOf[SparkSessionExtensions]),
Seq(context, None, None, extensions)).asInstanceOf[SparkSession]
}
然后在kyuubi的启动脚本中添加参数spark.sql.extensions
指向上面的权限认证类即可。
正在尝试其他方法中。。。
https://github.com/yaooqinn/spark-authorizer
https://blog.csdn.net/weixin_33744854/article/details/89593471