Thursday, March 28, 2019

Dynamically manage Hive Table in spark


Method to Drop Table and Files
object test {
    def main(args: Array[String]): Unit = {
     val databaseName = args(0)
     val tableName = args(1)

def method1 (spark: SparkSession ,databaseName: String, tableName: String): Unit={
    import spark.implicits._
    val  tblary= Array("Table1","Table2","Table3","Table4")
    for  (tab <- tblary){
tblcheck=spark.catlog.tableExists("databaseName","tableName")
    if(tblcheck){
    val loc = spark.Sql("desc formatted" +databaseName" "." +tableName).toDF.fillter('col_Name === "Location").collect()(0)(1).toString

//val loc = spark.Sql("desc formatted" +databaseName" "." +tableName).toDF.fillter('col_Name === "Location").select("data_type") -- can use this also

spark.Sql("drop table" +databaseName" "." +tableName)
process.apply("hadoop fs -rm -r -skipTrash" + Location).run().exitValue()
       }
    }
}


Method to create Table and Files
  def test2(spark: SparkSession ,databaseName: String, tableName: String): Unit={
  val createTable = "Create External Table IF NOT EXISTS" + databaseName + ".Table1  (" +
                            "col1," +
                            "col2," +
                            "col3," +
                            "col4"  +
                            "LOCATION   '"  + loc + "/Table1 '"
spark.sql(createTable)
}   

1 comment: