You can run SQL queries with Spark on TileDB arrays as follows:
// Create a dataframe from a TileDB array
val df = spark.read
.format("io.tiledb.spark")
.option("uri", "s3://my_bucket/my_array")
.load()
// Create a view and run SQL
df.createOrReplaceTempView("tiledbArray");
val sql_df = spark.sql("SELECT * FROM tiledbArray")
sql_df.show()
# Create a dataframe from a TileDB array
df = spark.read
.format("io.tiledb.spark")
.option("uri", "s3://my_bucket/my_array")
.load()
# Create a view and run SQL
df.createOrReplaceTempView("tiledbArray");
sql_df = spark.sql("SELECT * FROM tiledbArray")
sql_df.show()
# Create a dataframe from a TileDB array
df <- read.df(uri = "s3://my_bucket/array_new", source = "io.tiledb.spark")
# Create a view and run SQL
createOrReplaceTempView(df, "tiledbArray")
sql_df <- sql("select * from tiledbArray")
head(sql_df)