Serverless SQL
Last updated
Last updated
SQL queries use , and can be executed with the TileDB Cloud client as follows.
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# res is a Pandas dataframe
res = tiledb.cloud.sql.exec(
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`")
library(tilebcloud)
tiledbcloud::login(username="my_username", password="my_password")
# or tiledbcloud::login(api_key="my_token")
# res is a data.frame
res <- execute_sql_query(
"select `cols`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `cols`",
)
show(res)
// Login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(
null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create an SQL query
SQLParameters sql = new SQLParameters();
sql.setQuery("select `cols`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `cols`");
// Create a TileDBSQL object
TileDBSQL tiledbSQL = new TileDBSQL(tileDBClient, "NAMESPACE", sql);
// Get the resutls back in arrow format
Pair<ArrayList<ValueVector>, Integer> results = tiledbSQL.execArrow();
Supposing that there is an array tiledb://user/array_name
that you have write permissions for, you can run a SQL query that writes the results to that array as follows:
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
tiledb.cloud.sql.exec(
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`",
output_uri = "tiledb://user/array_name")
// Create a TileDB-Client and login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(
null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create an SQL query
SQLParameters sql = new SQLParameters();
sql.setQuery("select `cols`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `cols`");
// Setting the output URI
sql.setOutputUri("tiledb://user/array_name");
// Create a TileDBSQL object
TileDBSQL tiledbSQL = new TileDBSQL(tileDBClient, "NAMESPACE", sql);
// Run
List<Object> results = tiledbSQL.exec();
You can run any SQL statement, including CTEs.
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# INSERT INTO ...
tiledb.cloud.sql.exec(query = "insert into `tiledb://user1/output1` select AVG(a) from `tiledb://user1/myarray1`")
# CTE
tiledb.cloud.sql.exec(query =
"""
WITH t1 AS (select `dim`, `dim2`, AVG(a), AVG(a) OVER (partition by dim2) FROM `tiledb://user1/array1`)
insert into `tiledb://user/array_name`
select * from t1
""")
// Create a TileDB-Client and login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(
null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create an SQL query
SQLParameters sql = new SQLParameters();
sql.setQuery("insert into `tiledb://user1/output1` select AVG(a) from `tiledb://user1/myarray1`");
// Create a TileDBSQL object
TileDBSQL tiledbSQL = new TileDBSQL(tileDBClient, "NAMESPACE", sql);
// Run
List<Object> results = tiledbSQL.exec();
// CTE
sql.setQuery("WITH t1 AS (select `dim`, `dim2`, AVG(a), AVG(a) OVER (partition by dim2) FROM `tiledb://user1/array1`)insert into `tiledb://user/array_name`select * from t1");
// Run
List<Object> results = tiledbSQL.exec();
If the array does not exist, you will just need to create and pass an array schema that complies with the result schema.
import tiledb.cloud
import numpy as np
import pandas
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# The output array has a single dimension
dom = tiledb.Domain(tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32))
# The output array has a single attribute
attr = tiledb.Attr(name="avg_a", dtype=np.float32)
# The output array is sparse
schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[attr])
# The following will create the output array and write the results to it
tiledb.cloud.sql.exec(
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`",
output_schema = schema,
output_uri = "tiledb://user/s3://my_bucket/my_array") # Notice the full path here
We also provide an auxiliary function exec_and_fetch
, which executes a SQL query as above, but then opens the resulting output array so that it is ready for use.
import tiledb.cloud
import pandas
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
A = tiledb.cloud.sql.exec_and_fetch(
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`",
output_uri="tiledb://user/array_name")
# `A` is an opened array for reading, ready for use
print(A.df[:])
If you are a member of an organization, then by default the organization is charged for your SQL query. If you would like to charge the SQL query to yourself, you just need to add one extra argument namespace
.
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
tiledb.cloud.sql.exec(
namespace ="my_username", # Who to charge the query to
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`",
output_uri = "tiledb://user/array_name")
// Create a TileDB-Client and login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(
null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create an SQL query
SQLParameters sql = new SQLParameters();
sql.setQuery("select `cols`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `cols`");
// Create a TileDBSQL object
// Charges will occur according to the namespace parameter provided here
TileDBSQL tiledbSQL = new TileDBSQL(tileDBClient, "NAMESPACE", sql);
// Get the resutls back in arrow format
Pair<ArrayList<ValueVector>, Integer> results = tiledbSQL.execArrow();
Each serverless SQL runs by default in an isolated environment with 2 CPUs and 2 GB of memory. You can choose an alternative runtime environment from the following list:
standard
2 CPUs, 2 GB RAM
large
8 CPUs, 8 GB RAM
Charges are based on the total number of CPUs selected, not on actual use.
To run a serverless SQL in a specific environment, set the resource_class
parameter to the name of the environment.
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
tiledb.cloud.sql.exec(
resource_class ="large",
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`",
output_uri = "tiledb://user/array_name")
// Create a TileDB-Client and login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(
null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create an SQL query
SQLParameters sql = new SQLParameters();
sql.setQuery("select `cols`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `cols`");
// Setting the resource class
sql.setResourceClass("large");
// Create a TileDBSQL object
TileDBSQL tiledbSQL = new TileDBSQL(tileDBClient, "NAMESPACE", sql);
// Get the resutls back in arrow format
Pair<ArrayList<ValueVector>, Integer> results = tiledbSQL.execArrow();
An asynchronous version of serverless SQL is available. The _async
version returns a future.
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# res is a future
res = tiledb.cloud.sql.exec_async(
query = "select `rows`, AVG(a) as avg_a from `tiledb://TileDB-Inc/quickstart_dense` GROUP BY `rows`")
# Call res.get() to block on the results
print(res.get())
It is also possible to use SQL to create a new array.
import tiledb.cloud
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# Create an array that will be registered in TileDB Cloud and stored in the
# S3 bucket and location specied in the `uri` parameter. The `uri` parameter
# overrides the normal use of the table name `example`.
# The array is automatically registered in TileDB cloud
# under `my_username`/`create_example_1`
tiledb.cloud.sql.exec("""
CREATE TABLE `example` (
dim0 integer DIMENSION=1 lower_bound="0" upper_bound="100" tile_extent="10",
dim1 integer DIMENSION=1 lower_bound="0" upper_bound="100" tile_extent="10",
attr1 varchar(255) filters="GZIP=-1",
attr2 FLOAT filters="GZIP=-1",
attr3 DATETIME filters="GZIP=-1"
) engine=MyTile
array_type='SPARSE'
coordinate_filters="NONE"
offset_filters="POSITIVE_DELTA=128"
uri='tiledb://my_username/s3://my_bucket/create_example_1';
""")
# You can also use `Primary Key` to specify which fields are the dimensions
# This replaces the `DIMENSION=1` parameter
# The array is automatically registered in TileDB cloud
# under `my_username`/`create_example_2`
tiledb.cloud.sql.exec("""
CREATE TABLE `example` (
dim0 integer lower_bound="0" upper_bound="100" tile_extent="10" NOT NULL,
dim1 bigint lower_bound="0" upper_bound="100" tile_extent="10" NOT NULL,
attr1 varchar(255) filters="GZIP=-1",
PRIMARY KEY(dim0, dim1)
) engine=MyTile
array_type='SPARSE'
coordinate_filters="NONE"
offset_filters="POSITIVE_DELTA=128"
uri='tiledb://my_username/s3://my_bucket/create_example_2';
""")
# Lastly you can also use `INDEX()` to set dimensions and allow duplicates
# By default duplicate coordinates for dimensions are not allowed,
# uniqueness is enforced just like a primary key. Using `Index()`
# instead triggers the array to be created allowing duplicates coordinate values.
# The array is automatically registered in TileDB cloud
# under `my_username`/`create_example_3`
tiledb.cloud.sql.exec("""
CREATE TABLE `example` (
dim0 integer lower_bound="0" upper_bound="100" tile_extent="10" NOT NULL,
dim1 varchar(255) NOT NULL,
attr1 varchar(255) filters="GZIP=-1",
INDEX(dim0, dim1)
) engine=MyTile
array_type='SPARSE'
coordinate_filters="NONE"
offset_filters="POSITIVE_DELTA=128"
uri='tiledb://my_username/s3://my_bucket/create_example_3';
""")
// Create a TileDB-Client and login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(
null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create an SQL query
SQLParameters sql = new SQLParameters();
sql.setQuery("CREATE TABLE `example` (dim0 integer DIMENSION=1 lower_bound=\"0\" upper_bound=\"100\" tile_extent=\"10\",dim1 integer DIMENSION=1 lower_bound=\"0\" upper_bound=\"100\" tile_extent=\"10\",attr1 varchar(255) filters=\"GZIP=-1\",attr2 FLOAT filters=\"GZIP=-1\",attr3 DATETIME filters=\"GZIP=-1\") engine=MyTilearray_type='SPARSE'coordinate_filters=\"NONE\"offset_filters=\"POSITIVE_DELTA=128\"uri='tiledb://my_username/s3://my_bucket/create_example_1';");
// Create a TileDBSQL object
TileDBSQL tiledbSQL = new TileDBSQL(tileDBClient, "NAMESPACE", sql);
// Get the resutls back in arrow format
List<Object> results = tiledbSQL.exec();
See Retry Settings for more information on this topic.