Serverless UDFs
Below we show how to use Python UDFs in TileDB Cloud, with an example that uses numpy to compute the median of random numbers.
Python
R
Java
import tiledb, tiledb.cloud, numpy, random
def mymedian():
vals = []
for i in range(1, random.randrange(2,50)):
vals.append(random.randrange(0, i))
return numpy.median(vals)
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
res = tiledb.cloud.udf.exec(mymedian)
print(res)
library(tiledbcloud)
mymedian <- function() {
n <- sample(2:50, 1)
vals <- vector(mode="numeric", length=n)
for (i in 1:n) {
vals[i] <- sample(0:i, 1)
}
median(vals)
}
tiledbcloud::login(username=username, password=password)
# or tiledbcloud::login(api_key="my_token")
tiledbcloud::execute_generic_udf(mymedian)
// The Java client is only cabable of running existing UDFs
// Login using a TileDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create a TileDBUDF object
TileDBUDF tileDBUDF = new TileDBUDF(tileDBClient, "TileDB-Inc");
// Create a Generic UDF
GenericUDF genericUDF = new GenericUDF();
genericUDF.setUdfInfoName("TileDB-Inc/print-udf");
// Print the result. Can reurn results in arrow or JSON. executeGenericArrow(), executeGenericJSON().
System.out.println(tileDBUDF.executeGeneric(genericUDF, null));
The UDF can receive any number of arguments, with keyword arguments supported as well.
Python
Java
import tiledb, tiledb.cloud, numpy, random
def multi_args(arg1, arg2, arg3=None, arg4={}):
# These will print in the logs of the udf
print("type(arg1)={}, arg1={}\n".format(type(arg1), arg1))
print("type(arg2)={}, arg2={}\n".format(type(arg2), arg2))
print("type(arg3)={}, arg3={}\n".format(type(arg3), arg3))
print("type(arg4)={}, arg4={}\n".format(type(arg4), arg4))
return
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
res = tiledb.cloud.udf.exec(multi_args,
[1,2,3],
{"dictionary": "arg2_test"},
False,
arg4=True)
print(res) # None since the function returned nothing
# View the logs
print(tiledb.cloud.last_udf_task().logs)
// The Java client is only cabable of running existing UDFs
// Login using a TIleDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create a TileDBUDF object
TileDBUDF tileDBUDF = new TileDBUDF(tileDBClient, "TileDB-Inc");
// Create a Generic UDF
GenericUDF genericUDF = new GenericUDF();
genericUDF.setUdfInfoName("TileDB-Inc/args-udf");
HashMap<String,Object> arguments = new HashMap<>();
// Pass the arguments
arguments.put("arg1", "a1");
arguments.put("arg2", "a2");
// Print the result. Can return results in arrow or JSON. executeGenericArrow(), executeGenericJSON().
System.out.println(tileDBUDF.executeGeneric(genericUDF, arguments));
An async version of UDFs is available, which returns a future.
Python
R
import tiledb, tiledb.cloud, numpy, random
def mymedian():
vals = []
for i in range(1, random.randrange(2,50)):
vals.append(random.randrange(0, i))
return numpy.median(vals)
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# res will be a future
res = tiledb.cloud.udf.exec_async(mymedian)
# call res.get() to block on the results
print(res.get())
library(tiledbcloud)
mymedian <- function() {
n <- sample(2:50, 1)
vals <- vector(mode="numeric", length=n)
for (i in 1:n) {
vals[i] <- sample(0:i, 1)
}
median(vals)
}
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
# res will be a future
res = tiledbcloud::delayed_generic_udf(mymedian, args=list())
# call compute(res) to block on the results
print(compute(res))
If you you are a member of an organization, then by default the organization is charged for your UDF. If you would like to charge the UDF task to yourself, you just need to add one extra argument
namespace
. Python
R
Java
import tiledb, tiledb.cloud, numpy, random
def mymedian():
vals = []
for i in range(1, random.randrange(2,50)):
vals.append(random.randrange(0, i))
return numpy.median(vals)
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
res = tiledb.cloud.udf.exec(mymedian, namespace="my_username")
print(res)
library(tiledbcloud)
mymedian <- function() {
n <- sample(2:50, 1)
vals <- vector(mode="numeric", length=n)
for (i in 1:n) {
vals[i] <- sample(0:i, 1)
}
median(vals)
}
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
res <- tiledbcloud::execute_generic_udf(mymedian, namespace="my_username")
print(res)
// The Java client is only cabable of running existing UDFs
// Login using a TIleDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create a TileDBUDF object. The second param is the namespace to be charged
TileDBUDF tileDBUDF = new TileDBUDF(tileDBClient, "TileDB-Inc");
// Create a Generic UDF
GenericUDF genericUDF = new GenericUDF();
genericUDF.setUdfInfoName("TileDB-Inc/print-udf");
// Print the result. Can return results in arrow or JSON. executeGenericArrow(), executeGenericJSON().
System.out.println(tileDBUDF.executeGeneric(genericUDF, null));
Each UDF runs by default in an isolated environment with 2 CPUs and 2 GB of memory. You can choose an alternative runtime environment from the following list:
Name | Description |
---|---|
standard | 2 CPUs, 2 GB RAM |
large | 8 CPUs, 8 GB RAM |
Charges are based on the total number of CPUs selected, not on the actual use.
To run a udf in a specific environment, set the
resource_class
parameter to the name of the environment.Python
R
Java
def mymedian():
vals = []
for i in range(1, random.randrange(2,50)):
vals.append(random.randrange(0, i))
return numpy.median(vals)
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
res = tiledb.cloud.udf.exec(mymedian, resource_class="large")
print(res)
library(tiledbcloud)
mymedian <- function() {
n <- sample(2:50, 1)
vals <- vector(mode="numeric", length=n)
for (i in 1:n) {
vals[i] <- sample(0:i, 1)
}
median(vals)
}
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
res <- tiledbcloud::execute_generic_udf(mymedian, resource_class="large")
print(res)
setResourceClass
// Login using a TIleDBLogin object
TileDBClient tileDBClient = new TileDBClient(
new TileDBLogin(null,
null,
"<TILEDB_API_TOKEN>",
true,
true,
true));
// Create a TileDBUDF object
TileDBUDF tileDBUDF = new TileDBUDF(tileDBClient, "TileDB-Inc");
// Create a Generic UDF
GenericUDF genericUDF = new GenericUDF();
genericUDF.setUdfInfoName("TileDB-Inc/print-udf");
// Set resource class
genericUDF.setResourceClass("large");
// Print the result. Can return results in arrow or JSON. executeGenericArrow(), executeGenericJSON().
System.out.println(tileDBUDF.executeGeneric(genericUDF, null));
You can register a UDF (similar to arrays) as follows:
Python
R
Java
import tiledb, tiledb.cloud, numpy, random
def mymedian():
vals = []
for i in range(1, random.randrange(2,50)):
vals.append(random.randrange(0, i))
return numpy.median(vals)
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
tiledb.cloud.udf.register_generic_udf(median, name="my_median", namespace="my_username")
library(tiledbcloud)
mymedian <- function() {
n <- sample(2:50, 1)
vals <- vector(mode="numeric", length=n)
for (i in 1:n) {
vals[i] <- sample(0:i, 1)
}
median(vals)
}
tiledb.cloud.login(username="my_username", password="my_password")
# or tiledb.cloud.login(token="my_token")
tiledbcloud::register_udf(namespace="my_namespace", type='generic', func=mymedian)
Currently, registering a UDF is only possible via the Python or R client.
In order to be able to register a UDF you need to set up the default storage path for you and/or your organization.
Last modified 1mo ago