Query Conditions
Query conditions can selectively return data that meets a given expression. Rather than filter the results after a query, a condition is pushed down to TileDB and returns a subset of the valid elements.
tiledb_ctx_t * ctx;
tiledb_ctx_alloc(NULL, &ctx);
tiledb_array_t* array_read;
tiledb_array_alloc(ctx, "<array_uri>", &array_read);
tiledb_array_open(ctx, array_read, TILEDB_READ);
// Query condition where a1 != NULL
tiledb_query_condition_t* queryCondition1;
tiledb_query_condition_alloc(ctx, &queryCondition1);
tiledb_query_condition_init(ctx, queryCondition1, "a1", NULL, 0, TILEDB_NE);
// Query condition where a2 > 10.5
float conditionVal = 10.5f;
tiledb_query_condition_t* queryCondition2;
tiledb_query_condition_alloc(ctx, &queryCondition2);
tiledb_query_condition_init(ctx, queryCondition2, "a2", &conditionVal, sizeof(float), TILEDB_GT);
// Query condition where (a1 != NULL || a2 > 10.5)
tiledb_query_condition_t* queryCondition;
tiledb_query_condition_alloc(ctx, &queryCondition);
tiledb_query_condition_combine(ctx, queryCondition1, queryCondition2, TILEDB_OR, &queryCondition);
// Slice rows 1, 2 and cols 2, 3, 4
int32_t subarray_ranges[] = {1, 2, 2, 4};
tiledb_subarray_t* subarray;
tiledb_subarray_alloc(ctx, array_read, &subarray);
tiledb_subarray_set_subarray(ctx, subarray, subarray_ranges);
// Allocate buffers for query
int32_t a1_read[6];
uint64_t a1_read_size = sizeof(a1_read);
uint8_t a1_read_validity[6];
uint64_t a1_read_validity_size = sizeof(a1_read_validity);
float a2_read[6];
uint64_t a2_read_size = sizeof(a2_read);
tiledb_query_t* query_read;
tiledb_query_alloc(ctx, array_read, TILEDB_READ, &query_read);
tiledb_query_set_layout(ctx, query_read, TILEDB_ROW_MAJOR);
tiledb_query_set_data_buffer(ctx, query_read, "a1", a1_read, &a1_read_size);
tiledb_query_set_validity_buffer(ctx, query_read, "a1", a1_read_validity, &a1_read_validity_size);
tiledb_query_set_data_buffer(ctx, query_read, "a2", a2_read, &a2_read_size);
tiledb_query_set_subarray_t(ctx, query_read, subarray);
tiledb_query_set_condition(ctx, query_read, queryCondition);
tiledb_query_submit(ctx, query_read);
// For sparse arrays, a1_read_size will be set to number of bytes read into the buffer
// + Values that don't meet the query condition won't be read, so we can use byte math to get a1_result_num
int a1_result_num = (int)(a1_read_size / sizeof(int32_t));
for (size_t i = 0; i < a1_result_num; i++) {
// Print buffers from sparse array...
}
// For dense arrays, we can use the fill value of an attribute to check the element met our conditions
tiledb_array_schema_t* schema;
tiledb_array_get_schema(ctx, array_read, &schema);
tiledb_attribute_t* attr;
tiledb_array_schema_get_attribute_from_name(ctx, schema, "a2", &attr);
float* fillVal;
uint64_t valSize;
tiledb_attribute_get_fill_value(ctx, attr, &fillVal, &valSize);
for (size_t i = 0; i < a2_read_size / sizeof(float); i++) {
if (a2_read[i] != *fillVal) {
// Print buffers from dense array...
}
}
// Free allocated objects
tiledb_array_free(&array_read);
tiledb_query_condition_free(&queryCondition1);
tiledb_query_condition_free(&queryCondition2);
tiledb_query_condition_free(&queryCondition);
tiledb_subarray_free(&subarray);
tiledb_query_free(&query_read);
Context ctx;
Array array_read(ctx, "<array_uri>", TILEDB_READ);
// QueryCondition for attribute values where a1 == nullptr
QueryCondition queryCondition1(ctx);
queryCondition1.init("a1", nullptr, 0, TILEDB_EQ);
// QueryCondition for attribute values where a2 <= 10.5
float qcVal = 10.5;
QueryCondition queryCondition2(ctx);
queryCondition2.init("a2", &qcVal, sizeof(float), TILEDB_LE);
// QueryCondition for (a1 == nullptr && a2 <= 10.5)
QueryCondition queryCondition = queryCondition1.combine(queryCondition2, TILEDB_AND);
// Slice rows 1, 2 and cols 2, 3, 4
Subarray subarray(ctx, array_read);
subarray.add_range("rows", 1, 2)
.add_range("cols", 2, 4);
// Allocate buffers for query
std::vector<int32_t> a1_read(6);
std::vector<uint8_t> a1_read_validity(6);
std::vector<float> a2_read(6);
Query query_read(ctx, array_read);
query_read.set_layout(TILEDB_ROW_MAJOR)
.set_subarray(subarray)
.set_data_buffer("a1", a1_read)
.set_validity_buffer("a1", a1_read_validity)
.set_data_buffer("a2", a2_read)
.set_condition(queryCondition2);
query_read.submit();
// For sparse arrays, we can check query result buffers for number of elements read with our query condition
auto buffers = query_read.result_buffer_elements();
uint64_t a1_result_num = buffers["a1"].second;
uint64_t a2_result_num = buffers["a2"].second;
for (size_t i = 0; i < a1_result_num; i++) {
// Print or consume buffers from sparse array...
}
// For dense arrays, we can use the fill value of an attribute to check the element met our conditions
const float* fillVal;
uint64_t fillValSize;
auto a2 = array_read.schema().attribute("a2");
a2.get_fill_value((const void**)&fillVal, &fillValSize);
for (size_t i = 0; i < a2_read.size(); i++) {
if (a2_read[i] != *fillVal) {
// Print or consume buffers from dense array...
}
}
import tiledb
with tiledb.open(uri, mode="r") as A:
# select cells where the attribute values for foo are less than 5
# and bar equal to string asdf.
# create a QueryCondition and pass a string containing a Python valid
# Boolean expression. Note that strings are be enclosed in quotes (either
# single or double quotes) whereas attribute names are not. The exception
# is if the attribute name has any special characters in it, in which
# case replace `namehere` with `attr("namehere")`.
q = A.query(cond="foo > 5 and bar == 'asdf'")
# Or:
q = A.query(cond="attr('percent.mt') > 10.0")
# output the results
print(q.df[:])
## Example assumes current array is the standard Palmer Penguins data set
### Via qc creation API
qc <- tiledb_query_condition_init(attr = "bill_length_mm",
value = 52,
dtype = "FLOAT64",
op = "GE")
res <- tiledb_array(uri, query_condition=qc)[]
dim(res) # 344 -> 18 due to qc
### Via query parser
arr <- tiledb_array(uri)
qc <- parse_query_condition(bill_length_mm > 52, arr)
query_condition(arr) <- qc
dim(res) # 344 -> 18 due to qc
### Or piped (for R 4.1.0 or later)
arr |>
tdb_filter(bill_length_mm > 52) |>
tdb_collect() |>
dim()
// Create TileDB context and open the array
try(Context ctx = new Context(),
Array array = new Array(ctx, "<array-uri>", TILEDB_READ)) {
// Slice only rows 1, 2 and cols 2, 3, 4
NativeArray subarray = new NativeArray(ctx, new long[] {1, 2, 2, 4}, Integer.class);
// Prepare the query
Query query = new Query(ctx, array, TILEDB_READ);
// Prepare the vectors that will hold the results
query.setBuffer(
"d1", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"d2", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"a1", new NativeArray(ctx, 20, Integer.class));
query.setBuffer(
"a2", new NativeArray(ctx, 20, Float.class));
query.setSubarray(subarray)
.setLayout(TILEDB_ROW_MAJOR);
// QueryCondition Equivalent to: a2 > 15.0f AND a1 == null;
QueryCondition con1 = new QueryCondition(ctx, "a2", 15.0f, Float.class, TILEDB_GT);
QueryCondition con2 = new QueryCondition(ctx, "a1", 0, null, TILEDB_EQ);
// Combine the two conditions
QueryCondition con3 = con1.combine(con2, TILEDB_AND);
query.setCondition(con3);
// Submit the query and close the array.
query.submit();
// NOTE: although not recommended (for performance reasons),
// you can get the coordinates even when slicing dense arrays.
// NOTE: The layout could have also been TILEDB_COL_MAJOR or
// TILEDB_GLOBAL_ORDER.
// Get the results in native java arrays
int[] d1 = (int[]) query.getBuffer("d1");
int[] d2 = (int[]) query.getBuffer("d2");
int[] a1 = (int[]) query.getBuffer("a1");
float[] a2 = (float[]) query.getBuffer("a2");
// Close the query
query.close();
}
// TODO
using TileDB.CSharp;
// Create TileDB context
using Context ctx = new Context();
// Prepare the array for reading
using Array array = new Array(ctx, "<array-uri>");
array.Open(QueryType.Read);
// QueryCondition for attribute values where a1 == null
using QueryCondition queryCondition1 =
// TODO: Actually implement this API.
QueryCondition.CreateIsNull(ctx, "a1");
// QueryCondition for attribute values where a2 <= 10.5
using QueryCondition queryCondition2 =
QueryCondition.Create(ctx, "a2", 10.5f, QueryConditionOperatorType.LessThanOrEqual);
// QueryCondition for (a1 == null && a2 <= 10.5)
using QueryCondition queryCondition = queryCondition1 & queryCondition2;
// Slice rows 1, 2 and cols 2, 3, 4
using Subarray subarray = new Subarray(array);
subarray.AddRange("rows", 1, 2);
subarray.AddRange("cols", 2, 4);
int[] a1 = new int[6];
byte[] a1Validity = new byte[6];
float[] a2 = new float[6];
using Query query = new Query(ctx, array, QueryType.Read);
query.SetSubarray(subarray);
query.SetLayout(LayoutType.RowMajor);
query.SetDataBuffer("a1", a1);
query.SetValidityBuffer("a", a1Validity);
query.SetDataBuffer("a2", a2);
query.SetCondition(queryCondition);
query.Submit();
// For sparse arrays, we can check query result buffers
// for number of elements read with our query condition
ulong a1Num = query.GetResultDataElements("a1");
ulong a2Num = query.GetResultDataElements("a2");
for (ulong i = 0; i < a1Num; i++)
{
// Print or consume buffers from sparse array...
}
// For dense arrays, we can use the fill value of an
// attribute to check the element met our conditions
float fillVal;
using (ArraySchema schema = array.Schema())
using (Attribute attribute = schema.Attribute("a2"))
{
fillVal = attribute.FillValue<float>()[0];
}
for (ulong i = 0; i < a2Num; i++)
{
if (a2[i] != fillVal)
{
// Print or consume buffer from dense array...
}
}
Last updated