Aggregates
Aggregates can be requested on the data of a query so that the computation is pushed down to TileDB rather than needing to compute the result externally. The currently supported operations can be found in Aggregates.
Here are some examples of using aggregates with TileDB:
tiledb_ctx_t * ctx;
tiledb_ctx_alloc(NULL, &ctx);
tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array_uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);
// Buffers that will hold the result (1 cells)
uint64_t count[1];
uint64_t count_size = sizeof(count);
int64_t sum[1];
uint64_t sum_size = sizeof(sum);
int32_t min[1];
uint64_t min_size = sizeof(min);
int32_t max[1];
uint64_t max_size = sizeof(max);
uint64_t null_count[1];
uint64_t null_count_size = sizeof(null_count);
double mean[1];
uint64_t mean_size = sizeof(mean);
// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);
// Get the default channel from the query
tiledb_query_channel_t* default_channel;
tiledb_query_get_default_channel(ctx, query, &default_channel);
// Apply count aggregate
const tiledb_channel_operation_t* count_aggregate;
tiledb_aggregate_count_get(ctx, &count_aggregate);
tiledb_channel_apply_aggregate(
ctx, default_channel, "Count", count_aggregate);
// Apply sum aggregate on "a" attribute
const tiledb_channel_operator_t* operator_sum;
tiledb_channel_operator_sum_get(ctx, &operator_sum);
tiledb_channel_operation_t* sum_a;
tiledb_create_unary_aggregate(ctx, query, operator_sum, "a", &sum_a);
tiledb_channel_apply_aggregate(ctx, default_channel, "SumA", sum_a);
// Apply min aggregate on "b" attribute
const tiledb_channel_operator_t* operator_min;
tiledb_channel_operator_min_get(ctx, &operator_min);
tiledb_channel_operation_t* min_b;
tiledb_create_unary_aggregate(ctx, query, operator_min, "b", &min_b);
tiledb_channel_apply_aggregate(ctx, default_channel, "MinB", min_b);
// Apply max aggregate on "b" attribute
const tiledb_channel_operator_t* operator_max;
tiledb_channel_operator_max_get(ctx, &operator_max);
tiledb_channel_operation_t* max_b;
tiledb_create_unary_aggregate(ctx, query, operator_max, "b", &max_b);
tiledb_channel_apply_aggregate(ctx, default_channel, "MaxB", max_b);
// Apply null count aggregate on "c" attribute
const tiledb_channel_operator_t* operator_nc;
tiledb_channel_operator_null_count_get(ctx, &operator_nc);
tiledb_channel_operation_t* nc_c;
tiledb_create_unary_aggregate(ctx, query, operator_nc, "c", &nc_c);
tiledb_channel_apply_aggregate(ctx, default_channel, "NullCountC", nc_c);
// Apply mean aggregate on "c" attribute
const tiledb_channel_operator_t* operator_mean;
tiledb_channel_operator_mean_get(ctx, &operator_mean);
tiledb_channel_operation_t* mean_c;
tiledb_create_unary_aggregate(ctx, query, operator_mean, "c", &mean_c);
tiledb_channel_apply_aggregate(ctx, default_channel, "MeanC", mean_c);
// Set layout and buffers
tiledb_query_set_layout(ctx, query, TILEDB_UNORDERED);
tiledb_query_set_data_buffer(ctx, query, "Count", count, &count_size);
tiledb_query_set_data_buffer(ctx, query, "SumA", sum, &sum_size);
tiledb_query_set_data_buffer(ctx, query, "MinB", min, &min_size);
tiledb_query_set_data_buffer(ctx, query, "MaxB", max, &max_size);
tiledb_query_set_data_buffer(ctx, query, "NullCountC", null_count, &null_count_size);
tiledb_query_set_data_buffer(ctx, query, "MeanC", mean, &mean_size);
// Submit query
tiledb_query_submit(ctx, query);
// Close array
tiledb_array_close(ctx, array);
// Print out the results.
printf("Count has data %i\n", (int)count[0]);
printf("Sum of A has data %i\n", (int)sum[0]);
printf("Min of B has data %i\n", (int)min[0]);
printf("Max of B has data %i\n", (int)max[0]);
printf("Null count of C has data %i\n", (int)null_count[0]);
printf("Mean of C has data %f\n", mean[0]);
// Free allocated objects
tiledb_aggregate_free(ctx, &sum_a);
tiledb_aggregate_free(ctx, &min_b);
tiledb_aggregate_free(ctx, &max_b);
tiledb_aggregate_free(ctx, &nc_c);
tiledb_aggregate_free(ctx, &mean_c);
tiledb_query_channel_free(ctx, &default_channel);
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);
Context ctx;
Array array(ctx, "<array_uri>", TILEDB_READ);
// Vectors that will hold the result (1 cells)
std::vector<uint64_t> count(1);
std::vector<int64_t> sum(1);
std::vector<int32_t> min(1);
std::vector<int32_t> max(1);
std::vector<uint64_t> null_count(1);
std::vector<double> mean(1);
// Create a query
Query query(ctx, array);
// Get the default channel
QueryChannel default_channel = QueryExperimental::get_default_channel(query);
// Apply count aggregate
default_channel.apply_aggregate("Count", CountOperation());
// Apply sum aggregate on "a" attribute
ChannelOperation operation_sum =
QueryExperimental::create_unary_aggregate<SumOperator>(query, "a");
default_channel.apply_aggregate("SumA", operation_sum);
// Apply min/max aggregate on "b" attribute
ChannelOperation operation_min =
QueryExperimental::create_unary_aggregate<MinOperator>(query, "b");
default_channel.apply_aggregate("MinB", operation_min);
ChannelOperation operation_max =
QueryExperimental::create_unary_aggregate<MaxOperator>(query, "b");
default_channel.apply_aggregate("MaxB", operation_max);
// Apply null count/mean aggregate on "c" attribute
ChannelOperation operation_nc =
QueryExperimental::create_unary_aggregate<NullCountOperator>(query, "c");
default_channel.apply_aggregate("NullCountC", operation_nc);
ChannelOperation operation_mean =
QueryExperimental::create_unary_aggregate<MeanOperator>(query, "c");
default_channel.apply_aggregate("MeanC", operation_mean);
// Set layout and buffers.
query.set_layout(TILEDB_UNORDERED)
.set_data_buffer("Count", count)
.set_data_buffer("SumA", sum)
.set_data_buffer("MinB", min)
.set_data_buffer("MaxB", max)
.set_data_buffer("NullCountC", null_count)
.set_data_buffer("MeanC", mean);
// Submit the query and close the array.
query.submit();
array.close();
// Print out the results.
std::cout << "Count: " << count[0] << std::endl;
std::cout << "Sum of A: " << sum[0] << std::endl;
std::cout << "Min of B: " << min[0] << std::endl;
std::cout << "Max of B: " << max[0] << std::endl;
std::cout << "Null count of C: " << null_count[0] << std::endl;
std::cout << "Mean of C: " << mean[0] << std::endl;
import tiledb
with tiledb.open(uri, mode="r") as A:
# create a query object to run aggregate against
q = A.query()
# count the number of records in the array
print(q.agg("count")[:])
# Get the maximum value of attribute "a"
print(q.agg({"a": "max"})[:])
# Get the maximum, minimum and count value of attribute "a"
print(q.agg({"a": ["max", "min", "count"]})[:])
# Get the maximum, minimum of attribute "a" and the minimum of attribute "b"
print(q.agg({"a": ["max", "min"], "b":["min"]})[:])
# select cells where the attribute values for foo are less than 5
# and bar equal to string asdf and preform count
# create a QueryCondition and pass a string containing a Python valid
# Boolean expression. Note that strings are be enclosed in quotes (either
# single or double quotes) whereas attribute names are not. The exception
# is if the attribute name has any special characters in it, in which
# case replace `namehere` with `attr("namehere")`.
q = A.query(cond="foo > 5 and bar == 'asdf'")
# Or:
q = A.query(cond="attr('percent.mt') > 10.0")
# output the results
print(q.agg("count")[:])
# Open the array
arr <- tiledb_array(uri)
# Create a query
qry <- tiledb_query(arr)
qry <- tiledb_query_set_layout(qry, "UNORDERED")
# Compute "Mean"
res <- tiledb_query_apply_aggregate(qry, attr, "Mean", True)
cat("Mean for ", attr, " is ", res, "\n")
# Compute NullCount
res <- tiledb_query_apply_aggregate(qry, attr, "NullCount", True)
cat("Mean for ", attr, " is ", res, "\n")
# Compute Sum
res <- tiledb_query_apply_aggregate(qry, attr, "Sum", True)
cat("Sum for ", attr, " is ", res, "\n")
# Compute Min
res <- tiledb_query_apply_aggregate(qry, attr, "Min", True)
cat("Min for ", attr, " is ", res, "\n")
# Compute Max
res <- tiledb_query_apply_aggregate(qry, attr, "Max", True)
cat("Max for ", attr, " is ", res, "\n")
Last updated