Aggregates

Aggregates can be requested on the data of a query so that the computation is pushed down to TileDB rather than needing to compute the result externally. The currently supported operations can be found in Aggregates.

Here are some examples of using aggregates with TileDB:

tiledb_ctx_t * ctx;
tiledb_ctx_alloc(NULL, &ctx);

tiledb_array_t* array;
tiledb_array_alloc(ctx, "<array_uri>", &array);
tiledb_array_open(ctx, array, TILEDB_READ);

// Buffers that will hold the result (1 cells)
uint64_t count[1];
uint64_t count_size = sizeof(count);
int64_t sum[1];
uint64_t sum_size = sizeof(sum);
int32_t min[1];
uint64_t min_size = sizeof(min);
int32_t max[1];
uint64_t max_size = sizeof(max);
uint64_t null_count[1];
uint64_t null_count_size = sizeof(null_count);
double mean[1];
uint64_t mean_size = sizeof(mean);

// Create query
tiledb_query_t* query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &query);

// Get the default channel from the query
tiledb_query_channel_t* default_channel;
tiledb_query_get_default_channel(ctx, query, &default_channel);

// Apply count aggregate
const tiledb_channel_operation_t* count_aggregate;
tiledb_aggregate_count_get(ctx, &count_aggregate);
tiledb_channel_apply_aggregate(
  ctx, default_channel, "Count", count_aggregate);

// Apply sum aggregate on "a" attribute
const tiledb_channel_operator_t* operator_sum;
tiledb_channel_operator_sum_get(ctx, &operator_sum);
tiledb_channel_operation_t* sum_a;
tiledb_create_unary_aggregate(ctx, query, operator_sum, "a", &sum_a);
tiledb_channel_apply_aggregate(ctx, default_channel, "SumA", sum_a);

// Apply min aggregate on "b" attribute
const tiledb_channel_operator_t* operator_min;
tiledb_channel_operator_min_get(ctx, &operator_min);
tiledb_channel_operation_t* min_b;
tiledb_create_unary_aggregate(ctx, query, operator_min, "b", &min_b);
tiledb_channel_apply_aggregate(ctx, default_channel, "MinB", min_b);

// Apply max aggregate on "b" attribute
const tiledb_channel_operator_t* operator_max;
tiledb_channel_operator_max_get(ctx, &operator_max);
tiledb_channel_operation_t* max_b;
tiledb_create_unary_aggregate(ctx, query, operator_max, "b", &max_b);
tiledb_channel_apply_aggregate(ctx, default_channel, "MaxB", max_b);

// Apply null count aggregate on "c" attribute
const tiledb_channel_operator_t* operator_nc;
tiledb_channel_operator_null_count_get(ctx, &operator_nc);
tiledb_channel_operation_t* nc_c;
tiledb_create_unary_aggregate(ctx, query, operator_nc, "c", &nc_c);
tiledb_channel_apply_aggregate(ctx, default_channel, "NullCountC", nc_c);

// Apply mean aggregate on "c" attribute
const tiledb_channel_operator_t* operator_mean;
tiledb_channel_operator_mean_get(ctx, &operator_mean);
tiledb_channel_operation_t* mean_c;
tiledb_create_unary_aggregate(ctx, query, operator_mean, "c", &mean_c);
tiledb_channel_apply_aggregate(ctx, default_channel, "MeanC", mean_c);

// Set layout and buffers
tiledb_query_set_layout(ctx, query, TILEDB_UNORDERED);
tiledb_query_set_data_buffer(ctx, query, "Count", count, &count_size);
tiledb_query_set_data_buffer(ctx, query, "SumA", sum, &sum_size);
tiledb_query_set_data_buffer(ctx, query, "MinB", min, &min_size);
tiledb_query_set_data_buffer(ctx, query, "MaxB", max, &max_size);
tiledb_query_set_data_buffer(ctx, query, "NullCountC", null_count, &null_count_size);
tiledb_query_set_data_buffer(ctx, query, "MeanC", mean, &mean_size);

// Submit query
tiledb_query_submit(ctx, query);

// Close array
tiledb_array_close(ctx, array);

// Print out the results.
printf("Count has data %i\n", (int)count[0]);
printf("Sum of A has data %i\n", (int)sum[0]);
printf("Min of B has data %i\n", (int)min[0]);
printf("Max of B has data %i\n", (int)max[0]);
printf("Null count of C has data %i\n", (int)null_count[0]);
printf("Mean of C has data %f\n", mean[0]);

// Free allocated objects
tiledb_aggregate_free(ctx, &sum_a);
tiledb_aggregate_free(ctx, &min_b);
tiledb_aggregate_free(ctx, &max_b);
tiledb_aggregate_free(ctx, &nc_c);
tiledb_aggregate_free(ctx, &mean_c);
tiledb_query_channel_free(ctx, &default_channel);
tiledb_array_free(&array);
tiledb_query_free(&query);
tiledb_ctx_free(&ctx);

Last updated