Deleting
TileDB supports deletion using Query Conditions to specify which cells to remove. When a delete query is committed, any cells matching the given condition are ignored on subsequent reads.
#include <tiledb/tiledb.h>
const char* array_name = "deletes_array";
// Create TileDB context.
tiledb_ctx_t* ctx;
tiledb_ctx_alloc(NULL, &ctx);
// Create the array.
int32_t rows_domain[] = {1, 6};
int32_t tile_extents[] = {2};
tiledb_dimension_t* d;
tiledb_dimension_alloc(
ctx, "cols", TILEDB_INT32, &rows_domain[0], &tile_extents[0], &d);
tiledb_domain_t* domain;
tiledb_domain_alloc(ctx, &domain);
tiledb_domain_add_dimension(ctx, domain, d);
tiledb_attribute_t* a;
tiledb_attribute_alloc(ctx, "a1", TILEDB_INT32, &a);
tiledb_array_schema_t* array_schema;
tiledb_array_schema_alloc(ctx, TILEDB_SPARSE, &array_schema);
tiledb_array_schema_set_domain(ctx, array_schema, domain);
tiledb_array_schema_add_attribute(ctx, array_schema, a);
tiledb_array_create(ctx, array_name, array_schema);
// Write initial data to the array.
tiledb_array_t* array;
tiledb_array_alloc(ctx, array_name, &array);
tiledb_array_open(ctx, array, TILEDB_WRITE);
tiledb_query_t* write_query;
tiledb_query_alloc(ctx, array, TILEDB_WRITE, &write_query);
int32_t cols_data[] = {1, 2, 3, 4, 5, 6};
uint64_t cols_size = sizeof(cols_data);
int32_t a1_data[] = {1, 2, 3, 4, 5, 6};
uint64_t a1_size = sizeof(a1_data);
tiledb_query_set_layout(ctx, write_query, TILEDB_UNORDERED);
tiledb_query_set_data_buffer(ctx, write_query, "cols", cols_data, &cols_size);
tiledb_query_set_data_buffer(ctx, write_query, "a1", a1_data, &a1_size);
tiledb_query_submit(ctx, write_query);
tiledb_array_close(ctx, array);
// Delete cells where a1 attribute value is < 4
tiledb_array_open(ctx, array, TILEDB_DELETE);
tiledb_query_t* delete_query;
tiledb_query_alloc(ctx, array, TILEDB_DELETE, &delete_query);
tiledb_query_condition_t* qc;
tiledb_query_condition_alloc(ctx, &qc);
int32_t val = 4;
tiledb_query_condition_init(ctx, qc, "a1", &val, sizeof(val), TILEDB_LT);
tiledb_query_set_condition(ctx, delete_query, qc);
tiledb_query_submit(ctx, delete_query);
tiledb_array_close(ctx, array);
// Read data back from the array.
tiledb_array_open(ctx, array, TILEDB_READ);
tiledb_query_t* read_query;
tiledb_query_alloc(ctx, array, TILEDB_READ, &read_query);
int32_t cols_data_read[6] = {0, 0, 0, 0, 0, 0};
uint64_t cols_size_read = sizeof(int32_t) * 6;
int32_t a1_data_read[6] = {0, 0, 0, 0, 0, 0};
uint64_t a1_size_read = sizeof(int32_t) * 6;
int32_t range[] = {1, 6};
tiledb_subarray_t* subarray;
tiledb_subarray_alloc(ctx, array, &subarray);
tiledb_subarray_add_range(ctx, subarray, 0, &range[0], &range[1], NULL);
tiledb_query_set_layout(ctx, read_query, TILEDB_UNORDERED);
tiledb_query_set_subarray_t(ctx, read_query, subarray);
tiledb_query_set_data_buffer(
ctx, read_query, "cols", cols_data_read, &cols_size_read);
tiledb_query_set_data_buffer(
ctx, read_query, "a1", a1_data_read, &a1_size_read);
tiledb_query_submit(ctx, read_query);
tiledb_array_close(ctx, array);
// Output: 4, 5, 6, 0, 0, 0
for (size_t i = 0; i < 5; i++) {
printf("%d, ", a1_data_read[i]);
}
printf("%d\n", a1_data_read[5]);
// Clean up allocated objects
tiledb_attribute_free(&a);
tiledb_dimension_free(&d);
tiledb_domain_free(&domain);
tiledb_array_schema_free(&array_schema);
tiledb_array_free(&array);
tiledb_query_free(&write_query);
tiledb_query_free(&delete_query);
tiledb_query_free(&read_query);
tiledb_subarray_free(&subarray);
#include <iostream>
#include <tiledb/tiledb>
using namespace tiledb;
std::string array_uri = "deletes_array";
// Create TileDB context.
Context ctx;
// Create the array.
Domain domain(ctx);
ArraySchema schema(ctx, TILEDB_SPARSE);
domain.add_dimension(Dimension::create<int32_t>(ctx, "cols", {{1, 6}}, 2));
schema.set_domain(domain);
schema.add_attribute(Attribute::create<int32_t>(ctx, "a1"));
Array::create(array_uri, schema);
// Write initial data to the array.
Array array(ctx, array_uri, TILEDB_WRITE);
Query write_query(ctx, array, TILEDB_WRITE);
std::vector<int32_t> cols_data = {1, 2, 3, 4, 5, 6};
std::vector<int32_t> a1_data = {1, 2, 3, 4, 5, 6};
write_query.set_layout(TILEDB_UNORDERED)
.set_data_buffer("cols", cols_data)
.set_data_buffer("a1", a1_data);
write_query.submit();
array.close();
// Delete cells where a1 attribute is >= 4
array.open(TILEDB_DELETE);
Query delete_query(ctx, array, TILEDB_DELETE);
QueryCondition qc(ctx);
int32_t val = 4;
qc.init("a1", &val, sizeof(val), TILEDB_GE);
delete_query.set_condition(qc);
delete_query.submit();
array.close();
// Read data from the array.
array.open(TILEDB_READ);
Query read_query(ctx, array, TILEDB_READ);
std::vector<int32_t> cols_data_read(6);
std::vector<int32_t> a1_data_read(6);
Subarray subarray(ctx, array);
subarray.add_range(0, 1, 6);
read_query.set_layout(TILEDB_UNORDERED)
.set_subarray(subarray)
.set_data_buffer("cols", cols_data_read)
.set_data_buffer("a1", a1_data_read);
read_query.submit();
array.close();
// Output: 1, 2, 3, 0, 0, 0
for (size_t i = 0; i < 5; i++) {
std::cout << a1_data_read[i] << ", ";
}
std::cout << a1_data_read[5] << std::endl;
import tiledb, numpy as np
import tempfile
path = tempfile.mkdtemp("example_sparse_coord_delete")
print(path)
# Create array with one int dimension and one uint attribute
dom = tiledb.Domain(tiledb.Dim(name="x", domain=(1, 10), tile=1, dtype=np.uint32))
attrs = [tiledb.Attr("ints", dtype=np.uint32)]
schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True)
tiledb.Array.create(path, schema)
# Generate 10 random integers
data = np.random.randint(1, 10, 10)
# Write data
with tiledb.open(path, "w") as A:
A[np.arange(1, 11)] = data
# Delete all cells with x value less than 5
qc = "x < 5"
# Issue delete query
with tiledb.open(path, "d") as A:
A.query(cond=qc).submit()
# Read data back and print: cells matching condition should be removed
with tiledb.open(path, "r") as A:
print(A[:])
uri <- "deletes_array"
## Create a (sparse) array from a data frame, indexed on 'cols'
DF <- data.frame(cols = 1:6, a1 = 1:6)
fromDataFrame(DF, uri, col_index="cols")
## Create a query condition object corresponding to 'a1 >= 4'
qc <- parse_query_condition(a1 >= 4)
## and apply it as a DELETE query
arr <- tiledb_array(uri)
qry <- tiledb_query(arr, "DELETE")
qry <- tiledb_query_set_condition(qry, qc)
tiledb_query_submit(qry)
tiledb_query_finalize(qry)
## Read back: only first see three rows where 'a1 < 4'
D2 <- tiledb_array(uri, return_as="data.frame")[]
print(D2)
using System.Linq;
using TileDB.CSharp;
string arrayUri = "deletes_array";
// Create TileDB context.
using Context ctx = new Context();
// Create the array.
using (ArraySchema schema = new ArraySchema(ctx, ArrayType.Sparse))
{
using Domain domain = new Domain(ctx);
using Dimension cols = Dimension.Create(ctx, "cols", 1, 6, 2);
domain.AddDimension(cols);
schema.SetDomain(domain);
using Attribute a1 = Attribute.Create<int>(ctx, "a1");
schema.AddAttribute(a1);
Array.Create(ctx, arrayUri, schema);
}
using Array array = new Array(ctx, arrayUri);
// Write initial data to the array.
array.Open(QueryType.Write);
using (Query writeQuery = new Query(ctx, array, QueryType.Write))
{
int[] colsData = { 1, 2, 3, 4, 5, 6 };
int[] a1Data = { 1, 2, 3, 4, 5, 6 };
writeQuery.SetLayout(LayoutType.Unordered);
writeQuery.SetDataBuffer("cols", colsData);
writeQuery.SetDataBuffer("a1", a1Data);
writeQuery.Submit();
}
array.Close();
// Delete cells where a1 attribute is >= 4
array.Open(QueryType.Delete);
using (Query deleteQuery = new Query(ctx, array, QueryType.Delete))
{
using QueryCondition qc = QueryCondition.Create(ctx, "a1", 4,
QueryConditionOperatorType.GreaterThanOrEqual);
deleteQuery.SetCondition(qc);
deleteQuery.Submit();
}
array.Close();
// Read data from the array.
array.Open(QueryType.Read);
using (Query readQuery = new Query(ctx, array, QueryType.Read))
{
int[] colsDataRead = new int[6];
int[] a1DataRead = new int[6];
using Subarray subarray = new Subarray(array);
subarray.AddRange(0, 1, 6);
readQuery.SetLayout(LayoutType.Unordered);
readQuery.SetSubarray(subarray);
readQuery.SetDataBuffer("cols", colsDataRead);
readQuery.SetDataBuffer("a1", a1DataRead);
readQuery.Submit();
int a1Count = (int)readQuery.GetResultDataElements("a1");
// Output: 1, 2, 3
System.Console.WriteLine(string.Join(", ", a1DataRead.Take(a1Count)));
}
array.Close();
// create array
Dimension<Integer> d1 =
new Dimension<Integer>(ctx, "d1", Datatype.TILEDB_STRING_ASCII, null, null);
// Create and set getDomain
Domain domain = new Domain(ctx);
domain.addDimension(d1);
Attribute a1 = new Attribute(ctx, "a1", Integer.class);
ArraySchema schema = new ArraySchema(ctx, TILEDB_SPARSE);
schema.setTileOrder(TILEDB_ROW_MAJOR);
schema.setCellOrder(TILEDB_ROW_MAJOR);
schema.setDomain(domain);
schema.addAttribute(a1);
Array.create(arrayURISparse, schema);
// write array
NativeArray d_data = new NativeArray(ctx, "aabbccddee", Datatype.TILEDB_STRING_ASCII);
NativeArray d_off = new NativeArray(ctx, new long[] {0, 2, 4, 6, 8}, Datatype.TILEDB_UINT64);
// Prepare cell buffers
NativeArray a1 = new NativeArray(ctx, new int[] {1, 2, 3, 4, 5}, Integer.class);
// Create query
Array array = new Array(ctx, arrayURISparse, QueryType.TILEDB_WRITE);
Query query = new Query(array);
query.setLayout(TILEDB_GLOBAL_ORDER);
query.setDataBuffer("d1", d_data);
query.setOffsetsBuffer("d1", d_off);
query.setDataBuffer("a1", a1);
// Submit query
query.submit();
query.finalizeQuery();
query.close();
array.close();
// delete data with appropriate QC
Array array = new Array(ctx, arrayURISparse, QueryType.TILEDB_DELETE);
Query query = new Query(array, TILEDB_DELETE);
QueryCondition deleteQc = new QueryCondition(ctx, "a1", 3, Integer.class, TILEDB_GT);
query.setCondition(deleteQc);
query.submit();
// close resources
query.close();
deleteQc.close();
array.close();
// check if data was deleted
array = new Array(ctx, arrayURISparse, QueryType.TILEDB_READ);
query = new Query(array, QueryType.TILEDB_READ);
query.setDataBuffer("a1", new NativeArray(ctx, 40, Integer.class));
while (query.getQueryStatus() != QueryStatus.TILEDB_COMPLETED) {
query.submit();
}
int[] a1_buff = (int[]) query.getBuffer("a1");
// a1_buff will be {1, 2, 3};
array.close();
query.close();
deleteQc.close();
Last updated