Comment on page
Configuration
Configuration objects can be used when creating a TileDB context, or when using consolidation and virtual filesystem (VFS) functionality. See Configuration Parameters for a summary of all TileDB configuration options.
You can create and set configuration objects as follows:
C
C++
Python
R
Java
Go
// Create a configuration object
tiledb_config_t *config;
tiledb_config_alloc(&config, NULL);
// Set a configuration parameter
tiledb_config_set(config, "sm.tile_cache_size", "5000", &error);
// Get a configuration parameter
const char* value;
tiledb_config_get(config, "sm.tile_cache_size", &value, &error);
// Unset a configuration parameter
tiledb_config_unset(config, "sm.tile_cache_size", &error);
// Clean up
tiledb_config_free(&config);
// Create a configuration object
Config config;
// Set a configuration parameter
config["sm.tile_cache_size"] = "5000";
// Get a configuration parameter
std::string = config["sm.tile_cache_size"];
// Unset a configuration parameter
config.unset("sm.tile_cache_size");
# Create a configuration object
config = tiledb.Config()
# Set a configuration parameter
config["sm.tile_cache_size"] = 5000
# Config objects may also be initialized with a dictionary
config = tiledb.Config({"sm.tile_cache_size": 5000})
# Remove a configuration parameter
# (resets to the default value)
del config["sm.tile_cache_size"]
# Create a configuration object
config <- tiledb_config()
# Set a configuration parameter
config["sm.tile_cache_size"] <- "5000"
# Get a configuration parameter
tile_cache_size <- config["sm.tile_cache_size"]
# Unset a configuration parameter
tiledb_config_unset(config, "sm.tile_cache_size")
try(Config config = new Config()) {
// Set a configuration parameter
config.set("sm.tile_cache_size", "5000");
// Get a configuration parameter
String tile_cache_size = config.get("sm.tile_cache_size");
// Unset a configuration parameter
config.unset("sm.tile_cache_size");
}
// Create a configuration object
config, _ := tiledb.NewConfig()
// Set a configuration parameter
config.Set("sm.tile_cache_size", "5000");
// Get a configuration parameter
smTileCacheSize, _ := config.Get("sm.tile_cache_size")
// Unset a configuration parameter
config.Unset("sm.tile_cache_size")
// Clean up
config.Free()
You can save a configuration object into a text file, as well as load a configuration object from a text file, as follows:
C
C++
Python
R
Java
Go
// Create a TileDB config
tiledb_config_t* config;
tiledb_config_alloc(&config, NULL);
tiledb_config_set(config, "sm.tile_cache_size", "0", &error);
// Save to file
tiledb_config_save_to_file(config, "tiledb_config.txt", &error);
// Load from file
tiledb_config_t* config_load;
tiledb_config_alloc(&config_load, &error);
tiledb_config_load_from_file(config_load, "tiledb_config.txt", &error);
const char* value;
tiledb_config_get(config_load, "sm.tile_cache_size", &value, &error);
// Clean up
tiledb_config_free(&config);
tiledb_config_free(&config_load);
// Save to file
Config config;
config["sm.tile_cache_size"] = 0;
config.save_to_file("tiledb_config.txt");
// Load from file
Config config_load("tiledb_config.txt");
std::string tile_cache_size = config_load["sm.tile_cache_size"];
config = tiledb.Config()
config["sm.tile_cache_size"] = 0
config.save("tiledb_config.txt")
config_load = tiledb.Config.load("tiledb_config.txt")
# Save to file
config <- tiledb_config()
config["sm.tile_cache_size"] <- 0;
tiledb_config_save(config, "tiledb_config.txt")
# Load from file
config_loaded <- tiledb_config_load("tiledb_config.txt")
tile_cache_size = config_loaded["sm.tile_cache_size"]
// Save to file
try(Config config = new Config()) {
config.set("sm.tile_cache_size", "0");
config.saveToFile("tiledb_config.txt");
// Load from file
Config config_load = new Config("tiledb_config.txt");
String tile_cache_size = config_load.get("sm.tile_cache_size");
}
// Create a TileDB config
config, _ := tiledb.NewConfig()
// Set a value
config.Set("sm.tile_cache_size", "8")
// Save to file
config.SaveToFile(configFileName)
// Load from file
newConfig, _ := tiledb.LoadConfig(configFileName)
// Retrieve value
smTileCacheSize, _ := newConfig.Get("sm.tile_cache_size")
// Clean up
config.Free()
You can also use a configuration iterator as follows:
C
C++
Python
R
Java
Go
// Create a TileDB config
tiledb_config_t* config;
tiledb_config_alloc(&config, NULL);
// Create a TileDB config iterator
// You can use any prefix instead of "vfs.s3."
tiledb_config_iter_t* config_iter;
tiledb_config_iter_alloc(config, "vfs.s3.", &config_iter, &error);
// Print all configuration parameters that start with "vfs.s3"
// Note that the prefix is exluded from the results
printf("\nVFS S3 settings:\n");
int done = 0;
const char *param, *value;
tiledb_config_iter_done(config_iter, &done, &error);
while (!done) {
tiledb_config_iter_here(config_iter, ¶m, &value, &error);
printf("\"%s\" : \"%s\"\n", param, value);
tiledb_config_iter_next(config_iter, &error);
tiledb_config_iter_done(config_iter, &done, &error);
}
// You can reset the iterator as follows
tiledb_config_iter_reset(config, config_iter, NULL, NULL);
// Clean up
tiledb_config_free(&config);
tiledb_config_iter_free(&config_iter);
Config config;
// Print only the S3 settings
// You can use any prefix instead of "vfs.s3."
// Note that the prefix is exluded from the results
std::cout << "\nVFS S3 settings:\n";
for (auto i = config.begin("vfs.s3."); i != config.end(); ++i) {
auto& p = *i;
std::cout << "\"" << p.first << "\" : \"" << p.second << "\"\n";
}
# ... create/open a Config object
for (key,value) in config.items():
print(f"'{key}': '{value}'")
# keys may optionally be filtered by passing a prefix to `items()`
for (key,value) in config.items("vfs.s3."):
print(f"'{key}': '{value}'")
# R has no native iterator but one loop over the config elements
# by retrieving the configuration as a vector
cfg <- as.vector(tiledb_config())
# print all non-empty config elements
for (n in names(cfg))
if (cfg[n] != "")
cat(n, ":", cfg[n], "\n")
try(Config config = new Config()) {
// Print only the S3 settings
// You can use any prefix instead of "vfs.s3."
// Note that the prefix is exluded from the results
System.out.printf("\nVFS S3 settings:\n");
for (Map.Entry me : config.parameters("vfs.s3").entrySet()) {
System.out.Printf("\"%s\" : \"%s\"\n", p.getKey(), p.getValue());
}
}
fmt.Println("VFS S3 settings:\n")
config, _ := NewConfig()
// Iterate the configuration
iter, _ := config.Iterate("vfs.s3.")
for ; !iter.IsDone(); _ = iter.Next() {
// Get current param, value from iterator
param, value, _ := iter.Here()
fmt.Printf("%s: %s\n", *param, *value)
}
Below we provide a table with all the TileDB configuration parameters, along with their description and default values. See Configuration for information on how to set them.
Parameter | Default Value | Description |
"sm.check_coord_dups" | "true" | This is applicable only if sm.dedup_coords is false . If true , an error will be thrown if there are cells with duplicate coordinates during sparse fragment writes. If false and there are duplicates, the duplicates will be written without errors. |
"sm.check_coord_oob" | "true" | If true , an error will be thrown if there are cells with coordinates lying outside the array domain during sparse fragment writes. |
"sm.check_global_order" | "true" | If true , an error will be thrown if the coordinates are not in the global order. Applicable only to sparse writes in the global order. |
"sm.consolidation.amplification" | "1.0" | The factor by which the size of the dense fragment resulting from consolidating a set of fragments (containing at least one dense fragment) can be amplified. This is important when the union of the non-empty domains of the fragments to be consolidated have a lot of empty cells, which the consolidated fragment will have to fill with the special fill value (since the resulting fragment is dense). |
"sm.consolidation.buffer_size" | "50000000" | The size (in bytes) of the attribute buffers used during consolidation. |
"sm.consolidation.step_max_frags" | "4294967295" | The maximum number of fragments to consolidate in a single step. |
"sm.consolidation.step_min_frags" | "4294967295" | The minimum number of fragments to consolidate in a single step. |
"sm.consolidation.step_size_ratio" | "0" | The size ratio of two (“adjacent”) fragments must be larger than this value to be considered for consolidation in a single step. |
"sm.consolidation.steps" | "4294967295" | The number of consolidation steps to be performed when executing the consolidation algorithm. |
"sm.consolidation.mode" | "fragments" | The consolidation mode, one of fragments (consolidate all fragments), fragment_meta (consolidate only fragment metadata footers to a single file), or array_meta (consolidate array metadata only). |
"sm.vacuum.mode" | "fragments" | The vacuuming mode, one of fragments (remove consolidated fragments), fragment_meta (remove only consolidated fragment metadata), or array_meta (remove consolidated array metadata files). |
"sm.dedup_coords" | "false" | If true , cells with duplicate coordinates will be removed during sparse fragment writes. Note that ties during deduplication are broken arbitrarily. |
"sm.enable_signal_handlers" | "true" | Determines whether or not TileDB will install signal handlers. |
"sm.memory_budget" | "5GB" | The memory budget for tiles of fixed-sized attributes (or offsets for var-sized attributes) to be fetched during reads. This is applicable to dense reads, sparse ordered reads and "legacy" reader modes. |
"sm.memory_budget_var" | "10GB" | The memory budget for tiles of var-sized attributes to be fetched during reads. This is applicable to dense reads and sparse ordered reads and "legacy" reader modes. |
"sm.mem.malloc_trim" | "true" | Should malloc_trim be called on context and query destruction? This might reduce residual memory usage. |
"sm.mem.total_budget" | "10GB" | Memory budget for refactored readers and writers. This includes sparse unordered reads, and dense reads with refactored reader. |
"sm.mem.reader.sparse_global_order.ratio_coords" | 0.5 | Ratio of the budget allocated for coordinates in the sparse global order reader. |
"sm.mem.reader.sparse_global_order.ratio_query_condition" | 0.25 | Ratio of the budget allocated for the query condition in the sparse global order reader. |
"sm.mem.reader.sparse_global_order.ratio_tile_ranges" | 0.1 | Ratio of the budget allocated for tile ranges in the sparse global order reader. |
"sm.mem.reader.sparse_global_order.ratio_array_data" | 0.1 | Ratio of the budget allocated for array data in the sparse global order reader. |
"sm.mem.reader.sparse_unordered_with_dups.ratio_coords" | 0.5 | Ratio of the budget allocated for coordinates in the sparse unordered with duplicates reader. |
"sm.mem.reader.sparse_unordered_with_dups.ratio_query_condition" | 0.25 | Ratio of the budget allocated for the query condition in the sparse unordered with duplicates reader. |
"sm.mem.reader.sparse_unordered_with_dups.ratio_tile_ranges" | 0.1 | Ratio of the budget allocated for tile ranges in the sparse unordered with duplicates reader. |
"sm.mem.reader.sparse_unordered_with_dups.ratio_array_data" | 0.1 | Ratio of the budget allocated for array data in the sparse unordered with duplicates reader. |
"sm.sub_partitioner_memory_budget" | "0" | The memory budget used by the read algorithm to force partition the query range in case sorting is much slower than the partitioning overhead. |
"sm.compute_concurrency_level" | # of cores | Upper-bound on number of threads to allocate for compute-bound tasks. |
"sm.io_concurrency_level" | # of cores | Upper-bound on number of threads to allocate for IO-bound tasks. |
"sm.num_tbb_threads" | TBB automatic | The number of threads allocated for the TBB thread pool. Note: this is a whole-program setting. Usually this should not be modified from the default. See also the documentation for TBB's task_scheduler_init class. When TBB is disabled, this will be used to set the level of concurrency for generic threading where TBB is otherwise used. |
"sm.tile_cache_size" | "10000000" | The tile cache size in bytes. |
"vfs.file.max_parallel_ops" | sm.io_concurrency_level | The maximum number of parallel operations on objects with file:/// URIs. |
"vfs.file.enable_filelocks" | true | If set to false , file locking operations are no-ops in VFS for file:/// URIs. |
"vfs.file.posix_file_permissions" | "644" | Permissions to use for posix file system with file creation. |
"vfs.file.posix_directory_permissions" | "755" | Permissions to use for posix file system with directory creation. |
"vfs.min_batch_gap" | "512000" | The minimum number of bytes between two VFS read batches. |
"vfs.min_batch_size" | "20971520" | The minimum number of bytes in a VFS read operation. |
"vfs.min_parallel_size" | "10485760" | The minimum number of bytes in a parallel VFS operation, except parallel S3 writes, which are controlled by parameter vfs.s3.multipart_part_size |
"vfs.read_ahead_size" | "102400" | The maximum byte size to read-ahead from the backend. |
"vfs.read_ahead_cache_size" | "10485760" | The the total maximum size of the read-ahead cache, which is an LRU. |
"vfs.s3.connect_max_tries" | "5" | The maximum tries for a connection. Any long value is acceptable. |
"vfs.s3.connect_scale_factor" | "25" | The scale factor for exponential backoff when connecting to S3. Any long value is acceptable. |
"vfs.s3.connect_timeout_ms" | "3000" | The connection timeout in ms. Any long value is acceptable. |
"vfs.s3.endpoint_override" | "" | The S3 endpoint, if S3 is enabled. |
"vfs.s3.max_parallel_ops" | sm.io_concurrency_level | The maximum number of S3 backend parallel operations. |
"vfs.s3.multipart_part_size" | "5242880" | The part size (in bytes) used in S3 multipart writes. Any uint64_t value is acceptable. Note: vfs.s3.multipart_part_size * vfs.s3.max_parallel_ops bytes will be buffered before issuing multipart uploads in parallel. |
"vfs.s3.proxy_host" | "" | The S3 proxy host. |
"vfs.s3.proxy_password" | "" | The S3 proxy password. |
"vfs.s3.proxy_port" | "0" | The S3 proxy port. |
"vfs.s3.proxy_scheme" | "https" | The S3 proxy scheme. |
"vfs.s3.proxy_username" | "" | The S3 proxy username. |
"vfs.s3.region" | "us-east-1" | The S3 region. |
"vfs.s3.aws_access_key_id" | "" | The AWS access key id ( AWS_ACCESS_KEY_ID ) |
"vfs.s3.aws_secret_access_key" | "" | The AWS access secret ( AWS_SECRET_ACCESS_KEY ) |
"vfs.s3.aws_session_token" | "" | The AWS session token to use |
"vfs.s3.aws_role_arn" | "" | The Amazon Resource Name (ARN) of the role to assume. |
"vfs.s3.aws_external_id" | "" | A unique identifier that might be required when you assume a role in another account |
"vfs.s3.aws_load_frequency" | "" | The duration, in minutes, of the role session |
"vfs.s3.aws_session_name" | "" | An identifier for the assumed role session. |
"vfs.s3.logging_level" | "" | The AWS SDK logging level (OFF, DEBUG, TRACE) |
"vfs.s3.request_timeout_ms" | "3000" | The request timeout in ms. Any long value is acceptable. |
"vfs.s3.scheme" | "https" | The S3 scheme. |
"vfs.s3.use_virtual_addressing" | "true" | Determines whether to use virtual addressing or not. |
"vfs.s3.use_multipart_upload" | "true" | The S3 use of multi-part upload requests ( true or false ), if S3 is enabled. |
"vfs.s3.ca_file" | "" | The path to a cURL-compatible certificate file. |
"vfs.s3.ca_path" | "" | The path to a cURL-compatible certificate directory. |
"vfs.s3.verify_ssl" | "true" | Enable certificate verification for HTTPS connections. |
"vfs.gcs.project_id" | "" | Set the GCS project id. |
"vfs.gcs.multi_part_size" | "5242880" | The part size (in bytes) used in GCS multi part writes. Any uint64_t value is acceptable. Note: vfs.gcs.multi_part_size * vfs.gcs.max_parallel_ops bytes will be buffered before issuing part uploads in parallel. |
"vfs.gcs.max_parallel_ops" | "sm.io_concurrency_level" | The maximum number of GCS backend parallel operations. |
vfs.gcs.request_timeout_ms | "3000" | The maximum amount of time to retry network requests to GCS. |
"vfs.gcs.use_multi_part_upload" | "true" | Determines if the GCS backend can use chunked part uploads. |
"vfs.azure.storage_account_name" | "" | Set the Azure Storage Account name. |
"vfs.azure.storage_account_key" | "" | Set the Azure Storage Account key. |
"vfs.azure.blob_endpoint" | "" | Overrides the default Azure Storage Blob endpoint. If empty, the endpoint will be constructed from the storage account name. This should not include an http:// or https:// prefix. |
"vfs.azure.block_list_block_size" | 5242880 | The block size (in bytes) used in Azure blob block list writes. Any uint64_t value is acceptable. Note: vfs.azure.block_list_block_size * vfs.azure.max_parallel_ops bytes will be buffered before issuing block uploads in parallel. |
"vfs.azure.use_https" | "true" | Determines if the blob endpoint should use HTTP or HTTPS. |
"vfs.azure.max_parallel_ops" | sm.io_concurrency_level | The maximum number of Azure backend parallel operations. |
"vfs.azure.use_block_list_upload" | "true" | Determines if the Azure backend can use chunked block uploads. |
"vfs.hdfs.kerb_ticket_cache_path" | "" | Path to the Kerberos ticket cache when connecting to an HDFS cluster. |
"vfs.hdfs.name_node_uri" | "" | Optional namenode URI to use (TileDB will use "default" if not specified). URI must be specified in the format <protocol>://<hostname>:<port> , ex: hdfs://localhost:9000 . If the string starts with a protocol type such as file:// or s3:// this protocol will be used (default hdfs:// ). |
"vfs.hdfs.username" | "" | Username to use when connecting to the HDFS cluster. |
"rest.server_address" | https://api.tiledb.com | URL for REST server to use for remote arrays. |
"rest.server_serialization_format" | "CAPNP" | Serialization format to use for remote array requests (CAPNP or JSON). |
"rest.username" | "" | Username for login to REST server. |
"rest.password" | "" | Password for login to REST server. |
"rest.token" | "" | Authentication token for REST server (used instead of username/password). |
"rest.resubmit_incomplete" | "true" | If true, incomplete queries received from server are automatically resubmitted before returning to user control. |
"rest.ignore_ssl_validation" | "false" | Have curl ignore ssl peer and host validation for REST server. |
"rest.http_compressor" | "none" | Compression used in HTTP requests. |
"rest.creation_access_credentials_name" | no default set | The name of the registered access key to use for creation of the REST server. |
"rest.retry_http_codes" | "503" | CSV list of http status codes to automatically retry a REST request for. |
"rest.retry_count" | "3" | Number of times to retry failed REST requests |
"rest.retry_initial_delay_ms" | "500" | Initial delay in milliseconds to wait until retrying a REST request |
"rest.retry_delay_factor" | "1.25" | The delay factor to exponentially wait until further retries of a failed REST request |
"config.env_var_prefix" | "TILEDB_" | Prefix of environmental variables for reading configuration parameters. |
Last modified 1yr ago