ESDM
Middleware for Earth System Data
Macros | Functions | Variables
esdm-datatypes.c File Reference

This file implements ESDM types, and associated methods. More...

#include <esdm-internal.h>
#include <esdm-grid.h>
#include <esdm.h>
#include <inttypes.h>
#include <smd.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Include dependency graph for esdm-datatypes.c:

Macros

#define _GNU_SOURCE   /* See feature_test_macros(7) */
 
#define DEBUG_ENTER   ESDM_DEBUG_COM_FMT("DATATYPES", "", "")
 
#define DEBUG(fmt, ...)   ESDM_DEBUG_COM_FMT("DATATYPES", fmt, __VA_ARGS__)
 

Functions

esdm_status esdm_container_create (const char *name, int allow_overwrite, esdm_container_t **oc)
 
bool esdm_container_dataset_exists (esdm_container_t *c, char const *name)
 
int esdm_container_dataset_count (esdm_container_t *c)
 
esdm_dataset_tesdm_container_dataset_from_array (esdm_container_t *c, int i)
 
void esdmI_container_register_dataset (esdm_container_t *c, esdm_dataset_t *dset)
 
void esdmI_container_init (char const *name, esdm_container_t **out_container)
 
esdm_status esdmI_create_dataset_from_metadata (esdm_container_t *c, json_t *json, esdm_dataset_t **out)
 
esdm_status esdm_container_open_md_parse (esdm_container_t *c, char *md, int size)
 
esdm_status esdm_container_open_md_load (esdm_container_t *c, char **out_md, int *out_size)
 
esdm_status esdm_container_open (char const *name, int esdm_mode_flags, esdm_container_t **out_container)
 
void esdmI_datasets_reference_metadata_create (esdm_container_t *c, smd_string_stream_t *s)
 
void esdmI_container_metadata_create (esdm_container_t *c, smd_string_stream_t *s)
 
esdm_status esdm_container_commit (esdm_container_t *c)
 
esdm_status esdm_container_close (esdm_container_t *c)
 
esdm_status esdm_container_delete_attribute (esdm_container_t *c, const char *name)
 
esdm_status esdm_container_link_attribute (esdm_container_t *c, int overwrite, smd_attr_t *attr)
 
esdm_status esdm_container_get_attributes (esdm_container_t *c, smd_attr_t **out_metadata)
 
esdm_status esdmI_container_destroy (esdm_container_t *c)
 
 __attribute__ ((warn_unused_result))
 
int64_t esdm_dataspace_get_dims (esdm_dataspace_t *d)
 
int64_t esdm_dataspace_total_bytes (esdm_dataspace_t *d)
 
int64_t const * esdm_dataspace_get_size (esdm_dataspace_t *d)
 
int64_t const * esdm_dataspace_get_offset (esdm_dataspace_t *d)
 
esdm_status esdmI_fragment_create (esdm_dataset_t *d, esdm_dataspace_t *sspace, void *buf, esdm_fragment_t **out_fragment)
 
esdm_status esdm_fragment_retrieve (esdm_fragment_t *fragment)
 
esdm_status esdm_fragment_load (esdm_fragment_t *fragment)
 
esdm_status esdm_fragment_unload (esdm_fragment_t *fragment)
 
void esdm_fragment_metadata_create (esdm_fragment_t *f, smd_string_stream_t *stream)
 
esdm_status esdm_fragment_commit (esdm_fragment_t *f)
 
esdm_status esdm_container_delete (esdm_container_t *c)
 
esdm_status esdm_dataset_delete (esdm_dataset_t *d)
 
esdm_status esdm_fragment_destroy (esdm_fragment_t *frag)
 
void esdm_dataset_init (esdm_container_t *c, const char *name, esdm_dataspace_t *dspace, esdm_dataset_t **out_dataset)
 
esdm_status esdm_dataset_create (esdm_container_t *c, const char *name, esdm_dataspace_t *dspace, esdm_dataset_t **out_dataset)
 
esdm_status esdm_dataset_set_compression_hint (esdm_dataset_t *dset, scil_user_hints_t const *hints)
 
esdm_status esdm_dataset_open_md_load (esdm_dataset_t *dset, char **out_md, int *out_size)
 
esdm_backend_tesdmI_get_backend (char const *plugin_id)
 
esdm_status esdmI_create_fragment_from_metadata (esdm_dataset_t *dset, json_t *json, esdm_fragment_t **out_fragment)
 
esdm_status esdm_dataspace_set_stride (esdm_dataspace_t *space, int64_t *stride)
 
esdm_status esdm_dataspace_copyDatalayout (esdm_dataspace_t *space, esdm_dataspace_t *source)
 
esdm_status esdm_dataset_open_md_parse (esdm_dataset_t *d, char *md, int size)
 
esdm_status esdm_dataset_ref (esdm_dataset_t *d)
 
esdm_status esdm_dataset_by_name (esdm_container_t *c, const char *name, int esdm_mode_flags, esdm_dataset_t **out_dataset)
 
esdm_status esdm_dataset_open (esdm_container_t *c, const char *name, int esdm_mode_flags, esdm_dataset_t **out_dataset)
 
void esdmI_dataset_metadata_create (esdm_dataset_t *d, smd_string_stream_t *s)
 
esdm_status esdm_dataset_commit (esdm_dataset_t *d)
 
esdm_status esdm_dataset_update (esdm_dataset_t *dataset)
 
esdm_status esdmI_dataset_fragmentsCoveringRegion (esdm_dataset_t *dataset, esdmI_hypercube_t *region, int64_t *out_count, esdm_fragment_t ***out_fragments, esdmI_hypercubeSet_t **out_uncovered, bool *out_fullyCovered)
 
esdm_fragment_tesdmI_dataset_createFragment (esdm_dataset_t *dataset, esdm_dataspace_t *memspace, void *buf, bool *out_newFragment)
 
esdm_fragment_tesdmI_dataset_lookupFragmentForShape (esdm_dataset_t *dataset, esdm_dataspace_t *shape)
 
esdm_status esdm_dataset_close (esdm_dataset_t *dset)
 
esdm_status esdmI_dataset_destroy (esdm_dataset_t *dset)
 
esdm_status esdm_dataset_delete_attribute (esdm_dataset_t *dataset, const char *name)
 
esdm_status esdm_dataset_get_attributes (esdm_dataset_t *dataset, smd_attr_t **out_metadata)
 
esdm_status esdm_dataset_rename (esdm_dataset_t *d, const char *name)
 
esdm_status esdm_dataspace_create_full (int64_t dims, int64_t *sizes, int64_t *offset, esdm_type_t type, esdm_dataspace_t **out_dataspace)
 
esdm_status esdm_dataspace_create (int64_t dims, int64_t *sizes, esdm_type_t type, esdm_dataspace_t **out_dataspace)
 
esdm_status esdmI_dataspace_createFromHypercube (esdmI_hypercube_t *extends, esdm_type_t type, esdm_dataspace_t **out_space)
 
esdm_status esdm_dataspace_copy (esdm_dataspace_t *orig, esdm_dataspace_t **out_dataspace)
 
esdm_status esdmI_dataspace_getExtends (esdm_dataspace_t *space, esdmI_hypercube_t **out_extends)
 
esdm_status esdmI_dataspace_setExtends (esdm_dataspace_t *space, esdmI_hypercube_t *extends)
 
esdm_status esdm_dataspace_subspace (esdm_dataspace_t *dataspace, int64_t dims, int64_t *size, int64_t *offset, esdm_dataspace_t **out_dataspace)
 
esdm_status esdm_dataspace_makeContiguous (esdm_dataspace_t *dataspace, esdm_dataspace_t **out_dataspace)
 
void esdm_dataspace_print (esdm_dataspace_t *d)
 
void esdm_dataspace_serialize (const esdm_dataspace_t *space, smd_string_stream_t *stream)
 
esdm_status esdmI_dataspace_createFromJson (json_t *json, esdm_dataset_t *dataset, esdm_dataspace_t **out_dataspace)
 
void esdm_dataspace_getEffectiveStride (esdm_dataspace_t *space, int64_t *out_stride)
 
int64_t esdm_dataspace_elementOffset (esdm_dataspace_t *space, int64_t *coords)
 
esdm_status esdm_dataspace_fill (esdm_dataspace_t *dataspace, void *data, void *fillElement)
 
void esdm_fragment_print (esdm_fragment_t *f)
 
esdm_status esdm_dataspace_destroy (esdm_dataspace_t *d)
 
uint64_t esdm_dataspace_element_count (esdm_dataspace_t *subspace)
 
esdm_status esdm_dataset_name_dims (esdm_dataset_t *d, char *const *names)
 
esdm_status esdm_dataset_rename_dim (esdm_dataset_t *d, char const *name, int i)
 
void esdm_dataset_set_status_dirty (esdm_dataset_t *d)
 
void esdm_container_set_status_dirty (esdm_container_t *c)
 
esdm_status esdm_dataset_get_name_dims (esdm_dataset_t *d, char const *const **out_names)
 
esdm_status esdm_dataset_link_attribute (esdm_dataset_t *dset, int overwrite, smd_attr_t *attr)
 
esdm_status esdm_dataset_iterator (esdm_container_t *container, esdm_dataset_iterator_t **iter)
 
char const * esdm_dataset_name (esdm_dataset_t *d)
 
int64_t const * esdm_dataset_get_actual_size (esdm_dataset_t *dset)
 
int64_t const * esdm_dataset_get_size (esdm_dataset_t *dset)
 
esdm_status esdm_dataset_update_size (esdm_dataset_t *d, uint64_t *sizes)
 
esdm_type_t esdm_dataset_get_type (esdm_dataset_t *d)
 
esdm_type_t esdm_dataspace_get_type (esdm_dataspace_t *d)
 

Variables

const int64_t kInitialGridSlotCount = 8
 

Detailed Description

This file implements ESDM types, and associated methods.

Function Documentation

◆ esdm_container_close()

esdm_status esdm_container_close ( esdm_container_t container)

Close a container object. If it isn't in use any more free it.

Warning: This throws an error if there are any datasets within this container that are still open. Make sure to close all datasets first.

Parameters
[in]containeran existing container object that is no longer needed

◆ esdm_container_commit()

esdm_status esdm_container_commit ( esdm_container_t container)

Make container persistent to storage. Enqueue for writing to backends.

Calling container commit may trigger subsequent commits for datasets that are part of the container.

Parameters
[in]containerpointer to an existing container which is to be committed to storage
Returns
status

◆ esdm_container_create()

esdm_status esdm_container_create ( const char *  name,
int  allow_overwrite,
esdm_container_t **  out_container 
)

Create a new container.

  • Allocate process local memory structures.
  • Register with metadata service.
Parameters
[in]namestring to identify the container, must not be empty
[out]out_containerreturns a pointer to the new container
Returns
status

◆ esdm_container_open()

esdm_status esdm_container_open ( const char *  name,
int  esdm_mode_flags,
esdm_container_t **  out_container 
)

Open an existing container.

  • Allocate process local memory structures.
  • Register with metadata service.
Parameters
[in]namestring to identify the container, must not be empty
[out]out_containerreturns a pointer to the container
Returns
status

◆ esdm_dataset_close()

esdm_status esdm_dataset_close ( esdm_dataset_t dataset)

Close a dataset object, if it isn't used anymore, it's metadata will be unloaded

This function is not thread-safe. Only a single master thread must be used to call into ESDM.

Parameters
[in]datasetan existing dataset object that is no longer needed
Returns
status

◆ esdm_dataset_commit()

esdm_status esdm_dataset_commit ( esdm_dataset_t dataset)

Make dataset persistent to storage. Schedule for writing to backends.

Parameters
[in]datasetpointer to an existing dataset which is to be committed to storage
Returns
status

◆ esdm_dataset_create()

esdm_status esdm_dataset_create ( esdm_container_t container,
const char *  name,
esdm_dataspace_t dataspace,
esdm_dataset_t **  out_dataset 
)

Create a new dataset.

  • Allocate process local memory structures.
  • Register with metadata service.
Parameters
[in]containerpointer to an existing container to which the new dataset will be linked
[in]nameidentifier for the new dataset, must not be empty
[in]dataspacepointer to an existing dataspace which defines the shape of the data that will be stored within the dataset
[out]out_datasetreturns a pointer to the new dataset
Returns
status

◆ esdm_dataset_open()

esdm_status esdm_dataset_open ( esdm_container_t container,
const char *  name,
int  esdm_mode_flags,
esdm_dataset_t **  out_dataset 
)

Open a dataset.

  • Allocate process local memory structures
  • Retrieve metadata
Parameters
[in]containerpointer to an open container that contains the dataset that is to be opened
[in]nameidentifier of the dataset within the container, must not be empty
[out]out_datasetreturns a pointer to the opened dataset
Returns
status

◆ esdm_dataspace_copy()

esdm_status esdm_dataspace_copy ( esdm_dataspace_t orig,
esdm_dataspace_t **  out_dataspace 
)

Create a copy of a dataspace.

  • Allocate process local memory structures.
    Parameters
    [in]origthe dataspace to copy
    [out]out_dataspacepointer to the new dataspace
    Returns
    status

◆ esdm_dataspace_copyDatalayout()

esdm_status esdm_dataspace_copyDatalayout ( esdm_dataspace_t dataspace,
esdm_dataspace_t strideSource 
)

Copy the stride information from one dataspace to another.

This is useful when defining a subspace that is supposed to access the same buffer as the enclosing dataspace. A simple esdm_dataspace_subspace() will assume contiguous storage for the subspace, a subsequent call esdm_dataspace_copyDatalayout(subspace, bufferSpace) will provide the subspace with the correct stride values to access its possibly non-contiguous part from the same buffer. Note that it is still necessary to adjust the buffer's address by means of esdm_dataspace_elementOffset() to compute the actual address of the subspace's first element.

The strideSource must have the same dimension count as the dataspace.

Parameters
[in,out]dataspacethe dataspace to update
[in]strideSourcethe dataspace that provides the data layout information which is to be copied
Returns
ESDM_SUCCESS

◆ esdm_dataspace_create()

esdm_status esdm_dataspace_create ( int64_t  dims,
int64_t *  sizes,
esdm_type_t  type,
esdm_dataspace_t **  out_dataspace 
)

Create a new dataspace.

  • Allocate process local memory structures.
Parameters
[in]dimscount of dimensions of the new dataspace
[in]sizesarray of the sizes of the different dimensions, the length of this array is dims. Must not be NULL unless dims == 0
[in]typethe datatype for each data point
[out]out_dataspacepointer to the new dataspace
Returns
status

◆ esdm_dataspace_create_full()

esdm_status esdm_dataspace_create_full ( int64_t  dims,
int64_t *  size,
int64_t *  offset,
esdm_type_t  type,
esdm_dataspace_t **  out_dataspace 
)

Create a new dataspace.

  • Allocate process local memory structures.
Parameters
[in]dimscount of dimensions of the new dataspace
[in]sizesarray of the sizes of the different dimensions, the length of this array is dims. Must not be NULL unless dims == 0
[in]offsetarray containing the logical coordinates of the first data point in this dataspace
[in]typethe datatype for each data point
[out]out_dataspacepointer to the new dataspace
Returns
status

◆ esdm_dataspace_destroy()

esdm_status esdm_dataspace_destroy ( esdm_dataspace_t dataspace)

Destruct and free a dataspace object.

Parameters
[in]dataspacean existing dataspace object that is no longer needed
Returns
status

"_destroy" sounds too destructive, this will be renamed to esdm_dataspace_close().

◆ esdm_dataspace_element_count()

uint64_t esdm_dataspace_element_count ( esdm_dataspace_t dataspace)

Returns the number of datapoints in the dataspace.

◆ esdm_dataspace_elementOffset()

int64_t esdm_dataspace_elementOffset ( esdm_dataspace_t space,
int64_t *  coords 
)

Get the offset in bytes of the element at the given logical position. The resulting offset may be negative if a custom stride has been set that has negative component(s). Otherwise, a contiguous C order multidimensional array is assumed, producing only positive offsets.

Parameters
[in]spacethe dataspace to query
[in]coordsan array with the coordinates of the element's logical location
Returns
an offset in bytes

◆ esdm_dataspace_fill()

esdm_status esdm_dataspace_fill ( esdm_dataspace_t dataspace,
void *  data,
void *  fillElement 
)

Overwrite a buffer with a fill value.

This functions sets all elements in the given data buffer to the value given by *fillElement. The amount and offsets of the data elements to set is controlled by the dataspace argument.

Parameters
[in]dataspacedescription of the area to overwrite
[in,out]datapointer to the first element to set
[in]fillElementpointer to a single element which is used as a prototype.
Returns
status

◆ esdm_dataspace_getEffectiveStride()

void esdm_dataspace_getEffectiveStride ( esdm_dataspace_t space,
int64_t *  out_stride 
)

Get the effective stride of a dataspace.

If a stride has been set for the dataspace, that stride is copied to the out_stride array, otherwise the effective stride is calculated and returned in that same array.

Parameters
[in]spacethe dataspace to query
[out]out_stridepointer to an array of size space->dims which will be filled with the components of the stride.

As with esdm_dataspace_set_stride(), the stride is given in terms of fundamental datatype elements and needs to be multiplied with esdm_sizeof(space->type) to get the stride in bytes.

◆ esdm_dataspace_makeContiguous()

esdm_status esdm_dataspace_makeContiguous ( esdm_dataspace_t dataspace,
esdm_dataspace_t **  out_dataspace 
)

Define a dataspace that covers the same logical hypercube as the given dataspace, but which uses the standard, contiguous C array element order. The stride of the original dataspace will be ignored totally.

  • Allocates process local memory structures.
Parameters
[in]dataspacethe dataspace that is to be copied
[out]out_dataspacepointer to the new contiguous dataspace
Returns
ESDM_SUCCESS

◆ esdm_dataspace_serialize()

void esdm_dataspace_serialize ( const esdm_dataspace_t dataspace,
smd_string_stream_t *  stream 
)

Serializes dataspace description.

e.g., to store along with fragment

◆ esdm_dataspace_set_stride()

esdm_status esdm_dataspace_set_stride ( esdm_dataspace_t dataspace,
int64_t *  stride 
)

Specify a non-standard serialization order for a dataspace.

This can be used to handle FORTRAN arrays, for example, or do some crazy stuff like inverted dimensions, or to skip over holes. Use carefully, or don't use at all. You have been warned.

Parameters
[in,out]dataspacethe dataspace that is to be modified
[in]dimsnumber of entries in the stride argument, must match the dimension count of the dataspace
[in]stridearray with dims entries, each entry gives the number of elements to skip over when increasing the respective coordinate by one.
Returns
status

Examples:

A C array int array[7][11] does not need a stride, the stride is implicitly assumed to be (11, 1).

To handle a FORTRAN array INTEGER :: array(7, 11), use the following call: esdm_dataspace_set_stride(dataspace, 2, (int64_t[2]){1, 7});

To use only a 3x5 part of an existing C array int array[7][11], starting at (1, 2), use these calls: esdm_dataspace_t* subspace; esdm_dataspace_subspace(parent, 2, (int64_t[2]){3, 5}, (int64_t[2]){1, 2}, &subspace); esdm_dataspace_set_stride(subspace, (int64_t[2]){11, 1}); After this, the 2D coordinates will be mapped to the buffer offsets like this: (1,2)=0, (1,3)=1, (1,4)=2, (1,5)=3, (1,6)=4, (2,2)=11, (2,3)=12, (2,4)=13, (2,5)=14, (2,6)=15, (3,2)=22, (3,3)=23, (3,4)=24, (3,5)=25, (3,6)=26,

◆ esdm_dataspace_subspace()

esdm_status esdm_dataspace_subspace ( esdm_dataspace_t dataspace,
int64_t  dims,
int64_t *  size,
int64_t *  offset,
esdm_dataspace_t **  out_dataspace 
)

TODO: remove dims parameter for good

◆ esdm_dataspace_total_bytes()

int64_t esdm_dataspace_total_bytes ( esdm_dataspace_t d)

Returns the number of bytes covered by the dataspace.

◆ esdm_fragment_commit()

esdm_status esdm_fragment_commit ( esdm_fragment_t fragment)

Make fragment persistent to storage. Schedule for writing to backends.

Parameters
[in]fragmentpointer to an existing fragment which is to be committed to storage
Returns
status

◆ esdm_fragment_destroy()

esdm_status esdm_fragment_destroy ( esdm_fragment_t fragment)

Destruct and free a fragment object.

Parameters
[in]fragmentan existing fragment object that is no longer needed
Returns
status

"_destroy" sounds too destructive, this will be renamed to esdm_fragment_close().

◆ esdm_fragment_load()

esdm_status esdm_fragment_load ( esdm_fragment_t fragment)

Like esdm_fragment_retrieve(), but more permissive: Does not throw an ESDM_DIRTY_DATA_ERROR, simply ensures that the fragments data is available in memory.

◆ esdm_fragment_print()

void esdm_fragment_print ( esdm_fragment_t fragment)

Serializes fragment for storage.

◆ esdm_fragment_retrieve()

esdm_status esdm_fragment_retrieve ( esdm_fragment_t fragment)

Fetch data from disk if possible. Loads fragments that are not loaded, noops on those that are loaded and clean, and errors out on those that are dirty or deleted.

XXX: This should probably be turned into an internal interface.

◆ esdm_fragment_unload()

esdm_status esdm_fragment_unload ( esdm_fragment_t fragment)

Ensure that the fragment has no data in memory.

If the fragment is dirty, it is committed, turning it into a persistent fragment. If the fragment is persistent, its buffer is released, turning it into an unloaded fragment. If the fragment is deleted or not loaded, nothing is done successfully.

◆ esdmI_container_destroy()

esdm_status esdmI_container_destroy ( esdm_container_t container)

Destruct and free a container object.

Parameters
[in]containeran existing container object that is no longer needed

"_destroy" sounds too destructive, this will be renamed to esdm_container_close().

◆ esdmI_dataset_destroy()

esdm_status esdmI_dataset_destroy ( esdm_dataset_t dataset)

Destruct and free a dataset object.

Parameters
[in]datasetan existing dataset object that is no longer needed
Returns
status

"_destroy" sounds too destructive, this will be renamed to esdm_dataset_close().

◆ esdmI_dataspace_createFromHypercube()

esdm_status esdmI_dataspace_createFromHypercube ( esdmI_hypercube_t extends,
esdm_type_t  type,
esdm_dataspace_t **  out_space 
)

Create a dataspace. Takes the shape in the form of an esdmI_hypercube_t.

Similar to esdm_dataspace_create(), but takes an esdmI_hypercube_t* instead of a pair of offset and size arrays.

Parameters
[in]extendsthe logical shape of the dataspace that is to be created
[out]out_spacereturns a new dataspace object that needs to be destructed by the caller

◆ esdmI_dataspace_createFromJson()

esdm_status esdmI_dataspace_createFromJson ( json_t *  json,
esdm_dataset_t dataset,
esdm_dataspace_t **  out_dataspace 
)

Create a dataspace object from its JSON description (which was produced via a call to esdm_dataspace_serialize()).

Parameters
[in]jsonthe JSON data describing the dataspace
[in]datasetthe dataset that is to be linked to the dataspace (this provides the datatype)
[out]out_dataspacewill point to a valid dataspace object after a successful return
Returns
ESDM_SUCCESS on success, ESDM_INVALID_DATA_ERROR in case of any inconsistencies in the JSON data

◆ esdmI_dataspace_getExtends()

esdm_status esdmI_dataspace_getExtends ( esdm_dataspace_t space,
esdmI_hypercube_t **  out_extends 
)

Get the logical extends covered by a dataspace in the form of an esdmI_hypercube_t.

Parameters
[in]spacethe dataspace to query
[out]out_extendsreturns a pointer to a hypercube with the extends of the dataspace, the caller is responsible to destroy the returned pointer
Returns
ESDM_SUCCESS

◆ esdmI_dataspace_setExtends()

esdm_status esdmI_dataspace_setExtends ( esdm_dataspace_t space,
esdmI_hypercube_t extends 
)

Set the logical extends covered by a dataspace in the form of an esdmI_hypercube_t.

Parameters
[in]spacethe dataspace to query
[in]extendsa hypercube with the extends of the dataspace
Returns
ESDM_SUCCESS

◆ esdmI_fragment_create()

esdm_status esdmI_fragment_create ( esdm_dataset_t d,
esdm_dataspace_t sspace,
void *  buf,
esdm_fragment_t **  out_fragment 
)

TODO: there should be a mode to auto-commit on creation?

How does this integrate with the scheduler? On auto-commit this merely beeing pushed to sched for dispatch?