UP | HOME

TEXT back end

Table of Contents

The "file" produced by the text back end is a directory with one file per group.

When the file is open, it is locked by the current process. No other process can read/write the same file. This guarantees that the representation in memory is consistent with the file and avoid re-reading the file before writing. To lock the file, we lock the .lock file which is present in the directory.

The file is written when closed, or when the flush function is called.

1 Template for group-related structures in text back end

typedef struct $group$_s {
  $group_num_dtype_double$ $group_num$;
  $group_dset_dtype$*  $group_dset$;
  uint64_t dims_$group_dset$[16];
  uint64_t len_$group_str$;
  uint32_t rank_$group_dset$;
  uint32_t to_flush;
  bool $group_num$_isSet;
  char*    $group_str$;
  char     file_name[TREXIO_MAX_FILENAME_LENGTH];
} $group$_t;

2 Template for general structure in text back end

Polymorphism of the trexio_t type is handled by ensuring that the corresponding types for all back ends can be safely casted to trexio_t. This is done by making the back-end structs start with trexio_t parent attribute:

typedef struct trexio_text_s {
  trexio_t   parent ;
  $group$_t* $group$;
  int        lock_file;
} trexio_text_t;

3 Initialize function (constant part)

bool
trexio_text_file_exists (const char* file_name)
{
  /* Check if the file with "file_name" exists */
  struct stat st;

  int rc = stat(file_name, &st);

  bool file_exists = rc == 0;

  return file_exists;
}
trexio_exit_code
trexio_text_inquire (const char* file_name)
{
  /* Check if the file with "file_name" exists and that it is a directory */
  struct stat st;

  int rc = stat(file_name, &st);

  bool file_exists = rc == 0;

  if (file_exists) {

    bool is_a_directory = false;
#if defined(S_IFDIR)
    is_a_directory = st.st_mode & S_IFDIR;
#elif defined(S_ISDIR)
    is_a_directory = S_ISDIR(st.st_mode);
#else
    printf("Some important macros are missing for directory handling.\n");
    return TREXIO_FAILURE;
#endif
    if (!is_a_directory) return TREXIO_FILE_ERROR;

    return TREXIO_SUCCESS;
  } else {
    return TREXIO_FAILURE;
  }
}

On non-POSIX file systems, the function mkdtemp might is not defined. In that case, we define an alternate one, which is not as safe as the original one.

#if  /* Since glibc 2.19: */ _DEFAULT_SOURCE \
           || /* Glibc 2.19 and earlier: */ _BSD_SOURCE \
           || /* Since glibc 2.10: */ _POSIX_C_SOURCE >= 200809L

/* mkdtemp is defined */
#else

char* mkdtemp(char* template) {
    char* dir = NULL;
    dir = tmpnam(dir);
    if (dir == NULL) return NULL;

    if (mkdir(dir, S_IRWXU | S_IRWXG | S_IRWXO) != 0) {
        return NULL;
    }

    strcpy(template, dir);
    return template;
}

#endif
trexio_exit_code
trexio_text_init (trexio_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_text_t* const f = (trexio_text_t*) file;

  /* Put all pointers to NULL but leave parent untouched */
  memset(&(f->parent)+1,0,sizeof(trexio_text_t)-sizeof(trexio_t));

  /* Check if directory exists */
  trexio_exit_code rc;
  rc = trexio_text_inquire(file->file_name);
  /* TREXIO file exists but is not  a directory */
  if (rc == TREXIO_FILE_ERROR) return rc;
  /* If directory does not exist - create it in write mode */
  if (rc == TREXIO_FAILURE) {

    if (file->mode == 'r') return TREXIO_READONLY;

    int rc_dir = mkdir(file->file_name, 0777);
    if (rc_dir != 0) return TREXIO_ERRNO;

  }

  /* Create the lock file in the directory */
  const char* lock_file_name = "/.lock";

  char file_name[TREXIO_MAX_FILENAME_LENGTH];

  strncpy (file_name, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  strncat (file_name, lock_file_name, TREXIO_MAX_FILENAME_LENGTH-strlen(lock_file_name));

  if (file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
    return TREXIO_LOCK_ERROR;
  }

  f->lock_file = open(file_name,O_WRONLY|O_CREAT|O_TRUNC, 0644);

  if (f->lock_file <= 0) {
    if (file->mode != 'r') {
      return TREXIO_ERRNO;
    } else {
      if (errno == EACCES) {
        /* The directory is read-only and the lock file can't be written.
          Create a dummy temporary file for dummy locking.
        */
        char dirname[TREXIO_MAX_FILENAME_LENGTH] = "/tmp/trexio.XXXXXX";
        if (mkdtemp(dirname) == NULL) return TREXIO_ERRNO;
        strncpy (file_name, dirname, TREXIO_MAX_FILENAME_LENGTH);
        strncat (file_name, lock_file_name, TREXIO_MAX_FILENAME_LENGTH-strlen(lock_file_name));
        f->lock_file = open(file_name,O_WRONLY|O_CREAT|O_TRUNC, 0644);
        remove(file_name);
        rmdir(dirname);
      } else {
        return TREXIO_ERRNO;
      }
    }
  }

  return TREXIO_SUCCESS;
}
trexio_exit_code trexio_text_lock(trexio_t* const file) {
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_text_t* const f = (trexio_text_t*) file;

  struct flock fl;

  fl.l_type   = F_WRLCK;
  fl.l_whence = SEEK_SET;
  fl.l_start  = 0;
  fl.l_len    = 0;
  fl.l_pid    = getpid();

  int rc = fcntl(f->lock_file, F_SETLKW, &fl);
  if (rc == -1) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_unlock (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  trexio_text_t* const f = (trexio_text_t*) file;

  struct flock fl;

  fl.l_type   = F_UNLCK;
  fl.l_whence = SEEK_SET;
  fl.l_start  = 0;
  fl.l_len    = 0;
  fl.l_pid    = getpid();
  fcntl(f->lock_file, F_SETLK, &fl);

  close(f->lock_file);
  return TREXIO_SUCCESS;

}

4 Deinitialize function (templated part)

trexio_exit_code
trexio_text_deinit (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  trexio_exit_code rc;

  /* Error handling for this call is added by the generator */
  rc = trexio_text_free_$group$( (trexio_text_t*) file);

  return TREXIO_SUCCESS;

}

5 Flush function (templated part)

trexio_exit_code
trexio_text_flush (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  trexio_exit_code rc;
  trexio_text_t* f = (trexio_text_t*) file;

  /* Error handling for this call is added by the generator */
  rc = trexio_text_flush_$group$(f);

  return TREXIO_SUCCESS;

}

6 Template for text read a group

$group$_t*
trexio_text_read_$group$ (trexio_text_t* const file)
{

  if (file == NULL) return NULL;

  /* If the data structure exists, return it */
  if (file->$group$ != NULL) {
    return file->$group$;
  }

  /* Allocate the data structure */
  $group$_t* $group$ = MALLOC($group$_t);
  if ($group$ == NULL) return NULL;

  memset($group$,0,sizeof($group$_t));

  /* Build the file name */
  const char* $group$_file_name = "/$group$.txt";

  strncpy ($group$->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH);
  strncat ($group$->file_name, $group$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group$_file_name));

  if ($group$->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
    FREE($group$);
    return NULL;
  }

  /* If the file exists, read it */
  FILE* f = fopen($group$->file_name,"r");
  if (f != NULL) {

    /* Find size of file to allocate the max size of the string buffer */
    fseek(f, 0L, SEEK_END);
    size_t sz = ftell(f);
    fseek(f, 0L, SEEK_SET);

    sz = (sz < 1024) ? (1024) : (sz);
    char* buffer = CALLOC(sz, char);
    if (buffer == NULL) {
      fclose(f);
      FREE($group$);
      return NULL;
    }

    int rc = 0;
    trexio_exit_code rc_free = TREXIO_FAILURE;

    /* workaround for the case of missing blocks in the file */
    // START REPEAT GROUP_DSET_ALL
    uint64_t size_$group_dset$ = 0;
    // END REPEAT GROUP_DSET_ALL

    while(fscanf(f, "%1023s", buffer) != EOF) {

      if (strcmp(buffer, "EXIT") == 0) {
        break;
      // START REPEAT GROUP_DSET_ALL
      } else if (strcmp(buffer, "rank_$group_dset$") == 0) {

        rc = fscanf(f, "%u", &($group$->rank_$group_dset$));
        if (rc != 1) {
          trexio_text_free_read_$group$(buffer, f, file, $group$);
          return NULL;
        }

        if ($group$->rank_$group_dset$ != 0) size_$group_dset$ = 1UL;

        for (uint32_t i=0; i<$group$->rank_$group_dset$; ++i){

          uint32_t j=0;
          rc = fscanf(f, "%1023s %u", buffer, &j);
          if ((rc != 2) || (strcmp(buffer, "dims_$group_dset$") != 0) || (j!=i)) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }

          rc = fscanf(f, "%" SCNu64 "\n", &($group$->dims_$group_dset$[i]));
          assert(!(rc != 1));
          if (rc != 1) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }

          size_$group_dset$ *= $group$->dims_$group_dset$[i];
        }
      // END REPEAT GROUP_DSET_ALL
      // START REPEAT GROUP_DSET_NUM
      } else if (strcmp(buffer, "$group_dset$") == 0) {

        /* Allocate arrays */
        $group$->$group_dset$ = CALLOC(size_$group_dset$, $group_dset_dtype$);
        if ($group$->$group_dset$ == NULL) {
          trexio_text_free_read_$group$(buffer, f, file, $group$);
          return NULL;
        }

        for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
          rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i]));
          if (rc != 1) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }
        }

      // END REPEAT GROUP_DSET_NUM
      // START REPEAT GROUP_DSET_STR
      } else if (strcmp(buffer, "$group_dset$") == 0) {

        if (size_$group_dset$ != 0) {
          /* Allocate arrays */
          $group$->$group_dset$ = CALLOC(size_$group_dset$, $group_dset_dtype$);
          if ($group$->$group_dset$ == NULL) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }

          /* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
           * BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
           */
          char* tmp_$group_dset$;
          tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);

          for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
            $group$->$group_dset$[i] = tmp_$group_dset$;
            /* conventional fcanf with "%s" only return the string before the first space character
             * to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
             */
            rc = fscanf(f, " %1023[^\n]", buffer);
            if (rc != 1) {
              trexio_text_free_read_$group$(buffer, f, file, $group$);
              return NULL;
            }

            size_t tmp_$group_dset$_len = strlen(buffer);
            strncpy(tmp_$group_dset$, buffer, 32);
            tmp_$group_dset$ += tmp_$group_dset$_len + 1;
          }
        }

      // END REPEAT GROUP_DSET_STR
      // START REPEAT GROUP_NUM
      } else if (strcmp(buffer, "$group_num$_isSet") == 0) {

        unsigned int $group_num$_isSet;
        /* additional parameter $group_num$_isSet is needed to suppress warning when fscanf into bool variable using %u or %d */
        rc = fscanf(f, "%u", &($group_num$_isSet));
        $group$->$group_num$_isSet = (bool) $group_num$_isSet;
        if (rc != 1) {
          trexio_text_free_read_$group$(buffer, f, file, $group$);
          return NULL;
        }

        if ($group$->$group_num$_isSet == true) {

          rc = fscanf(f, "%1023s", buffer);
          if ((rc != 1) || (strcmp(buffer, "$group_num$") != 0)) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }

          rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$));
          if (rc != 1) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }

        }

      // END REPEAT GROUP_NUM
      // START REPEAT GROUP_ATTR_STR
      } else if (strcmp(buffer, "len_$group_str$") == 0) {

        rc = fscanf(f, "%" SCNu64 "", &($group$->len_$group_str$));
        if (rc != 1) {
          trexio_text_free_read_$group$(buffer, f, file, $group$);
          return NULL;
        }

        rc = fscanf(f, "%1023s", buffer);
        if ((rc != 1) || (strcmp(buffer, "$group_str$") != 0)) {
          trexio_text_free_read_$group$(buffer, f, file, $group$);
          return NULL;
        }

        if ($group$->len_$group_str$ != 0) {

          $group$->$group_str$ = CALLOC($group$->len_$group_str$, char);
          if ($group$->$group_str$ == NULL) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }

          rc = fscanf(f, " %1023[^\n]", buffer);
          if (rc != 1) {
            trexio_text_free_read_$group$(buffer, f, file, $group$);
            return NULL;
          }
          /* Safer string conversion to avoid buffer overflow in fscanf */
          strncpy($group$->$group_str$, buffer, $group$->len_$group_str$);

        }
      // END REPEAT GROUP_ATTR_STR
      } else {
        continue;
      }

    }

    FREE(buffer);
    fclose(f);
    f = NULL;
  }

  file->$group$ = $group$;
  return $group$;
}

7 Template for text has a group

trexio_exit_code
trexio_text_has_$group$ (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  /* Flush the group to make sure the group.txt file is created */
  if (file->mode != 'r') {
    trexio_exit_code rc = trexio_text_flush_$group$((trexio_text_t*) file);
    if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
  }

  /* Build the file name */
  char $group$_full_path[TREXIO_MAX_FILENAME_LENGTH];

  const char* $group$_file_name = "/$group$.txt";

  strncpy ($group$_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  strncat ($group$_full_path, $group$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group$_file_name));

  if ($group$_full_path[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') return TREXIO_FAILURE;

  bool file_exists;
  file_exists = trexio_text_file_exists($group$_full_path);

  if (file_exists) {
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }
}

8 Template for text flush a group

trexio_exit_code
trexio_text_flush_$group$ (trexio_text_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  if (file->parent.mode == 'r') return TREXIO_READONLY;

  $group$_t* $group$ = file->$group$;
  if ($group$ == NULL) return TREXIO_SUCCESS;

  if ($group$->to_flush == 0) return TREXIO_SUCCESS;

  assert (file->parent.mode == 'w' || file->parent.mode == 'u');

  FILE* f = fopen($group$->file_name, "w");
  if (f == NULL) return TREXIO_INVALID_ARG_1;

  /* Write the dimensioning variables */
  // START REPEAT GROUP_DSET_ALL
  fprintf(f, "rank_$group_dset$ %u\n", $group$->rank_$group_dset$);
  // workaround for the case of missing blocks in the file
  uint64_t size_$group_dset$ = 0;
  if ($group$->rank_$group_dset$ != 0) size_$group_dset$ = 1;

  for (unsigned int i=0; i<$group$->rank_$group_dset$; ++i){
    fprintf(f, "dims_$group_dset$ %u %" PRIu64 "\n", i, $group$->dims_$group_dset$[i]);
    size_$group_dset$ *= $group$->dims_$group_dset$[i];
  }
  // END REPEAT GROUP_DSET_ALL

  // START REPEAT GROUP_NUM
  fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet);
  if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$);
  // END REPEAT GROUP_NUM

  // START REPEAT GROUP_ATTR_STR
  fprintf(f, "len_$group_str$ %" PRIu64 "\n", $group$->len_$group_str$);
  fprintf(f, "$group_str$\n");
  if ($group$->len_$group_str$ != 0) fprintf(f, "%s\n", $group$->$group_str$);
  // END REPEAT GROUP_ATTR_STR

  /* Write arrays */
  // START REPEAT GROUP_DSET_ALL

  fprintf(f, "$group_dset$\n");
  for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
    fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]);
  }
  // END REPEAT GROUP_DSET_ALL

  fclose(f);
  $group$->to_flush = 0;
  return TREXIO_SUCCESS;

}

9 Template for text free memory

Memory is allocated when reading. The following function frees memory.

trexio_exit_code
trexio_text_free_$group$ (trexio_text_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  if (file->parent.mode != 'r') {
    trexio_exit_code rc = trexio_text_flush_$group$(file);
    if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
  }

  $group$_t* $group$ = file->$group$;
  if ($group$ == NULL) return TREXIO_SUCCESS;

  // START REPEAT GROUP_DSET_NUM
  if ($group$->$group_dset$ != NULL) FREE ($group$->$group_dset$);
  // END REPEAT GROUP_DSET_NUM

  // START REPEAT GROUP_DSET_STR
  if ($group$->$group_dset$ != NULL) {
    if ($group$->rank_$group_dset$ != 0) FREE ($group$->$group_dset$[0]);
    FREE ($group$->$group_dset$);
  }
  // END REPEAT GROUP_DSET_STR

  // START REPEAT GROUP_ATTR_STR
  if ($group$->$group_str$ != NULL) FREE ($group$->$group_str$);
  // END REPEAT GROUP_ATTR_STR

  FREE ($group$);
  file->$group$ = NULL;

  return TREXIO_SUCCESS;

}

This function is called upon the non-successful exit from the trexio_text_read_group function.

trexio_exit_code
trexio_text_free_read_$group$ (char* buffer, FILE* txt_file, trexio_text_t* trexio_file, $group$_t* $group$)
{
  trexio_exit_code rc_free;

  FREE(buffer);
  fclose(txt_file);
  /* Set pointer to the struct so that the garbage collector can do the job on file handle */
  trexio_file->$group$ = $group$;
  rc_free = trexio_text_free_$group$(trexio_file);
  assert(rc_free == TREXIO_SUCCESS);

  return TREXIO_SUCCESS;
}

10 Template for has/read/write a numerical attribute

trexio_exit_code
trexio_text_read_$group_num$ (trexio_t* const file, $group_num_dtype_double$* const num)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if (num   == NULL) return TREXIO_INVALID_ARG_2;

  $group$_t* $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  *num = $group$->$group_num$;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_write_$group_num$ (trexio_t* const file, const $group_num_dtype_double$ num)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (file->mode == 'r') return TREXIO_READONLY;

  $group$_t* $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  $group$->$group_num$ = num;
  $group$->$group_num$_isSet = true;
  $group$->to_flush = 1;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_has_$group_num$ (trexio_t* const file)
{
  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  $group$_t* $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->$group_num$_isSet == true){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }

}

11 Template for has/read/write a dataset of numerical data

The group_dset array is assumed allocated with the appropriate size.

trexio_exit_code
trexio_text_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $group_dset$,
                               const uint32_t rank, const uint64_t* dims)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if (rank != $group$->rank_$group_dset$) return TREXIO_INVALID_ARG_3;

  uint64_t dim_size = 1;
  for (uint32_t i=0; i<rank; ++i){
    if (dims[i] != $group$->dims_$group_dset$[i]) return TREXIO_INVALID_ARG_4;
    dim_size *= dims[i];
  }

  for (uint64_t i=0 ; i<dim_size ; ++i) {
    $group_dset$[i] = $group$->$group_dset$[i];
  }

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* $group_dset$,
                                const uint32_t rank, const uint64_t* dims)
{

  if (file  == NULL)  return TREXIO_INVALID_ARG_1;
  if ($group_dset$ == NULL)  return TREXIO_INVALID_ARG_2;

  if (file->mode == 'r') return TREXIO_READONLY;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->$group_dset$ != NULL) {
    FREE($group$->$group_dset$);
  }

  $group$->rank_$group_dset$ = rank;

  uint64_t dim_size = 1;
  for (uint32_t i=0; i<$group$->rank_$group_dset$; ++i){
    $group$->dims_$group_dset$[i] = dims[i];
    dim_size *= dims[i];
  }

  $group$->$group_dset$ = CALLOC(dim_size, $group_dset_dtype$);

  for (uint64_t i=0 ; i<dim_size ; ++i) {
    $group$->$group_dset$[i] = $group_dset$[i];
  }

  $group$->to_flush = 1;
  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_has_$group_dset$ (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->rank_$group_dset$ > 0){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }

}

12 Template for has/read/write a dataset of strings

The group_dset array is assumed allocated with the appropriate size.

trexio_exit_code
trexio_text_read_$group_dset$ (trexio_t* const file, char* const dset, const uint32_t rank, const uint64_t* dims, const uint32_t max_str_len)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if (dset == NULL) return TREXIO_INVALID_ARG_2;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if (rank != $group$->rank_$group_dset$) return TREXIO_INVALID_ARG_3;

  for (uint32_t i=0 ; i<rank ; ++i) {
    if (dims[i] != $group$->dims_$group_dset$[i]) return TREXIO_INVALID_ARG_4;
  }

  strcpy(dset, "");
  for (uint64_t i=0 ; i<dims[0] ; ++i) {
    strncat(dset, $group$->$group_dset$[i], max_str_len);
    strcat(dset, TREXIO_DELIM);
  }

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_write_$group_dset$ (trexio_t* const file, const char** dset, const uint32_t rank, const uint64_t* dims)
{

  if (file  == NULL)  return TREXIO_INVALID_ARG_1;
  if (dset == NULL)  return TREXIO_INVALID_ARG_2;

  if (file->mode == 'r') return TREXIO_READONLY;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->$group_dset$ != NULL) {
    if ($group$->rank_$group_dset$ != 0) FREE($group$->$group_dset$[0]);
    FREE($group$->$group_dset$);
  }

  $group$->rank_$group_dset$ = rank;

  for (uint32_t i=0; i<$group$->rank_$group_dset$; ++i){
    $group$->dims_$group_dset$[i] = dims[i];
  }

  $group$->$group_dset$ = CALLOC(dims[0], char*);
  if ($group$->$group_dset$ == NULL) return TREXIO_ALLOCATION_FAILED;

  char* tmp_str = CALLOC(dims[0]*32 + 1, char);
  if (tmp_str == NULL) return TREXIO_ALLOCATION_FAILED;

  for (uint64_t i=0 ; i<dims[0] ; ++i) {
    size_t tmp_len = strlen(dset[i]);
    $group$->$group_dset$[i] = tmp_str;
    strncpy(tmp_str, dset[i], tmp_len);
    tmp_str += tmp_len + 1;
  }

  $group$->to_flush = 1;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_has_$group_dset$ (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->rank_$group_dset$ > 0){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }

}

13 Template for has/read/write a string attribute

trexio_exit_code
trexio_text_read_$group_str$ (trexio_t* const file, char* const str, const uint32_t max_str_len)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if (str == NULL) return TREXIO_INVALID_ARG_2;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  strncpy(str, $group$->$group_str$, max_str_len);

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_write_$group_str$ (trexio_t* const file, const char *str)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (str  == NULL) return TREXIO_INVALID_ARG_2;

  if (file->mode == 'r') return TREXIO_READONLY;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->$group_str$ != NULL) FREE($group$->$group_str$);

  size_t tmp_len = strlen(str);

  $group$->$group_str$ = CALLOC(tmp_len + 1, char);
  if ($group$->$group_str$ == NULL) return TREXIO_ALLOCATION_FAILED;

  $group$->len_$group_str$ = tmp_len + 1;

  strncpy($group$->$group_str$, str, tmp_len + 1);

  $group$->to_flush = 1;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_text_has_$group_str$ (trexio_t* const file)
{

  if (file  == NULL) return TREXIO_INVALID_ARG_1;

  $group$_t* const $group$ = trexio_text_read_$group$((trexio_text_t*) file);
  if ($group$ == NULL) return TREXIO_FAILURE;

  if ($group$->len_$group_str$ > 0){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }

}

14 Template for has/read/write the dataset of sparse data

Each sparse array is stored in a separate .txt file due to the fact that sparse I/O has to be decoupled from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data to prevent memory overflow. Chunks have a given int64_t size (size specifies the number of sparse data items, e.g. integrals).

User provides indices and values of the sparse array as two separate variables.

trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
                                                const int64_t offset_file,
                                                const int64_t size,
                                                const int64_t size_max,
                                                const int64_t size_start,
                                                const int32_t* index_sparse,
                                                const double* value_sparse)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  /* Build the name of the file with sparse data*/
  /* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */
  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, $group_dset$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));

  /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
  FILE* f = fopen(file_full_path, "a");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Specify the line length in order to offset properly. For example, for 4-index quantities
     the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
     CURRENTLY NO OFFSET IS USED WHEN WRITING !
    */
  int64_t line_length = 0L;
  char format_str[256];

  /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
  if (size_max < UINT8_MAX) {
    line_length = $sparse_line_length_8$; // 41 for 4 indices
    strncpy(format_str, $sparse_format_printf_8$, 256);
  } else if (size_max < UINT16_MAX) {
    line_length = $sparse_line_length_16$; // 49 for 4 indices
    strncpy(format_str, $sparse_format_printf_16$, 256);
  } else {
    line_length = $sparse_line_length_32$; //69 for 4 indices
    strncpy(format_str, $sparse_format_printf_32$, 256);
  }
  strncat(format_str, "\n", 2);

  /* Get the starting position of the IO stream to be written in the .size file.
     This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
     One can use ftello function which is adapted for large files.
     For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow.
   */
  int64_t io_start_pos = size_start * line_length;

  /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
  int rc;
  for (uint64_t i=0UL; i < (uint64_t) size; ++i) {
    rc = fprintf(f, format_str,
       $group_dset_sparse_indices_printf$,
       *(value_sparse + i));
    if (rc <= 0) {
      fclose(f);
      return TREXIO_FAILURE;
    }
  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  /* Append .size to the file_full_path in order to write additional info about the written buffer of data */
  strncat(file_full_path, ".size", 6);

  /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
  FILE *f_wSize = fopen(file_full_path, "a");
  if (f_wSize == NULL) return TREXIO_FILE_ERROR;

  /* Write the buffer_size */
  rc = fprintf(f_wSize, "%" PRId64 " %" PRId64 "\n", size, io_start_pos);
  if (rc <= 0) {
    fclose(f_wSize);
    return TREXIO_FAILURE;
  }

  /* Close the TXT file */
  rc = fclose(f_wSize);
  if (rc != 0) return TREXIO_FILE_ERROR;

  const char $group$_file_name[256] = "/$group$.txt";

  memset (file_full_path, 0, TREXIO_MAX_FILENAME_LENGTH);
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, $group$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group$_file_name));

  bool file_exists = trexio_text_file_exists(file_full_path);

  /* Create an empty file for the trexio_text_has_group to work */
  if (!file_exists) {
    FILE *fp = fopen(file_full_path, "ab+");
    fclose(fp);
  }

  /* Exit upon success */
  return TREXIO_SUCCESS;
}
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
                                               const int64_t offset_file,
                                               const int64_t size,
                                               const int64_t size_max,
                                               int64_t* const eof_read_size,
                                               int32_t* const index_sparse,
                                               double* const value_sparse)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;

  /* Build the name of the file with sparse data.
     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
   */
  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, $group_dset$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));

  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
  FILE* f = fopen(file_full_path, "r");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Specify the line length in order to offset properly. For example, for 4-index quantities
     the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
   */
  uint64_t line_length = 0UL;
  /* Determine the line length depending on the size_max (usually mo_num or ao_num) */
  if (size_max < UINT8_MAX) {
    line_length = $sparse_line_length_8$; // 41 for 4 indices
  } else if (size_max < UINT16_MAX) {
    line_length = $sparse_line_length_16$; // 49 for 4 indices
  } else {
    line_length = $sparse_line_length_32$; //69 for 4 indices
  }

  /* Offset in the file according to the provided  value of offset_file and optimal line_length */
  fseek(f, (long) offset_file * line_length, SEEK_SET);

  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
  int rc;
  char buffer[1024];
  uint64_t count = 0UL;
  for (uint64_t i=0UL; i < (uint64_t) size; ++i) {

      memset(buffer, 0, sizeof(buffer));

      if (fgets(buffer, 1023, f) == NULL){

        fclose(f);
        *eof_read_size = count;
        return TREXIO_END;

      } else {

        rc = sscanf(buffer, "$group_dset_format_scanf$",
                    $group_dset_sparse_indices_scanf$,
                    value_sparse + i);
        if (rc <= 0) {
          fclose(f);
          return TREXIO_FAILURE;
        }
        count += 1UL;

      }
  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  return TREXIO_SUCCESS;
}
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  /* Build the name of the file with sparse data.
     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
   */
  const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, $group_dset$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));

  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
  FILE* f = fopen(file_full_path, "r");
  if (f == NULL) return TREXIO_FILE_ERROR;


  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
  int rc;
  int64_t size_item, offset_item, size_accum=0L;

  /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
  while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) {
    /* Check that summation will not overflow the int64_t value */
    if (INT64_MAX - size_accum > size_item) {
      size_accum += size_item;
    } else {
      fclose(f);
      *size_max = -1L;
      return TREXIO_INT_SIZE_OVERFLOW;
    }
  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  /* Overwrite the value at the input address and return TREXIO_SUCCESS */
  *size_max = size_accum;
  return TREXIO_SUCCESS;

}
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  /* Build the name of the file with sparse data.
     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
   */
  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, $group_dset$_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));

  /* Check the return code of access function to determine whether the file with sparse data exists or not */
  if (access(file_full_path, F_OK) == 0){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }
}

15 Template for has/read/write a buffered vector

Each array is stored in a separate .txt file due to the fact that buffered I/O has to be decoupled from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write the data to prevent memory overflow. Chunks have a given int64_t size. Size specifies the number of vector elements to be written.

trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
                                               const int64_t offset_file,
                                               const uint32_t rank,
                                               const uint64_t* dims,
                                               int64_t* const eof_read_size,
                                               double* const dset)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
  if (dset == NULL) return TREXIO_INVALID_ARG_6;

  const char file_name[256] = "/$group_dset$.txt";

  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, file_name, TREXIO_MAX_FILENAME_LENGTH - sizeof(file_name));

  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
  FILE* f = fopen(file_full_path, "r");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Specify the line length in order to offset properly.
     Each double value 24 elements + one newline char.
   */
  uint64_t line_length = 25UL;

  /* Offset in the file according to the provided  value of offset_file and optimal line_length */
  fseek(f, (long) offset_file * line_length, SEEK_SET);

  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
  int rc;
  char buffer[64];
  uint32_t buf_size = sizeof(buffer);
  /* Counter for number of elements beind processed */
  uint64_t count = 0UL;

  for (uint64_t i=0UL; i < dims[0]; ++i) {

    memset(buffer, 0, buf_size);
    if (fgets(buffer, buf_size-1, f) == NULL){

      fclose(f);
      *eof_read_size = count;
      return TREXIO_END;

    } else {

      rc = sscanf(buffer, "%lf", dset + i);
      if (rc <= 0) {
        fclose(f);
        return TREXIO_FAILURE;
      }
      count += 1UL;

    }
  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  return TREXIO_SUCCESS;
}

trexio_exit_code
trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (size_max == NULL) return TREXIO_INVALID_ARG_2;

  const char file_name[256] = "/$group_dset$.txt.size";

  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, file_name, TREXIO_MAX_FILENAME_LENGTH - sizeof(file_name));

  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
  FILE* f = fopen(file_full_path, "r");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
  int rc;
  int64_t size_item, size_accum=0L;

  /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
  while(fscanf(f, "%" SCNd64, &size_item) != EOF) {
    /* Check that summation will not overflow the int64_t value */
    if (INT64_MAX - size_accum > size_item) {
      size_accum += size_item;
    } else {
      fclose(f);
      *size_max = -1L;
      return TREXIO_INT_SIZE_OVERFLOW;
    }
  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  /* Overwrite the value at the input address and return TREXIO_SUCCESS */
  *size_max = size_accum;
  return TREXIO_SUCCESS;
}
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
                                                const int64_t offset_file,
                                                const uint32_t rank,
                                                const uint64_t* dims,
                                                const double* dset)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (dset == NULL) return TREXIO_INVALID_ARG_5;

  const char file_name[256] = "/$group_dset$.txt";
  const int append_str_len = 6;

  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, file_name, TREXIO_MAX_FILENAME_LENGTH - sizeof(file_name));

  /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
  FILE* f = fopen(file_full_path, "a");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
  int rc;
  for (uint64_t i=0UL; i < dims[0]; ++i) {

    rc = fprintf(f, "%24.16e\n", *(dset+ i));
    if (rc <= 0) {
      fclose(f);
      return TREXIO_FAILURE;
    }

  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  /* Append .size to the file_full_path in order to write additional info about the written buffer of data */
  strncat(file_full_path, ".size", append_str_len);

  /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
  FILE *f_wSize = fopen(file_full_path, "a");
  if (f_wSize == NULL) return TREXIO_FILE_ERROR;

  /* Write the buffer_size */
  rc = fprintf(f_wSize, "%" PRIu64 "\n", dims[0]);
  if (rc <= 0) {
    fclose(f_wSize);
    return TREXIO_FAILURE;
  }

  /* Close the TXT file */
  rc = fclose(f_wSize);
  if (rc != 0) return TREXIO_FILE_ERROR;

  /* Additional part for the trexio_text_has_group to work */
  const char group_file_name[256] = "/$group$.txt";

  memset (file_full_path, 0, TREXIO_MAX_FILENAME_LENGTH);
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, group_file_name, TREXIO_MAX_FILENAME_LENGTH - sizeof(group_file_name));

  bool file_exists = trexio_text_file_exists(file_full_path);

  /* Create an empty file for the trexio_text_has_group to work */
  if (!file_exists) {
    FILE *fp = fopen(file_full_path, "ab+");
    fclose(fp);
  }

  /* Exit upon success */
  return TREXIO_SUCCESS;
}

trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  const char file_name[256] = "/$group_dset$.txt";

  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, file_name, TREXIO_MAX_FILENAME_LENGTH - sizeof(file_name));

  /* Check the return code of access function to determine whether the file with data exists or not */
  if (access(file_full_path, F_OK) == 0){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }
}

16 Template for text delete a group (UNSAFE mode)

trexio_exit_code
trexio_text_delete_$group$ (trexio_t* const file)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_text_t* f = (trexio_text_t*) file;

  $group$_t* $group$ = trexio_text_read_$group$(f);
  if ($group$ == NULL) return TREXIO_FAILURE;

  int rc = remove($group$->file_name);
  if (rc == -1) return TREXIO_FAILURE;

  $group$->to_flush = 0;

  trexio_exit_code rc_free = trexio_text_free_$group$(f);
  if (rc_free != TREXIO_SUCCESS) return rc_free;

  return TREXIO_SUCCESS;
}

17 Source code for the determinant part

Each array is stored in a separate .txt file due to the fact that determinant I/O has to be decoupled from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write the data to prevent memory overflow. Chunks have a given int64_t size. Size specifies the number of data items, e.g. determinants.

trexio_exit_code trexio_text_read_determinant_list(trexio_t* const file,
                                                   const int64_t offset_file,
                                                   const uint32_t rank,
                                                   const uint64_t* dims,
                                                   int64_t* const eof_read_size,
                                                   int64_t* const list)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
  if (list == NULL) return TREXIO_INVALID_ARG_6;

  const char determinant_list_file_name[256] = "/determinant_list.txt";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, determinant_list_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen(determinant_list_file_name));

  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
  FILE* f = fopen(file_full_path, "r");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Specify the line length in order to offset properly.
     Each 64-bit integer takes at most 10 slots and requires one space,
     we have int_num integers per up-spin determinant,
     then this number is doubled because we have the same number for down-spin electrons,
     and then one newline char.
   */
  uint64_t line_length = dims[1]*11UL + 1UL; // 10 digits per int64_t bitfield + 1 space = 11 spots + 1 newline char

  /* Offset in the file according to the provided  value of offset_file and optimal line_length */
  fseek(f, (long) offset_file * line_length, SEEK_SET);

  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
  int rc;
  /* Declare fixed buffer which will be used to read the determinant string <a1 a2 ... a/\ b1 b2 ... b\/> */
  char buffer[1024];
  uint32_t buf_size = sizeof(buffer);
  /* Parameters to post-process the buffer and to get bit fields integers */
  uint64_t accum = 0UL;
  uint32_t shift_int64 = 11U;
  /* Counter for number of elements beind processed */
  uint64_t count = 0UL;
  for (uint64_t i=0UL; i < dims[0]; ++i) {

    accum = 0UL;
    memset(buffer, 0, buf_size);

    if (fgets(buffer, buf_size-1, f) == NULL){

      fclose(f);
      *eof_read_size = count;
      return TREXIO_END;

    } else {

   /* The format string is not anymore static but rather dynamic (the number of ints depend on the mo_num)
      Thus, we parse the buffer string int_num*2 times to get the bit field determinants.
    */
      for (uint32_t j=0; j < (uint32_t) dims[1]; ++j) {
        rc = sscanf(buffer+accum, "%10" SCNd64, list + dims[1]*i + j);
        if (rc <= 0) {
          fclose(f);
          return TREXIO_FAILURE;
        }
        accum += shift_int64;
      }
      count += 1UL;

    }
  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  return TREXIO_SUCCESS;
}
trexio_exit_code trexio_text_write_determinant_list(trexio_t* const file,
                                                    const int64_t offset_file,
                                                    const uint32_t rank,
                                                    const uint64_t* dims,
                                                    const int64_t* list)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (list == NULL) return TREXIO_INVALID_ARG_5;

  const char determinant_list_file_name[256] = "/determinant_list.txt";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, determinant_list_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen(determinant_list_file_name));

  /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
  FILE* f = fopen(file_full_path, "a");
  if (f == NULL) return TREXIO_FILE_ERROR;

  /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
  int rc;
  for (uint64_t i=0UL; i < dims[0]; ++i) {

    /* The loop below is needed to write a line with int bit fields for alpha and beta electrons */
    for (uint32_t j=0; j < (uint32_t) dims[1]; ++j) {
      rc = fprintf(f, "%10" PRId64 " ", *(list + i*dims[1] + j));
      if (rc <= 0) {
        fclose(f);
        return TREXIO_FAILURE;
      }
    }
    fprintf(f, "%s", "\n");

  }

  /* Close the TXT file */
  rc = fclose(f);
  if (rc != 0) return TREXIO_FILE_ERROR;

  /* Additional part for the trexio_text_has_group to work */
  const char det_file_name[256] = "/determinant.txt";

  memset (file_full_path, 0, TREXIO_MAX_FILENAME_LENGTH);
  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, det_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen(det_file_name));

  bool file_exists = trexio_text_file_exists(file_full_path);

  /* Create an empty file for the trexio_text_has_group to work */
  if (!file_exists) {
    FILE *fp = fopen(file_full_path, "ab+");
    fclose(fp);
  }

  /* Exit upon success */
  return TREXIO_SUCCESS;
}
trexio_exit_code trexio_text_has_determinant_list(trexio_t* const file)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  const char determinant_list_file_name[256] = "/determinant_list.txt";
  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

  /* Copy directory name in file_full_path */
  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
  /* Append name of the file with sparse data */
  strncat (file_full_path, determinant_list_file_name,
           TREXIO_MAX_FILENAME_LENGTH-strlen(determinant_list_file_name));

  /* Check the return code of access function to determine whether the file with data exists or not */
  if (access(file_full_path, F_OK) == 0){
    return TREXIO_SUCCESS;
  } else {
    return TREXIO_HAS_NOT;
  }
}

Author: TREX-CoE

Created: 2024-11-21 Thu 08:47

Validate