libTLK  1.3.1
Data Structures | Defines | Typedefs | Enumerations | Functions
Basic types and utilities

Data Structures

struct  tLProbPair
 A pair of log-probabilities. More...
struct  tLFileList
 List of file names. More...
struct  tLTar
 Tar files. More...
struct  tLFea
 Feature vectors. More...
struct  tLSeqs
 Symbol sequence manager. More...
struct  tLBuffer
 Text buffer. More...
struct  tLDict
 Dictionary of tokens. More...
struct  tLSeq
 Symbol sequence. More...
struct  tLWGState
 Word graph state. More...
struct  tLWGSegment
 Word graph segment. More...
struct  tLWGEdge
 Word graph edge. More...
struct  tLWGList
 List node. More...
struct  tLWordGraph
 Word graph. More...

Defines

#define tL_fea_get_real_data(FEA)   ((float **) (FEA)->v)
 Gets a pointer to real data.
#define tL_fea_load_fd(FEA, FD, ERR)   (FEA)->_load ( (FEA), (FD), (ERR) )
 Loads a feature vector sequence.
#define tL_fea_print(FEA, TO)   (FEA)->_print ( (FEA), (TO) )
 Prints the content.
#define tL_fea_write_fd(FEA, TO, BINARY, ERR)   (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) )
 Writes the content.
#define tL_seqs_get_num_seqs(SEQS)   ((SEQS)->cseqs->N)
 Gets the number of different symbol sequences.
#define tL_seqs_get_seq(SEQS, ID)   ((const tLSeq *) (SEQS)->cseqs->data[(ID)])
 Gets a symbol sequence.
#define tL_dict_get_token(DICT, ID)   ((const char *) (DICT)->data[(ID)])
 Gets the corresponding token.

Typedefs

typedef double tLFloat
 Type float.
typedef tLFloat tLProb
 An alias of the type tLFloat used to represent log-probabilities.
typedef unsigned char tLBool
 Boolean type.

Enumerations

enum  tLFeaType { TL_FEA_BINARY, TL_FEA_REAL }
 Type of features. More...

Functions

 tL_atop (const char *str)
 From ASCII to Prob.
 tL_prob_print (const tLProb prob, FILE *to)
 Prints a probability.
 tL_error (const char *format,...)
 Terminates the application.
 tL_warning (const char *format,...)
 Warning message.
 tL_filelist_new_from_file (const char *file_name)
 Creates a new file list from a text file.
 tL_filelist_free (tLFileList *filelist)
 Frees memory.
 tL_tar_free (tLTar *tar)
 Frees memory.
 tL_tar_get_file (tLTar *tar, const char *name, long *size, char **err)
 Returns a file descriptor to desired file.
 tL_tar_new (const char *file_name, char **err)
 Creates a new tLTar.
 tL_fea_free (tLFea *fea)
 Frees memory.
 tL_fea_get_binary_data (tLFea *fea)
 Gets a pointer to binary data.
 tL_fea_load (tLFea *fea, const char *file_name, char **err)
 Loads a feature vector sequence.
 tL_fea_new (const tLFeaType type)
 Creates a new feature vector manager.
 tL_fea_resize (tLFea *fea, const int dim, const int nvecs)
 Resizes memory.
 tL_fea_write (const tLFea *fea, const char *file_name, const tLBool binary, char **err)
 Writes the content.
 tL_seqs_adjust (tLSeqs *seqs)
 Adjusts memory.
 tL_seqs_append (tLSeqs *seqs, const tLSeq *seq)
 Appends a new symbol sequence.
 tL_seqs_free (tLSeqs *seqs)
 Frees memory.
 tL_seqs_load (const tLFileList *filelist, const tLDict *syms, char **err, tLTar *tar)
 Creates a new symbol sequence manager from files.
 tL_seqs_load_words (const tLFileList *filelist, const tLDict *syms, char **err, tLTar *tar)
 Creates a new symbol sequence manager from files containing words.
 tL_seqs_new (void)
 Creates a new symbol sequence manager.
 tL_buffer_new ()
 Creates a new buffer.
 tL_buffer_free (tLBuffer *buffer)
 Frees memory.
 tL_gline (gzFile f, tLBuffer *buffer)
 Reads line.
 tL_gtoken (gzFile f, tLBuffer *buffer)
 Reads token.
 tL_gntoken (gzFile f, tLBuffer *buffer, size_t n)
 Reads the next token from a file descriptor until the end of file or until n bytes are read.
 tL_gtokenstr (char *str, char **begin, char **end)
 Reads token from string.
 tL_dict_new ()
 Creates a new dictionary.
 tL_dict_insert (tLDict *dict, const void *token, size_t *id)
 Inserts a token.
 tL_dict_find (const tLDict *dict, const void *token)
 Searches a token.
 tL_dict_free (tLDict *dict)
 Frees memory.
 tL_seq_copy (const tLSeq *seq)
 Create a new symbol sequence from another one.
 tL_seq_free (tLSeq *seq)
 Frees memory.
 tL_seq_new (const size_t length)
 Creates a new empty symbol sequence.
 tL_seq_new_from_word (const char *word, const tLDict *syms, char **err)
 Creates a new symbol sequence from a word.
 tL_seq_print (const tLSeq *seq, FILE *to, const tLDict *syms, const char sep)
 Prints a symbol sequence.
 tL_seq_print_wildcards (const tLSeq *seq, FILE *to, const tLDict *syms, const char sep, const size_t first, const size_t last)
 Prints a symbol sequence with wildcards.
 tL_wordgraph_free (tLWordGraph *wg)
 Frees memory.
 tL_wordgraph_load (tLWordGraph **wg, gzFile from, const tLDict *words, const tLDict *syms, const char *end_sym, char **err)
 Loads a word graph.
 tL_wordgraph_print (const tLWordGraph *wg, FILE *to, const tLDict *words, const tLDict *syms, const char *end_sym)
 Prints the word graph.
 tL_wordgraph_print_htk (const tLWordGraph *wg, FILE *to, const tLDict *words, const tLDict *syms, const char *feaname, const char *start_sym, const char *end_sym)
 Prints the word graph using the HTK format.

Define Documentation

#define tL_dict_get_token (   DICT,
  ID 
)    ((const char *) (DICT)->data[(ID)])

Gets the corresponding token.

Returns the mapped token to a given number. The number is assumed to be mapped to an existing token.

Parameters:
DICTThe dictionary.
IDThe number of the requested token.
Returns:
The token.
#define tL_fea_get_real_data (   FEA)    ((float **) (FEA)->v)

Gets a pointer to real data.

This macro gets a pointer to the feature vector manager data. The data can be directly modified by accessing as a C matrix of floats [N][D], where N is the feature vector index, and D is the selected dimension. This macro is only valid for TL_FEA_REAL feature vector managers.

Parameters:
FEAThe feature vector manager.
Returns:
A pointer (float **) to the real data.
#define tL_fea_load_fd (   FEA,
  FD,
  ERR 
)    (FEA)->_load ( (FEA), (FD), (ERR) )

Loads a feature vector sequence.

This macro loads into the feature vector manager a feature vector sequence stored in the given file. The feature vector sequence must be of the same type as the feature vector manager. Binary feature vector sequences are expected to be in PBM image, where each column is a binary feature vector.

Parameters:
FEAThe feature vector manager.
FDThe file where a feature vector sequence is stored.
ERRPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 otherwise.
#define tL_fea_print (   FEA,
  TO 
)    (FEA)->_print ( (FEA), (TO) )

Prints the content.

This macro prints the content in text representation. The feature vector manager must be loaded, otherwise an unexpected error can happen.

Parameters:
FEAThe feature vector manager.
TOFile to which the content is written.
#define tL_fea_write_fd (   FEA,
  TO,
  BINARY,
  ERR 
)    (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) )

Writes the content.

This macro writes the content to a file descriptor. The feature vector manager must be loaded, otherwise an unexpected error can happen.

Parameters:
FEAThe feature vector manager.
TOFile to which the content is written.
BINARYSpecifies whether the content must be written using binary or text representation.
ERRPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 otherwise.
#define tL_seqs_get_num_seqs (   SEQS)    ((SEQS)->cseqs->N)

Gets the number of different symbol sequences.

This macro returns the number of different symbol sequences encoded in the symbol sequence manager. Symbol sequences are always encoded as consecutive numbers starting from 0.

Parameters:
SEQSThe symbol sequence manager.
Returns:
The number of different symbol sequences in the manager.
#define tL_seqs_get_seq (   SEQS,
  ID 
)    ((const tLSeq *) (SEQS)->cseqs->data[(ID)])

Gets a symbol sequence.

This macro returns, from the provided symbol sequence manager, a reference to the symbol sequence encoded as ID.

Parameters:
SEQSThe symbol sequence manager.
IDAn integer encoding a symbol sequence.
Returns:
A reference to the required symbol sequence.

Typedef Documentation

typedef double tLFloat

Type float.


Enumeration Type Documentation

enum tLFeaType

Type of features.

Enumerator:
TL_FEA_BINARY 

Binary feature.

TL_FEA_REAL 

Real feature.


Function Documentation

tL_atop ( const char *  str)

From ASCII to Prob.

Converts a string to tLProb.

Parameters:
strA pointer to string. Cannot be NULL.
Returns:
The converted value.
tL_buffer_free ( tLBuffer buffer)

Frees memory.

Frees the memory allocated for the buffer.

Parameters:
bufferThe buffer to be freed.

Creates a new buffer.

Returns:
The new buffer.
tL_dict_find ( const tLDict dict,
const void *  token 
)

Searches a token.

Returns the number mapped to the token, or dict->N if the token is not inserted in the dictionary.

Parameters:
dictThe dictionary.
tokenA C string (char *) containing the token.
Returns:
The number mapped to the token, or dict->N if the token is not found.
tL_dict_free ( tLDict dict)

Frees memory.

Frees the memory allocated for the dictionary.

Parameters:
dictThe dictionary.
tL_dict_insert ( tLDict dict,
const void *  token,
size_t *  id 
)

Inserts a token.

Tries to insert a token into the dictionary. If the token already exists, then it is not inserted. In any case, the number mapped to the token is stored in id.

The first token inserted into the dictionary is mapped to 0, the second token is mapped to 1, and so on.

Parameters:
dictThe dictionary.
tokenA C string (char *) containing the token.
idA pointer to a variable of type 'size_t'. The mapped number is stored in this variable.
Returns:
Returns 1 if the token has been inserted, 0 if the token is already in the dictionary.

Creates a new dictionary.

Returns:
The new dictionary.
tL_error ( const char *  format,
  ... 
)

Terminates the application.

Terminates the application with an error exit status, and shows an error message through standard error.

Parameters:
formatFormat of the error message.
tL_fea_free ( tLFea fea)

Frees memory.

Frees the memory allocated for the feature vector manager.

Parameters:
feaThe feature vector manager.

Gets a pointer to binary data.

This function gets a pointer to the feature vector manager data. The data can be directly modified by accessing as a C matrix of chars [N][D], where N is the feature vector index, and D is the selected dimension. This macro is only valid for TL_FEA_BINARY feature vector managers. The only allowed values are 0 and 1, other values could produce undesirable behaviour. Once the feature vector manager has been used, this function must be called again if we want to modify the values again.

Parameters:
feaThe feature vector manager.
Returns:
A pointer to the binary data.
tL_fea_load ( tLFea fea,
const char *  file_name,
char **  err 
)

Loads a feature vector sequence.

This function does the same as tL_fea_load_fd. The difference is that in this function the name of the file is provided instead of the file descriptor.

Parameters:
feaThe feature vector manager.
file_nameThe name of the file where a feature vector sequence is stored.
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 otherwise.
tL_fea_new ( const tLFeaType  type)

Creates a new feature vector manager.

This function creates a new manager for feature vectors of the provided type.

Parameters:
typeThe type of the feature vectors.
Returns:
The new feature vector manager.
tL_fea_resize ( tLFea fea,
const int  dim,
const int  nvecs 
)

Resizes memory.

This function resizes the memory used by the manager to store the sample data. If the current capacity is enough, this function does nothing.

Parameters:
feaThe feature vector manager.
dimThe required feature vector dimension.
nvecsThe required number of feature vectors.
tL_fea_write ( const tLFea fea,
const char *  file_name,
const tLBool  binary,
char **  err 
)

Writes the content.

This macro writes the content to a file. The feature vector manager must be loaded, otherwise an unexpected error can happen.

Parameters:
feaThe feature vector manager.
file_nameName of the file to which content must be written.
binarySpecifies whether the content must be written using binary or text representation.
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 otherwise.
tL_filelist_free ( tLFileList filelist)

Frees memory.

Frees the memory allocated for the file list.

tL_filelist_new_from_file ( const char *  file_name)

Creates a new file list from a text file.

This function reads the content of a text file, where each line is the name of a file, and loads it into a new file list.

Parameters:
file_nameName of the file conatining the file list.
Returns:
The new file list, or NULL if the file cannot be opened. In this case, errno is set by the lib C.
tL_gline ( gzFile  f,
tLBuffer buffer 
)

Reads line.

This function reads the next line from a file into the provided text buffer. Line separators are ignored.

Parameters:
fInput file descriptor.
bufferText buffer to which the line is read.
Returns:
0 if no line has been read.
tL_gntoken ( gzFile  f,
tLBuffer buffer,
size_t  n 
)

Reads the next token from a file descriptor until the end of file or until n bytes are read.

Parameters:
fInput file descriptor.
bufferText buffer to which the token is read.
nnumber of bytes to be read
Returns:
0 if no token has been read, else the number of read characters is returned.
tL_gtoken ( gzFile  f,
tLBuffer buffer 
)

Reads token.

This function reads the next token from a file into the provided text buffer. Blank separators are ignored.

Parameters:
fInput file descriptor.
bufferText buffer to which the token is read.
Returns:
0 if no token has been read.
tL_gtokenstr ( char *  str,
char **  begin,
char **  end 
)

Reads token from string.

This function reads the next token from a given string.

Parameters:
strInput string.
beginA pointer to string, where the address of the first character of the token is stored, or NULL if there is no token in str.
endA pointer to string, where, if it exists, the address of the next character of the token is stored.
Returns:
1 if the input string has been fully processed, 0 otherwise.
tL_prob_print ( const tLProb  prob,
FILE *  to 
)

Prints a probability.

Prints a probability to the specified file. The format depends on the current locale.

Parameters:
probProbability.
toFile to which the probability will be printed.
tL_seq_copy ( const tLSeq seq)

Create a new symbol sequence from another one.

This function creates a new symbol sequence with the same length and content of the provided symbol sequence.

Parameters:
seqThe symbol sequence to be copied.
Returns:
The new symbol sequence.
tL_seq_free ( tLSeq seq)

Frees memory.

Frees the memory allocated for the symbol sequence.

tL_seq_new ( const size_t  length)

Creates a new empty symbol sequence.

This function creates a new symbol sequence of the specified length, and sets all symbol identifiers to 0.

Parameters:
lengthThe length of the new symbol sequence. Must be greater than 0.
Returns:
The new symbol sequence.
tL_seq_new_from_word ( const char *  word,
const tLDict syms,
char **  err 
)

Creates a new symbol sequence from a word.

This function splits the provided word into UTF-8 characters, and using the provided symbol dictionary, creates a new symbol sequence containing the encoded word.

Parameters:
wordA string containing a UTF-8 word.
symsA dictionary used to convert UTF-8 characters (symbols) to numbers.
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
The new symbol sequence, or NULL in case of error.
tL_seq_print ( const tLSeq seq,
FILE *  to,
const tLDict syms,
const char  sep 
)

Prints a symbol sequence.

This function writes to the given file the symbol sequence. The symbol sequence is decoded using the given symbol dictionary. If sep is set to '\0', the sequence is printed without spaces between symbols.

Parameters:
seqThe symbol sequence.
toFile to which the sequence is written.
symsDictionary with the symbols.
sepCharacter used as separator. '\0' means no separator.
tL_seq_print_wildcards ( const tLSeq seq,
FILE *  to,
const tLDict syms,
const char  sep,
const size_t  first,
const size_t  last 
)

Prints a symbol sequence with wildcards.

This function writes to the given file the symbol sequence, but replacing the first and last symbols, which are assumed to be wildcards, with the given symbols. The symbol sequence is decoded using the given symbol dictionary. If sep is set to '\0', the sequence is printed without spaces between symbols.

Parameters:
seqThe symbol sequence.
toFile to which the sequence is written.
symsDictionary with the symbols.
sepCharacter used as separator. '\0' means no separator.
firstFirst symbol.
lastLast symbol.
tL_seqs_adjust ( tLSeqs seqs)

Adjusts memory.

This function adjusts the memory related to the field 'seqs'.

Parameters:
seqsThe symbol sequence manager.
tL_seqs_append ( tLSeqs seqs,
const tLSeq seq 
)

Appends a new symbol sequence.

This function appends a new symbol sequence to the symbol sequence manager (the symbol sequence may be repeated) and returns the identifier of the inserted symbol sequence.

Parameters:
seqsThe symbol sequence manager.
seqThe new symbol sequence.
Returns:
The identifier of the inserted symbol sequence.
tL_seqs_free ( tLSeqs seqs)

Frees memory.

Frees the memory allocated for the symbol sequence manager.

Parameters:
seqsThe symbol sequence manager.
tL_seqs_load ( const tLFileList filelist,
const tLDict syms,
char **  err,
tLTar tar 
)

Creates a new symbol sequence manager from files.

This function creates a new sequence manager and loads all the symbol sequences from the provided tLFileList. Each file is assumed to contain a symbol sequence encoded as a sequence of tokens separated by blank characters. Sequences are loaded respecting the original order in the tLFileList. The provided symbol dictionary is used to encode the symbols.

Parameters:
filelistList of the files containing the symbol sequences.
symsThe dictionary with the symbols.
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
tarinformation of a tar file from which samples will be read
Returns:
The new symbol sequence manager, or NULL in case of error.
tL_seqs_load_words ( const tLFileList filelist,
const tLDict syms,
char **  err,
tLTar tar 
)

Creates a new symbol sequence manager from files containing words.

This function works as tL_seqs_load, but the symbol sequences are assumed to be words encoded using UTF-8.

Parameters:
filelistList of the files containing the symbol sequences.
symsThe dictionary with the symbols (UTF-8 characters).
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
tarinformation of a tar file from which samples will be read
Returns:
The new symbol sequence manager, or NULL in case of error.
tL_seqs_new ( void  )

Creates a new symbol sequence manager.

This function creates a new empty symbol sequence manager.

Returns:
The new symbol sequence manager.
tL_tar_free ( tLTar tar)

Frees memory.

Frees the memory allocated for an tLTar.

Parameters:
tarThe tLTar structure.
tL_tar_get_file ( tLTar tar,
const char *  name,
long *  size,
char **  err 
)

Returns a file descriptor to desired file.

This function returns the file descriptor of the tar file, seeked to the beginning of the desired file. Therefore, the file descriptor must not be closed.

Parameters:
tarThe tLTar structure.
nameThe name of the desired file.
sizePointer to a long variable. If not NULL the size of the file in bytes is returned.
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
The file descriptor to the desired file, or NULL in case of error. This descriptor must not be closed.
tL_tar_new ( const char *  file_name,
char **  err 
)

Creates a new tLTar.

This functions creates a new tLTar for a given file.

Parameters:
file_nameThe name of the file.
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
The new feature vector manager.
tL_warning ( const char *  format,
  ... 
)

Warning message.

Shows a warning message through the standard error.

Parameters:
formatFormat of the warning message.

Frees memory.

Frees the memory allocated for the word graph.

Parameters:
wgThe word graph.
tL_wordgraph_load ( tLWordGraph **  wg,
gzFile  from,
const tLDict words,
const tLDict syms,
const char *  end_sym,
char **  err 
)

Loads a word graph.

Loads a word graph from a text description.

Parameters:
wgPointer to the memory address where the word graph will be loaded. If it points to NULL, a new word graph will be created.
fromFile where the text description is stored.
wordsDictionary containing the words.
symsDictionary containing the symbols. Only required in the case of words graphs with segments.
end_symToken used to represent the special final word. NULL is equivalent to "</s>".
errPointer to string variable. If not NULL, an error message is allocated in the variable in case of error.
Returns:
-1 in case of error, 0 otherwise.
tL_wordgraph_print ( const tLWordGraph wg,
FILE *  to,
const tLDict words,
const tLDict syms,
const char *  end_sym 
)

Prints the word graph.

This function writes in the given file a text representation of the word graph. The provided word dictionary is assumed to contain all needed words, otherwise an unexpected error could happen.

Parameters:
wgThe word graph.
toFile to which the word graph is written.
wordsDictionary containing the words.
symsDictionary containing the symbols. Not required if the word graph doesn't contain any segments.
end_symToken used to print the special final word. NULL is equivalent to "</s>".
tL_wordgraph_print_htk ( const tLWordGraph wg,
FILE *  to,
const tLDict words,
const tLDict syms,
const char *  feaname,
const char *  start_sym,
const char *  end_sym 
)

Prints the word graph using the HTK format.

This function writes to the given file a text representation of the word graph using the HTK format. The provided word dictionary is assumed to contain all needed words, otherwise an unexpected error could happen.

Parameters:
wgThe word graph.
toFile to which the word graph is written.
wordsDictionary containing the words.
symsDictionary containing the symbols. Not required if the word graph doesn't contain any segments.
feanameA string with the name of the feature related to the word graph.
start_symToken used to print the special initial word. NULL is equivalent to "<s>".
end_symToken used to print the special final word. NULL is equivalent to "</s>".
 All Data Structures Variables