libTLK
1.3.1
|
Data Structures | |
struct | tLProbPair |
A pair of log-probabilities. More... | |
struct | tLFileList |
List of file names. More... | |
struct | tLTar |
Tar files. More... | |
struct | tLFea |
Feature vectors. More... | |
struct | tLSeqs |
Symbol sequence manager. More... | |
struct | tLBuffer |
Text buffer. More... | |
struct | tLDict |
Dictionary of tokens. More... | |
struct | tLSeq |
Symbol sequence. More... | |
struct | tLWGState |
Word graph state. More... | |
struct | tLWGSegment |
Word graph segment. More... | |
struct | tLWGEdge |
Word graph edge. More... | |
struct | tLWGList |
List node. More... | |
struct | tLWordGraph |
Word graph. More... | |
Defines | |
#define | tL_fea_get_real_data(FEA) ((float **) (FEA)->v) |
Gets a pointer to real data. | |
#define | tL_fea_load_fd(FEA, FD, ERR) (FEA)->_load ( (FEA), (FD), (ERR) ) |
Loads a feature vector sequence. | |
#define | tL_fea_print(FEA, TO) (FEA)->_print ( (FEA), (TO) ) |
Prints the content. | |
#define | tL_fea_write_fd(FEA, TO, BINARY, ERR) (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) ) |
Writes the content. | |
#define | tL_seqs_get_num_seqs(SEQS) ((SEQS)->cseqs->N) |
Gets the number of different symbol sequences. | |
#define | tL_seqs_get_seq(SEQS, ID) ((const tLSeq *) (SEQS)->cseqs->data[(ID)]) |
Gets a symbol sequence. | |
#define | tL_dict_get_token(DICT, ID) ((const char *) (DICT)->data[(ID)]) |
Gets the corresponding token. | |
Typedefs | |
typedef double | tLFloat |
Type float. | |
typedef tLFloat | tLProb |
An alias of the type tLFloat used to represent log-probabilities. | |
typedef unsigned char | tLBool |
Boolean type. | |
Enumerations | |
enum | tLFeaType { TL_FEA_BINARY, TL_FEA_REAL } |
Type of features. More... | |
Functions | |
tL_atop (const char *str) | |
From ASCII to Prob. | |
tL_prob_print (const tLProb prob, FILE *to) | |
Prints a probability. | |
tL_error (const char *format,...) | |
Terminates the application. | |
tL_warning (const char *format,...) | |
Warning message. | |
tL_filelist_new_from_file (const char *file_name) | |
Creates a new file list from a text file. | |
tL_filelist_free (tLFileList *filelist) | |
Frees memory. | |
tL_tar_free (tLTar *tar) | |
Frees memory. | |
tL_tar_get_file (tLTar *tar, const char *name, long *size, char **err) | |
Returns a file descriptor to desired file. | |
tL_tar_new (const char *file_name, char **err) | |
Creates a new tLTar. | |
tL_fea_free (tLFea *fea) | |
Frees memory. | |
tL_fea_get_binary_data (tLFea *fea) | |
Gets a pointer to binary data. | |
tL_fea_load (tLFea *fea, const char *file_name, char **err) | |
Loads a feature vector sequence. | |
tL_fea_new (const tLFeaType type) | |
Creates a new feature vector manager. | |
tL_fea_resize (tLFea *fea, const int dim, const int nvecs) | |
Resizes memory. | |
tL_fea_write (const tLFea *fea, const char *file_name, const tLBool binary, char **err) | |
Writes the content. | |
tL_seqs_adjust (tLSeqs *seqs) | |
Adjusts memory. | |
tL_seqs_append (tLSeqs *seqs, const tLSeq *seq) | |
Appends a new symbol sequence. | |
tL_seqs_free (tLSeqs *seqs) | |
Frees memory. | |
tL_seqs_load (const tLFileList *filelist, const tLDict *syms, char **err, tLTar *tar) | |
Creates a new symbol sequence manager from files. | |
tL_seqs_load_words (const tLFileList *filelist, const tLDict *syms, char **err, tLTar *tar) | |
Creates a new symbol sequence manager from files containing words. | |
tL_seqs_new (void) | |
Creates a new symbol sequence manager. | |
tL_buffer_new () | |
Creates a new buffer. | |
tL_buffer_free (tLBuffer *buffer) | |
Frees memory. | |
tL_gline (gzFile f, tLBuffer *buffer) | |
Reads line. | |
tL_gtoken (gzFile f, tLBuffer *buffer) | |
Reads token. | |
tL_gntoken (gzFile f, tLBuffer *buffer, size_t n) | |
Reads the next token from a file descriptor until the end of file or until n bytes are read. | |
tL_gtokenstr (char *str, char **begin, char **end) | |
Reads token from string. | |
tL_dict_new () | |
Creates a new dictionary. | |
tL_dict_insert (tLDict *dict, const void *token, size_t *id) | |
Inserts a token. | |
tL_dict_find (const tLDict *dict, const void *token) | |
Searches a token. | |
tL_dict_free (tLDict *dict) | |
Frees memory. | |
tL_seq_copy (const tLSeq *seq) | |
Create a new symbol sequence from another one. | |
tL_seq_free (tLSeq *seq) | |
Frees memory. | |
tL_seq_new (const size_t length) | |
Creates a new empty symbol sequence. | |
tL_seq_new_from_word (const char *word, const tLDict *syms, char **err) | |
Creates a new symbol sequence from a word. | |
tL_seq_print (const tLSeq *seq, FILE *to, const tLDict *syms, const char sep) | |
Prints a symbol sequence. | |
tL_seq_print_wildcards (const tLSeq *seq, FILE *to, const tLDict *syms, const char sep, const size_t first, const size_t last) | |
Prints a symbol sequence with wildcards. | |
tL_wordgraph_free (tLWordGraph *wg) | |
Frees memory. | |
tL_wordgraph_load (tLWordGraph **wg, gzFile from, const tLDict *words, const tLDict *syms, const char *end_sym, char **err) | |
Loads a word graph. | |
tL_wordgraph_print (const tLWordGraph *wg, FILE *to, const tLDict *words, const tLDict *syms, const char *end_sym) | |
Prints the word graph. | |
tL_wordgraph_print_htk (const tLWordGraph *wg, FILE *to, const tLDict *words, const tLDict *syms, const char *feaname, const char *start_sym, const char *end_sym) | |
Prints the word graph using the HTK format. |
#define tL_dict_get_token | ( | DICT, | |
ID | |||
) | ((const char *) (DICT)->data[(ID)]) |
Gets the corresponding token.
Returns the mapped token to a given number. The number is assumed to be mapped to an existing token.
DICT | The dictionary. |
ID | The number of the requested token. |
#define tL_fea_get_real_data | ( | FEA | ) | ((float **) (FEA)->v) |
Gets a pointer to real data.
This macro gets a pointer to the feature vector manager data. The data can be directly modified by accessing as a C matrix of floats [N][D], where N is the feature vector index, and D is the selected dimension. This macro is only valid for TL_FEA_REAL feature vector managers.
FEA | The feature vector manager. |
#define tL_fea_load_fd | ( | FEA, | |
FD, | |||
ERR | |||
) | (FEA)->_load ( (FEA), (FD), (ERR) ) |
Loads a feature vector sequence.
This macro loads into the feature vector manager a feature vector sequence stored in the given file. The feature vector sequence must be of the same type as the feature vector manager. Binary feature vector sequences are expected to be in PBM image, where each column is a binary feature vector.
FEA | The feature vector manager. |
FD | The file where a feature vector sequence is stored. |
ERR | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
#define tL_fea_print | ( | FEA, | |
TO | |||
) | (FEA)->_print ( (FEA), (TO) ) |
Prints the content.
This macro prints the content in text representation. The feature vector manager must be loaded, otherwise an unexpected error can happen.
FEA | The feature vector manager. |
TO | File to which the content is written. |
#define tL_fea_write_fd | ( | FEA, | |
TO, | |||
BINARY, | |||
ERR | |||
) | (FEA)->_write ( (FEA), (TO), (BINARY), (ERR) ) |
Writes the content.
This macro writes the content to a file descriptor. The feature vector manager must be loaded, otherwise an unexpected error can happen.
FEA | The feature vector manager. |
TO | File to which the content is written. |
BINARY | Specifies whether the content must be written using binary or text representation. |
ERR | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
#define tL_seqs_get_num_seqs | ( | SEQS | ) | ((SEQS)->cseqs->N) |
Gets the number of different symbol sequences.
This macro returns the number of different symbol sequences encoded in the symbol sequence manager. Symbol sequences are always encoded as consecutive numbers starting from 0.
SEQS | The symbol sequence manager. |
#define tL_seqs_get_seq | ( | SEQS, | |
ID | |||
) | ((const tLSeq *) (SEQS)->cseqs->data[(ID)]) |
Gets a symbol sequence.
This macro returns, from the provided symbol sequence manager, a reference to the symbol sequence encoded as ID.
SEQS | The symbol sequence manager. |
ID | An integer encoding a symbol sequence. |
typedef double tLFloat |
Type float.
enum tLFeaType |
tL_atop | ( | const char * | str | ) |
From ASCII to Prob.
Converts a string to tLProb.
str | A pointer to string. Cannot be NULL. |
tL_buffer_free | ( | tLBuffer * | buffer | ) |
Frees memory.
Frees the memory allocated for the buffer.
buffer | The buffer to be freed. |
tL_buffer_new | ( | ) |
Creates a new buffer.
tL_dict_find | ( | const tLDict * | dict, |
const void * | token | ||
) |
Searches a token.
Returns the number mapped to the token, or dict->N if the token is not inserted in the dictionary.
dict | The dictionary. |
token | A C string (char *) containing the token. |
tL_dict_free | ( | tLDict * | dict | ) |
Frees memory.
Frees the memory allocated for the dictionary.
dict | The dictionary. |
tL_dict_insert | ( | tLDict * | dict, |
const void * | token, | ||
size_t * | id | ||
) |
Inserts a token.
Tries to insert a token into the dictionary. If the token already exists, then it is not inserted. In any case, the number mapped to the token is stored in id.
The first token inserted into the dictionary is mapped to 0, the second token is mapped to 1, and so on.
dict | The dictionary. |
token | A C string (char *) containing the token. |
id | A pointer to a variable of type 'size_t'. The mapped number is stored in this variable. |
tL_dict_new | ( | ) |
Creates a new dictionary.
tL_error | ( | const char * | format, |
... | |||
) |
Terminates the application.
Terminates the application with an error exit status, and shows an error message through standard error.
format | Format of the error message. |
tL_fea_free | ( | tLFea * | fea | ) |
Frees memory.
Frees the memory allocated for the feature vector manager.
fea | The feature vector manager. |
tL_fea_get_binary_data | ( | tLFea * | fea | ) |
Gets a pointer to binary data.
This function gets a pointer to the feature vector manager data. The data can be directly modified by accessing as a C matrix of chars [N][D], where N is the feature vector index, and D is the selected dimension. This macro is only valid for TL_FEA_BINARY feature vector managers. The only allowed values are 0 and 1, other values could produce undesirable behaviour. Once the feature vector manager has been used, this function must be called again if we want to modify the values again.
fea | The feature vector manager. |
tL_fea_load | ( | tLFea * | fea, |
const char * | file_name, | ||
char ** | err | ||
) |
Loads a feature vector sequence.
This function does the same as tL_fea_load_fd. The difference is that in this function the name of the file is provided instead of the file descriptor.
fea | The feature vector manager. |
file_name | The name of the file where a feature vector sequence is stored. |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tL_fea_new | ( | const tLFeaType | type | ) |
Creates a new feature vector manager.
This function creates a new manager for feature vectors of the provided type.
type | The type of the feature vectors. |
tL_fea_resize | ( | tLFea * | fea, |
const int | dim, | ||
const int | nvecs | ||
) |
Resizes memory.
This function resizes the memory used by the manager to store the sample data. If the current capacity is enough, this function does nothing.
fea | The feature vector manager. |
dim | The required feature vector dimension. |
nvecs | The required number of feature vectors. |
tL_fea_write | ( | const tLFea * | fea, |
const char * | file_name, | ||
const tLBool | binary, | ||
char ** | err | ||
) |
Writes the content.
This macro writes the content to a file. The feature vector manager must be loaded, otherwise an unexpected error can happen.
fea | The feature vector manager. |
file_name | Name of the file to which content must be written. |
binary | Specifies whether the content must be written using binary or text representation. |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tL_filelist_free | ( | tLFileList * | filelist | ) |
Frees memory.
Frees the memory allocated for the file list.
tL_filelist_new_from_file | ( | const char * | file_name | ) |
Creates a new file list from a text file.
This function reads the content of a text file, where each line is the name of a file, and loads it into a new file list.
file_name | Name of the file conatining the file list. |
Reads line.
This function reads the next line from a file into the provided text buffer. Line separators are ignored.
f | Input file descriptor. |
buffer | Text buffer to which the line is read. |
tL_gntoken | ( | gzFile | f, |
tLBuffer * | buffer, | ||
size_t | n | ||
) |
Reads the next token from a file descriptor until the end of file or until n bytes are read.
f | Input file descriptor. |
buffer | Text buffer to which the token is read. |
n | number of bytes to be read |
Reads token.
This function reads the next token from a file into the provided text buffer. Blank separators are ignored.
f | Input file descriptor. |
buffer | Text buffer to which the token is read. |
tL_gtokenstr | ( | char * | str, |
char ** | begin, | ||
char ** | end | ||
) |
Reads token from string.
This function reads the next token from a given string.
str | Input string. |
begin | A pointer to string, where the address of the first character of the token is stored, or NULL if there is no token in str. |
end | A pointer to string, where, if it exists, the address of the next character of the token is stored. |
tL_prob_print | ( | const tLProb | prob, |
FILE * | to | ||
) |
Prints a probability.
Prints a probability to the specified file. The format depends on the current locale.
prob | Probability. |
to | File to which the probability will be printed. |
tL_seq_copy | ( | const tLSeq * | seq | ) |
Create a new symbol sequence from another one.
This function creates a new symbol sequence with the same length and content of the provided symbol sequence.
seq | The symbol sequence to be copied. |
tL_seq_free | ( | tLSeq * | seq | ) |
Frees memory.
Frees the memory allocated for the symbol sequence.
tL_seq_new | ( | const size_t | length | ) |
Creates a new empty symbol sequence.
This function creates a new symbol sequence of the specified length, and sets all symbol identifiers to 0.
length | The length of the new symbol sequence. Must be greater than 0. |
tL_seq_new_from_word | ( | const char * | word, |
const tLDict * | syms, | ||
char ** | err | ||
) |
Creates a new symbol sequence from a word.
This function splits the provided word into UTF-8 characters, and using the provided symbol dictionary, creates a new symbol sequence containing the encoded word.
word | A string containing a UTF-8 word. |
syms | A dictionary used to convert UTF-8 characters (symbols) to numbers. |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tL_seq_print | ( | const tLSeq * | seq, |
FILE * | to, | ||
const tLDict * | syms, | ||
const char | sep | ||
) |
Prints a symbol sequence.
This function writes to the given file the symbol sequence. The symbol sequence is decoded using the given symbol dictionary. If sep is set to '\0', the sequence is printed without spaces between symbols.
seq | The symbol sequence. |
to | File to which the sequence is written. |
syms | Dictionary with the symbols. |
sep | Character used as separator. '\0' means no separator. |
tL_seq_print_wildcards | ( | const tLSeq * | seq, |
FILE * | to, | ||
const tLDict * | syms, | ||
const char | sep, | ||
const size_t | first, | ||
const size_t | last | ||
) |
Prints a symbol sequence with wildcards.
This function writes to the given file the symbol sequence, but replacing the first and last symbols, which are assumed to be wildcards, with the given symbols. The symbol sequence is decoded using the given symbol dictionary. If sep is set to '\0', the sequence is printed without spaces between symbols.
seq | The symbol sequence. |
to | File to which the sequence is written. |
syms | Dictionary with the symbols. |
sep | Character used as separator. '\0' means no separator. |
first | First symbol. |
last | Last symbol. |
tL_seqs_adjust | ( | tLSeqs * | seqs | ) |
Adjusts memory.
This function adjusts the memory related to the field 'seqs'.
seqs | The symbol sequence manager. |
tL_seqs_append | ( | tLSeqs * | seqs, |
const tLSeq * | seq | ||
) |
Appends a new symbol sequence.
This function appends a new symbol sequence to the symbol sequence manager (the symbol sequence may be repeated) and returns the identifier of the inserted symbol sequence.
seqs | The symbol sequence manager. |
seq | The new symbol sequence. |
tL_seqs_free | ( | tLSeqs * | seqs | ) |
Frees memory.
Frees the memory allocated for the symbol sequence manager.
seqs | The symbol sequence manager. |
tL_seqs_load | ( | const tLFileList * | filelist, |
const tLDict * | syms, | ||
char ** | err, | ||
tLTar * | tar | ||
) |
Creates a new symbol sequence manager from files.
This function creates a new sequence manager and loads all the symbol sequences from the provided tLFileList. Each file is assumed to contain a symbol sequence encoded as a sequence of tokens separated by blank characters. Sequences are loaded respecting the original order in the tLFileList. The provided symbol dictionary is used to encode the symbols.
filelist | List of the files containing the symbol sequences. |
syms | The dictionary with the symbols. |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tar | information of a tar file from which samples will be read |
tL_seqs_load_words | ( | const tLFileList * | filelist, |
const tLDict * | syms, | ||
char ** | err, | ||
tLTar * | tar | ||
) |
Creates a new symbol sequence manager from files containing words.
This function works as tL_seqs_load, but the symbol sequences are assumed to be words encoded using UTF-8.
filelist | List of the files containing the symbol sequences. |
syms | The dictionary with the symbols (UTF-8 characters). |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tar | information of a tar file from which samples will be read |
tL_seqs_new | ( | void | ) |
Creates a new symbol sequence manager.
This function creates a new empty symbol sequence manager.
tL_tar_free | ( | tLTar * | tar | ) |
tL_tar_get_file | ( | tLTar * | tar, |
const char * | name, | ||
long * | size, | ||
char ** | err | ||
) |
Returns a file descriptor to desired file.
This function returns the file descriptor of the tar file, seeked to the beginning of the desired file. Therefore, the file descriptor must not be closed.
tar | The tLTar structure. |
name | The name of the desired file. |
size | Pointer to a long variable. If not NULL the size of the file in bytes is returned. |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tL_tar_new | ( | const char * | file_name, |
char ** | err | ||
) |
tL_warning | ( | const char * | format, |
... | |||
) |
Warning message.
Shows a warning message through the standard error.
format | Format of the warning message. |
tL_wordgraph_free | ( | tLWordGraph * | wg | ) |
Frees memory.
Frees the memory allocated for the word graph.
wg | The word graph. |
tL_wordgraph_load | ( | tLWordGraph ** | wg, |
gzFile | from, | ||
const tLDict * | words, | ||
const tLDict * | syms, | ||
const char * | end_sym, | ||
char ** | err | ||
) |
Loads a word graph.
Loads a word graph from a text description.
wg | Pointer to the memory address where the word graph will be loaded. If it points to NULL, a new word graph will be created. |
from | File where the text description is stored. |
words | Dictionary containing the words. |
syms | Dictionary containing the symbols. Only required in the case of words graphs with segments. |
end_sym | Token used to represent the special final word. NULL is equivalent to "</s>". |
err | Pointer to string variable. If not NULL, an error message is allocated in the variable in case of error. |
tL_wordgraph_print | ( | const tLWordGraph * | wg, |
FILE * | to, | ||
const tLDict * | words, | ||
const tLDict * | syms, | ||
const char * | end_sym | ||
) |
Prints the word graph.
This function writes in the given file a text representation of the word graph. The provided word dictionary is assumed to contain all needed words, otherwise an unexpected error could happen.
wg | The word graph. |
to | File to which the word graph is written. |
words | Dictionary containing the words. |
syms | Dictionary containing the symbols. Not required if the word graph doesn't contain any segments. |
end_sym | Token used to print the special final word. NULL is equivalent to "</s>". |
tL_wordgraph_print_htk | ( | const tLWordGraph * | wg, |
FILE * | to, | ||
const tLDict * | words, | ||
const tLDict * | syms, | ||
const char * | feaname, | ||
const char * | start_sym, | ||
const char * | end_sym | ||
) |
Prints the word graph using the HTK format.
This function writes to the given file a text representation of the word graph using the HTK format. The provided word dictionary is assumed to contain all needed words, otherwise an unexpected error could happen.
wg | The word graph. |
to | File to which the word graph is written. |
words | Dictionary containing the words. |
syms | Dictionary containing the symbols. Not required if the word graph doesn't contain any segments. |
feaname | A string with the name of the feature related to the word graph. |
start_sym | Token used to print the special initial word. NULL is equivalent to "<s>". |
end_sym | Token used to print the special final word. NULL is equivalent to "</s>". |