#include "error.h"
#include "transcode.h"
#include "token.h"
Include dependency graph for tokenizer.h:
Go to the source code of this file.
Typedefs | |
typedef FAXPP_TokenizerEnv_s | FAXPP_Tokenizer |
The tokenizer structure. Details of the structure are private. | |
Functions | |
FAXPP_Tokenizer * | FAXPP_create_tokenizer (FAXPP_Transcoder encode) |
Creates a tokenizer object. | |
void | FAXPP_free_tokenizer (FAXPP_Tokenizer *tokenizer) |
Frees a tokenizer object. | |
FAXPP_DecodeFunction | FAXPP_get_tokenizer_decode (const FAXPP_Tokenizer *tokenizer) |
Returns the current FAXPP_DecodeFunction that the tokenizer is using. | |
void | FAXPP_set_tokenizer_decode (FAXPP_Tokenizer *tokenizer, FAXPP_DecodeFunction decode) |
Sets the FAXPP_DecodeFunction that the tokenizer uses to decode the XML document. | |
FAXPP_Error | FAXPP_init_tokenize (FAXPP_Tokenizer *tokenizer, void *buffer, unsigned int length, unsigned int done) |
Initialize the tokenizer to tokenize the given buffer, returning strings encoded using the given encoding function. | |
FAXPP_Error | FAXPP_tokenizer_release_buffer (FAXPP_Tokenizer *tokenizer, void **buffer_position) |
Instructs the tokenizer to release any dependencies it has on it's current buffer. | |
FAXPP_Error | FAXPP_continue_tokenize (FAXPP_Tokenizer *tokenizer, void *buffer, unsigned int length, unsigned int done) |
Provides a new buffer for the tokenizer to continue tokenizing. | |
FAXPP_Error | FAXPP_next_token (FAXPP_Tokenizer *tokenizer) |
Reads the next token from the buffer, placing the information for it into the current token. | |
const FAXPP_Token * | FAXPP_get_current_token (const FAXPP_Tokenizer *tokenizer) |
Returns the current token produced by the tokenizer when FAXPP_next_token() was called. | |
unsigned int | FAXPP_get_tokenizer_nesting_level (const FAXPP_Tokenizer *tokenizer) |
Returns the current element nesting level in the XML document. | |
unsigned int | FAXPP_get_tokenizer_error_line (const FAXPP_Tokenizer *tokenizer) |
Returns the line that the current error occured on. | |
unsigned int | FAXPP_get_tokenizer_error_column (const FAXPP_Tokenizer *tokenizer) |
Returns the column that the current error occured on. |
typedef struct FAXPP_TokenizerEnv_s FAXPP_Tokenizer |
The tokenizer structure. Details of the structure are private.
Definition at line 32 of file tokenizer.h.
FAXPP_Error FAXPP_continue_tokenize | ( | FAXPP_Tokenizer * | tokenizer, | |
void * | buffer, | |||
unsigned int | length, | |||
unsigned int | done | |||
) |
Provides a new buffer for the tokenizer to continue tokenizing.
FAXPP_tokenizer_release_buffer() should have been called before this, and the remaining data in the old buffer transferred to the new one.
tokenizer | ||
buffer | A pointer to the start of the buffer to tokenize | |
length | The length of the given buffer | |
done | Set to non-zero if this is the last buffer from the input |
NO_ERROR |
FAXPP_Tokenizer * FAXPP_create_tokenizer | ( | FAXPP_Transcoder | encode | ) |
Creates a tokenizer object.
encode | The transcoder to use when encoding token values |
void FAXPP_free_tokenizer | ( | FAXPP_Tokenizer * | tokenizer | ) |
Frees a tokenizer object.
tokenizer | The tokenizer to free |
const FAXPP_Token * FAXPP_get_current_token | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the current token produced by the tokenizer when FAXPP_next_token() was called.
tokenizer |
FAXPP_DecodeFunction FAXPP_get_tokenizer_decode | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the current FAXPP_DecodeFunction that the tokenizer is using.
tokenizer |
unsigned int FAXPP_get_tokenizer_error_column | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the column that the current error occured on.
tokenizer |
unsigned int FAXPP_get_tokenizer_error_line | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the line that the current error occured on.
tokenizer |
unsigned int FAXPP_get_tokenizer_nesting_level | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the current element nesting level in the XML document.
tokenizer |
FAXPP_Error FAXPP_init_tokenize | ( | FAXPP_Tokenizer * | tokenizer, | |
void * | buffer, | |||
unsigned int | length, | |||
unsigned int | done | |||
) |
Initialize the tokenizer to tokenize the given buffer, returning strings encoded using the given encoding function.
tokenizer | The tokenizer to initialize | |
buffer | A pointer to the start of the buffer to tokenize | |
length | The length of the given buffer | |
done | Set to non-zero if this is the last buffer from the input |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
NO_ERROR |
FAXPP_Error FAXPP_next_token | ( | FAXPP_Tokenizer * | tokenizer | ) |
Reads the next token from the buffer, placing the information for it into the current token.
tokenizer |
DOUBLE_DASH_IN_COMMENT | ||
PREMATURE_END_OF_BUFFER | ||
INVALID_START_OF_COMMENT | ||
INVALID_CHAR_IN_START_ELEMENT | ||
INVALID_CHAR_IN_ATTRIBUTE | ||
INVALID_CHAR_IN_END_ELEMENT | ||
NON_WHITESPACE_OUTSIDE_DOC_ELEMENT | ||
BAD_ENCODING | ||
UNSUPPORTED_ENCODING | ||
ADDITIONAL_DOCUMENT_ELEMENT | ||
INVALID_CHAR_IN_PI_NAME | ||
INVALID_PI_NAME_OF_XML | ||
INVALID_CHAR_IN_ELEMENT_NAME | ||
INVALID_CHAR_IN_ATTRIBUTE_NAME | ||
RESTRICTED_CHAR | ||
INVALID_CHAR_IN_ENTITY_REFERENCE | ||
INVALID_CHAR_IN_CHAR_REFERENCE | ||
INVALID_CHAR_IN_XML_DECL | ||
EXPECTING_EQUALS | ||
EXPECTING_WHITESPACE | ||
UNKNOWN_XML_VERSION | ||
INVALID_ENCODING_VALUE | ||
OUT_OF_MEMORY | ||
NO_ERROR |
void FAXPP_set_tokenizer_decode | ( | FAXPP_Tokenizer * | tokenizer, | |
FAXPP_DecodeFunction | decode | |||
) |
Sets the FAXPP_DecodeFunction that the tokenizer uses to decode the XML document.
This will typically be called when an encoding declaration is read, to switch to the correct decode function.
tokenizer | ||
decode | The decode function |
FAXPP_Error FAXPP_tokenizer_release_buffer | ( | FAXPP_Tokenizer * | tokenizer, | |
void ** | buffer_position | |||
) |
Instructs the tokenizer to release any dependencies it has on it's current buffer.
This is typically called on recieving a PREMATURE_END_OF_BUFFER error, before using FAXPP_continue_tokenize() to provide a new buffer. In this case, the buffer data between *buffer_position and the end of the buffer need to be copied into the start of the new buffer.
tokenizer | ||
[out] | buffer_position | Set to a pointer in the current buffer that the tokenizer has tokenized up to |
OUT_OF_MEMORY | ||
NO_ERROR |