#include "error.h"#include "transcode.h"#include "token.h"Include dependency graph for tokenizer.h:

Go to the source code of this file.
Typedefs | |
| typedef FAXPP_TokenizerEnv_s | FAXPP_Tokenizer |
| The tokenizer structure. Details of the structure are private. | |
Functions | |
| FAXPP_Tokenizer * | FAXPP_create_tokenizer (FAXPP_Transcoder encode) |
| Creates a tokenizer object. | |
| void | FAXPP_free_tokenizer (FAXPP_Tokenizer *tokenizer) |
| Frees a tokenizer object. | |
| FAXPP_DecodeFunction | FAXPP_get_tokenizer_decode (const FAXPP_Tokenizer *tokenizer) |
| Returns the current FAXPP_DecodeFunction that the tokenizer is using. | |
| void | FAXPP_set_tokenizer_decode (FAXPP_Tokenizer *tokenizer, FAXPP_DecodeFunction decode) |
| Sets the FAXPP_DecodeFunction that the tokenizer uses to decode the XML document. | |
| FAXPP_Error | FAXPP_init_tokenize (FAXPP_Tokenizer *tokenizer, void *buffer, unsigned int length, unsigned int done) |
| Initialize the tokenizer to tokenize the given buffer, returning strings encoded using the given encoding function. | |
| FAXPP_Error | FAXPP_tokenizer_release_buffer (FAXPP_Tokenizer *tokenizer, void **buffer_position) |
| Instructs the tokenizer to release any dependencies it has on it's current buffer. | |
| FAXPP_Error | FAXPP_continue_tokenize (FAXPP_Tokenizer *tokenizer, void *buffer, unsigned int length, unsigned int done) |
| Provides a new buffer for the tokenizer to continue tokenizing. | |
| FAXPP_Error | FAXPP_next_token (FAXPP_Tokenizer *tokenizer) |
| Reads the next token from the buffer, placing the information for it into the current token. | |
| const FAXPP_Token * | FAXPP_get_current_token (const FAXPP_Tokenizer *tokenizer) |
| Returns the current token produced by the tokenizer when FAXPP_next_token() was called. | |
| unsigned int | FAXPP_get_tokenizer_nesting_level (const FAXPP_Tokenizer *tokenizer) |
| Returns the current element nesting level in the XML document. | |
| unsigned int | FAXPP_get_tokenizer_error_line (const FAXPP_Tokenizer *tokenizer) |
| Returns the line that the current error occured on. | |
| unsigned int | FAXPP_get_tokenizer_error_column (const FAXPP_Tokenizer *tokenizer) |
| Returns the column that the current error occured on. | |
| typedef struct FAXPP_TokenizerEnv_s FAXPP_Tokenizer |
The tokenizer structure. Details of the structure are private.
Definition at line 32 of file tokenizer.h.
| FAXPP_Error FAXPP_continue_tokenize | ( | FAXPP_Tokenizer * | tokenizer, | |
| void * | buffer, | |||
| unsigned int | length, | |||
| unsigned int | done | |||
| ) |
Provides a new buffer for the tokenizer to continue tokenizing.
FAXPP_tokenizer_release_buffer() should have been called before this, and the remaining data in the old buffer transferred to the new one.
| tokenizer | ||
| buffer | A pointer to the start of the buffer to tokenize | |
| length | The length of the given buffer | |
| done | Set to non-zero if this is the last buffer from the input |
| NO_ERROR |
| FAXPP_Tokenizer * FAXPP_create_tokenizer | ( | FAXPP_Transcoder | encode | ) |
Creates a tokenizer object.
| encode | The transcoder to use when encoding token values |
| void FAXPP_free_tokenizer | ( | FAXPP_Tokenizer * | tokenizer | ) |
Frees a tokenizer object.
| tokenizer | The tokenizer to free |
| const FAXPP_Token * FAXPP_get_current_token | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the current token produced by the tokenizer when FAXPP_next_token() was called.
| tokenizer |
| FAXPP_DecodeFunction FAXPP_get_tokenizer_decode | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the current FAXPP_DecodeFunction that the tokenizer is using.
| tokenizer |
| unsigned int FAXPP_get_tokenizer_error_column | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the column that the current error occured on.
| tokenizer |
| unsigned int FAXPP_get_tokenizer_error_line | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the line that the current error occured on.
| tokenizer |
| unsigned int FAXPP_get_tokenizer_nesting_level | ( | const FAXPP_Tokenizer * | tokenizer | ) |
Returns the current element nesting level in the XML document.
| tokenizer |
| FAXPP_Error FAXPP_init_tokenize | ( | FAXPP_Tokenizer * | tokenizer, | |
| void * | buffer, | |||
| unsigned int | length, | |||
| unsigned int | done | |||
| ) |
Initialize the tokenizer to tokenize the given buffer, returning strings encoded using the given encoding function.
| tokenizer | The tokenizer to initialize | |
| buffer | A pointer to the start of the buffer to tokenize | |
| length | The length of the given buffer | |
| done | Set to non-zero if this is the last buffer from the input |
| UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
| NO_ERROR |
| FAXPP_Error FAXPP_next_token | ( | FAXPP_Tokenizer * | tokenizer | ) |
Reads the next token from the buffer, placing the information for it into the current token.
| tokenizer |
| DOUBLE_DASH_IN_COMMENT | ||
| PREMATURE_END_OF_BUFFER | ||
| INVALID_START_OF_COMMENT | ||
| INVALID_CHAR_IN_START_ELEMENT | ||
| INVALID_CHAR_IN_ATTRIBUTE | ||
| INVALID_CHAR_IN_END_ELEMENT | ||
| NON_WHITESPACE_OUTSIDE_DOC_ELEMENT | ||
| BAD_ENCODING | ||
| UNSUPPORTED_ENCODING | ||
| ADDITIONAL_DOCUMENT_ELEMENT | ||
| INVALID_CHAR_IN_PI_NAME | ||
| INVALID_PI_NAME_OF_XML | ||
| INVALID_CHAR_IN_ELEMENT_NAME | ||
| INVALID_CHAR_IN_ATTRIBUTE_NAME | ||
| RESTRICTED_CHAR | ||
| INVALID_CHAR_IN_ENTITY_REFERENCE | ||
| INVALID_CHAR_IN_CHAR_REFERENCE | ||
| INVALID_CHAR_IN_XML_DECL | ||
| EXPECTING_EQUALS | ||
| EXPECTING_WHITESPACE | ||
| UNKNOWN_XML_VERSION | ||
| INVALID_ENCODING_VALUE | ||
| OUT_OF_MEMORY | ||
| NO_ERROR |
| void FAXPP_set_tokenizer_decode | ( | FAXPP_Tokenizer * | tokenizer, | |
| FAXPP_DecodeFunction | decode | |||
| ) |
Sets the FAXPP_DecodeFunction that the tokenizer uses to decode the XML document.
This will typically be called when an encoding declaration is read, to switch to the correct decode function.
| tokenizer | ||
| decode | The decode function |
| FAXPP_Error FAXPP_tokenizer_release_buffer | ( | FAXPP_Tokenizer * | tokenizer, | |
| void ** | buffer_position | |||
| ) |
Instructs the tokenizer to release any dependencies it has on it's current buffer.
This is typically called on recieving a PREMATURE_END_OF_BUFFER error, before using FAXPP_continue_tokenize() to provide a new buffer. In this case, the buffer data between *buffer_position and the end of the buffer need to be copied into the start of the new buffer.
| tokenizer | ||
| [out] | buffer_position | Set to a pointer in the current buffer that the tokenizer has tokenized up to |
| OUT_OF_MEMORY | ||
| NO_ERROR |
1.5.1