#include <stdio.h>
#include "error.h"
#include "transcode.h"
#include "event.h"
Include dependency graph for parser.h:
Go to the source code of this file.
Typedefs | |
typedef FAXPP_ParserEnv_s | FAXPP_Parser |
The parser structure. Details of the structure are private. | |
typedef unsigned int(*) | FAXPP_ReadCallback (void *userData, void *buffer, unsigned int length) |
The function called when faxpp recieves a PREMATURE_END_OF_BUFFER error from the tokenizer. | |
typedef FAXPP_Error(*) | FAXPP_ExternalEntityCallback (void *userData, FAXPP_Parser *parser, FAXPP_EntityType type, const FAXPP_Text *base_uri, const FAXPP_Text *system_id, const FAXPP_Text *public_id) |
The function called when faxpp finds a reference to an external parsed entity. | |
Enumerations | |
enum | FAXPP_ParseMode { NO_CHECKS_PARSE_MODE, WELL_FORMED_PARSE_MODE } |
The type of checks to perform whilst parsing. More... | |
enum | FAXPP_EntityType { EXTERNAL_PARSED_ENTITY = 0, EXTERNAL_SUBSET_ENTITY = 1, EXTERNAL_IN_MARKUP_ENTITY = 2 } |
The type of external entity to parse. More... | |
Functions | |
FAXPP_Parser * | FAXPP_create_parser (FAXPP_ParseMode mode, FAXPP_Transcoder encode) |
Creates a parser object. | |
void | FAXPP_free_parser (FAXPP_Parser *parser) |
Frees a parser object. | |
void | FAXPP_set_null_terminate (FAXPP_Parser *parser, unsigned int boolean) |
Sets whether the parser will null terminate the strings in the event values. | |
void | FAXPP_set_normalize_attrs (FAXPP_Parser *parser, unsigned int boolean) |
Sets whether the parser will normalize attributes values into a single string. | |
void | FAXPP_set_encode (FAXPP_Parser *parser, FAXPP_Transcoder encode) |
Sets the transcoder that the parser will use when encoding event values. | |
FAXPP_DecodeFunction | FAXPP_get_decode (const FAXPP_Parser *parser) |
Returns the current FAXPP_DecodeFunction that the parser is using. | |
void | FAXPP_set_decode (FAXPP_Parser *parser, FAXPP_DecodeFunction decode) |
Sets the FAXPP_DecodeFunction that the parser uses to decode the XML document. | |
const FAXPP_Text * | FAXPP_get_base_uri (const FAXPP_Parser *parser) |
Gets the base URI for the file currently being parsed. | |
FAXPP_Error | FAXPP_set_base_uri (FAXPP_Parser *parser, const FAXPP_Text *base_uri) |
Sets the base URI for the file currently being parsed. | |
FAXPP_Error | FAXPP_set_base_uri_str (FAXPP_Parser *parser, const char *base_uri) |
Sets the base URI for the file currently being parsed. | |
void | FAXPP_set_external_entity_callback (FAXPP_Parser *parser, FAXPP_ExternalEntityCallback callback, void *userData) |
Sets the FAXPP_ExternalEntityCallback that the parser will call when it encounters a reference to an external parsed entity. | |
FAXPP_Error | FAXPP_init_parse (FAXPP_Parser *parser, void *buffer, unsigned int length, unsigned int done) |
Initialize the parser to parse the given buffer. | |
FAXPP_Error | FAXPP_init_parse_file (FAXPP_Parser *parser, FILE *file) |
Initialize the parser to parse the given file. | |
FAXPP_Error | FAXPP_init_parse_callback (FAXPP_Parser *parser, FAXPP_ReadCallback callback, void *userData) |
Initialize the parser to parse using the given read callback. | |
FAXPP_Error | FAXPP_parse_external_entity (FAXPP_Parser *parser, FAXPP_EntityType type, void *buffer, unsigned int length, unsigned int done) |
Interrupts parsing to parse the external entity in the given buffer. | |
FAXPP_Error | FAXPP_parse_external_entity_file (FAXPP_Parser *parser, FAXPP_EntityType type, FILE *file) |
Interrupts parsing to parse the external entity from the given file. | |
FAXPP_Error | FAXPP_parse_external_entity_callback (FAXPP_Parser *parser, FAXPP_EntityType type, FAXPP_ReadCallback callback, void *userData) |
Interrupts parsing to parse the external entity using the given read callback. | |
FAXPP_Error | FAXPP_release_buffer (FAXPP_Parser *parser, void **buffer_position) |
Instructs the parser to release any dependencies it has on it's current buffer. | |
FAXPP_Error | FAXPP_continue_parse (FAXPP_Parser *parser, void *buffer, unsigned int length, unsigned int done) |
Provides a new buffer for the parser to continue parsing. | |
FAXPP_Error | FAXPP_next_event (FAXPP_Parser *parser) |
Parses the next event, placing the information for it into the current event. | |
const FAXPP_Event * | FAXPP_get_current_event (const FAXPP_Parser *parser) |
Returns the current event produced by the parser when FAXPP_next_event() was called. | |
FAXPP_Error | FAXPP_lookup_namespace_uri (const FAXPP_Parser *parser, const FAXPP_Text *prefix, FAXPP_Text *uri) |
Look up the given prefix in the parser's namespace mappings, returning the namespace URI in the uri parameter. | |
unsigned int | FAXPP_get_nesting_level (const FAXPP_Parser *parser) |
Returns the current element nesting level in the XML document. | |
unsigned int | FAXPP_get_error_line (const FAXPP_Parser *parser) |
Returns the line that the current error occured on. | |
unsigned int | FAXPP_get_error_column (const FAXPP_Parser *parser) |
Returns the column that the current error occured on. |
typedef FAXPP_Error(*) FAXPP_ExternalEntityCallback(void *userData, FAXPP_Parser *parser, FAXPP_EntityType type, const FAXPP_Text *base_uri, const FAXPP_Text *system_id, const FAXPP_Text *public_id) |
The function called when faxpp finds a reference to an external parsed entity.
The function should locate the entity using it's system and public indentifiers and call FAXPP_parse_external_entity(), FAXPP_parse_external_entity_callback() or FAXPP_parse_external_entity_file() to parse the external entity. The base URI provided is the one supplied by the user using FAXPP_set_base_uri() or FAXPP_set_base_uri_str() for the file that the entity declaration was in.
userData | The user data supplied to the FAXPP_set_external_entity_callback() method | |
parser | A pointer to the parser | |
type | The type of external entity to locate | |
base_uri | The base URI for the entity declaration | |
system_id | The entity's system identifier | |
public_id | The entity's public identifier |
typedef struct FAXPP_ParserEnv_s FAXPP_Parser |
typedef unsigned int(*) FAXPP_ReadCallback(void *userData, void *buffer, unsigned int length) |
The function called when faxpp recieves a PREMATURE_END_OF_BUFFER error from the tokenizer.
The function should read the next chunk of input into the buffer provided, returning the length of the data read.
userData | The user data supplied to the FAXPP_init_parse_callback() method | |
[out] | buffer | The buffer to read the data into |
length | The length of the buffer |
enum FAXPP_EntityType |
enum FAXPP_ParseMode |
The type of checks to perform whilst parsing.
FAXPP_Error FAXPP_continue_parse | ( | FAXPP_Parser * | parser, | |
void * | buffer, | |||
unsigned int | length, | |||
unsigned int | done | |||
) |
Provides a new buffer for the parser to continue parsing.
FAXPP_release_buffer() should have been called before this, and the remaining data in the old buffer transferred to the new one.
parser | ||
buffer | A pointer to the start of the buffer to parse | |
length | The length of the given buffer | |
done | Set to non-zero if this is the last buffer from the input |
NO_ERROR |
FAXPP_Parser * FAXPP_create_parser | ( | FAXPP_ParseMode | mode, | |
FAXPP_Transcoder | encode | |||
) |
Creates a parser object.
mode | The type of checks the parser should perform | |
encode | The transcoder to use when encoding event values |
void FAXPP_free_parser | ( | FAXPP_Parser * | parser | ) |
Frees a parser object.
parser | The parser to free |
const FAXPP_Text * FAXPP_get_base_uri | ( | const FAXPP_Parser * | parser | ) |
Gets the base URI for the file currently being parsed.
This is set by the user using FAXPP_set_base_uri() or FAXPP_set_base_uri_str().
The base URI returned will be in the encoding that it was provided in to the FAXPP_set_base_uri() function.
parser |
const FAXPP_Event * FAXPP_get_current_event | ( | const FAXPP_Parser * | parser | ) |
Returns the current event produced by the parser when FAXPP_next_event() was called.
parser |
FAXPP_DecodeFunction FAXPP_get_decode | ( | const FAXPP_Parser * | parser | ) |
Returns the current FAXPP_DecodeFunction that the parser is using.
parser |
unsigned int FAXPP_get_error_column | ( | const FAXPP_Parser * | parser | ) |
Returns the column that the current error occured on.
parser |
unsigned int FAXPP_get_error_line | ( | const FAXPP_Parser * | parser | ) |
Returns the line that the current error occured on.
parser |
unsigned int FAXPP_get_nesting_level | ( | const FAXPP_Parser * | parser | ) |
Returns the current element nesting level in the XML document.
parser |
FAXPP_Error FAXPP_init_parse | ( | FAXPP_Parser * | parser, | |
void * | buffer, | |||
unsigned int | length, | |||
unsigned int | done | |||
) |
Initialize the parser to parse the given buffer.
This will halt any parse that was already in progress.
The buffer provided must remain valid and unchanged during the time that the parser is using it, since a copy of it is not made. The user remains responsible for deleting the buffer.
parser | The parser to initialize | |
buffer | A pointer to the start of the buffer to parse | |
length | The length of the given buffer | |
done | Set to non-zero if this is the last buffer from the input |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_init_parse_callback | ( | FAXPP_Parser * | parser, | |
FAXPP_ReadCallback | callback, | |||
void * | userData | |||
) |
Initialize the parser to parse using the given read callback.
This will halt any parse that was already in progress.
parser | The parser to initialize | |
callback | The read callback function to use to retrieve the parse input | |
userData | The user data to be passed to the callback function when it is called |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_init_parse_file | ( | FAXPP_Parser * | parser, | |
FILE * | file | |||
) |
Initialize the parser to parse the given file.
This will halt any parse that was already in progress.
The file provided must remain valid during the time that the parser is using it. The user remains responsible for closing the file after parsing has ended.
parser | The parser to initialize | |
file | The file descriptor of the file to parse |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_lookup_namespace_uri | ( | const FAXPP_Parser * | parser, | |
const FAXPP_Text * | prefix, | |||
FAXPP_Text * | uri | |||
) |
Look up the given prefix in the parser's namespace mappings, returning the namespace URI in the uri parameter.
This method will not work correctly if the NO_CHECKS_PARSE_MODE is used, since the namespace mappings are not maintained in this mode.
parser | ||
prefix | The prefix to loookup | |
[out] | uri | The URI that the prefix maps to |
NO_URI_FOR_PREFIX | If a URI cannot be found | |
NO_ERROR |
FAXPP_Error FAXPP_next_event | ( | FAXPP_Parser * | parser | ) |
Parses the next event, placing the information for it into the current event.
parser |
FAXPP_Error FAXPP_parse_external_entity | ( | FAXPP_Parser * | parser, | |
FAXPP_EntityType | type, | |||
void * | buffer, | |||
unsigned int | length, | |||
unsigned int | done | |||
) |
Interrupts parsing to parse the external entity in the given buffer.
Any parsing that was previously underway will continue when the external entity has been parsed. This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a non-null public or system identifier, in order to parse the external entity it points to.
The buffer provided must remain valid and unchanged during the time that the parser is using it, since a copy of it is not made. The user remains responsible for deleting the buffer.
parser | The parser to use | |
type | The type of external entity to parse | |
buffer | A pointer to the start of the buffer to parse | |
length | The length of the given buffer | |
done | Set to non-zero if this is the last buffer from the external entity |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_parse_external_entity_callback | ( | FAXPP_Parser * | parser, | |
FAXPP_EntityType | type, | |||
FAXPP_ReadCallback | callback, | |||
void * | userData | |||
) |
Interrupts parsing to parse the external entity using the given read callback.
Any parsing that was previously underway will continue when the external entity has been parsed. This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a non-null public or system identifier, in order to parse the external entity it points to.
parser | The parser to initialize | |
type | The type of external entity to parse | |
callback | The read callback function to use to retrieve the parse input | |
userData | The user data to be passed to the callback function when it is called |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_parse_external_entity_file | ( | FAXPP_Parser * | parser, | |
FAXPP_EntityType | type, | |||
FILE * | file | |||
) |
Interrupts parsing to parse the external entity from the given file.
Any parsing that was previously underway will continue when the external entity has been parsed. This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a non-null public or system identifier, in order to parse the external entity it points to.
The file provided must remain valid during the time that the parser is using it. The user remains responsible for closing the file after parsing has ended.
parser | The parser to initialize | |
type | The type of external entity to parse | |
file | The file descriptor of the file to parse |
UNSUPPORTED_ENCODING | If the encoding sniffing algorithm cannot recognize the encoding of the buffer | |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_release_buffer | ( | FAXPP_Parser * | parser, | |
void ** | buffer_position | |||
) |
Instructs the parser to release any dependencies it has on it's current buffer.
This is typically called on recieving a PREMATURE_END_OF_BUFFER error, before using FAXPP_continue_parse() to provide a new buffer. In this case, the buffer data between *buffer_position and the end of the buffer need to be copied into the start of the new buffer.
parser | ||
[out] | buffer_position | Set to a pointer in the current buffer that the tokenizer has tokenized up to |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_set_base_uri | ( | FAXPP_Parser * | parser, | |
const FAXPP_Text * | base_uri | |||
) |
Sets the base URI for the file currently being parsed.
This is passed to the FAXPP_ExternalEntityCallback set using FAXPP_set_external_entity_callback().
A copy of the base_uri will be kept internally, so neither the FAXPP_Text object nor the buffer it points to need exist after a call to FAXPP_set_base_uri(). The encoding of the base URI is irrelevent to FAXPP - the base URI will be in the same encoding when it is passed back to the FAXPP_ExternalEntityCallback.
parser | ||
base_uri | The base URI |
OUT_OF_MEMORY | ||
NO_ERROR |
FAXPP_Error FAXPP_set_base_uri_str | ( | FAXPP_Parser * | parser, | |
const char * | base_uri | |||
) |
Sets the base URI for the file currently being parsed.
This is passed to the FAXPP_ExternalEntityCallback set using FAXPP_set_external_entity_callback().
A copy of the base_uri will be kept internally, so the string need not exist after a call to FAXPP_set_base_uri_str().
parser | ||
base_uri | The base URI |
OUT_OF_MEMORY | ||
NO_ERROR |
void FAXPP_set_decode | ( | FAXPP_Parser * | parser, | |
FAXPP_DecodeFunction | decode | |||
) |
Sets the FAXPP_DecodeFunction that the parser uses to decode the XML document.
This will typically be called when an encoding declaration is read, to switch to the correct decode function.
This method can also be called after initialising the parser, to specify that the encoding for the document is known, and all other encoding hints should be ignored. Note that it is valid to call this method and proceed with a document parse when parser initialisation fails with the UNSUPPORTED_ENCODING error.
parser | ||
decode | The decode function |
void FAXPP_set_encode | ( | FAXPP_Parser * | parser, | |
FAXPP_Transcoder | encode | |||
) |
Sets the transcoder that the parser will use when encoding event values.
Setting this parameter whilst a parse is in progress has undefined results.
parser | ||
encode | The transcoder to use when encoding event values |
void FAXPP_set_external_entity_callback | ( | FAXPP_Parser * | parser, | |
FAXPP_ExternalEntityCallback | callback, | |||
void * | userData | |||
) |
Sets the FAXPP_ExternalEntityCallback that the parser will call when it encounters a reference to an external parsed entity.
parser | ||
callback | The callback function | |
userData | The usuer data passed when the function is called |
void FAXPP_set_normalize_attrs | ( | FAXPP_Parser * | parser, | |
unsigned int | boolean | |||
) |
Sets whether the parser will normalize attributes values into a single string.
This option is off by default for NO_CHECKS_PARSE_MODE, and on for other parser modes.
The XML specification requires conformant parsers to normalize attribute values by expanding entity references and turning all whitespace to &x20; characters. This option will have no effect on a parser in NO_CHECKS_PARSE_MODE, since this mode will never normalize attribute values.
Setting this parameter whilst a parse is in progress has undefined results.
Normalizing attribute values will involve copying the strings, and so will be slower where copying strings was not otherwise necessary.
parser | ||
boolean | Whether to normalize attribute values |
void FAXPP_set_null_terminate | ( | FAXPP_Parser * | parser, | |
unsigned int | boolean | |||
) |
Sets whether the parser will null terminate the strings in the event values.
The default is not to null terminate strings, as this is generally more efficient.
Setting this parameter whilst a parse is in progress has undefined results.
Null terminating the event strings will involve copying the strings, and so will be slower where copying strings was not otherwise necessary. The FAXPP_Text::len field will not include the null in it's count of bytes, and so will be identical whether the parser is null terminating strings or not.
parser | ||
boolean | Whether to null terminate the event strings or not |