mirror of
https://github.com/stefanocasazza/ULib.git
synced 2025-09-28 19:05:55 +08:00
367 lines
12 KiB
C++
367 lines
12 KiB
C++
// ============================================================================
|
|
//
|
|
// = LIBRARY
|
|
// ULib - c++ library
|
|
//
|
|
// = FILENAME
|
|
// document.h - wrapping of libxml2
|
|
//
|
|
// = AUTHOR
|
|
// Stefano Casazza
|
|
//
|
|
// ============================================================================
|
|
|
|
#ifndef ULIB_UXML2DOCUMENT_H
|
|
#define ULIB_UXML2DOCUMENT_H 1
|
|
|
|
#include <ulib/xml/libxml2/node.h>
|
|
|
|
/*
|
|
Represents an XML document in the DOM model.
|
|
|
|
struct _xmlDoc {
|
|
void* _private; // application data
|
|
xmlElementType type; // XML_DOCUMENT_NODE, must be second !
|
|
char* name; // name/filename/URI of the document
|
|
struct _xmlNode* children; // the document tree
|
|
struct _xmlNode* last; // last child link
|
|
struct _xmlNode* parent; // child->parent link
|
|
struct _xmlNode* next; // next sibling link
|
|
struct _xmlNode* prev; // previous sibling link
|
|
struct _xmlDoc* doc; // autoreference to itself
|
|
// End of common part
|
|
int compression; // level of zlib compression
|
|
int standalone; // standalone document (no external refs)
|
|
// 1 if standalone = "yes"
|
|
// 0 if standalone = "no"
|
|
// -1 if there is no XML declaration
|
|
// -2 if there is an XML declaration, but no standalone attribute was specified
|
|
struct _xmlDtd* intSubset; // the document internal subset
|
|
struct _xmlDtd* extSubset; // the document external subset
|
|
struct _xmlNs* oldNs; // Global namespace, the old way
|
|
const xmlChar* version; // the XML version string
|
|
const xmlChar* encoding; // external initial encoding, if any
|
|
void* ids; // Hash table for ID attributes if any
|
|
void* refs; // Hash table for IDREFs attributes if any
|
|
const xmlChar* URL; // The URI for that document
|
|
int charset; // encoding of the in-memory content actually an xmlCharEncoding
|
|
struct _xmlDict* dict; // dict used to allocate names or NULL
|
|
void* psvi; // for type/PSVI informations
|
|
int parseFlags; // set of xmlParserOption used to parse the document
|
|
int properties; // set of xmlDocProperties for this document set at the end of parsing
|
|
};
|
|
*/
|
|
|
|
class UDSIGContext;
|
|
|
|
class U_EXPORT UXML2Document {
|
|
public:
|
|
|
|
// Check for memory error
|
|
U_MEMORY_TEST
|
|
|
|
// Allocator e Deallocator
|
|
U_MEMORY_ALLOCATOR
|
|
U_MEMORY_DEALLOCATOR
|
|
|
|
UXML2Document()
|
|
{
|
|
U_TRACE_REGISTER_OBJECT(0, UXML2Document, "", 0)
|
|
|
|
impl_ = (xmlDocPtr) U_SYSCALL(xmlNewDoc, "%S", (xmlChar*)"1.0");
|
|
}
|
|
|
|
UXML2Document(const UString& data);
|
|
|
|
~UXML2Document()
|
|
{
|
|
U_TRACE_UNREGISTER_OBJECT(0, UXML2Document)
|
|
|
|
U_SYSCALL_VOID(xmlFreeDoc, "%p", impl_);
|
|
}
|
|
|
|
// SERVICES
|
|
|
|
uint32_t getElement(UVector<UString>& velement, const char* tag, uint32_t tag_len);
|
|
uint32_t getElement(UString& element, uint32_t pos, const char* tag, uint32_t tag_len);
|
|
UString getElementData( uint32_t pos, const char* tag, uint32_t tag_len);
|
|
|
|
/**
|
|
* getEncoding()
|
|
*
|
|
* @return The encoding used in the source from which the document has been loaded
|
|
*/
|
|
|
|
const char* getEncoding() const { return (const char*)impl_->encoding; }
|
|
|
|
xmlDtdPtr getInternalSubset() const
|
|
{
|
|
U_TRACE_NO_PARAM(1, "UXML2Document::getInternalSubset()")
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlDtdPtr dtd = (xmlDtdPtr) U_SYSCALL(xmlGetIntSubset, "%p", impl_);
|
|
|
|
U_RETURN_POINTER(dtd,_xmlDtd);
|
|
}
|
|
|
|
xmlDtdPtr setInternalSubset(const char* name, const char* external_id = 0, const char* system_id = 0)
|
|
{
|
|
U_TRACE(1, "UXML2Document::setInternalSubset(%S,%S,%S)", name, external_id, system_id)
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlDtdPtr dtd = (xmlDtdPtr) U_SYSCALL(xmlCreateIntSubset, "%p,%S,%S,%S", impl_, (const xmlChar*)name,
|
|
(const xmlChar*)external_id,
|
|
(const xmlChar*)system_id);
|
|
|
|
U_RETURN_POINTER(dtd,_xmlDtd);
|
|
}
|
|
|
|
/**
|
|
* Return the root node.
|
|
*
|
|
* This function does _not_ create a default root node if it doesn't exist.
|
|
*
|
|
* @return A pointer to the root node if it exists, 0 otherwise
|
|
*/
|
|
|
|
xmlNodePtr getRootNode() const
|
|
{
|
|
U_TRACE(1, "UXML2Document::getRootNode()")
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlNodePtr root = (xmlNodePtr) U_SYSCALL(xmlDocGetRootElement, "%p", impl_);
|
|
|
|
U_RETURN_POINTER(root,_xmlNode);
|
|
}
|
|
|
|
/**
|
|
* Creates the root node.
|
|
*
|
|
* @param name The node's name.
|
|
* @param ns_uri The namespace URI. A namspace declaration will be added to this node, because it could not have been declared before.
|
|
* @param ns_prefix The namespace prefix to associate with the namespace. If no namespace prefix is specified then the namespace URI will
|
|
* be the default namespace.
|
|
*
|
|
* @return A pointer to the new root node
|
|
*/
|
|
|
|
xmlNodePtr createRootNode(const char* name, const char* ns_uri = 0, const char* ns_prefix = 0)
|
|
{
|
|
U_TRACE(1, "UXML2Document::setInternalSubset(%S,%S,%S)", name, ns_uri, ns_prefix)
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlNodePtr node = (xmlNodePtr) U_SYSCALL(xmlNewDocNode, "%p,%p,%S,%S", impl_, 0, (const xmlChar*)name, 0);
|
|
|
|
U_SYSCALL_VOID(xmlDocSetRootElement, "%p,%p", impl_, node);
|
|
|
|
xmlNodePtr root = getRootNode();
|
|
|
|
if (ns_uri) UXML2Node(root).setNameSpaceDeclaration(ns_uri, ns_prefix);
|
|
|
|
U_RETURN_POINTER(root,_xmlNode);
|
|
}
|
|
|
|
/**
|
|
* Creates a root node by importing the node from another document, without affecting the source node.
|
|
*
|
|
* @param node The node to copy and insert as the root node of the document
|
|
* @param recursive Whether to import the child nodes also. Defaults to true.
|
|
*
|
|
* @return A pointer to the new root node
|
|
*/
|
|
|
|
xmlNodePtr createRootNodeByImport(const xmlNodePtr node, bool recursive = true)
|
|
{
|
|
U_TRACE(1, "UXML2Document::createRootNodeByImport(%p,%b)", node, recursive)
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlNodePtr imported_node = (xmlNodePtr) U_SYSCALL(xmlDocCopyNode, "%p,%p,%b", node, impl_, recursive);
|
|
|
|
if (imported_node) U_SYSCALL_VOID(xmlDocSetRootElement, "%p,%p", impl_, node);
|
|
|
|
xmlNodePtr root = getRootNode();
|
|
|
|
U_RETURN_POINTER(root,_xmlNode);
|
|
}
|
|
|
|
/**
|
|
* Append a new comment node.
|
|
*
|
|
* @param content The text. This should be unescaped
|
|
*
|
|
* @returns The new comment node
|
|
*/
|
|
|
|
xmlNodePtr addComment(const char* content)
|
|
{
|
|
U_TRACE(1, "UXML2Document::addComment(%S)", content)
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
U_INTERNAL_ASSERT_POINTER(impl_->doc)
|
|
|
|
xmlNodePtr node = (xmlNodePtr) U_SYSCALL(xmlNewComment, "%S", (const xmlChar*)content);
|
|
|
|
// Use the result, because node can be freed when merging text nodes:
|
|
|
|
if (node) node = (xmlNodePtr) U_SYSCALL(xmlAddChild, "%p,%p", (xmlNodePtr)impl_, node);
|
|
|
|
U_RETURN_POINTER(node,_xmlNode);
|
|
}
|
|
|
|
/**
|
|
* Add an Entity declaration to the document.
|
|
*
|
|
* @param name The name of the entity that will be used in an entity reference.
|
|
* @param type The type of entity.
|
|
* @param publicId The public ID of the subset.
|
|
* @param systemId The system ID of the subset.
|
|
* @param content The value of the Entity. In entity reference substitutions, this is the replacement value
|
|
*/
|
|
|
|
xmlEntityPtr setEntityDeclaration(const char* name, xmlEntityType type, const char* publicId, const char* systemId, const char* content)
|
|
{
|
|
U_TRACE(1, "UXML2Document::setEntityDeclaration(%S,%d,%S,%S,%S)", name, type, publicId, systemId, content)
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlEntityPtr entity = (xmlEntityPtr) U_SYSCALL(xmlAddDocEntity, "%p,%S,%d,%S,%S,%S", impl_, (const xmlChar*)name, type,
|
|
(const xmlChar*)publicId,
|
|
(const xmlChar*)systemId,
|
|
(const xmlChar*)content);
|
|
|
|
U_RETURN_POINTER(entity,_xmlEntity);
|
|
}
|
|
|
|
/**
|
|
* Searches all children of the @parent node having given name and namespace href.
|
|
*
|
|
* @param parent the pointer to XML node.
|
|
* @param name the name.
|
|
* @param ns the namespace href (may be NULL).
|
|
*
|
|
* @return the pointer to the found node or NULL if an error occurs or node is not found
|
|
*/
|
|
|
|
xmlNodePtr findNode(const xmlNodePtr parent, const xmlChar* name, const xmlChar* ns);
|
|
|
|
/**
|
|
* Searches a direct child of the @parent node having given name and namespace href.
|
|
*
|
|
* @param parent the pointer to XML node.
|
|
* @param name the name.
|
|
* @param ns the namespace href (may be NULL).
|
|
*
|
|
* @return the pointer to the found node or NULL if an error occurs or node is not found
|
|
*/
|
|
|
|
xmlNodePtr findChild(const xmlNodePtr parent, const xmlChar* name, const xmlChar* ns);
|
|
|
|
/**
|
|
* Searches the ancestors axis of the @cur node for a node having given name and namespace href.
|
|
*
|
|
* @param cur the pointer to an XML node.
|
|
* @param name the name.
|
|
* @param ns the namespace href (may be NULL).
|
|
*
|
|
* @return the pointer to the found node or NULL if an error occurs or node is not found
|
|
*/
|
|
|
|
xmlNodePtr findParent(const xmlNodePtr cur, const xmlChar* name, const xmlChar* ns);
|
|
|
|
/**
|
|
* Write the document to a file.
|
|
*
|
|
* @param filename
|
|
* @param encoding If not provided, UTF-8 is used
|
|
* @param formatted The output is formatted by inserting whitespaces, which is easier to read for a human,
|
|
* but may insert unwanted significant whitespaces. Use with care !
|
|
*/
|
|
|
|
bool writeToFile(const char* filename, const char* encoding = 0, bool formatted = false);
|
|
|
|
/**
|
|
* Write the document to the memory.
|
|
*
|
|
* @param encoding If not provided, UTF-8 is used
|
|
* @param formatted The output is formatted by inserting whitespaces, which is easier to read for a human,
|
|
* but may insert unwanted significant whitespaces. Use with care !
|
|
*
|
|
* @return The written document
|
|
*/
|
|
|
|
xmlChar* writeToString(int& length, const char* encoding = 0, bool formatted = false);
|
|
|
|
/**
|
|
* Canonical XML implementation (http://www.w3.org/TR/2001/REC-xml-c14n-20010315)
|
|
*
|
|
* Enum xmlC14NMode {
|
|
* XML_C14N_1_0 = 0 : Origianal C14N 1.0 spec
|
|
* XML_C14N_EXCLUSIVE_1_0 = 1 : Exclusive C14N 1.0 spec
|
|
* XML_C14N_1_1 = 2 : C14N 1.1 spec
|
|
* }
|
|
*/
|
|
|
|
UString xmlC14N( int mode = 2, int with_comments = 0, unsigned char** inclusive_namespaces = 0);
|
|
static UString xmlC14N(const UString& data, int mode = 2, int with_comments = 0, unsigned char** inclusive_namespaces = 0);
|
|
|
|
// Access the underlying libxml2 implementation.
|
|
|
|
xmlDocPtr cobj() { return impl_; }
|
|
|
|
#if defined(U_STDCPP_ENABLE) && defined(DEBUG)
|
|
const char* dump(bool reset) const;
|
|
#endif
|
|
|
|
protected:
|
|
UString data;
|
|
xmlDocPtr impl_;
|
|
|
|
static bool binit;
|
|
|
|
static void init();
|
|
|
|
UXML2Document(xmlDocPtr doc) : impl_(doc)
|
|
{
|
|
U_TRACE_REGISTER_OBJECT(0, UXML2Document, "%p", doc)
|
|
}
|
|
|
|
/**
|
|
* Retrieve an Entity.
|
|
*
|
|
* The entity can be from an external subset or internally declared.
|
|
*
|
|
* @param name Then name of the entity to get.
|
|
*
|
|
* @returns A pointer to the libxml2 entity structure
|
|
*/
|
|
|
|
xmlEntityPtr getEntity(const char* name)
|
|
{
|
|
U_TRACE(1, "UXML2Document::getEntity(%S)", name)
|
|
|
|
U_INTERNAL_ASSERT_POINTER(impl_)
|
|
|
|
xmlEntityPtr node = (xmlEntityPtr) U_SYSCALL(xmlGetDocEntity, "%p,%S", impl_, (const xmlChar*)name);
|
|
|
|
U_RETURN_POINTER(node,_xmlEntity);
|
|
}
|
|
|
|
private:
|
|
#ifdef U_COMPILER_DELETE_MEMBERS
|
|
UXML2Document(const UXML2Document&) = delete;
|
|
UXML2Document& operator=(const UXML2Document&) = delete;
|
|
#else
|
|
UXML2Document(const UXML2Document&) {}
|
|
UXML2Document& operator=(const UXML2Document&) { return *this; }
|
|
#endif
|
|
|
|
friend class UDSIGContext;
|
|
};
|
|
|
|
#endif
|