mirror of
https://github.com/stefanocasazza/ULib.git
synced 2025-09-28 19:05:55 +08:00
679 lines
17 KiB
C++
679 lines
17 KiB
C++
// ============================================================================
|
|
//
|
|
// = LIBRARY
|
|
// ULib - c++ library
|
|
//
|
|
// = FILENAME
|
|
// url.h - Set and get the parts of an URL
|
|
//
|
|
// = AUTHOR
|
|
// Stefano Casazza
|
|
//
|
|
// ============================================================================
|
|
|
|
#ifndef ULIB_URL_H
|
|
#define ULIB_URL_H 1
|
|
|
|
#include <ulib/string.h>
|
|
#include <ulib/base/coder/url.h>
|
|
|
|
/**
|
|
* @class Url - proto://[user[:password]@]hostname[:port]/[path]?[query]
|
|
*
|
|
* Represents a <em>Uniform Resource Locator</em> (URL). This class provides the capability to parse
|
|
* and manipulate URL strings.
|
|
*
|
|
* URI: <scheme>:<scheme-specific-part>
|
|
* URL: <scheme>://<user>:<password>@<host>:<port>/<url-path>
|
|
*
|
|
* ------------------------------------------
|
|
* Scheme_Name Description
|
|
* ------------------------------------------
|
|
* ftp File Transfer Protocol
|
|
* http Hypertext Transfer Protocol
|
|
* news USENET news
|
|
* nntp USENET news using NNTP access
|
|
* wais Wide Area Information Servers
|
|
* gopher The Gopher Protocol
|
|
* mailto Electronic mail address
|
|
* telnet Reference to interactive sessions
|
|
* prospero Prospero Directory Service
|
|
* ------------------------------------------
|
|
*
|
|
* URN: "urn:" <NID> ":" <NSS>
|
|
* NID specifies the Namespace ID and NSS specifies the Namespace Specific String.
|
|
* URN does not resolve to a unique, physical location. URNs serve as persistent resource identifiers.
|
|
*
|
|
* <h4>The URL Format</h4>
|
|
* A URL is a string representation of a resource that is available via the Internet. The format of URLs
|
|
* is formally defined in the IETF RFC 1738 which is available online at http://www.ietf.org/rfc/rfc1738.txt
|
|
* (which is itself a URL!) The URL syntax is dependent upon the scheme. In general, absolute URL are written as follows:
|
|
* @c <scheme>:<scheme-specific-part>
|
|
* A URL contains the name of the scheme being used (<scheme>) followed by a colon and then a string
|
|
* (the <scheme-specific-part>) whose interpretation depends on the scheme. The URL syntax does not
|
|
* require that the scheme-specific-part is common among all URL, however, many forms of URL do share
|
|
* a common syntax for representing hierarchical relationships. This "generic URL" syntax consists of
|
|
* a sequence of four main components:
|
|
* @c <scheme>://<authority><path>?<query>
|
|
* The @b scheme is often the name of a network protocol which can be used to retrieve the resource
|
|
* from the Internet. The words @a protocol and @a scheme are used interchangeably within this document.
|
|
* The @b authority is comprised of three sub-components:
|
|
* @c <userInfo@><host><:port>
|
|
* The @b path is comprised of everything following the authority up to the query part. In contrast to
|
|
* the description in RFC 1738, this class includes the "/" separator between the authority part and the
|
|
* path as part of the path. The following examples are supported:
|
|
*
|
|
* host
|
|
* /path
|
|
* host:port/path
|
|
* service:/path
|
|
* service://host
|
|
* service://path
|
|
* service://host:port
|
|
* [12]user@bogus.example.com
|
|
* service://[11]user@bogus.example.com/path
|
|
* service://host:port/path?query=1&query=2&query=3
|
|
* service://[10]user@bogus.example.com:port/path?query=1&query=2
|
|
*/
|
|
|
|
#define U_URL_TO_PARAM(url) (url).getUrlData(),(url).getUrlDataLen()
|
|
#define U_URL_TO_TRACE(url) (url).getUrlDataLen(),(url).getUrlData()
|
|
|
|
template <class T> class UVector;
|
|
|
|
class U_EXPORT Url {
|
|
public:
|
|
|
|
// Check for memory error
|
|
U_MEMORY_TEST
|
|
|
|
// Allocator e Deallocator
|
|
U_MEMORY_ALLOCATOR
|
|
U_MEMORY_DEALLOCATOR
|
|
|
|
/**
|
|
* Constructor of the class
|
|
*
|
|
* This constructor creates an empty class
|
|
*/
|
|
|
|
Url()
|
|
{
|
|
U_TRACE_CTOR(0, Url, "", 0)
|
|
|
|
service_end =
|
|
user_begin =
|
|
user_end =
|
|
host_begin =
|
|
host_end =
|
|
path_begin =
|
|
path_end = -1;
|
|
}
|
|
|
|
/**
|
|
* Constructor of the class
|
|
*
|
|
* This constructor set the url from the string
|
|
*
|
|
* @param x Reference to a string with an url
|
|
*/
|
|
|
|
Url(const UString& x) : url(x)
|
|
{
|
|
U_TRACE_CTOR(0, Url, "%V", x.rep)
|
|
|
|
findpos();
|
|
}
|
|
|
|
/**
|
|
* Constructor of the class
|
|
*
|
|
* This constructor set the url from the char buffer
|
|
*
|
|
* @param t Pointer to a char buffer with an url
|
|
*/
|
|
|
|
Url(const char* t, uint32_t tlen) : url(t, tlen)
|
|
{
|
|
U_TRACE_CTOR(0, Url, "%S,%u", t, tlen)
|
|
|
|
findpos();
|
|
}
|
|
|
|
/**
|
|
* Destructor of the class
|
|
*/
|
|
|
|
~Url()
|
|
{
|
|
U_TRACE_DTOR(0, Url)
|
|
}
|
|
|
|
// ASSIGNMENT
|
|
|
|
void set(const Url& u)
|
|
{
|
|
service_end = u.service_end;
|
|
user_begin = u.user_begin;
|
|
user_end = u.user_end;
|
|
host_begin = u.host_begin;
|
|
host_end = u.host_end;
|
|
path_begin = u.path_begin;
|
|
path_end = u.path_end;
|
|
}
|
|
|
|
Url(const Url& u) : url(u.url)
|
|
{
|
|
U_MEMORY_TEST_COPY(u)
|
|
|
|
set(u);
|
|
}
|
|
|
|
Url& operator=(const Url& u)
|
|
{
|
|
U_MEMORY_TEST_COPY(u)
|
|
|
|
url = u.url;
|
|
|
|
set(u);
|
|
|
|
return *this;
|
|
}
|
|
|
|
void set(const char* str, uint32_t len)
|
|
{
|
|
U_TRACE(0, "Url::set(%.*S,%u)", len, str, len)
|
|
|
|
(void) url.replace(str, len);
|
|
|
|
findpos();
|
|
}
|
|
|
|
void set(const UString& x) { set(U_STRING_TO_PARAM(x)); }
|
|
|
|
UString get() const { return url; }
|
|
bool empty() const { return url.empty(); }
|
|
|
|
const char* getUrlData() const { return url.data(); }
|
|
uint32_t getUrlDataLen() const { return url.size(); }
|
|
|
|
void clear()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::clear()")
|
|
|
|
url.clear();
|
|
|
|
service_end =
|
|
user_begin =
|
|
user_end =
|
|
host_begin =
|
|
host_end =
|
|
path_begin =
|
|
path_end = -1;
|
|
}
|
|
|
|
/**
|
|
* This methode returns the specified service of the url
|
|
*
|
|
* If there is no service specified the buffer will be empty.
|
|
* The service has no ':' char at the end!
|
|
*
|
|
* @return str
|
|
*/
|
|
|
|
UString getService() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::getService()")
|
|
|
|
UString srv;
|
|
|
|
if (service_end > 0) srv = url.substr(0U, (uint32_t)service_end);
|
|
|
|
U_RETURN_STRING(srv);
|
|
}
|
|
|
|
bool isLDAP() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isLDAP()")
|
|
|
|
if (u_get_unalignedp32(url.data()) == U_MULTICHAR_CONSTANT32('l','d','a','p')) U_RETURN(true);
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
bool isHTTP() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isHTTP()")
|
|
|
|
if (service_end == 4 &&
|
|
u_get_unalignedp32(url.data()) == U_MULTICHAR_CONSTANT32('h','t','t','p'))
|
|
{
|
|
U_RETURN(true);
|
|
}
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
bool isWS() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isWS()")
|
|
|
|
if (service_end == 2 &&
|
|
u_get_unalignedp16(url.data()) == U_MULTICHAR_CONSTANT16('w','s'))
|
|
{
|
|
U_RETURN(true);
|
|
}
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
bool isHTTPS() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isHTTPS()")
|
|
|
|
if (service_end == 5 &&
|
|
url.c_char(4) == 's' &&
|
|
u_get_unalignedp32(url.data()) == U_MULTICHAR_CONSTANT32('h','t','t','p'))
|
|
{
|
|
U_RETURN(true);
|
|
}
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
bool isWSS() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isWSS()")
|
|
|
|
if (service_end == 3 &&
|
|
url.c_char(2) == 's' &&
|
|
u_get_unalignedp16(url.data()) == U_MULTICHAR_CONSTANT16('w','s'))
|
|
{
|
|
U_RETURN(true);
|
|
}
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
bool isLDAPS() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isLDAPS()")
|
|
|
|
if (service_end == 5 &&
|
|
url.c_char(4) == 's' &&
|
|
u_get_unalignedp32(url.data()) == U_MULTICHAR_CONSTANT32('l','d','a','p'))
|
|
{
|
|
U_RETURN(true);
|
|
}
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
/**
|
|
* This methode set the service of the url
|
|
*
|
|
* The should no ':' char at the end!
|
|
*
|
|
* @param service Service to set
|
|
*/
|
|
|
|
void setService(const char* service, uint32_t n);
|
|
|
|
/**
|
|
* This methode returns the user identifier part of the UserInfo part of this URL
|
|
*
|
|
* This method assumes that UserInfo is structured like this: @c <userid>:<password>
|
|
* If there is no user specified the buffer will be empty
|
|
*
|
|
* @return str
|
|
*/
|
|
|
|
UString getUser()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::getUser()")
|
|
|
|
UString usr;
|
|
|
|
if (user_begin < user_end) usr = url.substr(user_begin, user_end - user_begin);
|
|
|
|
U_RETURN_STRING(usr);
|
|
}
|
|
|
|
/**
|
|
* This methode set the user of the url
|
|
*
|
|
* @param user User to set
|
|
*/
|
|
|
|
bool setUser(const char* user, uint32_t n);
|
|
|
|
/**
|
|
* This methode erase the user from the url
|
|
*/
|
|
|
|
void eraseUser()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::eraseUser()")
|
|
|
|
if (user_begin < user_end)
|
|
{
|
|
(void) url.erase(user_begin, user_end + 1);
|
|
|
|
findpos();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks if there is a host specified
|
|
*
|
|
* @retval true It has a host.
|
|
* @retval false There is no host, so it will be a local file
|
|
*/
|
|
|
|
bool isLocalFile()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isLocalFile()")
|
|
|
|
// Is there a host ?
|
|
|
|
if (host_begin < host_end) U_RETURN(true);
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
/**
|
|
* This methode returns the host name part of the URL
|
|
*
|
|
* Not all URLs contain a host name, but those that do specify the host as part of the authority segment which is contained
|
|
* between the @c '//' and the following @c '/' or @c '?' characters.
|
|
* The host name is a sub-string of the authority part, with user and port number information removed.
|
|
* For example, the following URL's host is @c www.elcel.com : @c http://user:password@www.elcel.com:80/index.html
|
|
* If there is no host specified the buffer will be empty
|
|
*
|
|
* @return str
|
|
*/
|
|
|
|
UString getHost()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::getHost()")
|
|
|
|
UString host;
|
|
|
|
if (host_begin < host_end) host = url.substr(host_begin, host_end - host_begin);
|
|
|
|
U_RETURN_STRING(host);
|
|
}
|
|
|
|
/**
|
|
* This methode set the host
|
|
*
|
|
* @param host host to set
|
|
* @param n len string host to set
|
|
*/
|
|
|
|
void setHost(const char* _host, uint32_t n);
|
|
|
|
/**
|
|
* This returns the port number from the URL or -1 if no port number is present
|
|
*
|
|
* The port number is usually contained within the authority part of the URL and is
|
|
* separated from the host by a colon character. For example, the following URL has
|
|
* a port number of 81: @c http://www.acme.org:81
|
|
* If there is no port, translate the service to a port number
|
|
*
|
|
* @retval 1..65535 specified port
|
|
* @retval 0 no port specified
|
|
*/
|
|
|
|
UString getPort();
|
|
uint32_t getPortNumber();
|
|
|
|
/**
|
|
* Set the port number
|
|
*
|
|
* @warning only possible if a url is specified
|
|
*/
|
|
|
|
bool setPort(unsigned int port);
|
|
|
|
/**
|
|
* This methode check the existence of the path from the url
|
|
*
|
|
* @return bool
|
|
*/
|
|
|
|
bool isPath() const
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isPath()")
|
|
|
|
if (path_begin < path_end) U_RETURN(true);
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
/**
|
|
* This methode returns the path for this URL
|
|
*
|
|
* The path consists of the file name part of the URL without any query information
|
|
*
|
|
* For example, the path for the following URL is @a '/search'.
|
|
* @c http://www.google.com/search?q=xml
|
|
* If there is no path specified the buffer contains '/'
|
|
*
|
|
* @return str
|
|
*/
|
|
|
|
UString getPath()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::getPath()")
|
|
|
|
if (isPath() == false) return *UString::str_path_root;
|
|
|
|
uint32_t n = path_end - path_begin;
|
|
|
|
UString path(n);
|
|
|
|
decode(url.c_pointer(path_begin), n, path);
|
|
|
|
U_RETURN_STRING(path);
|
|
}
|
|
|
|
/**
|
|
* Returns the file name for this URL. The file name consists of the path plus the query (if present)
|
|
*
|
|
* For example, the file name for the following URL is @a '/search?q=xml'
|
|
* @c http://www.google.com/search?q=xml
|
|
*/
|
|
|
|
UString getPathAndQuery()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::getPathAndQuery()")
|
|
|
|
if (isPath() == false) return *UString::str_path_root;
|
|
|
|
UString path = url.substr(path_begin);
|
|
|
|
U_RETURN_STRING(path);
|
|
}
|
|
|
|
/**
|
|
* This methode set the path of the url
|
|
*
|
|
* If the first char is not an '/' it will be added
|
|
*
|
|
* @param path Path to set
|
|
*/
|
|
|
|
void setPath(const char* path, uint32_t n);
|
|
|
|
/**
|
|
* This methode check the existence of the query from the url
|
|
*
|
|
* @return bool
|
|
*/
|
|
|
|
bool isQuery()
|
|
{
|
|
U_TRACE_NO_PARAM(0, "Url::isQuery()")
|
|
|
|
if (path_end < (int)(url.size() - 1)) U_RETURN(true);
|
|
|
|
U_RETURN(false);
|
|
}
|
|
|
|
/**
|
|
* This methode returns the portion of the file after (but not including) '?' which represents the start of a query string
|
|
*
|
|
* If there is no query specified the buffer will be empty
|
|
*
|
|
* @return str
|
|
*/
|
|
|
|
UString getQuery();
|
|
uint32_t getQuery(UVector<UString>& vec);
|
|
|
|
static UString getQueryBody(UVector<UString>& vec);
|
|
|
|
/**
|
|
* This methode set the query of the url
|
|
*
|
|
* @param query Query to set
|
|
*/
|
|
|
|
bool setQuery(UVector<UString>& vec);
|
|
bool setQuery(const char* query, uint32_t query_len);
|
|
|
|
/**
|
|
* This methode erase the query from the url
|
|
*/
|
|
|
|
void eraseQuery();
|
|
|
|
/**
|
|
* This methode add's a new entry to the query
|
|
*
|
|
* The entry and the value will first encoded and then added to the url.
|
|
* To seperate the entry's the '&' character is been used
|
|
*
|
|
* @param entry Name of the entry
|
|
* @param entry_len len string entry = 0 No query will be added
|
|
* @param value Value of the entry
|
|
* @param value_len len string value = 0 only the entry will be added
|
|
*/
|
|
|
|
void addQuery(const char* entry, uint32_t entry_len, const char* value, uint32_t value_len);
|
|
|
|
enum UrlFieldType {
|
|
U_SCHEMA = 0x0001,
|
|
U_HOST = 0x0002,
|
|
U_PORT = 0x0004,
|
|
U_PATH = 0x0008,
|
|
U_QUERY = 0x0010,
|
|
U_FRAGMENT = 0x0020,
|
|
U_USERINFO = 0x0040
|
|
};
|
|
|
|
UString getFragment();
|
|
UString getFieldValue(int field_type);
|
|
|
|
/**
|
|
* Converts a Unicode string into the MIME @c x-www-form-urlencoded format
|
|
*
|
|
* @param input string to encode
|
|
* @param len size of the encoded string
|
|
* @param buffer buffer for the encoded string. The size of the buffer has to be 3 * len
|
|
*/
|
|
|
|
static void encode(const char* input, uint32_t len, UString& buffer)
|
|
{
|
|
U_TRACE(0, "Url::encode(%.*S,%u,%p)", len, input, len, &buffer)
|
|
|
|
U_ASSERT(buffer.capacity() >= len * 2)
|
|
U_INTERNAL_ASSERT_EQUALS(u_isBase64(input, len), false)
|
|
|
|
buffer.rep->_length = u_url_encode((const unsigned char*)input, len, (unsigned char*)buffer.data());
|
|
|
|
U_INTERNAL_DUMP("buffer(%u) = %#V", buffer.size(), buffer.rep)
|
|
|
|
U_INTERNAL_ASSERT(buffer.invariant())
|
|
}
|
|
|
|
static void encode(const UString& input, UString& buffer) { encode(input.data(), input.size(), buffer); }
|
|
|
|
static void encode_add(const char* input, uint32_t len, UString& buffer)
|
|
{
|
|
U_TRACE(0, "Url::encode_add(%.*S,%u,%p)", len, input, len, &buffer)
|
|
|
|
U_ASSERT(buffer.space() >= (len * 2))
|
|
U_INTERNAL_ASSERT_EQUALS(u_isBase64(input, len), false)
|
|
|
|
buffer.rep->_length += u_url_encode((const unsigned char*)input, len, (unsigned char*)buffer.pend());
|
|
|
|
U_INTERNAL_DUMP("buffer(%u) = %#V", buffer.size(), buffer.rep)
|
|
|
|
U_INTERNAL_ASSERT(buffer.invariant())
|
|
}
|
|
|
|
static void encode_add(const UString& input, UString& buffer) { encode_add(input.data(), input.size(), buffer); }
|
|
|
|
/**
|
|
* Decode a string
|
|
*
|
|
* @param input string to decode
|
|
* @param len size of the encoded string
|
|
* @param buffer buffer for the decoded string. The size of the buffer has to be at minimum len
|
|
*/
|
|
|
|
static void decode(const char* input, uint32_t len, UString& buffer)
|
|
{
|
|
U_TRACE(0, "Url::decode(%.*S,%u,%p)", len, input, len, &buffer)
|
|
|
|
U_ASSERT(buffer.capacity() >= len)
|
|
|
|
buffer.rep->_length = u_url_decode(input, len, (unsigned char*)buffer.data());
|
|
|
|
U_INTERNAL_DUMP("buffer(%u) = %#V", buffer.size(), buffer.rep)
|
|
|
|
U_INTERNAL_ASSERT(buffer.invariant())
|
|
}
|
|
|
|
static void decode(const UString& input, UString& buffer) { decode(U_STRING_TO_PARAM(input), buffer); }
|
|
|
|
// STREAM
|
|
|
|
#ifdef U_STDCPP_ENABLE
|
|
friend U_EXPORT istream& operator>>(istream& is, Url& u);
|
|
friend U_EXPORT ostream& operator<<(ostream& os, const Url& u);
|
|
|
|
// DEBUG
|
|
|
|
# ifdef DEBUG
|
|
const char* dump(bool reset) const;
|
|
# endif
|
|
#endif
|
|
|
|
protected:
|
|
UString url; // content string
|
|
int service_end, // End position of the service
|
|
user_begin, // begin position of the user
|
|
user_end, // end position of the user
|
|
host_begin, // begin position of the host
|
|
host_end, // end position of the host
|
|
path_begin, // begin position of the path
|
|
path_end; // end position of the path
|
|
|
|
#ifdef DEBUG
|
|
uint32_t field_mask;
|
|
#endif
|
|
|
|
void findpos(); // scans the structure of the url and is updating the position attributs of the class
|
|
|
|
private:
|
|
uint32_t getPosQuery() U_NO_EXPORT __pure;
|
|
uint32_t getPosFragment() U_NO_EXPORT __pure;
|
|
bool prepareForQuery() U_NO_EXPORT; // prepare the string to add a query
|
|
uint32_t getSizeQuery(uint32_t pos) U_NO_EXPORT __pure;
|
|
};
|
|
|
|
#endif
|