mirror of
https://github.com/stefanocasazza/ULib.git
synced 2025-09-28 19:05:55 +08:00
403 lines
12 KiB
C++
403 lines
12 KiB
C++
// test_pcre.cpp
|
||
|
||
#include <ulib/pcre/pcre.h>
|
||
|
||
#ifdef __MINGW32__
|
||
#define _GLIBCXX_USE_C99_DYNAMIC 1
|
||
#endif
|
||
|
||
#include <map>
|
||
#include <string>
|
||
#include <vector>
|
||
#include <fstream>
|
||
#include <iostream>
|
||
|
||
static void ex()
|
||
{
|
||
U_TRACE(5, "ex()")
|
||
|
||
/* this will generate only one substring, "This" */
|
||
|
||
UPCRE ex(U_STRING_FROM_CONSTANT("([a-z]+)"), "i");
|
||
|
||
U_ASSERT(ex.search(U_STRING_FROM_CONSTANT("This is a test.")) == true) /* check if the expression matched */
|
||
|
||
U_INTERNAL_DUMP("num substrings = %d", ex.matches())
|
||
|
||
U_ASSERT(ex.matches() == 1)
|
||
}
|
||
|
||
static void regex(const UString& expression, const UString& stuff)
|
||
{
|
||
U_TRACE(5, "regex(%.*S,%.*S)", U_STRING_TO_TRACE(expression), U_STRING_TO_TRACE(stuff))
|
||
|
||
UPCRE reg(expression, 0U); /* UPCRE object */
|
||
|
||
bool result = reg.search(stuff);
|
||
|
||
U_INTERNAL_ASSERT(result == true) /* check if the expression matched */
|
||
|
||
U_INTERNAL_DUMP("num substrings = %d", reg.matches())
|
||
|
||
int start, end, pos;
|
||
|
||
for (pos = 0; pos < reg.matches(); ++pos) /* iterate over the matched sub strings */
|
||
{
|
||
end = reg.getMatchEnd(pos);
|
||
start = reg.getMatchStart(pos);
|
||
|
||
/* print out the start/end offset of the current substring within the searched string(stuff) */
|
||
|
||
U_DUMP("substrings[%3d] = %S", pos, reg[pos].c_str()) // also possible: reg.getMatch(pos);
|
||
U_DUMP("(%3d, %3d) = %S", start, end, stuff.substr(start, end - start + 1).c_str());
|
||
}
|
||
}
|
||
|
||
static void regex()
|
||
{
|
||
U_TRACE(5, "regex()")
|
||
|
||
UString expression = U_STRING_FROM_CONSTANT("([a-z]*) ([0-9]+)"), /* define a string with a regular expression */
|
||
stuff = U_STRING_FROM_CONSTANT("hallo 11 robert"); /* this is the string in which we want to search */
|
||
|
||
UPCRE reg(expression, "i"); /* UPCRE object, search case-insensitive ("i") */
|
||
|
||
U_ASSERT(reg.search(stuff) == true) /* check if the expression matched */
|
||
|
||
U_INTERNAL_DUMP("num substrings = %d", reg.matches())
|
||
|
||
U_ASSERT(reg.matches() > 0) /* check if the expression generated any substrings */
|
||
|
||
int start, end, pos;
|
||
|
||
for (pos = 0; pos < reg.matches(); ++pos) /* iterate over the matched sub strings */
|
||
{
|
||
end = reg.getMatchEnd(pos);
|
||
start = reg.getMatchStart(pos);
|
||
|
||
/* print out the start/end offset of the current substring within the searched string(stuff) */
|
||
|
||
U_DUMP("substrings[%3d] = %S", pos, reg[pos].c_str()) // also possible: reg.getMatch(pos);
|
||
U_DUMP("(%3d, %3d) = %S", start, end, stuff.substr(start, end - start + 1).c_str());
|
||
}
|
||
|
||
U_ASSERT(reg[0] == U_STRING_FROM_CONSTANT("hallo"))
|
||
U_ASSERT(reg[1] == U_STRING_FROM_CONSTANT("11"))
|
||
}
|
||
|
||
static void split() // Sample of split() usage
|
||
{
|
||
U_TRACE(5, "split()")
|
||
|
||
UString sp_orig = U_STRING_FROM_CONSTANT("was21willst2387461du3alter!"),
|
||
delimiter = U_STRING_FROM_CONSTANT("[0-9]+"); // define a regex for digits (character class)
|
||
|
||
UPCRE S(delimiter, "g"); // new UPCRE object, match globally ("g" flag)
|
||
|
||
UVector<UString> splitted;
|
||
unsigned n = S.split(splitted, sp_orig); // split "was21willst2387461du3alter!" by digits
|
||
|
||
for (unsigned i = 0; i < n; ++i) // iterate over the resulting list
|
||
{
|
||
U_DUMP("splitted[%3d] = %S", i, splitted[i].c_str())
|
||
}
|
||
|
||
U_ASSERT(splitted[0] == U_STRING_FROM_CONSTANT("was"))
|
||
U_ASSERT(splitted[1] == U_STRING_FROM_CONSTANT("willst"))
|
||
U_ASSERT(splitted[2] == U_STRING_FROM_CONSTANT("du"))
|
||
U_ASSERT(splitted[3] == U_STRING_FROM_CONSTANT("alter!"))
|
||
}
|
||
|
||
static void replace() // Sample of replace() usage
|
||
{
|
||
U_TRACE(5, "replace()")
|
||
|
||
UString orig = U_STRING_FROM_CONSTANT("Hans ist 22 Jahre alt. Er ist 8 Jahre <20>lter als Fred.");
|
||
|
||
UPCRE p(U_STRING_FROM_CONSTANT("([0-9]+)"), 0U); // define a regex for digits (character class)
|
||
|
||
UString n = p.replace(orig, U_STRING_FROM_CONSTANT("zweiundzwanzig($1)")); // replace the 1st occurence of [0-9]+ with "zweiundzwanzig"
|
||
|
||
U_INTERNAL_DUMP("n = %.*S", U_STRING_TO_TRACE(n))
|
||
|
||
U_ASSERT(n == U_STRING_FROM_CONSTANT("Hans ist zweiundzwanzig(22) Jahre alt. Er ist 8 Jahre <20>lter als Fred."))
|
||
}
|
||
|
||
static void normalize() // another sample to check if normalizing using replace() works
|
||
{
|
||
U_TRACE(5, "normalize()")
|
||
|
||
UString orig = U_STRING_FROM_CONSTANT("Heute ist ein schoener Tag gell?");
|
||
|
||
UPCRE reg(U_STRING_FROM_CONSTANT("[\\s]+"), "gs"); // create regex for normalizing whitespace
|
||
|
||
UString n = reg.replace(orig, U_STRING_FROM_CONSTANT(" ")); // do the normalizing process
|
||
|
||
U_INTERNAL_DUMP("n = %.*S", U_STRING_TO_TRACE(n))
|
||
|
||
U_ASSERT(n == U_STRING_FROM_CONSTANT("Heute ist ein schoener Tag gell?"))
|
||
}
|
||
|
||
static void multisearch()
|
||
{
|
||
U_TRACE(5, "multisearch()")
|
||
|
||
size_t i = 0, pos = 0;
|
||
UPCRE reg(U_STRING_FROM_CONSTANT("([^\\n]+\\n)"), 0U);
|
||
UString str = U_STRING_FROM_CONSTANT("\nline1\nline2\nline3\n"), vec[3];
|
||
|
||
while (pos <= str.length() && reg.search(str, pos))
|
||
{
|
||
pos = reg.getMatchEnd(0);
|
||
vec[i] = reg[0];
|
||
|
||
U_DUMP("pos: %2d match (%d): %.*S", pos, i, U_STRING_TO_TRACE(vec[i]))
|
||
|
||
vec[i++].duplicate();
|
||
}
|
||
|
||
U_DUMP("vec[0] = %.*S", U_STRING_TO_TRACE(vec[0]))
|
||
U_DUMP("vec[1] = %.*S", U_STRING_TO_TRACE(vec[1]))
|
||
U_DUMP("vec[2] = %.*S", U_STRING_TO_TRACE(vec[2]))
|
||
|
||
U_ASSERT(vec[0] == U_STRING_FROM_CONSTANT("line1\n"))
|
||
U_ASSERT(vec[1] == U_STRING_FROM_CONSTANT("line2\n"))
|
||
U_ASSERT(vec[2] == U_STRING_FROM_CONSTANT("line3\n"))
|
||
}
|
||
|
||
static void replace_multi() // Sample of replace() usage with multiple substrings
|
||
{
|
||
U_TRACE(5, "replace_multi()")
|
||
|
||
UString orig = U_STRING_FROM_CONSTANT(" 08:23 ");
|
||
|
||
UPCRE reg(U_STRING_FROM_CONSTANT(" ([0-9]+)(:)([0-9]+) "), "sig"); // create regex which, if it matches, creates 3 substrings
|
||
|
||
// remove $2 (":") * re-use $1 ("08") and $3 ("23") in the replace string
|
||
UString n = reg.replace(orig, U_STRING_FROM_CONSTANT("$1 Stunden und $3 Minuten"));
|
||
|
||
U_INTERNAL_DUMP("n = %.*S", U_STRING_TO_TRACE(n))
|
||
|
||
U_ASSERT(n == U_STRING_FROM_CONSTANT("08 Stunden und 23 Minuten"))
|
||
}
|
||
|
||
static UPCRE* reg;
|
||
static string flags;
|
||
static map<string, vector<string> > _hash;
|
||
|
||
static void set(std::string& str)
|
||
{
|
||
U_TRACE(5, "set(%p)", &str)
|
||
|
||
if (reg) delete reg;
|
||
|
||
UString expression(str.c_str());
|
||
|
||
reg = new UPCRE(expression, flags.c_str());
|
||
}
|
||
|
||
static bool search(UPCRE& _pcre, std::string& str)
|
||
{
|
||
U_TRACE(5, "search(%p,%p)", &_pcre, &str)
|
||
|
||
UString stuff(str.c_str());
|
||
|
||
return _pcre.search(stuff, 0, 0);
|
||
}
|
||
|
||
static bool read_data(const char* file)
|
||
{
|
||
U_TRACE(5, "read_data(%S)", file)
|
||
|
||
ifstream data(file);
|
||
|
||
if (!data) return false;
|
||
|
||
char zeichen;
|
||
string line, regex;
|
||
bool is_regex = false;
|
||
vector<string> content;
|
||
UPCRE find_end(U_STRING_FROM_CONSTANT("\\/[gimsxADEGIMNSUX8L\\+]{0,2}$"), 0U);
|
||
|
||
while (data)
|
||
{
|
||
data.get(zeichen);
|
||
|
||
if (zeichen == '\n')
|
||
{
|
||
if (line[0] == '/')
|
||
{
|
||
regex = line;
|
||
is_regex = (search(find_end, line) == false);
|
||
}
|
||
else if (is_regex)
|
||
{
|
||
regex += "\n" + line;
|
||
is_regex = (search(find_end, line) == false);
|
||
}
|
||
else if (line[0] == ' ')
|
||
{
|
||
line.replace(0, 4, ""); // remove leading whitespaces
|
||
|
||
content.push_back(line);
|
||
}
|
||
else if (line.empty())
|
||
{
|
||
_hash[regex] = content;
|
||
|
||
content.clear();
|
||
}
|
||
|
||
line = "";
|
||
}
|
||
else
|
||
{
|
||
line += zeichen;
|
||
}
|
||
}
|
||
|
||
data.close();
|
||
|
||
return true;
|
||
}
|
||
|
||
int
|
||
U_EXPORT main (int argc, char* argv[])
|
||
{
|
||
U_ULIB_INIT(argv);
|
||
|
||
U_TRACE(5,"main(%d)",argc)
|
||
|
||
/* define a string with a regular expression */
|
||
UString expression = U_STRING_FROM_CONSTANT("^/lms/doceboCore(/|/index.php)?$"),
|
||
/* this is the string in which we want to search */
|
||
stuff = U_STRING_FROM_CONSTANT("/lms/doceboCore/index.php");
|
||
|
||
regex(expression, stuff);
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/lms/doceboCore/");
|
||
regex(expression, stuff);
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/lms/doceboCore");
|
||
regex(expression, stuff);
|
||
|
||
expression = U_STRING_FROM_CONSTANT("^/secure/?$");
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/secure");
|
||
regex(expression, stuff);
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/secure/");
|
||
regex(expression, stuff);
|
||
|
||
expression = U_STRING_FROM_CONSTANT("^/WAYF/?\\?shire=http*");
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/WAYF?shire=http%3A%2F%2Flocalhost%2FShibboleth.sso%2FSAML%2FPOST&time=1196764890&target=cookie&providerId=http%3A%2F%2Flocalhost%2Fsp");
|
||
|
||
regex(expression, stuff);
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/WAYF/?shire=http%3A%2F%2Flocalhost%2FShibboleth.sso%2FSAML%2FPOST&time=1196764890&target=cookie&providerId=http%3A%2F%2Flocalhost%2Fsp");
|
||
regex(expression, stuff);
|
||
|
||
expression = U_STRING_FROM_CONSTANT("^/SWITCHaai/images/.*\\.gif$");
|
||
|
||
stuff = U_STRING_FROM_CONSTANT("/SWITCHaai/images/toplevel.gif");
|
||
regex(expression, stuff);
|
||
|
||
/*
|
||
U_ASSERT( UPCRE::isValidURL(U_STRING_FROM_CONSTANT("http://immike.net/blog/2007/06/21/extreme-regex-foo-what-you-need-to-know-to-become-a-regular-expression-pro/")) == true )
|
||
*/
|
||
|
||
U_ASSERT( UPCRE::validateUsername(U_STRING_FROM_CONSTANT("%! mike_84&")) == false )
|
||
|
||
#define XML_MSG \
|
||
"<REQUEST sid=\"sid1\" version=\"1\">" \
|
||
"<EXPORT-POLICYLABEL name=\"openvpn-server\"/>" \
|
||
"</REQUEST>" \
|
||
"<REQUEST sid=\"sid2\" version=\"1\">" \
|
||
" <EXPORT-POLICYLABEL name=\"openvpn-default\"/>" \
|
||
"</REQUEST>"
|
||
|
||
UVector<UString> vec;
|
||
|
||
U_ASSERT( UPCRE::getTag(vec, U_STRING_FROM_CONSTANT(XML_MSG), "name", "openvpn-default", "EXPORT-POLICYLABEL") == 1 )
|
||
|
||
#define HTTP_MSG \
|
||
"HTTP/1.1 301 Moved Permanently\r\n" \
|
||
"Date: Fri, 10 Nov 2006 16:59:55 GMT\r\n" \
|
||
"Server: Apache/2.0.54 (Debian GNU/Linux) PHP/4.3.10-16 mod_ssl/2.0.54 OpenSSL/0.9.7e\r\n" \
|
||
"Location: http://laptop.unirel.intranet/webmail/\r\n" \
|
||
"Content-Length: 388\r\n" \
|
||
"Keep-Alive: timeout=15, max=100\r\n" \
|
||
"Connection: Keep-Alive\r\n" \
|
||
"Content-Type: text/html; charset=iso-8859-1\r\n" \
|
||
"\r\n" \
|
||
"<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">" \
|
||
"<html><head>" \
|
||
"<title>301 Moved Permanently</title>" \
|
||
"</head><body>" \
|
||
"<h1>Moved Permanently</h1>" \
|
||
"<p>The document has moved <a href=\"http://laptop.unirel.intranet/webmail/\">here</a>.</p>" \
|
||
"<hr>" \
|
||
"<address>Apache/2.0.54 (Debian GNU/Linux) PHP/4.3.10-16 mod_ssl/2.0.54 OpenSSL/0.9.7e Server at laptop.unirel.intranet Port 80</address>" \
|
||
"</body></html>"
|
||
|
||
expression = U_STRING_FROM_CONSTANT("(Location: http:)"),
|
||
stuff = U_STRING_FROM_CONSTANT(HTTP_MSG);
|
||
|
||
regex(expression, stuff);
|
||
|
||
ex();
|
||
regex();
|
||
split();
|
||
replace();
|
||
normalize();
|
||
multisearch();
|
||
replace_multi();
|
||
|
||
if (read_data(argv[1]))
|
||
{
|
||
typedef vector<string>::iterator vec_iter;
|
||
typedef map<string, vector<string> >::iterator map_iter;
|
||
|
||
string _expression;
|
||
|
||
for (map_iter mp = _hash.begin(); mp != _hash.end(); ++mp)
|
||
{
|
||
string regex = mp->first;
|
||
vector<string> content = mp->second;
|
||
unsigned pos = regex.find_last_of("/", string::npos);
|
||
|
||
if (pos == regex.size())
|
||
{
|
||
flags = "";
|
||
_expression = regex.substr(1, regex.size() - 1);
|
||
}
|
||
else
|
||
{
|
||
flags = regex.substr(pos + 1, regex.size());
|
||
_expression = regex.substr(1, pos - 1);
|
||
}
|
||
|
||
U_INTERNAL_DUMP("_expression = %S flags = %S", _expression.c_str(), flags.c_str())
|
||
|
||
cout << "/" << _expression << "/" << flags << endl;
|
||
|
||
set(_expression);
|
||
|
||
for (vec_iter vp = content.begin(); vp != content.end(); ++vp)
|
||
{
|
||
string data = *vp;
|
||
|
||
if (search(*reg, data)) cout << " 1: ";
|
||
else cout << " 0: ";
|
||
|
||
cout << data << endl;
|
||
}
|
||
|
||
cout << endl;
|
||
}
|
||
}
|
||
|
||
if (reg) delete reg;
|
||
}
|