1
0
mirror of https://github.com/stefanocasazza/ULib.git synced 2025-09-28 19:05:55 +08:00
ULib/include/ulib/tokenizer.h
stefanocasazza a4043a7ccb some fix
2015-12-09 18:54:28 +01:00

245 lines
4.8 KiB
C++

// ============================================================================
//
// = LIBRARY
// ULib - c++ library
//
// = FILENAME
// tokenizer.h
//
// = AUTHOR
// Stefano Casazza
//
// ============================================================================
#ifndef ULIB_TOKENIZER_H
#define ULIB_TOKENIZER_H 1
#include <ulib/string.h>
class UQueryParser;
template <class T> class UVector;
class U_EXPORT UTokenizer {
public:
// Check for memory error
U_MEMORY_TEST
// Allocator e Deallocator
U_MEMORY_ALLOCATOR
U_MEMORY_DEALLOCATOR
// COSTRUTTORI
UTokenizer(const char* d = 0)
{
U_TRACE_REGISTER_OBJECT(0, UTokenizer, "%S", d)
delim = d;
s = end = 0;
}
UTokenizer(const UString& data, const char* d = 0) : str(data)
{
U_TRACE_REGISTER_OBJECT(0, UTokenizer, "%V,%S", data.rep, d)
s = data.data();
end = data.end();
delim = d;
}
~UTokenizer()
{
U_TRACE_UNREGISTER_OBJECT(0, UTokenizer)
}
// VARIE
bool atEnd()
{
U_TRACE_NO_PARAM(0, "UTokenizer::atEnd()")
bool result = (s >= end);
U_RETURN(result);
}
void setDelimiter(const char* sep)
{
U_TRACE(0, "UTokenizer::setDelimiter(%S)", sep)
delim = sep;
}
void setData(const UString& data);
void skipSpaces()
{
U_TRACE_NO_PARAM(0, "UTokenizer::skipSpaces()")
while (s < end && u__isspace(*s)) ++s;
}
void setPointer(const char* ptr)
{
U_TRACE(0, "UTokenizer::setPointer(%S)", ptr)
U_INTERNAL_ASSERT(ptr <= end)
s = ptr;
}
const char* getPointer() const
{
U_TRACE_NO_PARAM(0, "UTokenizer::getPointer()")
U_RETURN(s);
}
const char* getEnd() const
{
U_TRACE_NO_PARAM(0, "UTokenizer::getEnd()")
U_RETURN(end);
}
void setDistance(uint32_t pos)
{
U_TRACE(0, "UTokenizer::setDistance(%u)", pos)
s = str.c_pointer(pos);
}
uint32_t getDistance() const __pure
{
U_TRACE(0, "UTokenizer::getDistance()")
uint32_t pos = (s < end ? str.distance(s) : str.size());
U_RETURN(pos);
}
// get current char
char current()
{
U_TRACE_NO_PARAM(0, "UTokenizer::current()")
if (s >= end) U_RETURN('\0');
U_RETURN(*s);
}
// go prev char
void back()
{
U_TRACE_NO_PARAM(0, "UTokenizer::back()")
U_INTERNAL_ASSERT_MAJOR(s, str.data())
--s;
}
// get next char
char next()
{
U_TRACE_NO_PARAM(0, "UTokenizer::next()")
if (s >= end) U_RETURN('\0');
U_RETURN(*s++);
}
UTokenizer& operator++() { ++s; return *this; } // ++A
// get next token
bool next(UString& tok, char c);
bool extend(UString& tok, char c); // extend the actual token to the next char 'c'... (see PEC_report.cpp)
bool next(UString& tok, bPFi func);
bool next(UString& tok, bool* bgroup);
// EXT
UString substr() const
{
U_TRACE_NO_PARAM(0, "UTokenizer::substr()")
U_INTERNAL_ASSERT(s <= end)
return (s < end ? str.substr(str.distance(s)) : UString::getStringNull());
}
UString substr(const char* start) const
{
U_TRACE(0, "UTokenizer::substr(%p)", start)
U_INTERNAL_ASSERT(start < end)
return str.substr(start, s - start);
}
UString getTokenQueryParser();
int getTokenId(UString* ptoken);
bool tokenSeen(const UString* x);
bool skipNumber(bool& isReal);
bool skipToken(const char* token, uint32_t sz);
static const char* group;
static bool group_skip, avoid_punctuation;
static uint32_t group_len, group_len_div_2;
static void setGroup(const char* grp, uint32_t grp_len = 0, bool bskip = false)
{
U_TRACE(0, "UTokenizer::setGroup(%S,%u,%b)", grp, grp_len, bskip)
group = grp;
group_skip = bskip;
group_len = grp_len;
group_len_div_2 = grp_len / 2;
}
static void setSkipTagXML(bool flag)
{
U_TRACE(0, "UTokenizer::setSkipTagXML(%b)", flag)
if (flag) setGroup(U_CONSTANT_TO_PARAM("<>"), true);
else setGroup(0, 0, false);
}
static void setAvoidPunctuation(bool flag)
{
U_TRACE(0, "UTokenizer::setAvoidPunctuation(%b)", flag)
avoid_punctuation = flag;
}
#if defined(U_STDCPP_ENABLE) && defined(DEBUG)
const char* dump(bool reset) const;
#endif
protected:
const char* s;
const char* end;
const char* delim;
UString str;
private:
#ifdef U_COMPILER_DELETE_MEMBERS
UTokenizer(const UTokenizer&) = delete;
UTokenizer& operator=(const UTokenizer&) = delete;
#else
UTokenizer(const UTokenizer&) {}
UTokenizer& operator=(const UTokenizer&) { return *this; }
#endif
friend class UQueryParser;
};
#endif