mirror of
https://github.com/stefanocasazza/ULib.git
synced 2025-09-28 19:05:55 +08:00
303 lines
7.4 KiB
C++
303 lines
7.4 KiB
C++
// cquery.cpp
|
|
|
|
#include <ulib/query/parser.h>
|
|
#include <ulib/utility/string_ext.h>
|
|
|
|
#include "cquery.h"
|
|
|
|
bool WeightWord::check_for_duplicate;
|
|
UVector<WeightWord*>* WeightWord::vec;
|
|
UHashMap<WeightWord*>* WeightWord::tbl;
|
|
|
|
void WeightWord::clear()
|
|
{
|
|
U_TRACE(5, "WeightWord::clear()")
|
|
|
|
if (tbl)
|
|
{
|
|
tbl->_length = 0;
|
|
|
|
tbl->deallocate();
|
|
|
|
delete tbl;
|
|
tbl = U_NULLPTR;
|
|
}
|
|
|
|
if (vec)
|
|
{
|
|
delete vec;
|
|
vec = U_NULLPTR;
|
|
}
|
|
}
|
|
|
|
void WeightWord::push()
|
|
{
|
|
U_TRACE(5, "WeightWord::push()")
|
|
|
|
U_INTERNAL_DUMP("UPosting::word_freq = %d", UPosting::word_freq)
|
|
|
|
U_INTERNAL_ASSERT(*UPosting::filename)
|
|
|
|
WeightWord* item;
|
|
|
|
U_NEW(WeightWord, item, WeightWord(*UPosting::filename, UPosting::word_freq));
|
|
|
|
if (check_for_duplicate)
|
|
{
|
|
if (tbl == U_NULLPTR) U_NEW(UHashMap<WeightWord*>, tbl, UHashMap<WeightWord*>);
|
|
|
|
if (tbl->find(*UPosting::filename))
|
|
{
|
|
U_INTERNAL_DUMP("DUPLICATE")
|
|
|
|
delete item;
|
|
|
|
return;
|
|
}
|
|
|
|
tbl->insertAfterFind(*UPosting::filename, item);
|
|
}
|
|
|
|
if (vec == U_NULLPTR) U_NEW(UVector<WeightWord*>, vec, UVector<WeightWord*>);
|
|
|
|
vec->push_back(item);
|
|
}
|
|
|
|
__pure int WeightWord::compareObj(const void* obj1, const void* obj2)
|
|
{
|
|
U_TRACE(5, "WeightWord::compareObj(%p,%p)", obj1, obj2)
|
|
|
|
int cmp;
|
|
|
|
#ifdef U_STDCPP_ENABLE
|
|
cmp = (((const WeightWord*)obj1)->word_freq < ((const WeightWord*)obj2)->word_freq ? 1 :
|
|
((const WeightWord*)obj1)->word_freq > ((const WeightWord*)obj2)->word_freq ? 0 :
|
|
((const WeightWord*)obj1)->filename.compare(((const WeightWord*)obj2)->filename) < 0);
|
|
#else
|
|
cmp = ((*(const WeightWord**)obj1)->word_freq < (*(const WeightWord**)obj2)->word_freq ? 1 :
|
|
(*(const WeightWord**)obj1)->word_freq > (*(const WeightWord**)obj2)->word_freq ? -1 :
|
|
(*(const WeightWord**)obj1)->filename.compare((*(const WeightWord**)obj2)->filename));
|
|
#endif
|
|
|
|
return cmp;
|
|
}
|
|
|
|
void WeightWord::sortObjects()
|
|
{
|
|
U_TRACE(5+256, "WeightWord::sortObjects()")
|
|
|
|
if (size() > 1) vec->sort(compareObj);
|
|
}
|
|
|
|
void WeightWord::dumpObjects()
|
|
{
|
|
U_TRACE(5, "WeightWord::dumpObjects()")
|
|
|
|
if (vec)
|
|
{
|
|
sortObjects();
|
|
|
|
for (uint32_t i = 0, n = vec->size(); i < n; ++i)
|
|
{
|
|
(void) write(1, U_STRING_TO_PARAM((*vec)[i]->filename));
|
|
(void) write(1, U_CONSTANT_TO_PARAM("\n"));
|
|
}
|
|
}
|
|
}
|
|
|
|
UString* Query::request;
|
|
UQueryParser* Query::parser;
|
|
|
|
Query::Query()
|
|
{
|
|
U_TRACE(5, "Query::Query()")
|
|
|
|
U_INTERNAL_ASSERT_EQUALS(parser, U_NULLPTR)
|
|
U_INTERNAL_ASSERT_EQUALS(request, U_NULLPTR)
|
|
|
|
U_NEW(UQueryParser, parser, UQueryParser);
|
|
U_NEW(UString, request, UString);
|
|
}
|
|
|
|
Query::~Query()
|
|
{
|
|
U_TRACE(5, "Query::~Query()")
|
|
|
|
clear();
|
|
|
|
delete parser;
|
|
delete request;
|
|
}
|
|
|
|
void Query::clear()
|
|
{
|
|
U_TRACE(5, "Query::clear()")
|
|
|
|
UPosting::reset();
|
|
WeightWord::clear();
|
|
|
|
parser->clear();
|
|
request->clear();
|
|
}
|
|
|
|
int Query::query_meta(UStringRep* word_rep, UStringRep* value)
|
|
{
|
|
U_TRACE(5, "Query::query_meta(%.*S,%p)", U_STRING_TO_TRACE(*word_rep), value)
|
|
|
|
if (u_dosmatch( word_rep->data(), word_rep->size(),
|
|
UPosting::word->data(), UPosting::word->size(), UPosting::ignore_case ? FNM_CASEFOLD : 0))
|
|
{
|
|
UPosting::posting->_assign(value);
|
|
|
|
UPosting::callForPostingAndSetFilename(WeightWord::push);
|
|
}
|
|
|
|
U_RETURN(1);
|
|
}
|
|
|
|
int Query::push(UStringRep* str_inode, UStringRep* filename)
|
|
{
|
|
U_TRACE(5, "Query::push(%#.*S,%.*S)", U_STRING_TO_TRACE(*str_inode), U_STRING_TO_TRACE(*filename))
|
|
|
|
UPosting::filename->_assign(filename);
|
|
|
|
UPosting::word_freq = 0;
|
|
|
|
WeightWord::push();
|
|
|
|
U_RETURN(1);
|
|
}
|
|
|
|
int Query::query_expr(UStringRep* str_inode, UStringRep* filename)
|
|
{
|
|
U_TRACE(5, "Query::query_expr(%#.*S,%.*S)", U_STRING_TO_TRACE(*str_inode), U_STRING_TO_TRACE(*filename))
|
|
|
|
UPosting::setDocID(str_inode);
|
|
|
|
if (parser->evaluate()) push(str_inode, filename);
|
|
|
|
U_RETURN(1);
|
|
}
|
|
|
|
// NB: may be there are difficult with quoting (MINGW)...
|
|
|
|
const char* Query::checkQuoting(char* argv[], uint32_t& len)
|
|
{
|
|
U_TRACE(5, "Query::checkQuoting(%p,%u)", argv, len)
|
|
|
|
U_INTERNAL_DUMP("optind = %d", optind)
|
|
|
|
U_INTERNAL_ASSERT_RANGE(1,optind,3)
|
|
|
|
U_DUMP_ATTRS(argv)
|
|
|
|
// [0] -> path_prog
|
|
// [1] -> "-c"
|
|
// [2] -> "index.cfg"
|
|
// [3] -> "query..."
|
|
// [4] -> '\0'
|
|
|
|
const char* ptr = argv[optind];
|
|
|
|
if (argv[optind+1] == U_NULLPTR) len = u__strlen(ptr, __PRETTY_FUNCTION__);
|
|
else
|
|
{
|
|
request->setBuffer(U_CAPACITY);
|
|
|
|
do {
|
|
U_INTERNAL_DUMP("ptr = %S", ptr)
|
|
|
|
bool bquote = (*ptr != '"' && strchr(ptr, ' ') != U_NULLPTR);
|
|
|
|
request->push_back(' ');
|
|
if (bquote) request->push_back('"');
|
|
(void) request->append(ptr);
|
|
if (bquote) request->push_back('"');
|
|
}
|
|
while ((ptr = argv[++optind]) && ptr[0]);
|
|
|
|
len = request->size()-1;
|
|
ptr = request->c_str()+1;
|
|
|
|
U_WARNING("quoting issue detected, actual query to be executed is(%u) <%.*s>", len, ptr);
|
|
}
|
|
|
|
U_RETURN_POINTER(ptr,const char);
|
|
}
|
|
|
|
void Query::run(const char* ptr, uint32_t len, UVector<WeightWord*>* vec)
|
|
{
|
|
U_TRACE(5, "Query::run(%.*S,%u,%p)", len, ptr, len, vec)
|
|
|
|
if (vec)
|
|
{
|
|
vec->clear();
|
|
|
|
WeightWord::vec = vec;
|
|
}
|
|
|
|
*UPosting::word = UStringExt::removeEscape(UStringExt::trim(ptr, len));
|
|
|
|
U_INTERNAL_DUMP("UPosting::word = %.*S", U_STRING_TO_TRACE(*UPosting::word))
|
|
|
|
if (UServices::dosMatchWithOR(*UPosting::word, U_CONSTANT_TO_PARAM("* or *|* and *|*not *"), FNM_IGNORECASE))
|
|
{
|
|
*UPosting::word = UStringExt::substitute(*UPosting::word, U_CONSTANT_TO_PARAM(" or "),
|
|
U_CONSTANT_TO_PARAM(" OR "));
|
|
*UPosting::word = UStringExt::substitute(*UPosting::word, U_CONSTANT_TO_PARAM(" and "),
|
|
U_CONSTANT_TO_PARAM(" AND "));
|
|
*UPosting::word = UStringExt::substitute(*UPosting::word, U_CONSTANT_TO_PARAM("not "),
|
|
U_CONSTANT_TO_PARAM("NOT "));
|
|
|
|
if (parser->parse(*UPosting::word))
|
|
{
|
|
parser->startEvaluate(UPosting::findDocID);
|
|
|
|
cdb_names->callForAllEntryWithPattern(query_expr, U_NULLPTR);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bool is_space = (UPosting::word->findWhiteSpace() != U_NOT_FOUND);
|
|
|
|
if (UPosting::word->find('?') == U_NOT_FOUND &&
|
|
UPosting::word->find('*') == U_NOT_FOUND)
|
|
{
|
|
UPosting::callForPosting(WeightWord::push, is_space);
|
|
}
|
|
else
|
|
{
|
|
if (is_space) U_ERROR("syntax error on query");
|
|
|
|
if (UPosting::word->equal(U_CONSTANT_TO_PARAM("*"))) cdb_names->callForAllEntryWithPattern(push, U_NULLPTR);
|
|
else
|
|
{
|
|
WeightWord::check_for_duplicate = true;
|
|
|
|
cdb_words->callForAllEntryWithPattern(query_meta, U_NULLPTR);
|
|
|
|
WeightWord::check_for_duplicate = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// DEBUG
|
|
|
|
#if defined(DEBUG) && defined(U_STDCPP_ENABLE)
|
|
const char* WeightWord::dump(bool reset) const
|
|
{
|
|
*UObjectIO::os << "word_freq " << word_freq << '\n'
|
|
<< "filename (UString " << (void*)&filename << ')';
|
|
|
|
if (reset)
|
|
{
|
|
UObjectIO::output();
|
|
|
|
return UObjectIO::buffer_output;
|
|
}
|
|
|
|
return U_NULLPTR;
|
|
}
|
|
#endif
|