// cquery.cpp #include #include #include "cquery.h" bool WeightWord::check_for_duplicate; UVector* WeightWord::vec; UHashMap* WeightWord::tbl; void WeightWord::clear() { U_TRACE(5, "WeightWord::clear()") if (tbl) { tbl->_length = 0; tbl->deallocate(); delete tbl; tbl = U_NULLPTR; } if (vec) { delete vec; vec = U_NULLPTR; } } void WeightWord::push() { U_TRACE(5, "WeightWord::push()") U_INTERNAL_DUMP("UPosting::word_freq = %d", UPosting::word_freq) U_INTERNAL_ASSERT(*UPosting::filename) WeightWord* item; U_NEW(WeightWord, item, WeightWord(*UPosting::filename, UPosting::word_freq)); if (check_for_duplicate) { if (tbl == U_NULLPTR) U_NEW(UHashMap, tbl, UHashMap); if (tbl->find(*UPosting::filename)) { U_INTERNAL_DUMP("DUPLICATE") delete item; return; } tbl->insertAfterFind(*UPosting::filename, item); } if (vec == U_NULLPTR) U_NEW(UVector, vec, UVector); vec->push_back(item); } __pure int WeightWord::compareObj(const void* obj1, const void* obj2) { U_TRACE(5, "WeightWord::compareObj(%p,%p)", obj1, obj2) int cmp; #ifdef U_STDCPP_ENABLE cmp = (((const WeightWord*)obj1)->word_freq < ((const WeightWord*)obj2)->word_freq ? 1 : ((const WeightWord*)obj1)->word_freq > ((const WeightWord*)obj2)->word_freq ? 0 : ((const WeightWord*)obj1)->filename.compare(((const WeightWord*)obj2)->filename) < 0); #else cmp = ((*(const WeightWord**)obj1)->word_freq < (*(const WeightWord**)obj2)->word_freq ? 1 : (*(const WeightWord**)obj1)->word_freq > (*(const WeightWord**)obj2)->word_freq ? -1 : (*(const WeightWord**)obj1)->filename.compare((*(const WeightWord**)obj2)->filename)); #endif return cmp; } void WeightWord::sortObjects() { U_TRACE(5+256, "WeightWord::sortObjects()") if (size() > 1) vec->sort(compareObj); } void WeightWord::dumpObjects() { U_TRACE(5, "WeightWord::dumpObjects()") if (vec) { sortObjects(); for (uint32_t i = 0, n = vec->size(); i < n; ++i) { (void) write(1, U_STRING_TO_PARAM((*vec)[i]->filename)); (void) write(1, U_CONSTANT_TO_PARAM("\n")); } } } UString* Query::request; UQueryParser* Query::parser; Query::Query() { U_TRACE(5, "Query::Query()") U_INTERNAL_ASSERT_EQUALS(parser, U_NULLPTR) U_INTERNAL_ASSERT_EQUALS(request, U_NULLPTR) U_NEW(UQueryParser, parser, UQueryParser); U_NEW(UString, request, UString); } Query::~Query() { U_TRACE(5, "Query::~Query()") clear(); delete parser; delete request; } void Query::clear() { U_TRACE(5, "Query::clear()") UPosting::reset(); WeightWord::clear(); parser->clear(); request->clear(); } int Query::query_meta(UStringRep* word_rep, UStringRep* value) { U_TRACE(5, "Query::query_meta(%.*S,%p)", U_STRING_TO_TRACE(*word_rep), value) if (u_dosmatch( word_rep->data(), word_rep->size(), UPosting::word->data(), UPosting::word->size(), UPosting::ignore_case ? FNM_CASEFOLD : 0)) { UPosting::posting->_assign(value); UPosting::callForPostingAndSetFilename(WeightWord::push); } U_RETURN(1); } int Query::push(UStringRep* str_inode, UStringRep* filename) { U_TRACE(5, "Query::push(%#.*S,%.*S)", U_STRING_TO_TRACE(*str_inode), U_STRING_TO_TRACE(*filename)) UPosting::filename->_assign(filename); UPosting::word_freq = 0; WeightWord::push(); U_RETURN(1); } int Query::query_expr(UStringRep* str_inode, UStringRep* filename) { U_TRACE(5, "Query::query_expr(%#.*S,%.*S)", U_STRING_TO_TRACE(*str_inode), U_STRING_TO_TRACE(*filename)) UPosting::setDocID(str_inode); if (parser->evaluate()) push(str_inode, filename); U_RETURN(1); } // NB: may be there are difficult with quoting (MINGW)... const char* Query::checkQuoting(char* argv[], uint32_t& len) { U_TRACE(5, "Query::checkQuoting(%p,%u)", argv, len) U_INTERNAL_DUMP("optind = %d", optind) U_INTERNAL_ASSERT_RANGE(1,optind,3) U_DUMP_ATTRS(argv) // [0] -> path_prog // [1] -> "-c" // [2] -> "index.cfg" // [3] -> "query..." // [4] -> '\0' const char* ptr = argv[optind]; if (argv[optind+1] == U_NULLPTR) len = u__strlen(ptr, __PRETTY_FUNCTION__); else { request->setBuffer(U_CAPACITY); do { U_INTERNAL_DUMP("ptr = %S", ptr) bool bquote = (*ptr != '"' && strchr(ptr, ' ') != U_NULLPTR); request->push_back(' '); if (bquote) request->push_back('"'); (void) request->append(ptr); if (bquote) request->push_back('"'); } while ((ptr = argv[++optind]) && ptr[0]); len = request->size()-1; ptr = request->c_str()+1; U_WARNING("quoting issue detected, actual query to be executed is(%u) <%.*s>", len, ptr); } U_RETURN_POINTER(ptr,const char); } void Query::run(const char* ptr, uint32_t len, UVector* vec) { U_TRACE(5, "Query::run(%.*S,%u,%p)", len, ptr, len, vec) if (vec) { vec->clear(); WeightWord::vec = vec; } *UPosting::word = UStringExt::removeEscape(UStringExt::trim(ptr, len)); U_INTERNAL_DUMP("UPosting::word = %.*S", U_STRING_TO_TRACE(*UPosting::word)) if (UServices::dosMatchWithOR(*UPosting::word, U_CONSTANT_TO_PARAM("* or *|* and *|*not *"), FNM_IGNORECASE)) { *UPosting::word = UStringExt::substitute(*UPosting::word, U_CONSTANT_TO_PARAM(" or "), U_CONSTANT_TO_PARAM(" OR ")); *UPosting::word = UStringExt::substitute(*UPosting::word, U_CONSTANT_TO_PARAM(" and "), U_CONSTANT_TO_PARAM(" AND ")); *UPosting::word = UStringExt::substitute(*UPosting::word, U_CONSTANT_TO_PARAM("not "), U_CONSTANT_TO_PARAM("NOT ")); if (parser->parse(*UPosting::word)) { parser->startEvaluate(UPosting::findDocID); cdb_names->callForAllEntryWithPattern(query_expr, U_NULLPTR); } } else { bool is_space = (UPosting::word->findWhiteSpace() != U_NOT_FOUND); if (UPosting::word->find('?') == U_NOT_FOUND && UPosting::word->find('*') == U_NOT_FOUND) { UPosting::callForPosting(WeightWord::push, is_space); } else { if (is_space) U_ERROR("syntax error on query"); if (UPosting::word->equal(U_CONSTANT_TO_PARAM("*"))) cdb_names->callForAllEntryWithPattern(push, U_NULLPTR); else { WeightWord::check_for_duplicate = true; cdb_words->callForAllEntryWithPattern(query_meta, U_NULLPTR); WeightWord::check_for_duplicate = false; } } } } // DEBUG #if defined(DEBUG) && defined(U_STDCPP_ENABLE) const char* WeightWord::dump(bool reset) const { *UObjectIO::os << "word_freq " << word_freq << '\n' << "filename (UString " << (void*)&filename << ')'; if (reset) { UObjectIO::output(); return UObjectIO::buffer_output; } return U_NULLPTR; } #endif