1
0
mirror of https://github.com/stefanocasazza/ULib.git synced 2025-09-28 19:05:55 +08:00
ULib/tests/examples/index_xml.cfg
2015-01-23 17:24:36 +01:00

47 lines
2.2 KiB
INI

## ----------------------------------------------------------------------------------------------------------
## index_xml.cfg - configuration data for index program
## ----------------------------------------------------------------------------------------------------------
## DB location for index db (must be terminated by /)
## DIRECTORY location of docs to index
## DIMENSION approximate number of docs to index
## MIN_WORD_SIZE sets the mininum length of words that will be indexed
## IGNORE_CASE case sensitive or not
## SKIP_TAG_XML skip index of tag xml for files with suffix indicated
## BAD_WORDS template words to not index for files with suffix indicated in BAD_WORDS_EXT
## BAD_WORDS_EXT extension file for BAD_WORDS
## DIR_CONTENT_AS_DOC consider content of directory as one document (for pongo)
## FILTER_EXT preprocessing for files with suffix indicated
## FILTER_CMD preprocessing command for files with suffix indicated in FILTER_EXT
## ----------------------------------------------------------------------------------------------------------
INDEX_CFG
{
## location for index db
DB IR/db_xml/ # must be terminated by /
DIRECTORY inp/xml2txt # location of docs to index
## mingw32
## DB C:\msys\1.0\IR\db\
## DIRECTORY C:\msys\1.0\IR\doc
DIMENSION 800 # approximate number of docs to index
MIN_WORD_SIZE 3 # sets the mininum length of words that will be indexed
## case sensitive or not
IGNORE_CASE yes
## skip index of tag xml for files with suffix indicated
SKIP_TAG_XML "[ eml ]"
## template words to not index for files with suffix indicated in BAD_WORDS_EXT
BAD_WORDS ??/??/????|??:??:??|value
BAD_WORDS_EXT "[ xml ]" # extension file for BAD_WORDS
## consider content of directory as one document (for pongo)
## DIR_CONTENT_AS_DOC yes
## preprocessing for files with suffix indicated
FILTER_EXT "[ xml ]"
FILTER_CMD "[ \"/usr/src/ulib-1.0.3/examples/xml2txt/xml2txt -t di,token,domiciliazione,contratto,long-term-id $FILE\" ]"
}