1
0
mirror of https://github.com/stefanocasazza/ULib.git synced 2025-10-19 19:55:22 +08:00
ULib/tests/examples/index_dir.cfg
2015-01-23 17:24:36 +01:00

47 lines
2.2 KiB
INI

## ----------------------------------------------------------------------------------------------------------
## index_dir.cfg - configuration data for index program
## ----------------------------------------------------------------------------------------------------------
## DB location for index db (must be terminated by /)
## DIRECTORY location of docs to index
## DIMENSION approximate number of docs to index
## MIN_WORD_SIZE sets the mininum length of words that will be indexed
## IGNORE_CASE case sensitive or not
## SKIP_TAG_XML skip index of tag xml for files with suffix indicated
## BAD_WORDS template words to not index for files with suffix indicated in BAD_WORDS_EXT
## BAD_WORDS_EXT extension file for BAD_WORDS
## DIR_CONTENT_AS_DOC consider content of directory as one document (for pongo)
## FILTER_EXT preprocessing for files with suffix indicated
## FILTER_CMD preprocessing command for files with suffix indicated in FILTER_EXT
## ----------------------------------------------------------------------------------------------------------
INDEX_DIR_CFG
{
## location for index db
DB IR/db_dir/ # must be terminated by /
DIRECTORY IR/doc_dir # location of docs to index
## mingw32
## DB C:\msys\1.0\IR\db\
## DIRECTORY C:\msys\1.0\IR\doc
DIMENSION 800 # approximate number of docs to index
MIN_WORD_SIZE 3 # sets the mininum length of words that will be indexed
## case sensitive or not
IGNORE_CASE yes
## skip index of tag xml for files with suffix indicated
SKIP_TAG_XML "[ eml ]"
## template words to not index for files with suffix indicated in BAD_WORDS_EXT
BAD_WORDS ??/??/????|??:??:??|workflow
## BAD_WORDS_EXT "[ xml ]" # extension file for BAD_WORDS
## consider content of directory as one document (for pongo)
DIR_CONTENT_AS_DOC yes
## preprocessing for files with suffix indicated
## FILTER_EXT "[ pdf doc html ]"
## FILTER_CMD "[ \"pdftotext -raw -nopgbrk -q $FILE -\" \"catdoc -aw $FILE\" \"htuml2txt\" ]"
}