## ---------------------------------------------------------------------------------------------------------- ## index.cfg - configuration data for index program ## ---------------------------------------------------------------------------------------------------------- ## DB location for index db (must be terminated by /) ## DIRECTORY location of docs to index ## DIMENSION approximate number of docs to index ## MIN_WORD_SIZE sets the mininum length of words that will be indexed ## IGNORE_CASE case sensitive or not ## SKIP_TAG_XML skip index of tag xml for files with suffix indicated ## BAD_WORDS template words to not index for files with suffix indicated in BAD_WORDS_EXT ## BAD_WORDS_EXT extension file for BAD_WORDS ## DIR_CONTENT_AS_DOC consider content of directory as one document (for pongo) ## FILTER_EXT preprocessing for files with suffix indicated ## FILTER_CMD preprocessing command for files with suffix indicated in FILTER_EXT ## ---------------------------------------------------------------------------------------------------------- INDEX_CFG { ## location for index db DB IR/db/ # must be terminated by / DIRECTORY IR/doc # location of docs to index ## mingw32 ## DB C:\msys\1.0\IR\db\ ## DIRECTORY C:\msys\1.0\IR\doc DIMENSION 800 # approximate number of docs to index MIN_WORD_SIZE 3 # sets the mininum length of words that will be indexed ## case sensitive or not IGNORE_CASE yes ## skip index of tag xml for files with suffix indicated SKIP_TAG_XML "[ xml ]" ## list of template words to not index for files with suffix indicated in BAD_WORDS_EXT BAD_WORDS ??/??/????|??:??:??|workflow ## BAD_WORDS_EXT "[ xml ]" # extension file for BAD_WORDS ## consider content of directory as one document (for pongo) ## DIR_CONTENT_AS_DOC yes ## preprocessing for files with suffix indicated ## FILTER_EXT "[ pdf doc html ]" ## FILTER_CMD "[ \"pdftotext -raw -nopgbrk -q $FILE -\" \"catdoc -aw $FILE\" \"htuml2txt\" ]" }