123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- from django.conf import settings
-
- import fstools
- import logging
- import os
- from whoosh.fields import Schema, ID, TEXT
- from whoosh.qparser.dateparse import DateParserPlugin
- from whoosh import index, qparser
-
- from pages.page import base_page
-
- logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
-
-
- SCHEMA = Schema(
- id=ID(unique=True, stored=True),
- # Page
- title=TEXT,
- page_src=TEXT
- )
-
-
- def mk_whooshpath_if_needed():
- if not os.path.exists(settings.WHOOSH_PATH):
- fstools.mkdir(settings.WHOOSH_PATH)
-
-
- def create_index():
- mk_whooshpath_if_needed()
- logger.debug('Search Index created.')
- return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
-
-
- def rebuild_index(ix):
- page_path = fstools.dirlist(settings.PAGES_ROOT, rekursive=False)
- for path in page_path:
- bp = base_page(path)
- add_item(ix, bp)
- return len(page_path)
-
-
- def load_index():
- mk_whooshpath_if_needed()
- try:
- ix = index.open_dir(settings.WHOOSH_PATH)
- except index.EmptyIndexError:
- ix = create_index()
- else:
- logger.debug('Search Index opened.')
- return ix
-
-
- def add_item(ix, bp: base_page):
- # Define Standard data
- #
- data = dict(
- id=bp.rel_path,
- title=bp.title,
- page_src=bp.raw_page_src
- )
- with ix.writer() as w:
- logger.info('Adding document with id=%s to the search index.', data.get('id'))
- w.add_document(**data)
- for key in data:
- logger.debug(' - Adding %s=%s', key, repr(data[key]))
-
-
- def whoosh_search(search_txt):
- ix = load_index()
- qp = qparser.MultifieldParser(['title', 'page_src'], ix.schema)
- qp.add_plugin(DateParserPlugin(free=True))
- try:
- q = qp.parse(search_txt)
- except AttributeError:
- return None
- except Exception:
- return None
- with ix.searcher() as s:
- results = s.search(q, limit=None)
- rpl = []
- for hit in results:
- rpl.append(hit['id'])
- return rpl
-
-
- def delete_item(ix, bp: base_page):
- with ix.writer() as w:
- logger.info('Removing document with id=%s from the search index.', bp.rel_path)
- w.delete_by_term("task_id", bp.rel_path)
-
-
- def update_item(bp: base_page):
- ix = load_index()
- delete_item(ix, bp)
- add_item(ix, bp)
|