piki/pages/search.py

108 lines
2.7 KiB
Python

from datetime import datetime
from django.conf import settings
import fstools
import logging
import os
from whoosh.fields import Schema, ID, TEXT, DATETIME
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh import index, qparser
from pages.page import base_page
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
SCHEMA = Schema(
id=ID(unique=True, stored=True),
# Page
title=TEXT,
page_src=TEXT,
page_tags=TEXT,
# metadata
creation_time=DATETIME,
modified_time=DATETIME,
modified_user=TEXT
)
def mk_whooshpath_if_needed():
if not os.path.exists(settings.WHOOSH_PATH):
fstools.mkdir(settings.WHOOSH_PATH)
def create_index():
mk_whooshpath_if_needed()
logger.debug('Search Index created.')
return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
def rebuild_index(ix):
page_path = fstools.dirlist(settings.PAGES_ROOT, rekursive=False)
for path in page_path:
bp = base_page(path)
add_item(ix, bp)
return len(page_path)
def load_index():
mk_whooshpath_if_needed()
try:
ix = index.open_dir(settings.WHOOSH_PATH)
except index.EmptyIndexError:
ix = create_index()
else:
logger.debug('Search Index opened.')
return ix
def add_item(ix, bp: base_page):
# Define Standard data
#
data = dict(
id=bp.rel_path,
#
title=bp.title,
page_src=bp.raw_page_src,
page_tags=bp.page_tags,
#
creation_time=datetime.fromtimestamp(bp._meta_data.get(bp._meta_data.KEY_CREATION_TIME)),
modified_time=datetime.fromtimestamp(bp._meta_data.get(bp._meta_data.KEY_MODIFIED_TIME)),
modified_user=bp._meta_data.get(bp._meta_data.KEY_MODIFIED_USER)
)
with ix.writer() as w:
logger.info('Adding document with id=%s to the search index.', data.get('id'))
w.add_document(**data)
for key in data:
logger.debug(' - Adding %s=%s', key, repr(data[key]))
def whoosh_search(search_txt):
ix = load_index()
qp = qparser.MultifieldParser(['title', 'page_src', 'page_tags'], ix.schema)
qp.add_plugin(DateParserPlugin(free=True))
try:
q = qp.parse(search_txt)
except AttributeError:
return None
except Exception:
return None
with ix.searcher() as s:
results = s.search(q, limit=None)
rpl = []
for hit in results:
rpl.append(hit['id'])
return rpl
def delete_item(ix, bp: base_page):
with ix.writer() as w:
logger.info('Removing document with id=%s from the search index.', bp.rel_path)
w.delete_by_term("task_id", bp.rel_path)
def update_item(bp: base_page):
ix = load_index()
delete_item(ix, bp)
add_item(ix, bp)