piki/pages/search.py

108 lines
2.6 KiB
Python
Raw Normal View History

from datetime import datetime
2024-10-09 09:57:05 +02:00
from django.conf import settings
import fstools
import logging
import os
from whoosh.fields import Schema, ID, TEXT, DATETIME
2024-10-09 09:57:05 +02:00
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh import index, qparser
from pages.page import page_wrapped, full_path_all_pages
2024-10-09 09:57:05 +02:00
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
SCHEMA = Schema(
id=ID(unique=True, stored=True),
# Page
title=TEXT,
page_src=TEXT,
2024-10-11 14:39:25 +02:00
tag=TEXT,
# metadata
creation_time=DATETIME,
modified_time=DATETIME,
modified_user=TEXT
2024-10-09 09:57:05 +02:00
)
def mk_whooshpath_if_needed():
if not os.path.exists(settings.WHOOSH_PATH):
fstools.mkdir(settings.WHOOSH_PATH)
def create_index():
mk_whooshpath_if_needed()
logger.debug('Search Index created.')
return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
def rebuild_index(ix):
page_path = full_path_all_pages()
2024-10-09 09:57:05 +02:00
for path in page_path:
pw = page_wrapped(None, path)
add_item(ix, pw)
2024-10-09 09:57:05 +02:00
return len(page_path)
def load_index():
mk_whooshpath_if_needed()
try:
ix = index.open_dir(settings.WHOOSH_PATH)
except index.EmptyIndexError:
ix = create_index()
else:
logger.debug('Search Index opened.')
return ix
def add_item(ix, pw: page_wrapped):
2024-10-09 09:57:05 +02:00
# Define Standard data
#
data = dict(
id=pw.rel_path,
#
title=pw.title,
page_src=pw.raw_page_src,
tag=pw.tags,
#
creation_time=datetime.fromtimestamp(pw.creation_time),
modified_time=datetime.fromtimestamp(pw.modified_time),
modified_user=pw.modified_user
2024-10-09 09:57:05 +02:00
)
with ix.writer() as w:
logger.info('Adding document with id=%s to the search index.', data.get('id'))
w.update_document(**data)
2024-10-09 09:57:05 +02:00
for key in data:
logger.debug(' - Adding %s=%s', key, repr(data[key]))
def whoosh_search(search_txt):
ix = load_index()
2024-10-11 14:39:25 +02:00
qp = qparser.MultifieldParser(['title', 'page_src', 'tag'], ix.schema)
2024-10-09 09:57:05 +02:00
qp.add_plugin(DateParserPlugin(free=True))
try:
q = qp.parse(search_txt)
except AttributeError:
return None
except Exception:
return None
with ix.searcher() as s:
results = s.search(q, limit=None)
rpl = []
for hit in results:
rpl.append(hit['id'])
return rpl
def delete_item(ix, pw: page_wrapped):
2024-10-09 09:57:05 +02:00
with ix.writer() as w:
logger.info('Removing document with id=%s from the search index.', pw.rel_path)
2024-10-18 20:47:33 +02:00
w.delete_by_term("id", pw.rel_path)
2024-10-09 09:57:05 +02:00
def update_item(pw: page_wrapped):
2024-10-09 09:57:05 +02:00
ix = load_index()
delete_item(ix, pw)
add_item(ix, pw)