diff --git a/.gitignore b/.gitignore index 86ffbfb..ff313d8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ data/media data/pages data/static +data/whoosh db.sqlite3 config.py @@ -156,4 +157,3 @@ pip-selfcheck.json # .nfs files are created when an open file is removed but is still being accessed .nfs* - diff --git a/README.md b/README.md index 6d6c026..50a0cd1 100644 --- a/README.md +++ b/README.md @@ -65,9 +65,8 @@ Now there are two ways to finalise your configuration. The first way is for a te ## Backup ### Create Backup files source venv/bin/activate - python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions -e patt --indent 2 > dump_base.json - python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions piki --indent 2 > dump_piki.json - tar -cvzf dump_data.tgz data/media data/pages + python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions -e pages --indent 2 > dump_pages.json + tar -cvzf dump_data.tgz data/media data/pages data/media ### Restore Backup @@ -79,6 +78,6 @@ If you are starting without a database, you need to create one Afterward add data step by step to the database. - python manage.py loaddata dump_base.json - python manage.py loaddata dump_patt.json + python manage.py loaddata dump_pages.json + rm -rf data/pages data/media tar -xvzf dump_data.tgz diff --git a/pages/__init__.py b/pages/__init__.py index 4b7ce5d..544ee56 100644 --- a/pages/__init__.py +++ b/pages/__init__.py @@ -11,3 +11,7 @@ def url_helpview(request, page): def url_edit(request, rel_path): return reverse('page-edit', kwargs={'rel_path': rel_path}) + + +def get_search_query(request): + return request.GET.get('q') diff --git a/pages/management/__init__.py b/pages/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pages/management/commands/__init__.py b/pages/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pages/management/commands/rebuild_index.py b/pages/management/commands/rebuild_index.py new file mode 100644 index 0000000..15dac20 --- /dev/null +++ b/pages/management/commands/rebuild_index.py @@ -0,0 +1,9 @@ +from django.core.management.base import BaseCommand +from pages.search import create_index, rebuild_index + + +class Command(BaseCommand): + def handle(self, *args, **options): + ix = create_index() + n = rebuild_index(ix) + self.stdout.write(self.style.SUCCESS('Search index for %d items created.') % n) diff --git a/pages/page.py b/pages/page.py index 7b02efa..643bcc5 100644 --- a/pages/page.py +++ b/pages/page.py @@ -1,69 +1,87 @@ from django.conf import settings import fstools +import logging from pages import messages, url_page import mycreole import os +logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__) -class creol_page(object): - SPLITCHAR = ":" - FOLDER_ATTACHMENTS = "attachments" + +class base_page(object): FOLDER_CONTENT = 'content' FILE_NAME = 'page' + SPLITCHAR = ":" - def __init__(self, request, rel_path) -> None: - self._rel_path = rel_path - self._request = request + def __init__(self, path): + if path.startswith(settings.PAGES_ROOT): + self._path = path + else: + self._path = os.path.join(settings.PAGES_ROOT, path.replace("/", 2*self.SPLITCHAR)) + self._raw_page_src = None + + def _load_page_src(self): + if self._raw_page_src is None: + try: + with open(self.filename, 'r') as fh: + self._raw_page_src = fh.read() + except FileNotFoundError: + self._raw_page_src = "" + + def update_page(self, page_txt): + from .search import update_item + # + folder = os.path.dirname(self.filename) + if not os.path.exists(folder): + fstools.mkdir(folder) + with open(self.filename, 'w') as fh: + fh.write(page_txt) + update_item(self) + + @property + def filename(self): + return os.path.join(self._path, self.FOLDER_CONTENT, self.FILE_NAME) + + @property + def rel_path(self): + return os.path.basename(self._path).replace(2*self.SPLITCHAR, "/") def rel_path_is_valid(self): - return not self.SPLITCHAR in self._rel_path + return not self.SPLITCHAR in self.rel_path def is_available(self): - return os.path.isfile(self.content_file_name) + is_a = os.path.isfile(self.filename) + if not is_a: + logger.info("page.is_available: Not available - %s", self.filename) + return is_a @property def title(self): - return os.path.basename(self._rel_path) - - @property - def attachment_path(self): - return os.path.join(self.content_folder_name, self.FOLDER_ATTACHMENTS) - - def __content_folder_filter__(self, folder): - return folder.replace('/', '::') - - def __folder_content_filter__(self, folder): - return folder.replace('::', '/') - - @property - def content_folder_name(self): - return self.__content_folder_filter__(self._rel_path) - - @property - def content_file_name(self): - return os.path.join(settings.PAGES_ROOT, self.content_folder_name, self.FOLDER_CONTENT, self.FILE_NAME) + return os.path.basename(self._path).split("::")[-1] @property def raw_page_src(self): - try: - with open(self.content_file_name, 'r') as fh: - return fh.read() - except FileNotFoundError: - return "" + self._load_page_src() + return self._raw_page_src - def update_page(self, page_txt): - folder = os.path.dirname(self.content_file_name) - if not os.path.exists(folder): - fstools.mkdir(folder) - with open(self.content_file_name, 'w') as fh: - fh.write(page_txt) + +class creole_page(base_page): + FOLDER_ATTACHMENTS = "attachments" + + def __init__(self, request, path) -> None: + self._request = request + super().__init__(path) + + @property + def attachment_path(self): + return os.path.join(os.path.basename(self._path), self.FOLDER_ATTACHMENTS) def render_to_html(self): if self.is_available(): return self.render_text(self._request, self.raw_page_src) else: - messages.unavailable_msg_page(self._request, self._rel_path) + messages.unavailable_msg_page(self._request, self.rel_path) return "" def render_text(self, request, txt): @@ -102,18 +120,18 @@ class creol_page(object): # rv = "" # create a rel_path list - pathlist = [self.__folder_content_filter__(os.path.basename(path)) for path in fstools.dirlist(settings.PAGES_ROOT, rekursive=False)] + pathlist = [base_page(path).rel_path for path in fstools.dirlist(settings.PAGES_ROOT, rekursive=False)] # sort basename pathlist.sort(key=os.path.basename) last_char = None for contentname in pathlist: # - if (contentname.startswith(self._rel_path) or allpages) and contentname != self._rel_path: + if (contentname.startswith(self.rel_path) or allpages) and contentname != self.rel_path: if allpages: name = contentname else: - name = contentname[len(self._rel_path)+1:] + name = contentname[len(self.rel_path)+1:] if name.count('/') < depth and name.startswith(startname): if last_char != os.path.basename(name)[0].upper(): last_char = os.path.basename(name)[0].upper() diff --git a/pages/search.py b/pages/search.py new file mode 100644 index 0000000..26db305 --- /dev/null +++ b/pages/search.py @@ -0,0 +1,96 @@ +from django.conf import settings + +import fstools +import logging +import os +from whoosh.fields import Schema, ID, TEXT +from whoosh.qparser.dateparse import DateParserPlugin +from whoosh import index, qparser + +from pages.page import base_page + +logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__) + + +SCHEMA = Schema( + id=ID(unique=True, stored=True), + # Page + title=TEXT, + page_src=TEXT +) + + +def mk_whooshpath_if_needed(): + if not os.path.exists(settings.WHOOSH_PATH): + fstools.mkdir(settings.WHOOSH_PATH) + + +def create_index(): + mk_whooshpath_if_needed() + logger.debug('Search Index created.') + return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA) + + +def rebuild_index(ix): + page_path = fstools.dirlist(settings.PAGES_ROOT, rekursive=False) + for path in page_path: + bp = base_page(path) + add_item(ix, bp) + return len(page_path) + + +def load_index(): + mk_whooshpath_if_needed() + try: + ix = index.open_dir(settings.WHOOSH_PATH) + except index.EmptyIndexError: + ix = create_index() + else: + logger.debug('Search Index opened.') + return ix + + +def add_item(ix, bp: base_page): + # Define Standard data + # + data = dict( + id=bp.rel_path, + title=bp.title, + page_src=bp.raw_page_src + ) + with ix.writer() as w: + logger.info('Adding document with id=%s to the search index.', data.get('id')) + w.add_document(**data) + for key in data: + logger.debug(' - Adding %s=%s', key, repr(data[key])) + + +def whoosh_search(search_txt): + ix = load_index() + qp = qparser.MultifieldParser(['title', 'page_src'], ix.schema) + qp.add_plugin(DateParserPlugin(free=True)) + try: + q = qp.parse(search_txt) + except AttributeError: + return None + except Exception: + return None + with ix.searcher() as s: + results = s.search(q, limit=None) + rpl = [] + for hit in results: + rpl.append(hit['id']) + return rpl + + +def delete_item(ix, bp: base_page): + with ix.writer() as w: + logger.info('Removing document with id=%s from the search index.', bp.rel_path) + w.delete_by_term("task_id", bp.rel_path) + + +def update_item(bp: base_page): + ix = load_index() + delete_item(ix, bp) + add_item(ix, bp) + diff --git a/pages/views.py b/pages/views.py index 059d57e..0679dff 100644 --- a/pages/views.py +++ b/pages/views.py @@ -8,12 +8,14 @@ import logging from . import access from . import messages from . import url_page +from . import get_search_query import config from .context import context_adaption from .forms import EditForm from .help import help_pages import mycreole -from .page import creol_page +from .page import creole_page +from .search import whoosh_search from themes import Context logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__) @@ -26,7 +28,7 @@ def root(request): def page(request, rel_path): context = Context(request) # needs to be executed first because of time mesurement # - p = creol_page(request, rel_path) + p = creole_page(request, rel_path) if access.read_page(request, rel_path): page_content = p.render_to_html() else: @@ -48,7 +50,7 @@ def edit(request, rel_path): if access.write_page(request, rel_path): context = Context(request) # needs to be executed first because of time mesurement # - p = creol_page(request, rel_path) + p = creole_page(request, rel_path) # if not request.POST: form = EditForm(page_data=p.raw_page_src) @@ -92,10 +94,22 @@ def edit(request, rel_path): def search(request): context = Context(request) # needs to be executed first because of time mesurement + # + search_txt = get_search_query(request) + + sr = whoosh_search(search_txt) + if sr is None: + messages.error(request, _('Invalid search pattern: %s') % repr(search_txt)) + sr = [] + page_content = "= Searchresults\n" + for rel_path in sr: + p = creole_page(request, rel_path) + page_content += f"[[/page/{rel_path}|{p.title}]]\n" + # context_adaption( context, request, - page_content="Search is not yet implemented..." + page_content=mycreole.render_simple(page_content) ) return render(request, 'pages/page.html', context=context) diff --git a/piki/settings.py b/piki/settings.py index 897be98..65e0053 100644 --- a/piki/settings.py +++ b/piki/settings.py @@ -135,6 +135,8 @@ MYCREOLE_BAR = { PAGES_ROOT = os.path.join(BASE_DIR, 'data', 'pages') +WHOOSH_PATH = os.path.join(BASE_DIR, 'data', 'whoosh') + # Default primary key field type # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field @@ -215,4 +217,3 @@ File "%(pathname)s", line %(lineno)d, in %(funcName)s }, }, } - diff --git a/requirements.txt b/requirements.txt index 684b4cb..14b60e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ Django Pillow python-creole pytz - +Whoosh