Fist minimal whoosh search implemented
This commit is contained in:
parent
77f8f61aab
commit
30b817e359
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,6 +2,7 @@
|
||||
data/media
|
||||
data/pages
|
||||
data/static
|
||||
data/whoosh
|
||||
db.sqlite3
|
||||
config.py
|
||||
|
||||
@ -156,4 +157,3 @@ pip-selfcheck.json
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
|
@ -65,9 +65,8 @@ Now there are two ways to finalise your configuration. The first way is for a te
|
||||
## Backup
|
||||
### Create Backup files
|
||||
source venv/bin/activate
|
||||
python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions -e patt --indent 2 > dump_base.json
|
||||
python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions piki --indent 2 > dump_piki.json
|
||||
tar -cvzf dump_data.tgz data/media data/pages
|
||||
python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions -e pages --indent 2 > dump_pages.json
|
||||
tar -cvzf dump_data.tgz data/media data/pages data/media
|
||||
|
||||
### Restore Backup
|
||||
|
||||
@ -79,6 +78,6 @@ If you are starting without a database, you need to create one
|
||||
|
||||
Afterward add data step by step to the database.
|
||||
|
||||
python manage.py loaddata dump_base.json
|
||||
python manage.py loaddata dump_patt.json
|
||||
python manage.py loaddata dump_pages.json
|
||||
rm -rf data/pages data/media
|
||||
tar -xvzf dump_data.tgz
|
||||
|
@ -11,3 +11,7 @@ def url_helpview(request, page):
|
||||
|
||||
def url_edit(request, rel_path):
|
||||
return reverse('page-edit', kwargs={'rel_path': rel_path})
|
||||
|
||||
|
||||
def get_search_query(request):
|
||||
return request.GET.get('q')
|
||||
|
0
pages/management/__init__.py
Normal file
0
pages/management/__init__.py
Normal file
0
pages/management/commands/__init__.py
Normal file
0
pages/management/commands/__init__.py
Normal file
9
pages/management/commands/rebuild_index.py
Normal file
9
pages/management/commands/rebuild_index.py
Normal file
@ -0,0 +1,9 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from pages.search import create_index, rebuild_index
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
def handle(self, *args, **options):
|
||||
ix = create_index()
|
||||
n = rebuild_index(ix)
|
||||
self.stdout.write(self.style.SUCCESS('Search index for %d items created.') % n)
|
102
pages/page.py
102
pages/page.py
@ -1,69 +1,87 @@
|
||||
from django.conf import settings
|
||||
|
||||
import fstools
|
||||
import logging
|
||||
from pages import messages, url_page
|
||||
import mycreole
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
||||
|
||||
class creol_page(object):
|
||||
SPLITCHAR = ":"
|
||||
FOLDER_ATTACHMENTS = "attachments"
|
||||
|
||||
class base_page(object):
|
||||
FOLDER_CONTENT = 'content'
|
||||
FILE_NAME = 'page'
|
||||
SPLITCHAR = ":"
|
||||
|
||||
def __init__(self, request, rel_path) -> None:
|
||||
self._rel_path = rel_path
|
||||
self._request = request
|
||||
def __init__(self, path):
|
||||
if path.startswith(settings.PAGES_ROOT):
|
||||
self._path = path
|
||||
else:
|
||||
self._path = os.path.join(settings.PAGES_ROOT, path.replace("/", 2*self.SPLITCHAR))
|
||||
self._raw_page_src = None
|
||||
|
||||
def _load_page_src(self):
|
||||
if self._raw_page_src is None:
|
||||
try:
|
||||
with open(self.filename, 'r') as fh:
|
||||
self._raw_page_src = fh.read()
|
||||
except FileNotFoundError:
|
||||
self._raw_page_src = ""
|
||||
|
||||
def update_page(self, page_txt):
|
||||
from .search import update_item
|
||||
#
|
||||
folder = os.path.dirname(self.filename)
|
||||
if not os.path.exists(folder):
|
||||
fstools.mkdir(folder)
|
||||
with open(self.filename, 'w') as fh:
|
||||
fh.write(page_txt)
|
||||
update_item(self)
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
return os.path.join(self._path, self.FOLDER_CONTENT, self.FILE_NAME)
|
||||
|
||||
@property
|
||||
def rel_path(self):
|
||||
return os.path.basename(self._path).replace(2*self.SPLITCHAR, "/")
|
||||
|
||||
def rel_path_is_valid(self):
|
||||
return not self.SPLITCHAR in self._rel_path
|
||||
return not self.SPLITCHAR in self.rel_path
|
||||
|
||||
def is_available(self):
|
||||
return os.path.isfile(self.content_file_name)
|
||||
is_a = os.path.isfile(self.filename)
|
||||
if not is_a:
|
||||
logger.info("page.is_available: Not available - %s", self.filename)
|
||||
return is_a
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
return os.path.basename(self._rel_path)
|
||||
|
||||
@property
|
||||
def attachment_path(self):
|
||||
return os.path.join(self.content_folder_name, self.FOLDER_ATTACHMENTS)
|
||||
|
||||
def __content_folder_filter__(self, folder):
|
||||
return folder.replace('/', '::')
|
||||
|
||||
def __folder_content_filter__(self, folder):
|
||||
return folder.replace('::', '/')
|
||||
|
||||
@property
|
||||
def content_folder_name(self):
|
||||
return self.__content_folder_filter__(self._rel_path)
|
||||
|
||||
@property
|
||||
def content_file_name(self):
|
||||
return os.path.join(settings.PAGES_ROOT, self.content_folder_name, self.FOLDER_CONTENT, self.FILE_NAME)
|
||||
return os.path.basename(self._path).split("::")[-1]
|
||||
|
||||
@property
|
||||
def raw_page_src(self):
|
||||
try:
|
||||
with open(self.content_file_name, 'r') as fh:
|
||||
return fh.read()
|
||||
except FileNotFoundError:
|
||||
return ""
|
||||
self._load_page_src()
|
||||
return self._raw_page_src
|
||||
|
||||
def update_page(self, page_txt):
|
||||
folder = os.path.dirname(self.content_file_name)
|
||||
if not os.path.exists(folder):
|
||||
fstools.mkdir(folder)
|
||||
with open(self.content_file_name, 'w') as fh:
|
||||
fh.write(page_txt)
|
||||
|
||||
class creole_page(base_page):
|
||||
FOLDER_ATTACHMENTS = "attachments"
|
||||
|
||||
def __init__(self, request, path) -> None:
|
||||
self._request = request
|
||||
super().__init__(path)
|
||||
|
||||
@property
|
||||
def attachment_path(self):
|
||||
return os.path.join(os.path.basename(self._path), self.FOLDER_ATTACHMENTS)
|
||||
|
||||
def render_to_html(self):
|
||||
if self.is_available():
|
||||
return self.render_text(self._request, self.raw_page_src)
|
||||
else:
|
||||
messages.unavailable_msg_page(self._request, self._rel_path)
|
||||
messages.unavailable_msg_page(self._request, self.rel_path)
|
||||
return ""
|
||||
|
||||
def render_text(self, request, txt):
|
||||
@ -102,18 +120,18 @@ class creol_page(object):
|
||||
#
|
||||
rv = ""
|
||||
# create a rel_path list
|
||||
pathlist = [self.__folder_content_filter__(os.path.basename(path)) for path in fstools.dirlist(settings.PAGES_ROOT, rekursive=False)]
|
||||
pathlist = [base_page(path).rel_path for path in fstools.dirlist(settings.PAGES_ROOT, rekursive=False)]
|
||||
# sort basename
|
||||
pathlist.sort(key=os.path.basename)
|
||||
|
||||
last_char = None
|
||||
for contentname in pathlist:
|
||||
#
|
||||
if (contentname.startswith(self._rel_path) or allpages) and contentname != self._rel_path:
|
||||
if (contentname.startswith(self.rel_path) or allpages) and contentname != self.rel_path:
|
||||
if allpages:
|
||||
name = contentname
|
||||
else:
|
||||
name = contentname[len(self._rel_path)+1:]
|
||||
name = contentname[len(self.rel_path)+1:]
|
||||
if name.count('/') < depth and name.startswith(startname):
|
||||
if last_char != os.path.basename(name)[0].upper():
|
||||
last_char = os.path.basename(name)[0].upper()
|
||||
|
96
pages/search.py
Normal file
96
pages/search.py
Normal file
@ -0,0 +1,96 @@
|
||||
from django.conf import settings
|
||||
|
||||
import fstools
|
||||
import logging
|
||||
import os
|
||||
from whoosh.fields import Schema, ID, TEXT
|
||||
from whoosh.qparser.dateparse import DateParserPlugin
|
||||
from whoosh import index, qparser
|
||||
|
||||
from pages.page import base_page
|
||||
|
||||
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
||||
|
||||
|
||||
SCHEMA = Schema(
|
||||
id=ID(unique=True, stored=True),
|
||||
# Page
|
||||
title=TEXT,
|
||||
page_src=TEXT
|
||||
)
|
||||
|
||||
|
||||
def mk_whooshpath_if_needed():
|
||||
if not os.path.exists(settings.WHOOSH_PATH):
|
||||
fstools.mkdir(settings.WHOOSH_PATH)
|
||||
|
||||
|
||||
def create_index():
|
||||
mk_whooshpath_if_needed()
|
||||
logger.debug('Search Index created.')
|
||||
return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
|
||||
|
||||
|
||||
def rebuild_index(ix):
|
||||
page_path = fstools.dirlist(settings.PAGES_ROOT, rekursive=False)
|
||||
for path in page_path:
|
||||
bp = base_page(path)
|
||||
add_item(ix, bp)
|
||||
return len(page_path)
|
||||
|
||||
|
||||
def load_index():
|
||||
mk_whooshpath_if_needed()
|
||||
try:
|
||||
ix = index.open_dir(settings.WHOOSH_PATH)
|
||||
except index.EmptyIndexError:
|
||||
ix = create_index()
|
||||
else:
|
||||
logger.debug('Search Index opened.')
|
||||
return ix
|
||||
|
||||
|
||||
def add_item(ix, bp: base_page):
|
||||
# Define Standard data
|
||||
#
|
||||
data = dict(
|
||||
id=bp.rel_path,
|
||||
title=bp.title,
|
||||
page_src=bp.raw_page_src
|
||||
)
|
||||
with ix.writer() as w:
|
||||
logger.info('Adding document with id=%s to the search index.', data.get('id'))
|
||||
w.add_document(**data)
|
||||
for key in data:
|
||||
logger.debug(' - Adding %s=%s', key, repr(data[key]))
|
||||
|
||||
|
||||
def whoosh_search(search_txt):
|
||||
ix = load_index()
|
||||
qp = qparser.MultifieldParser(['title', 'page_src'], ix.schema)
|
||||
qp.add_plugin(DateParserPlugin(free=True))
|
||||
try:
|
||||
q = qp.parse(search_txt)
|
||||
except AttributeError:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
with ix.searcher() as s:
|
||||
results = s.search(q, limit=None)
|
||||
rpl = []
|
||||
for hit in results:
|
||||
rpl.append(hit['id'])
|
||||
return rpl
|
||||
|
||||
|
||||
def delete_item(ix, bp: base_page):
|
||||
with ix.writer() as w:
|
||||
logger.info('Removing document with id=%s from the search index.', bp.rel_path)
|
||||
w.delete_by_term("task_id", bp.rel_path)
|
||||
|
||||
|
||||
def update_item(bp: base_page):
|
||||
ix = load_index()
|
||||
delete_item(ix, bp)
|
||||
add_item(ix, bp)
|
||||
|
@ -8,12 +8,14 @@ import logging
|
||||
from . import access
|
||||
from . import messages
|
||||
from . import url_page
|
||||
from . import get_search_query
|
||||
import config
|
||||
from .context import context_adaption
|
||||
from .forms import EditForm
|
||||
from .help import help_pages
|
||||
import mycreole
|
||||
from .page import creol_page
|
||||
from .page import creole_page
|
||||
from .search import whoosh_search
|
||||
from themes import Context
|
||||
|
||||
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
||||
@ -26,7 +28,7 @@ def root(request):
|
||||
def page(request, rel_path):
|
||||
context = Context(request) # needs to be executed first because of time mesurement
|
||||
#
|
||||
p = creol_page(request, rel_path)
|
||||
p = creole_page(request, rel_path)
|
||||
if access.read_page(request, rel_path):
|
||||
page_content = p.render_to_html()
|
||||
else:
|
||||
@ -48,7 +50,7 @@ def edit(request, rel_path):
|
||||
if access.write_page(request, rel_path):
|
||||
context = Context(request) # needs to be executed first because of time mesurement
|
||||
#
|
||||
p = creol_page(request, rel_path)
|
||||
p = creole_page(request, rel_path)
|
||||
#
|
||||
if not request.POST:
|
||||
form = EditForm(page_data=p.raw_page_src)
|
||||
@ -92,10 +94,22 @@ def edit(request, rel_path):
|
||||
|
||||
def search(request):
|
||||
context = Context(request) # needs to be executed first because of time mesurement
|
||||
#
|
||||
search_txt = get_search_query(request)
|
||||
|
||||
sr = whoosh_search(search_txt)
|
||||
if sr is None:
|
||||
messages.error(request, _('Invalid search pattern: %s') % repr(search_txt))
|
||||
sr = []
|
||||
page_content = "= Searchresults\n"
|
||||
for rel_path in sr:
|
||||
p = creole_page(request, rel_path)
|
||||
page_content += f"[[/page/{rel_path}|{p.title}]]\n"
|
||||
#
|
||||
context_adaption(
|
||||
context,
|
||||
request,
|
||||
page_content="Search is not yet implemented..."
|
||||
page_content=mycreole.render_simple(page_content)
|
||||
)
|
||||
return render(request, 'pages/page.html', context=context)
|
||||
|
||||
|
@ -135,6 +135,8 @@ MYCREOLE_BAR = {
|
||||
|
||||
PAGES_ROOT = os.path.join(BASE_DIR, 'data', 'pages')
|
||||
|
||||
WHOOSH_PATH = os.path.join(BASE_DIR, 'data', 'whoosh')
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
|
||||
|
||||
@ -215,4 +217,3 @@ File "%(pathname)s", line %(lineno)d, in %(funcName)s
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -2,4 +2,4 @@ Django
|
||||
Pillow
|
||||
python-creole
|
||||
pytz
|
||||
|
||||
Whoosh
|
||||
|
Loading…
x
Reference in New Issue
Block a user