Fist minimal whoosh search implemented
This commit is contained in:
parent
77f8f61aab
commit
30b817e359
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,6 +2,7 @@
|
|||||||
data/media
|
data/media
|
||||||
data/pages
|
data/pages
|
||||||
data/static
|
data/static
|
||||||
|
data/whoosh
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
config.py
|
config.py
|
||||||
|
|
||||||
@ -156,4 +157,3 @@ pip-selfcheck.json
|
|||||||
|
|
||||||
# .nfs files are created when an open file is removed but is still being accessed
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
.nfs*
|
.nfs*
|
||||||
|
|
||||||
|
@ -65,9 +65,8 @@ Now there are two ways to finalise your configuration. The first way is for a te
|
|||||||
## Backup
|
## Backup
|
||||||
### Create Backup files
|
### Create Backup files
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions -e patt --indent 2 > dump_base.json
|
python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions -e pages --indent 2 > dump_pages.json
|
||||||
python manage.py dumpdata --natural-foreign --natural-primary -e contenttypes -e sessions -e auth.Permission -e sessions piki --indent 2 > dump_piki.json
|
tar -cvzf dump_data.tgz data/media data/pages data/media
|
||||||
tar -cvzf dump_data.tgz data/media data/pages
|
|
||||||
|
|
||||||
### Restore Backup
|
### Restore Backup
|
||||||
|
|
||||||
@ -79,6 +78,6 @@ If you are starting without a database, you need to create one
|
|||||||
|
|
||||||
Afterward add data step by step to the database.
|
Afterward add data step by step to the database.
|
||||||
|
|
||||||
python manage.py loaddata dump_base.json
|
python manage.py loaddata dump_pages.json
|
||||||
python manage.py loaddata dump_patt.json
|
rm -rf data/pages data/media
|
||||||
tar -xvzf dump_data.tgz
|
tar -xvzf dump_data.tgz
|
||||||
|
@ -11,3 +11,7 @@ def url_helpview(request, page):
|
|||||||
|
|
||||||
def url_edit(request, rel_path):
|
def url_edit(request, rel_path):
|
||||||
return reverse('page-edit', kwargs={'rel_path': rel_path})
|
return reverse('page-edit', kwargs={'rel_path': rel_path})
|
||||||
|
|
||||||
|
|
||||||
|
def get_search_query(request):
|
||||||
|
return request.GET.get('q')
|
||||||
|
0
pages/management/__init__.py
Normal file
0
pages/management/__init__.py
Normal file
0
pages/management/commands/__init__.py
Normal file
0
pages/management/commands/__init__.py
Normal file
9
pages/management/commands/rebuild_index.py
Normal file
9
pages/management/commands/rebuild_index.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from pages.search import create_index, rebuild_index
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
ix = create_index()
|
||||||
|
n = rebuild_index(ix)
|
||||||
|
self.stdout.write(self.style.SUCCESS('Search index for %d items created.') % n)
|
102
pages/page.py
102
pages/page.py
@ -1,69 +1,87 @@
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
import fstools
|
import fstools
|
||||||
|
import logging
|
||||||
from pages import messages, url_page
|
from pages import messages, url_page
|
||||||
import mycreole
|
import mycreole
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
||||||
|
|
||||||
class creol_page(object):
|
|
||||||
SPLITCHAR = ":"
|
class base_page(object):
|
||||||
FOLDER_ATTACHMENTS = "attachments"
|
|
||||||
FOLDER_CONTENT = 'content'
|
FOLDER_CONTENT = 'content'
|
||||||
FILE_NAME = 'page'
|
FILE_NAME = 'page'
|
||||||
|
SPLITCHAR = ":"
|
||||||
|
|
||||||
def __init__(self, request, rel_path) -> None:
|
def __init__(self, path):
|
||||||
self._rel_path = rel_path
|
if path.startswith(settings.PAGES_ROOT):
|
||||||
self._request = request
|
self._path = path
|
||||||
|
else:
|
||||||
|
self._path = os.path.join(settings.PAGES_ROOT, path.replace("/", 2*self.SPLITCHAR))
|
||||||
|
self._raw_page_src = None
|
||||||
|
|
||||||
|
def _load_page_src(self):
|
||||||
|
if self._raw_page_src is None:
|
||||||
|
try:
|
||||||
|
with open(self.filename, 'r') as fh:
|
||||||
|
self._raw_page_src = fh.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
self._raw_page_src = ""
|
||||||
|
|
||||||
|
def update_page(self, page_txt):
|
||||||
|
from .search import update_item
|
||||||
|
#
|
||||||
|
folder = os.path.dirname(self.filename)
|
||||||
|
if not os.path.exists(folder):
|
||||||
|
fstools.mkdir(folder)
|
||||||
|
with open(self.filename, 'w') as fh:
|
||||||
|
fh.write(page_txt)
|
||||||
|
update_item(self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename(self):
|
||||||
|
return os.path.join(self._path, self.FOLDER_CONTENT, self.FILE_NAME)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rel_path(self):
|
||||||
|
return os.path.basename(self._path).replace(2*self.SPLITCHAR, "/")
|
||||||
|
|
||||||
def rel_path_is_valid(self):
|
def rel_path_is_valid(self):
|
||||||
return not self.SPLITCHAR in self._rel_path
|
return not self.SPLITCHAR in self.rel_path
|
||||||
|
|
||||||
def is_available(self):
|
def is_available(self):
|
||||||
return os.path.isfile(self.content_file_name)
|
is_a = os.path.isfile(self.filename)
|
||||||
|
if not is_a:
|
||||||
|
logger.info("page.is_available: Not available - %s", self.filename)
|
||||||
|
return is_a
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def title(self):
|
def title(self):
|
||||||
return os.path.basename(self._rel_path)
|
return os.path.basename(self._path).split("::")[-1]
|
||||||
|
|
||||||
@property
|
|
||||||
def attachment_path(self):
|
|
||||||
return os.path.join(self.content_folder_name, self.FOLDER_ATTACHMENTS)
|
|
||||||
|
|
||||||
def __content_folder_filter__(self, folder):
|
|
||||||
return folder.replace('/', '::')
|
|
||||||
|
|
||||||
def __folder_content_filter__(self, folder):
|
|
||||||
return folder.replace('::', '/')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def content_folder_name(self):
|
|
||||||
return self.__content_folder_filter__(self._rel_path)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def content_file_name(self):
|
|
||||||
return os.path.join(settings.PAGES_ROOT, self.content_folder_name, self.FOLDER_CONTENT, self.FILE_NAME)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def raw_page_src(self):
|
def raw_page_src(self):
|
||||||
try:
|
self._load_page_src()
|
||||||
with open(self.content_file_name, 'r') as fh:
|
return self._raw_page_src
|
||||||
return fh.read()
|
|
||||||
except FileNotFoundError:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def update_page(self, page_txt):
|
|
||||||
folder = os.path.dirname(self.content_file_name)
|
class creole_page(base_page):
|
||||||
if not os.path.exists(folder):
|
FOLDER_ATTACHMENTS = "attachments"
|
||||||
fstools.mkdir(folder)
|
|
||||||
with open(self.content_file_name, 'w') as fh:
|
def __init__(self, request, path) -> None:
|
||||||
fh.write(page_txt)
|
self._request = request
|
||||||
|
super().__init__(path)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def attachment_path(self):
|
||||||
|
return os.path.join(os.path.basename(self._path), self.FOLDER_ATTACHMENTS)
|
||||||
|
|
||||||
def render_to_html(self):
|
def render_to_html(self):
|
||||||
if self.is_available():
|
if self.is_available():
|
||||||
return self.render_text(self._request, self.raw_page_src)
|
return self.render_text(self._request, self.raw_page_src)
|
||||||
else:
|
else:
|
||||||
messages.unavailable_msg_page(self._request, self._rel_path)
|
messages.unavailable_msg_page(self._request, self.rel_path)
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def render_text(self, request, txt):
|
def render_text(self, request, txt):
|
||||||
@ -102,18 +120,18 @@ class creol_page(object):
|
|||||||
#
|
#
|
||||||
rv = ""
|
rv = ""
|
||||||
# create a rel_path list
|
# create a rel_path list
|
||||||
pathlist = [self.__folder_content_filter__(os.path.basename(path)) for path in fstools.dirlist(settings.PAGES_ROOT, rekursive=False)]
|
pathlist = [base_page(path).rel_path for path in fstools.dirlist(settings.PAGES_ROOT, rekursive=False)]
|
||||||
# sort basename
|
# sort basename
|
||||||
pathlist.sort(key=os.path.basename)
|
pathlist.sort(key=os.path.basename)
|
||||||
|
|
||||||
last_char = None
|
last_char = None
|
||||||
for contentname in pathlist:
|
for contentname in pathlist:
|
||||||
#
|
#
|
||||||
if (contentname.startswith(self._rel_path) or allpages) and contentname != self._rel_path:
|
if (contentname.startswith(self.rel_path) or allpages) and contentname != self.rel_path:
|
||||||
if allpages:
|
if allpages:
|
||||||
name = contentname
|
name = contentname
|
||||||
else:
|
else:
|
||||||
name = contentname[len(self._rel_path)+1:]
|
name = contentname[len(self.rel_path)+1:]
|
||||||
if name.count('/') < depth and name.startswith(startname):
|
if name.count('/') < depth and name.startswith(startname):
|
||||||
if last_char != os.path.basename(name)[0].upper():
|
if last_char != os.path.basename(name)[0].upper():
|
||||||
last_char = os.path.basename(name)[0].upper()
|
last_char = os.path.basename(name)[0].upper()
|
||||||
|
96
pages/search.py
Normal file
96
pages/search.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
import fstools
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from whoosh.fields import Schema, ID, TEXT
|
||||||
|
from whoosh.qparser.dateparse import DateParserPlugin
|
||||||
|
from whoosh import index, qparser
|
||||||
|
|
||||||
|
from pages.page import base_page
|
||||||
|
|
||||||
|
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
SCHEMA = Schema(
|
||||||
|
id=ID(unique=True, stored=True),
|
||||||
|
# Page
|
||||||
|
title=TEXT,
|
||||||
|
page_src=TEXT
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def mk_whooshpath_if_needed():
|
||||||
|
if not os.path.exists(settings.WHOOSH_PATH):
|
||||||
|
fstools.mkdir(settings.WHOOSH_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def create_index():
|
||||||
|
mk_whooshpath_if_needed()
|
||||||
|
logger.debug('Search Index created.')
|
||||||
|
return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
|
||||||
|
|
||||||
|
|
||||||
|
def rebuild_index(ix):
|
||||||
|
page_path = fstools.dirlist(settings.PAGES_ROOT, rekursive=False)
|
||||||
|
for path in page_path:
|
||||||
|
bp = base_page(path)
|
||||||
|
add_item(ix, bp)
|
||||||
|
return len(page_path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_index():
|
||||||
|
mk_whooshpath_if_needed()
|
||||||
|
try:
|
||||||
|
ix = index.open_dir(settings.WHOOSH_PATH)
|
||||||
|
except index.EmptyIndexError:
|
||||||
|
ix = create_index()
|
||||||
|
else:
|
||||||
|
logger.debug('Search Index opened.')
|
||||||
|
return ix
|
||||||
|
|
||||||
|
|
||||||
|
def add_item(ix, bp: base_page):
|
||||||
|
# Define Standard data
|
||||||
|
#
|
||||||
|
data = dict(
|
||||||
|
id=bp.rel_path,
|
||||||
|
title=bp.title,
|
||||||
|
page_src=bp.raw_page_src
|
||||||
|
)
|
||||||
|
with ix.writer() as w:
|
||||||
|
logger.info('Adding document with id=%s to the search index.', data.get('id'))
|
||||||
|
w.add_document(**data)
|
||||||
|
for key in data:
|
||||||
|
logger.debug(' - Adding %s=%s', key, repr(data[key]))
|
||||||
|
|
||||||
|
|
||||||
|
def whoosh_search(search_txt):
|
||||||
|
ix = load_index()
|
||||||
|
qp = qparser.MultifieldParser(['title', 'page_src'], ix.schema)
|
||||||
|
qp.add_plugin(DateParserPlugin(free=True))
|
||||||
|
try:
|
||||||
|
q = qp.parse(search_txt)
|
||||||
|
except AttributeError:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
with ix.searcher() as s:
|
||||||
|
results = s.search(q, limit=None)
|
||||||
|
rpl = []
|
||||||
|
for hit in results:
|
||||||
|
rpl.append(hit['id'])
|
||||||
|
return rpl
|
||||||
|
|
||||||
|
|
||||||
|
def delete_item(ix, bp: base_page):
|
||||||
|
with ix.writer() as w:
|
||||||
|
logger.info('Removing document with id=%s from the search index.', bp.rel_path)
|
||||||
|
w.delete_by_term("task_id", bp.rel_path)
|
||||||
|
|
||||||
|
|
||||||
|
def update_item(bp: base_page):
|
||||||
|
ix = load_index()
|
||||||
|
delete_item(ix, bp)
|
||||||
|
add_item(ix, bp)
|
||||||
|
|
@ -8,12 +8,14 @@ import logging
|
|||||||
from . import access
|
from . import access
|
||||||
from . import messages
|
from . import messages
|
||||||
from . import url_page
|
from . import url_page
|
||||||
|
from . import get_search_query
|
||||||
import config
|
import config
|
||||||
from .context import context_adaption
|
from .context import context_adaption
|
||||||
from .forms import EditForm
|
from .forms import EditForm
|
||||||
from .help import help_pages
|
from .help import help_pages
|
||||||
import mycreole
|
import mycreole
|
||||||
from .page import creol_page
|
from .page import creole_page
|
||||||
|
from .search import whoosh_search
|
||||||
from themes import Context
|
from themes import Context
|
||||||
|
|
||||||
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
|
||||||
@ -26,7 +28,7 @@ def root(request):
|
|||||||
def page(request, rel_path):
|
def page(request, rel_path):
|
||||||
context = Context(request) # needs to be executed first because of time mesurement
|
context = Context(request) # needs to be executed first because of time mesurement
|
||||||
#
|
#
|
||||||
p = creol_page(request, rel_path)
|
p = creole_page(request, rel_path)
|
||||||
if access.read_page(request, rel_path):
|
if access.read_page(request, rel_path):
|
||||||
page_content = p.render_to_html()
|
page_content = p.render_to_html()
|
||||||
else:
|
else:
|
||||||
@ -48,7 +50,7 @@ def edit(request, rel_path):
|
|||||||
if access.write_page(request, rel_path):
|
if access.write_page(request, rel_path):
|
||||||
context = Context(request) # needs to be executed first because of time mesurement
|
context = Context(request) # needs to be executed first because of time mesurement
|
||||||
#
|
#
|
||||||
p = creol_page(request, rel_path)
|
p = creole_page(request, rel_path)
|
||||||
#
|
#
|
||||||
if not request.POST:
|
if not request.POST:
|
||||||
form = EditForm(page_data=p.raw_page_src)
|
form = EditForm(page_data=p.raw_page_src)
|
||||||
@ -92,10 +94,22 @@ def edit(request, rel_path):
|
|||||||
|
|
||||||
def search(request):
|
def search(request):
|
||||||
context = Context(request) # needs to be executed first because of time mesurement
|
context = Context(request) # needs to be executed first because of time mesurement
|
||||||
|
#
|
||||||
|
search_txt = get_search_query(request)
|
||||||
|
|
||||||
|
sr = whoosh_search(search_txt)
|
||||||
|
if sr is None:
|
||||||
|
messages.error(request, _('Invalid search pattern: %s') % repr(search_txt))
|
||||||
|
sr = []
|
||||||
|
page_content = "= Searchresults\n"
|
||||||
|
for rel_path in sr:
|
||||||
|
p = creole_page(request, rel_path)
|
||||||
|
page_content += f"[[/page/{rel_path}|{p.title}]]\n"
|
||||||
|
#
|
||||||
context_adaption(
|
context_adaption(
|
||||||
context,
|
context,
|
||||||
request,
|
request,
|
||||||
page_content="Search is not yet implemented..."
|
page_content=mycreole.render_simple(page_content)
|
||||||
)
|
)
|
||||||
return render(request, 'pages/page.html', context=context)
|
return render(request, 'pages/page.html', context=context)
|
||||||
|
|
||||||
|
@ -135,6 +135,8 @@ MYCREOLE_BAR = {
|
|||||||
|
|
||||||
PAGES_ROOT = os.path.join(BASE_DIR, 'data', 'pages')
|
PAGES_ROOT = os.path.join(BASE_DIR, 'data', 'pages')
|
||||||
|
|
||||||
|
WHOOSH_PATH = os.path.join(BASE_DIR, 'data', 'whoosh')
|
||||||
|
|
||||||
# Default primary key field type
|
# Default primary key field type
|
||||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
|
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
|
||||||
|
|
||||||
@ -215,4 +217,3 @@ File "%(pathname)s", line %(lineno)d, in %(funcName)s
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,4 +2,4 @@ Django
|
|||||||
Pillow
|
Pillow
|
||||||
python-creole
|
python-creole
|
||||||
pytz
|
pytz
|
||||||
|
Whoosh
|
||||||
|
Loading…
x
Reference in New Issue
Block a user