Piki is a minimal wiki
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

search.py 2.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. from datetime import datetime
  2. from django.conf import settings
  3. import fstools
  4. import logging
  5. import os
  6. from whoosh.fields import Schema, ID, TEXT, DATETIME
  7. from whoosh.qparser.dateparse import DateParserPlugin
  8. from whoosh import index, qparser
  9. from pages.page import base_page
  10. logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
  11. SCHEMA = Schema(
  12. id=ID(unique=True, stored=True),
  13. # Page
  14. title=TEXT,
  15. page_src=TEXT,
  16. # metadata
  17. creation_time=DATETIME,
  18. modified_time=DATETIME,
  19. modified_user=TEXT
  20. )
  21. def mk_whooshpath_if_needed():
  22. if not os.path.exists(settings.WHOOSH_PATH):
  23. fstools.mkdir(settings.WHOOSH_PATH)
  24. def create_index():
  25. mk_whooshpath_if_needed()
  26. logger.debug('Search Index created.')
  27. return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
  28. def rebuild_index(ix):
  29. page_path = fstools.dirlist(settings.PAGES_ROOT, rekursive=False)
  30. for path in page_path:
  31. bp = base_page(path)
  32. add_item(ix, bp)
  33. return len(page_path)
  34. def load_index():
  35. mk_whooshpath_if_needed()
  36. try:
  37. ix = index.open_dir(settings.WHOOSH_PATH)
  38. except index.EmptyIndexError:
  39. ix = create_index()
  40. else:
  41. logger.debug('Search Index opened.')
  42. return ix
  43. def add_item(ix, bp: base_page):
  44. # Define Standard data
  45. #
  46. data = dict(
  47. id=bp.rel_path,
  48. #
  49. title=bp.title,
  50. page_src=bp.raw_page_src,
  51. #
  52. creation_time=datetime.fromtimestamp(bp._meta_data.get(bp._meta_data.KEY_CREATION_TIME)),
  53. modified_time=datetime.fromtimestamp(bp._meta_data.get(bp._meta_data.KEY_MODIFIED_TIME)),
  54. modified_user=bp._meta_data.get(bp._meta_data.KEY_MODIFIED_USER)
  55. )
  56. with ix.writer() as w:
  57. logger.info('Adding document with id=%s to the search index.', data.get('id'))
  58. w.add_document(**data)
  59. for key in data:
  60. logger.debug(' - Adding %s=%s', key, repr(data[key]))
  61. def whoosh_search(search_txt):
  62. ix = load_index()
  63. qp = qparser.MultifieldParser(['title', 'page_src'], ix.schema)
  64. qp.add_plugin(DateParserPlugin(free=True))
  65. try:
  66. q = qp.parse(search_txt)
  67. except AttributeError:
  68. return None
  69. except Exception:
  70. return None
  71. with ix.searcher() as s:
  72. results = s.search(q, limit=None)
  73. rpl = []
  74. for hit in results:
  75. rpl.append(hit['id'])
  76. return rpl
  77. def delete_item(ix, bp: base_page):
  78. with ix.writer() as w:
  79. logger.info('Removing document with id=%s from the search index.', bp.rel_path)
  80. w.delete_by_term("task_id", bp.rel_path)
  81. def update_item(bp: base_page):
  82. ix = load_index()
  83. delete_item(ix, bp)
  84. add_item(ix, bp)