Piki is a minimal wiki
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

search.py 2.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from datetime import datetime
  2. from django.conf import settings
  3. import fstools
  4. import logging
  5. import os
  6. from whoosh.fields import Schema, ID, TEXT, DATETIME
  7. from whoosh.qparser.dateparse import DateParserPlugin
  8. from whoosh import index, qparser
  9. from pages.page import page_wrapped, full_path_all_pages
  10. logger = logging.getLogger(settings.ROOT_LOGGER_NAME).getChild(__name__)
  11. SCHEMA = Schema(
  12. id=ID(unique=True, stored=True),
  13. # Page
  14. title=TEXT,
  15. page_src=TEXT,
  16. tag=TEXT,
  17. # metadata
  18. creation_time=DATETIME,
  19. modified_time=DATETIME,
  20. modified_user=TEXT
  21. )
  22. def mk_whooshpath_if_needed():
  23. if not os.path.exists(settings.WHOOSH_PATH):
  24. fstools.mkdir(settings.WHOOSH_PATH)
  25. def create_index():
  26. mk_whooshpath_if_needed()
  27. logger.debug('Search Index created.')
  28. return index.create_in(settings.WHOOSH_PATH, schema=SCHEMA)
  29. def rebuild_index(ix):
  30. page_path = full_path_all_pages()
  31. for path in page_path:
  32. pw = page_wrapped(None, path)
  33. add_item(ix, pw)
  34. return len(page_path)
  35. def load_index():
  36. mk_whooshpath_if_needed()
  37. try:
  38. ix = index.open_dir(settings.WHOOSH_PATH)
  39. except index.EmptyIndexError:
  40. ix = create_index()
  41. else:
  42. logger.debug('Search Index opened.')
  43. return ix
  44. def add_item(ix, pw: page_wrapped):
  45. # Define Standard data
  46. #
  47. data = dict(
  48. id=pw.rel_path,
  49. #
  50. title=pw.title,
  51. page_src=pw.raw_page_src,
  52. tag=pw.tags,
  53. #
  54. creation_time=datetime.fromtimestamp(pw.creation_time),
  55. modified_time=datetime.fromtimestamp(pw.modified_time),
  56. modified_user=pw.modified_user
  57. )
  58. with ix.writer() as w:
  59. logger.info('Adding document with id=%s to the search index.', data.get('id'))
  60. w.update_document(**data)
  61. for key in data:
  62. logger.debug(' - Adding %s=%s', key, repr(data[key]))
  63. def whoosh_search(search_txt):
  64. ix = load_index()
  65. qp = qparser.MultifieldParser(['title', 'page_src', 'tag'], ix.schema)
  66. qp.add_plugin(DateParserPlugin(free=True))
  67. try:
  68. q = qp.parse(search_txt)
  69. except AttributeError:
  70. return None
  71. except Exception:
  72. return None
  73. with ix.searcher() as s:
  74. results = s.search(q, limit=None)
  75. rpl = []
  76. for hit in results:
  77. rpl.append(hit['id'])
  78. return rpl
  79. def delete_item(ix, pw: page_wrapped):
  80. with ix.writer() as w:
  81. logger.info('Removing document with id=%s from the search index.', pw.rel_path)
  82. w.delete_by_term("task_id", pw.rel_path)
  83. def update_item(pw: page_wrapped):
  84. ix = load_index()
  85. delete_item(ix, pw)
  86. add_item(ix, pw)