aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReid 'arrdem' McKenzie <reid@factual.com>2015-08-13 14:06:29 -0700
committerReid 'arrdem' McKenzie <reid@factual.com>2015-08-13 14:06:29 -0700
commit5702f9db222dd1b2e6f435802aea8c62987d8f65 (patch)
tree79f477aecb74f6669eb24e2ce4352b6716229f65
parent3b7a7be9a2071d0b94e55d631c47fe7f400de5e0 (diff)
downloadclucy-5702f9db222dd1b2e6f435802aea8c62987d8f65.tar.gz
clucy-5702f9db222dd1b2e6f435802aea8c62987d8f65.tar.xz
clucy-5702f9db222dd1b2e6f435802aea8c62987d8f65.zip
Recycle opened indexes rather than reopen
Previously an "index" was an NIO directory or a RAM directory which could be opened into a reader/writer at will. Unfortunately getting the requisite filesystem locks to open a new reader/writer is expensive. Rather tha open new readers and writers, this patch creates and maintains a global cache of (Reader, Writer, Index) tripples keyed off of the Index or path of the Index if present. This means that multiple threads opening a same path will get and use the same Readers and Writers (which are thread safe) rather than attempting and failing to open distinct Readers and Writers as the Lucene locking model only allows for single opens.
-rw-r--r--src/clucy/core.clj207
1 files changed, 151 insertions, 56 deletions
diff --git a/src/clucy/core.clj b/src/clucy/core.clj
index 5168282..8ddd1fe 100644
--- a/src/clucy/core.clj
+++ b/src/clucy/core.clj
@@ -29,7 +29,8 @@
(str x)))
;; flag to indicate a default "_content" field should be maintained
-(def ^{:dynamic true} *content* true)
+(def ^{:dynamic true} *content*
+ true)
(defn memory-index
"Create a new index in RAM."
@@ -55,6 +56,90 @@
[index]
(DirectoryReader/open ^Directory index))
+
+;; We want to hold IndexWriters open for performance, but we can't
+;; actually have multiple IndexWriters open on a single index.
+;;
+;; Thankfully IndexWriters are thread safe.
+;;
+;; As a result we can safely "open" both a Reader and a Writer for any
+;; given Index, and maintain a cache mapping from Index instance to a
+;; (Reader, Writer, Index) structure which may be thread shared.
+
+(defonce ^{:private true} index-cache
+ (ref {}))
+
+;; We also want some common abstraction for working with either
+;; a "real" open index or something we know how to open into either a
+;; reader or writer
+
+(defprotocol IIndex
+ (as-reader [_])
+ (as-writer [_])
+ (as-index [_]))
+
+;; Now the tuple thing
+
+(declare close-index)
+
+(deftype AnIndex [reader writer ind]
+ java.io.Closeable
+ (close [this]
+ (close-index this))
+
+ IIndex
+ (as-reader [_] reader)
+ (as-writer [_] writer)
+ (as-index [_] ind))
+
+;; Open tuple instances using the cache
+
+(defn open-index [ind]
+ (let [key (if (instance? NIOFSDirectory ind)
+ (.getDirectory ind)
+ ind)]
+ (dosync
+ (if-let [ai (get @index-cache key)]
+ ai
+ (let [ai (AnIndex.
+ (index-reader ind)
+ (index-writer ind)
+ ind)]
+ (alter index-cache assoc key ai)
+ ai)))))
+
+;; Close them invalidating the cache
+
+(defn close-index [ind]
+ {:pre [(instance? AnIndex ind)]}
+ (let [key (if (instance? NIOFSDirectory ind)
+ (.getDirectory ind)
+ ind)]
+ (dosync
+ (.close (.reader ind))
+ (.close (.writer ind))
+ (alter index-cache dissoc key))
+ nil))
+
+;; And extend this abstraction to the other interesting types
+
+(extend-protocol IIndex
+ NIOFSDirectory
+ (as-reader [d]
+ (as-reader (open-index d)))
+ (as-writer [d]
+ (as-writer (open-index d)))
+ (as-index [d] d)
+
+ RAMDirectory
+ (as-reader [d]
+ (as-reader (open-index d)))
+ (as-writer [d]
+ (as-writer (open-index d)))
+ (as-index [d] d))
+
+
+
(defn- add-field
"Add a Field to a Document.
Following options are allowed for meta-map:
@@ -122,7 +207,7 @@
(defn add
"Add hash-maps to the search index."
[index & maps]
- (with-open [writer (index-writer index)]
+ (let [writer (as-writer index)]
(doseq [m maps]
(.addDocument writer
(map->document m)))))
@@ -130,7 +215,7 @@
(defn delete
"Deletes hash-maps from the search index."
[index & maps]
- (with-open [^IndexWriter writer (index-writer index)]
+ (let [writer (as-writer index)]
(doseq [m maps]
(let [^BooleanQuery query (BooleanQuery.)
^"[Lorg.apache.lucene.search.Query;" arr (into-array BooleanQuery [query])]
@@ -146,27 +231,35 @@
(defn- document->map
"Turn a Document object into a map."
([^Document document score]
- (document->map document score (constantly nil)))
+ (document->map document score (constantly nil)))
([^Document document score highlighter]
- (let [m (into {} (for [^Field f (.getFields document)]
- [(keyword (.name f)) (.stringValue f)]))
- fragments (highlighter m) ; so that we can highlight :_content
- m (dissoc m :_content)]
- (with-meta
- m
- (-> (into {}
- (for [^Field f (.getFields document)
- :let [field-type (.fieldType f)]]
- [(keyword (.name f)) {:indexed (.indexed field-type)
- :stored (.stored field-type)
- :tokenized (.tokenized field-type)}]))
- (assoc :_fragments fragments :_score score)
- (dissoc :_content))))))
+ (let [m (->> (for [^Field f (.getFields document)]
+ [(keyword (.name f)) (.stringValue f)])
+ (into {}))
+
+ ;; so that we can highlight :_content
+ fragments (highlighter m)
+
+ doc-map (dissoc m :_content)
+
+ doc-meta (as-> document d
+ (for [^Field f (.getFields d)
+ :let [field-type (.fieldType f)]]
+ [(keyword (.name f))
+ {:indexed (.indexed field-type)
+ :stored (.stored field-type)
+ :tokenized (.tokenized field-type)}])
+ (into {} d)
+ (assoc d
+ :_fragments fragments
+ :_score score)
+ (dissoc d :_content))]
+ (with-meta doc-map doc-meta))))
(defn- make-highlighter
"Create a highlighter function which will take a map and return highlighted
-fragments."
+ fragments."
[^Query query ^IndexSearcher searcher config]
(if config
(let [indexReader (.getIndexReader searcher)
@@ -198,51 +291,51 @@ fragments."
:or {page 0}}]
(if (every? false? [default-field *content*])
(throw (Exception. "No default search field specified"))
- (with-open [reader (index-reader index)]
- (let [default-field (or default-field :_content)
+ (let [reader (as-reader index)
+ default-field (or default-field :_content)
- searcher (IndexSearcher. reader)
+ searcher (IndexSearcher. reader)
- parser (doto (QueryParser. *version*
- (as-str default-field)
- *analyzer*)
- (.setDefaultOperator
- (case (or default-operator :or)
- :and
- ,,QueryParser/AND_OPERATOR
+ parser (doto (QueryParser. *version*
+ (as-str default-field)
+ *analyzer*)
+ (.setDefaultOperator
+ (case (or default-operator :or)
+ :and
+ ,,QueryParser/AND_OPERATOR
- :or
- ,,QueryParser/OR_OPERATOR)))
+ :or
+ ,,QueryParser/OR_OPERATOR)))
- max-results (if (= query :all)
- Integer/MAX_VALUE
- max-results)
+ max-results (if (= query :all)
+ Integer/MAX_VALUE
+ max-results)
- results-per-page (or results-per-page max-results)
+ results-per-page (or results-per-page max-results)
- query (if (= query :all)
- (MatchAllDocsQuery.)
- (.parse parser query))
+ query (if (= query :all)
+ (MatchAllDocsQuery.)
+ (.parse parser query))
- hits (.search searcher query (int max-results))
+ hits (.search searcher query (int max-results))
- highlighter (make-highlighter query searcher highlight)
+ highlighter (make-highlighter query searcher highlight)
- start (* page results-per-page)
+ start (* page results-per-page)
- end (min (+ start results-per-page)
- (.totalHits hits))]
- (doall
- (with-meta
- (for [hit (map (partial aget (.scoreDocs hits))
- (range start end))]
- (document->map
- (.doc ^IndexSearcher searcher
- (.doc ^ScoreDoc hit))
- (.score ^ScoreDoc hit)
- highlighter))
- {:_total-hits (.totalHits hits)
- :_max-score (.getMaxScore hits)}))))))
+ end (min (+ start results-per-page)
+ (.totalHits hits))]
+ (doall
+ (with-meta
+ (for [hit (map (partial aget (.scoreDocs hits))
+ (range start end))]
+ (document->map
+ (.doc ^IndexSearcher searcher
+ (.doc ^ScoreDoc hit))
+ (.score ^ScoreDoc hit)
+ highlighter))
+ {:_total-hits (.totalHits hits)
+ :_max-score (.getMaxScore hits)})))))
(defn search-and-delete
"Search the supplied index with a query string and then delete all
@@ -254,8 +347,10 @@ fragments."
(throw (Exception. "No default search field specified"))))
([index query default-field]
- (with-open [writer (index-writer index)]
+ (let [writer (as-writer index)]
(let [parser (QueryParser. *version* (as-str default-field) *analyzer*)
- query (.parse parser query)
+ query (if (= query :all)
+ (MatchAllDocsQuery.)
+ (.parse parser query))
^"[Lorg.apache.lucene.search.Query;" arr (into-array [query])]
(.deleteDocuments ^IndexWriter writer arr)))))