/usr/local/Hypy

view hypy/test_lib.py @ 139:d0a738aea316

Added tag 0.8.4.1 for changeset bbc3e3a3c4fb
author Cory Dodt <corymercurial@spam.goonmill.org>
date Fri Oct 09 10:32:45 2009 -0700 (2009-10-09)
parents 7a4447b28d87
children
line source
1 """
2 Test hypy.lib module
3 """
5 from __future__ import with_statement
7 import unittest
8 import os
9 from contextlib import contextmanager
11 import hypy
13 from hypy import (HDocument, HDatabase, HHit, HResults, HCondition, OpenFailed,
14 PutFailed, CloseFailed, FlushFailed, EditFailed)
16 class TestHDocument(unittest.TestCase):
17 """
18 Test the dictionary and text properties of HDocument
19 """
20 def setUp(self):
21 """
22 Create a document
23 """
24 self.doc = HDocument(uri=u'1')
26 def test_dictlike(self):
27 """
28 HDocument mostly conforms to the dictionary protocol. Make sure that
29 works.
30 """
31 doc = self.doc
33 # byte strings, other types are not allowed.
34 self.assertRaises(TypeError, doc.__setitem__, 'foobar', 'baz')
35 self.assertRaises(TypeError, doc.__setitem__, 'foobar', 1)
37 doc[u'foobar'] = u'baz'
38 doc[u'foobar']
39 self.assertEqual(doc[u'foobar'], u'baz')
40 self.assertEqual(doc.get(u'foobar', 'default'), u'baz')
41 self.assertEqual(doc.get(u'xyz', 'default'), 'default')
42 self.assertEqual(doc.get(u'xyz'), None)
44 newattrs = {u'new1': u'lala', u'foobar': u'bazz'}
45 doc.update(newattrs)
46 self.assertEqual(sorted(doc.items()), [(u'@uri', u'1'), (u'foobar', u'bazz'), (u'new1',
47 u'lala')])
49 doc[u'ninjas'] = u'11'
50 self.assertEqual(sorted(doc.keys()), [u'@uri', u'foobar', u'new1', u'ninjas', ])
51 self.assertEqual(sorted(doc.values()), [u'1', u'11', u'bazz', u'lala'])
53 self.assertRaises(NotImplementedError, doc.__delitem__, u'ninjas')
55 def test_text(self):
56 """
57 Mess with document text
58 """
59 doc = self.doc
60 self.assertRaises(TypeError, doc.addText, 'xyz')
61 doc.addText(u'xyz')
62 self.assertEqual([u'xyz'], doc.getTexts())
63 doc.addText(u'123')
64 self.assertEqual([u'xyz', u'123'], doc.getTexts())
65 self.assertRaises(TypeError, doc.addHiddenText, 'abc')
66 doc.addHiddenText(u'abc')
67 self.assertEqual([u'xyz', u'123'], doc.getTexts())
69 self.assertEqual(u'xyz\n123', doc.text)
71 doc.addText(u'\u062b')
73 self.assertEqual('xyz123\xd8\xab', doc.encode('utf-8'))
75 def test_unicodeType(self):
76 """
77 Almost everything in hypy must be unicode
78 """
79 self.assertRaises(TypeError, HDocument, uri='notunicode')
82 class TestDatabase(unittest.TestCase):
83 """
84 Tests HResults, HCondition and HHit. And since you can't test these
85 things without a database, test HDatabase.
86 """
87 @contextmanager
88 def freshenDatabase(self, extras=0):
89 """
90 Use:
91 with self.freshenDatabase() as db:
92 ... stuff that should test using these three documents ...
93 """
94 db = HDatabase()
95 db.open('_temp_db', 'w')
97 doc = HDocument(uri=u'1')
98 doc.addText(u'word this is my document. do you like documents? this one is hi-res.')
99 db.putDoc(doc, clean=True)
101 doc = HDocument(uri=u'2')
102 doc.addText(u'word lorem ipsum dolor sit amet something whatever whatever i do not remember the rest')
103 db.putDoc(doc)
105 doc = HDocument(uri=u'3')
106 doc.addText(u'word four score and 7 years ago our forefathers brought forth upon upon something')
107 db.putDoc(doc)
109 for x in range(4, extras+4):
110 doc = HDocument(uri=unicode(x))
111 doc.addText(u'filler filler filler carrot top')
112 # set some attributes for attribute operator testing
113 doc[u'specialId'] = unicode(x)
114 doc[u'description'] = unicode(x) * x
115 doc[u'date'] = u'2008-12-%s' % (x,)
116 db.putDoc(doc)
118 db.flush()
120 try:
121 yield db
122 finally:
123 try:
124 db.close()
125 except CloseFailed:
126 """Some of the tests do this close on their own."""
128 def test_dbOptimize(self):
129 """
130 Make sure the various optimize flags do not cause a heart attack.
131 """
132 with self.freshenDatabase() as db:
133 db.optimize(purge=True)
134 with self.freshenDatabase() as db:
135 db.optimize(opt=True)
136 with self.freshenDatabase() as db:
137 db.optimize()
138 self.assertRaises(NotImplementedError, db.sync)
140 def test_removeUpdate(self):
141 """
142 Test for document id, update, document removal, len() of database.
143 """
144 docxx = HDocument(uri=u'xx')
145 docxx.addText(u'xx')
147 # id of a non-stored document
148 self.assertEqual(docxx.id, -1)
150 # updateAttributes on opened db?
151 db = HDatabase()
152 self.assertRaises(EditFailed, db.updateAttributes, docxx)
154 # removes and updates
155 with self.freshenDatabase() as db:
156 # flags; just test that these do not nuke us. no idea what they
157 # are supposed to do.
158 db.putDoc(docxx, clean=True)
159 del db[u'xx']
160 # delete same doc twice?
161 self.assertRaises(EditFailed, db.remove, uri=u'xx')
163 db.putDoc(docxx)
165 # __len__
166 self.assertEqual(len(db), 4)
168 # remove by uri, by id, by reference
169 db.remove(uri=u'1')
170 self.assertEqual(len(db), 3)
171 doc2 = db[u'2']
172 db.remove(doc2)
173 self.assertEqual(len(db), 2)
174 doc3id = db[u'3'].id
175 db.remove(id=doc3id)
176 self.assertEqual(len(db), 1)
177 # no arg?
178 self.assertRaises(TypeError, db.remove)
179 # already removed?
180 self.assertRaises(EditFailed, db.remove, id=doc3id)
182 self.assertRaises(KeyError, db.__getitem__, '1')
184 # fetch a document from the database, edit it, store it, compare
185 # it with the original (unfetched) document. Verify that they are
186 # different after the edit. Then verify that the document can be
187 # fetched (again) from the database with the edited text.
188 dbdocxx = db[u'xx']
189 # yes, these are different objects
190 self.assertFalse(docxx is dbdocxx)
191 self.assertTrue(dbdocxx.get(u'zz') is None)
193 dbdocxx[u'zz'] = u'hello'
194 db.updateAttributes(dbdocxx)
195 dbdocxx2 = db[u'xx']
196 # again, different objects
197 self.assertFalse(dbdocxx is dbdocxx2)
198 self.assertEqual(dbdocxx2.get(u'zz'), u'hello')
200 def test_removeURINone(self):
201 """
202 #356253: should be able to explicitly say "uri=None" when calling remove
203 """
204 with self.freshenDatabase() as db:
205 # this is odd, but it should work
206 db.remove(id=1, uri=None)
208 def test_removeNulls(self):
209 """
210 Bug 321579: nulls should not kill addText
211 """
212 docnulls = HDocument(uri=u'nulls\0')
213 docnulls.addText(u'hello there\0 children')
214 docnulls.addHiddenText(u'sweet\0sweet love')
215 docnulls[u'character\0'] = u'chef\0'
216 with self.freshenDatabase() as db:
217 db.putDoc(docnulls)
218 db.flush()
219 cond = HCondition(u'there*')
220 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
221 cond = HCondition(u'sweet*')
222 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
224 cond = HCondition()
225 cond.addAttr(u'character STREQ chef')
226 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
228 docnulls[u'voi\0ce'] = u'Isaac Hayes'
229 db.updateAttributes(docnulls)
231 cond = HCondition()
232 cond.addAttr(u'voice STRRX .saac.*')
233 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
235 def test_putFlags(self):
236 """
237 Tests for put flags, other put-related corner cases.
238 """
239 docxx = HDocument(uri=u'xx')
240 docxx.addText(u'xx')
242 with self.freshenDatabase() as db:
243 # flags; just test that these do not nuke us. no idea what they
244 # are supposed to do.
245 db.putDoc(docxx, clean=True)
246 del db[u'xx']
247 db.putDoc(docxx, weight=True)
248 del db[u'xx']
250 ## # put same doc twice?
251 ## apparently this works. huh.
252 ## db.putDoc(docxx); db.putDoc(docxx)
254 def test_condExtras(self):
255 """
256 Tests for search skip, search options, cond on attributes
257 """
258 with self.freshenDatabase(extras=10) as db:
259 self.assertEqual(len(db), 13)
260 # skip and max
261 cond4_8 = HCondition(u'filler', max=5)
262 cond9_11 = HCondition(u'filler', max=3, skip=5)
263 res1 = db.search(cond4_8)
264 self.assertEqual(res1.pluck(u'@uri'), list(u'45678'))
265 res2 = db.search(cond9_11)
266 self.assertEqual(res2.pluck(u'@uri'), [u'9', u'10', u'11'])
268 # union matching
269 result = db.search(HCondition(u'ipsum score', matching='simple'))
270 self.assertEqual(len(result), 0)
271 result = db.search(HCondition(u'ipsum score', matching='union'))
272 self.assertEqual(len(result), 2)
274 # isect matching
275 result = db.search(HCondition(u'lorem* ipsum*', matching='simple'))
276 self.assertEqual(len(result), 1)
277 result = db.search(HCondition(u'lorem* ipsum*', matching='isect'))
278 self.assertEqual(len(result), 0)
279 result = db.search(HCondition(u'lorem ipsum', matching='isect'))
280 self.assertEqual(len(result), 1)
282 # rough matching
283 result = db.search(HCondition(u'lorem* ipsum*', matching='simple'))
284 self.assertEqual(len(result), 1)
285 result = db.search(HCondition(u'lorem* ipsum*', matching='rough'))
286 self.assertEqual(len(result), 0)
287 result = db.search(HCondition(u'lorem ipsum', matching='rough'))
288 self.assertEqual(len(result), 1)
290 # fewer-than-max hits
291 result = db.search(HCondition(u'7*', matching='simple', max=2))
292 self.assertEqual(len(result), 1)
293 self.assertEqual(result[0][u'@uri'], u'3')
295 # attribute conditions
296 cc = HCondition(u'*.**')
297 self.assertRaises(TypeError, cc.addAttr, 'xxx')
299 # None as a kwarg should work (bug #356253)
300 c2 = HCondition(phrase=None)
302 def attrSearch(expr, order=None, phrase=None):
303 if phrase:
304 cond = HCondition(phrase)
305 else:
306 cond = HCondition()
307 cond.addAttr(expr)
308 if order:
309 self.assertRaises(TypeError, cond.setOrder, '*.**')
310 cond.setOrder(order)
311 return db.search(cond)
312 result = attrSearch(u'description STREQ 4444')
313 self.assertEqual(result.pluck(u'@uri'), [u'4'])
314 result = attrSearch(u'specialId NUMGE 10')
315 self.assertEqual(result.pluck(u'@uri'), [u'10', u'11', u'12', u'13'])
316 result = attrSearch(u'date NUMGE 2008-12-09')
317 self.assertEqual(sorted(map(int, result.pluck(u'@uri'))), [9, 10, 11, 12, 13])
318 result = attrSearch(u'description STRRX .{10,14}')
319 self.assertEqual(result.pluck(u'@uri'), [u'10', u'11', u'12', u'13'])
320 # ordering and !not
321 result = attrSearch(u'description !STRRX .{10,14}', u'@uri NUMA')
322 self.assertEqual(result.pluck(u'@uri'), list(u'123456789'))
323 result = attrSearch(u'description STRRX .{10,14}', u'@uri NUMD')
324 self.assertEqual(result.pluck(u'@uri'), [u'13', u'12', u'11', u'10'])
326 # one attribute search combined with a text search - regular
327 # expression match, all characters
328 result = attrSearch(u'description STRRX .{10,14}', u'@uri NUMD', u'*.**')
329 self.assertEqual(result.pluck(u'@uri'), [u'13', u'12', u'11', u'10'])
331 def test_dbOpenClosed(self):
332 """
333 Tests for all the db open/close modes
334 """
335 docyy = HDocument(uri=u'yy')
336 docyy.addText(u'yy')
337 docxx = HDocument(uri=u'xx')
338 docxx.addText(u'xx')
339 condxx = HCondition(u'xx')
340 condyy = HCondition(u'yy')
342 db = HDatabase()
343 # open of unreachable directory
344 self.assertRaises(OpenFailed, db.open, 'does/not/exist', 'a')
345 # close before successful open
346 self.assertRaises(CloseFailed, db.close)
348 # w mode
349 db.open('_temp_db', 'w')
350 self.assert_(os.path.exists('_temp_db/_idx'))
351 db.putDoc(docyy)
352 db.close()
354 # r mode
355 db.open('_temp_db', 'r')
356 # write to read-only db
357 self.assertRaises(PutFailed, db.putDoc, docxx)
358 self.assertEqual(len(db.search(condyy)), 1)
359 db.close()
361 # a mode
362 db.open('_temp_db', 'a')
363 db.putDoc(docxx)
364 db.flush()
365 self.assertEqual(len(db.search(condxx)), 1)
366 self.assertEqual(len(db.search(condyy)), 1)
367 db.close()
369 # w mode again - check that the db is clobbered
370 db.open('_temp_db', 'w')
371 self.assertEqual(len(db.search(condxx)), 0)
372 db.close()
374 # close after successful close
375 self.assertRaises(CloseFailed, db.close)
376 self.assertRaises(FlushFailed, db.flush)
378 def test_queries(self):
379 """
380 Test various conditions against an index to make sure search works.
381 """
382 with self.freshenDatabase() as db:
383 # plain search, 8-bit str
384 result = db.search(HCondition(u'wor*', matching='simple'))
385 self.assertEqual(len(result), 3)
387 # unicode searches
388 result = db.search(HCondition(u'wor*', matching='simple'))
389 self.assertEqual(len(result), 3)
391 # test simple query with multiple hits
392 result = db.search(HCondition(u'res*', matching='simple'))
393 self.assertEqual(result.pluck(u'@uri'), [u'1', u'2'])
395 # vary query terms to check result scoring
396 result = db.search(HCondition(u'someth* | whatever*', matching='simple', max=2))
397 self.assertEqual(result.pluck(u'@uri'), [u'2', u'3'])
398 result = db.search(HCondition(u'someth* | upon*', matching='simple', max=2))
399 self.assertEqual(result.pluck(u'@uri'), [u'3', u'2']) # FIXME
401 self.assertEqual(result.hintWords(), [u'someth', u'upon'])
403 def test_hits(self):
404 """
405 Poke at the hits returned by a search and see if document data and
406 teaser text come out right.
407 """
408 with self.freshenDatabase() as db:
409 cc = HCondition(u'hi-res')
410 for hit in db.search(cc):
411 self.assertEqual(str(hit),
412 '@digest=17de33c57e358f0fc5d57cd26a08b48e\n@id=1\n@uri=1\n\nword this is my document. do you like documents? this one is hi-res.\n')
413 self.assertEqual(hit.teaser([u'document']), u'word this is my <strong>document</strong>. do you li ... ke <strong>document</strong>s? this one is hi-re ... ')
414 self.assertEqual(hit.teaser([u'document'], 'rst'), u'word this is my **document**. do you li ... ke **document**s? this one is hi-re ... ')
415 self.assertRaises(TypeError, hit.teaser, ['document'])
416 self.assertRaises(NotImplementedError, hit.teaser,
417 [u'document'], 'pdf')
419 def test_autoflush(self):
420 """
421 Verify that autoflush is not on when it's not turned on, and that
422 words are autoindexed when it is turned on
423 """
424 cond = HCondition(u'doc*')
426 @contextmanager
427 def setup(**kw):
428 db1 = HDatabase(**kw)
429 db1.open('_temp_db', 'w')
431 doc = HDocument(uri=u'1')
432 doc.addText(u'word this is my document. do you like documents? this one is hi-res.')
433 db1.putDoc(doc, clean=True)
435 try:
436 yield db1
437 finally:
438 db1.close()
440 # non-autoflush: can't search for doc*
441 with setup() as db:
442 self.assertEqual(len(db.search(cond)), 0)
444 # autoflush: now you can.
445 with setup(autoflush=True) as db:
446 self.assertEqual(len(db.search(cond)), 1)
449 class TestMisc(unittest.TestCase):
450 """
451 Test misc. features of the package such as version string
452 """
453 def test_version(self):
454 """
455 __version__ and other release info can be found in copyright.py and
456 __init__.py
458 This doesn't really verify that copyright.py is generated correctly,
459 just that it exists and has the right contents.
460 """
461 from hypy import __version__ as version1
462 from hypy.copyright import __version__ as version2
463 self.assertEqual(version1, version2)
466 if __name__ == '__main__':
467 unittest.main()