/usr/local/Hypy

view hypy/test_lib.py @ 101:7a4447b28d87

#356253: allow None as unicodeToByte argument
author Cory Dodt <corymercurial@spam.goonmill.org>
date Mon Apr 06 19:33:05 2009 -0700 (2009-04-06)
parents a718c996e3f8
children 90bee6d63d14
line source
1 """
2 Test hypy.lib module
3 """
5 from __future__ import with_statement
7 import unittest
8 import os
9 from contextlib import contextmanager
11 import hypy
13 from hypy import (HDocument, HDatabase, HHit, HResults, HCondition, OpenFailed,
14 PutFailed, CloseFailed, FlushFailed, EditFailed)
16 class TestHDocument(unittest.TestCase):
17 """
18 Test the dictionary and text properties of HDocument
19 """
20 def setUp(self):
21 """
22 Create a document
23 """
24 self.doc = HDocument(uri=u'1')
26 def test_dictlike(self):
27 """
28 HDocument mostly conforms to the dictionary protocol. Make sure that
29 works.
30 """
31 doc = self.doc
33 # byte strings, other types are not allowed.
34 self.assertRaises(TypeError, doc.__setitem__, 'foobar', 'baz')
35 self.assertRaises(TypeError, doc.__setitem__, 'foobar', 1)
37 doc[u'foobar'] = u'baz'
38 doc[u'foobar']
39 self.assertEqual(doc[u'foobar'], u'baz')
40 self.assertEqual(doc.get(u'foobar', 'default'), u'baz')
41 self.assertEqual(doc.get(u'xyz', 'default'), 'default')
42 self.assertEqual(doc.get(u'xyz'), None)
44 newattrs = {u'new1': u'lala', u'foobar': u'bazz'}
45 doc.update(newattrs)
46 self.assertEqual(sorted(doc.items()), [(u'@uri', u'1'), (u'foobar', u'bazz'), (u'new1',
47 u'lala')])
49 doc[u'ninjas'] = u'11'
50 self.assertEqual(sorted(doc.keys()), [u'@uri', u'foobar', u'new1', u'ninjas', ])
51 self.assertEqual(sorted(doc.values()), [u'1', u'11', u'bazz', u'lala'])
53 self.assertRaises(NotImplementedError, doc.__delitem__, u'ninjas')
55 def test_text(self):
56 """
57 Mess with document text
58 """
59 doc = self.doc
60 self.assertRaises(TypeError, doc.addText, 'xyz')
61 doc.addText(u'xyz')
62 self.assertEqual([u'xyz'], doc.getTexts())
63 doc.addText(u'123')
64 self.assertEqual([u'xyz', u'123'], doc.getTexts())
65 self.assertRaises(TypeError, doc.addHiddenText, 'abc')
66 doc.addHiddenText(u'abc')
67 self.assertEqual([u'xyz', u'123'], doc.getTexts())
69 self.assertEqual(u'xyz\n123', doc.text)
71 doc.addText(u'\u062b')
73 self.assertEqual('xyz123\xd8\xab', doc.encode('utf-8'))
75 def test_unicodeType(self):
76 """
77 Almost everything in hypy must be unicode
78 """
79 self.assertRaises(TypeError, HDocument, uri='notunicode')
82 class TestDatabase(unittest.TestCase):
83 """
84 Tests HResults, HCondition and HHit. And since you can't test these
85 things without a database, test HDatabase.
86 """
87 @contextmanager
88 def freshenDatabase(self, extras=0):
89 """
90 Use:
91 with self.freshenDatabase() as db:
92 ... stuff that should test using these three documents ...
93 """
94 db = HDatabase()
95 db.open('_temp_db', 'w')
97 doc = HDocument(uri=u'1')
98 doc.addText(u'word this is my document. do you like documents? this one is hi-res.')
99 db.putDoc(doc, clean=True)
101 doc = HDocument(uri=u'2')
102 doc.addText(u'word lorem ipsum dolor sit amet something whatever whatever i do not remember the rest')
103 db.putDoc(doc)
105 doc = HDocument(uri=u'3')
106 doc.addText(u'word four score and 7 years ago our forefathers brought forth upon upon something')
107 db.putDoc(doc)
109 for x in range(4, extras+4):
110 doc = HDocument(uri=unicode(x))
111 doc.addText(u'filler filler filler carrot top')
112 # set some attributes for attribute operator testing
113 doc[u'specialId'] = unicode(x)
114 doc[u'description'] = unicode(x) * x
115 doc[u'date'] = u'2008-12-%s' % (x,)
116 db.putDoc(doc)
118 db.flush()
120 try:
121 yield db
122 finally:
123 try:
124 db.close()
125 except CloseFailed:
126 """Some of the tests do this close on their own."""
128 def test_dbOptimize(self):
129 """
130 Make sure the various optimize flags do not cause a heart attack.
131 """
132 with self.freshenDatabase() as db:
133 db.optimize(purge=True)
134 with self.freshenDatabase() as db:
135 db.optimize(opt=True)
136 with self.freshenDatabase() as db:
137 db.optimize()
138 self.assertRaises(NotImplementedError, db.sync)
140 def test_removeUpdate(self):
141 """
142 Test for document id, update, document removal, len() of database.
143 """
144 docxx = HDocument(uri=u'xx')
145 docxx.addText(u'xx')
147 # id of a non-stored document
148 self.assertEqual(docxx.id, -1)
150 # updateAttributes on opened db?
151 db = HDatabase()
152 self.assertRaises(EditFailed, db.updateAttributes, docxx)
154 # removes and updates
155 with self.freshenDatabase() as db:
156 # flags; just test that these do not nuke us. no idea what they
157 # are supposed to do.
158 db.putDoc(docxx, clean=True)
159 del db[u'xx']
160 # delete same doc twice?
161 self.assertRaises(EditFailed, db.remove, uri=u'xx')
163 db.putDoc(docxx)
165 # __len__
166 self.assertEqual(len(db), 4)
168 # remove by uri, by id, by reference
169 db.remove(uri=u'1')
170 self.assertEqual(len(db), 3)
171 doc2 = db[u'2']
172 db.remove(doc2)
173 self.assertEqual(len(db), 2)
174 doc3id = db[u'3'].id
175 db.remove(id=doc3id)
176 self.assertEqual(len(db), 1)
177 # no arg?
178 self.assertRaises(TypeError, db.remove)
179 # already removed?
180 self.assertRaises(EditFailed, db.remove, id=doc3id)
182 self.assertRaises(KeyError, db.__getitem__, '1')
184 # fetch a document from the database, edit it, store it, compare
185 # it with the original (unfetched) document. Verify that they are
186 # different after the edit. Then verify that the document can be
187 # fetched (again) from the database with the edited text.
188 dbdocxx = db[u'xx']
189 # yes, these are different objects
190 self.assertFalse(docxx is dbdocxx)
191 self.assertTrue(dbdocxx.get(u'zz') is None)
193 dbdocxx[u'zz'] = u'hello'
194 db.updateAttributes(dbdocxx)
195 dbdocxx2 = db[u'xx']
196 # again, different objects
197 self.assertFalse(dbdocxx is dbdocxx2)
198 self.assertEqual(dbdocxx2.get(u'zz'), u'hello')
200 def test_removeURINone(self):
201 """
202 #356253: should be able to explicitly say "uri=None" when calling remove
203 """
204 with self.freshenDatabase() as db:
205 # this is odd, but it should work
206 db.remove(id=1, uri=None)
209 def test_removeNulls(self):
210 """
211 Bug 321579: nulls should not kill addText
212 """
213 docnulls = HDocument(uri=u'nulls\0')
214 docnulls.addText(u'hello there\0 children')
215 docnulls.addHiddenText(u'sweet\0sweet love')
216 docnulls[u'character\0'] = u'chef\0'
217 with self.freshenDatabase() as db:
218 db.putDoc(docnulls)
219 db.flush()
220 cond = HCondition(u'there*')
221 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
222 cond = HCondition(u'sweet*')
223 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
225 cond = HCondition()
226 cond.addAttr(u'character STREQ chef')
227 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
229 docnulls[u'voi\0ce'] = u'Isaac Hayes'
230 db.updateAttributes(docnulls)
232 cond = HCondition()
233 cond.addAttr(u'voice STRRX .saac.*')
234 self.assertEqual(db.search(cond).pluck(u'@uri'), [u'nulls'])
236 def test_putFlags(self):
237 """
238 Tests for put flags, other put-related corner cases.
239 """
240 docxx = HDocument(uri=u'xx')
241 docxx.addText(u'xx')
243 with self.freshenDatabase() as db:
244 # flags; just test that these do not nuke us. no idea what they
245 # are supposed to do.
246 db.putDoc(docxx, clean=True)
247 del db[u'xx']
248 db.putDoc(docxx, weight=True)
249 del db[u'xx']
251 ## # put same doc twice?
252 ## apparently this works. huh.
253 ## db.putDoc(docxx); db.putDoc(docxx)
255 def test_condExtras(self):
256 """
257 Tests for search skip, search options, cond on attributes
258 """
259 with self.freshenDatabase(extras=10) as db:
260 self.assertEqual(len(db), 13)
261 # skip and max
262 cond4_8 = HCondition(u'filler', max=5)
263 cond9_11 = HCondition(u'filler', max=3, skip=5)
264 res1 = db.search(cond4_8)
265 self.assertEqual(res1.pluck(u'@uri'), list(u'45678'))
266 res2 = db.search(cond9_11)
267 self.assertEqual(res2.pluck(u'@uri'), [u'9', u'10', u'11'])
269 # union matching
270 result = db.search(HCondition(u'ipsum score', matching='simple'))
271 self.assertEqual(len(result), 0)
272 result = db.search(HCondition(u'ipsum score', matching='union'))
273 self.assertEqual(len(result), 2)
275 # isect matching
276 result = db.search(HCondition(u'lorem* ipsum*', matching='simple'))
277 self.assertEqual(len(result), 1)
278 result = db.search(HCondition(u'lorem* ipsum*', matching='isect'))
279 self.assertEqual(len(result), 0)
280 result = db.search(HCondition(u'lorem ipsum', matching='isect'))
281 self.assertEqual(len(result), 1)
283 # rough matching
284 result = db.search(HCondition(u'lorem* ipsum*', matching='simple'))
285 self.assertEqual(len(result), 1)
286 result = db.search(HCondition(u'lorem* ipsum*', matching='rough'))
287 self.assertEqual(len(result), 0)
288 result = db.search(HCondition(u'lorem ipsum', matching='rough'))
289 self.assertEqual(len(result), 1)
291 # fewer-than-max hits
292 result = db.search(HCondition(u'7*', matching='simple', max=2))
293 self.assertEqual(len(result), 1)
294 self.assertEqual(result[0][u'@uri'], u'3')
296 # attribute conditions
297 cc = HCondition(u'*.**')
298 self.assertRaises(TypeError, cc.addAttr, 'xxx')
300 # None as a kwarg should work (bug #356253)
301 c2 = HCondition(phrase=None)
303 def attrSearch(expr, order=None, phrase=None):
304 if phrase:
305 cond = HCondition(phrase)
306 else:
307 cond = HCondition()
308 cond.addAttr(expr)
309 if order:
310 self.assertRaises(TypeError, cond.setOrder, '*.**')
311 cond.setOrder(order)
312 return db.search(cond)
313 result = attrSearch(u'description STREQ 4444')
314 self.assertEqual(result.pluck(u'@uri'), [u'4'])
315 result = attrSearch(u'specialId NUMGE 10')
316 self.assertEqual(result.pluck(u'@uri'), [u'10', u'11', u'12', u'13'])
317 result = attrSearch(u'date NUMGE 2008-12-09')
318 self.assertEqual(sorted(map(int, result.pluck(u'@uri'))), [9, 10, 11, 12, 13])
319 result = attrSearch(u'description STRRX .{10,14}')
320 self.assertEqual(result.pluck(u'@uri'), [u'10', u'11', u'12', u'13'])
321 # ordering and !not
322 result = attrSearch(u'description !STRRX .{10,14}', u'@uri NUMA')
323 self.assertEqual(result.pluck(u'@uri'), list(u'123456789'))
324 result = attrSearch(u'description STRRX .{10,14}', u'@uri NUMD')
325 self.assertEqual(result.pluck(u'@uri'), [u'13', u'12', u'11', u'10'])
327 # one attribute search combined with a text search - regular
328 # expression match, all characters
329 result = attrSearch(u'description STRRX .{10,14}', u'@uri NUMD', u'*.**')
330 self.assertEqual(result.pluck(u'@uri'), [u'13', u'12', u'11', u'10'])
332 def test_dbOpenClosed(self):
333 """
334 Tests for all the db open/close modes
335 """
336 docyy = HDocument(uri=u'yy')
337 docyy.addText(u'yy')
338 docxx = HDocument(uri=u'xx')
339 docxx.addText(u'xx')
340 condxx = HCondition(u'xx')
341 condyy = HCondition(u'yy')
343 db = HDatabase()
344 # open of unreachable directory
345 self.assertRaises(OpenFailed, db.open, 'does/not/exist', 'a')
346 # close before successful open
347 self.assertRaises(CloseFailed, db.close)
349 # w mode
350 db.open('_temp_db', 'w')
351 self.assert_(os.path.exists('_temp_db/_idx'))
352 db.putDoc(docyy)
353 db.close()
355 # r mode
356 db.open('_temp_db', 'r')
357 # write to read-only db
358 self.assertRaises(PutFailed, db.putDoc, docxx)
359 self.assertEqual(len(db.search(condyy)), 1)
360 db.close()
362 # a mode
363 db.open('_temp_db', 'a')
364 db.putDoc(docxx)
365 db.flush()
366 self.assertEqual(len(db.search(condxx)), 1)
367 self.assertEqual(len(db.search(condyy)), 1)
368 db.close()
370 # w mode again - check that the db is clobbered
371 db.open('_temp_db', 'w')
372 self.assertEqual(len(db.search(condxx)), 0)
373 db.close()
375 # close after successful close
376 self.assertRaises(CloseFailed, db.close)
377 self.assertRaises(FlushFailed, db.flush)
379 def test_queries(self):
380 """
381 Test various conditions against an index to make sure search works.
382 """
383 with self.freshenDatabase() as db:
384 # plain search, 8-bit str
385 result = db.search(HCondition(u'wor*', matching='simple'))
386 self.assertEqual(len(result), 3)
388 # unicode searches
389 result = db.search(HCondition(u'wor*', matching='simple'))
390 self.assertEqual(len(result), 3)
392 # test simple query with multiple hits
393 result = db.search(HCondition(u'res*', matching='simple'))
394 self.assertEqual(result.pluck(u'@uri'), [u'1', u'2'])
396 # vary query terms to check result scoring
397 result = db.search(HCondition(u'someth* | whatever*', matching='simple', max=2))
398 self.assertEqual(result.pluck(u'@uri'), [u'2', u'3'])
399 result = db.search(HCondition(u'someth* | upon*', matching='simple', max=2))
400 self.assertEqual(result.pluck(u'@uri'), [u'3', u'2']) # FIXME
402 self.assertEqual(result.hintWords(), [u'someth', u'upon'])
404 def test_hits(self):
405 """
406 Poke at the hits returned by a search and see if document data and
407 teaser text come out right.
408 """
409 with self.freshenDatabase() as db:
410 cc = HCondition(u'hi-res')
411 for hit in db.search(cc):
412 self.assertEqual(str(hit),
413 '@digest=17de33c57e358f0fc5d57cd26a08b48e\n@id=1\n@uri=1\n\nword this is my document. do you like documents? this one is hi-res.\n')
414 self.assertEqual(hit.teaser([u'document']), u'word this is my <strong>document</strong>. do you li ... ke <strong>document</strong>s? this one is hi-re ... ')
415 self.assertEqual(hit.teaser([u'document'], 'rst'), u'word this is my **document**. do you li ... ke **document**s? this one is hi-re ... ')
416 self.assertRaises(TypeError, hit.teaser, ['document'])
417 self.assertRaises(NotImplementedError, hit.teaser,
418 [u'document'], 'pdf')
420 def test_autoflush(self):
421 """
422 Verify that autoflush is not on when it's not turned on, and that
423 words are autoindexed when it is turned on
424 """
425 cond = HCondition(u'doc*')
427 @contextmanager
428 def setup(**kw):
429 db1 = HDatabase(**kw)
430 db1.open('_temp_db', 'w')
432 doc = HDocument(uri=u'1')
433 doc.addText(u'word this is my document. do you like documents? this one is hi-res.')
434 db1.putDoc(doc, clean=True)
436 try:
437 yield db1
438 finally:
439 db1.close()
441 # non-autoflush: can't search for doc*
442 with setup() as db:
443 self.assertEqual(len(db.search(cond)), 0)
445 # autoflush: now you can.
446 with setup(autoflush=True) as db:
447 self.assertEqual(len(db.search(cond)), 1)
450 if __name__ == '__main__':
451 unittest.main()