Last active
February 7, 2016 15:41
-
-
Save flaschbier/a3e9419428fc0b47006a to your computer and use it in GitHub Desktop.
Fiddling with regex select to multi valued attributes in MongoDB. Working fine with Python 2.7.6 and PyMongo 3.2.1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
http://stackoverflow.com/questions/35248610 | |
how to “find” docs in mongodb (in python) where a substring exists in a field | |
which is a list of strings? [no duplicate] | |
""" | |
import json | |
import sys | |
import unittest | |
import pymongo | |
from pymongo import MongoClient | |
class TestMongoMultiValueQuery(unittest.TestCase): | |
@classmethod | |
def setUpClass(cls): | |
# Populate collection q35248610 in database test | |
cls.client = MongoClient() | |
cls.db = cls.client.test | |
# choose a unique collection name | |
cls.db.q35248610.drop() | |
cls.coll = cls.db.q35248610 | |
docs = [ { "key" : "t1", "mvattr" : [ "one", "two" ] }, | |
{ "key" : "t2", "mvattr" : [ "not one", "but two" ] }, | |
{ "key" : "t3", "mvattr" : [ "two", "one" ] }, | |
{ "key" : "t4", "mvattr" : [ "not two", "but one" ] }, | |
{ "key" : "t5", "mvattr" : [ "i am alfa", "beta here too" ] } ] | |
ret = cls.coll.insert_many(docs) | |
def assertResultIs(self, query, l): | |
""" | |
Local assert function to test a query against the expected result. | |
We do not move the common '{"mvattr" : {"$regex" : _ }}' here because | |
this would actually make tests more difficult to understand. | |
""" | |
# type(self) to properly access class attributes | |
cur = type(self).coll.find(query, {"key" : 1}) | |
hits = [ doc['key'] for doc in cur ] | |
self.assertEqual(len(hits), len(l)) | |
for k in l: | |
self.assertIn(k, hits) | |
def test_contains_one(self): | |
self.assertResultIs( | |
# all records where one is at least one if the values | |
{"mvattr" : {"$regex" : "one"}}, | |
["t1", "t2", "t3", "t4"]) | |
def test_starts_with_one(self): | |
self.assertResultIs( | |
# all records where one of the values starts with one | |
{"mvattr" : {"$regex" : "^one"}}, | |
["t1", "t3"]) | |
def test_ends_with_one(self): | |
self.assertResultIs( | |
# all records where one of the values ends with one | |
{"mvattr" : {"$regex" : "one$"}}, | |
["t1", "t2", "t3", "t4"]) | |
def test_contains_space(self): | |
self.assertResultIs( | |
# more simple syntax when using backslashes | |
{"mvattr" : {"$regex" : r'\s'}}, | |
["t2", "t4", "t5"]) | |
def test_one_or_two(self): | |
self.assertResultIs( | |
# $in selects full values from multivalue attribute, no regex possible | |
{"mvattr" : {"$in" : ['one', 'two']}}, | |
["t1", "t3"]) | |
def test_contains_not_or_but(self): | |
self.assertResultIs( | |
# | in regex acts a little bit like $in with SAL LIKE... | |
{"mvattr" : {"$regex" : r'not|but'}}, | |
["t2", "t4"]) | |
def test_ops_question_1(self): | |
self.assertResultIs( | |
# the original question, however, was regarding AND :3 | |
{"$and" : [ | |
{"mvattr" : {"$regex" : r'alfa'}}, | |
{"mvattr" : {"$regex" : r'beta'}} | |
]}, | |
["t5"]) | |
def test_ops_question_2(self): | |
self.assertResultIs( | |
# is it really AND? | |
{"$and" : [ | |
{"mvattr" : {"$regex" : r'alfa'}}, | |
{"mvattr" : {"$regex" : r'one'}} | |
]}, | |
[]) | |
@classmethod | |
def tearDownClass(cls): | |
# tidy up | |
cls.coll.drop() | |
cls.client.close() | |
if __name__ == '__main__': | |
# increase verbosity to see which tests are processed | |
unittest.main(verbosity=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment