Created
April 26, 2018 14:46
-
-
Save i-like-bikes/eecbff1f3041d5006bb307d3d34a896d to your computer and use it in GitHub Desktop.
LGA and year filtering
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import logging | |
import sys | |
from mongoengine import connect | |
from mongoengine import DateTimeField, Document, EmbeddedDocument, EmbeddedDocumentField, IntField, ListField, StringField | |
from pymongo.errors import OperationFailure | |
# Solution comes from following this: | |
# https://stackoverflow.com/questions/28982285/mongodb-projection-of-nested-arrays | |
conf = { | |
'user': '', | |
'password': '', | |
'port': , | |
'ds': '' | |
} | |
host= 'mongodb://{user}:{password}@{ds}.mlab.com:{port}/lga_data'.format(**conf) | |
class YearRecord(EmbeddedDocument): | |
year = StringField(max_length=52, required=True) | |
crime_rate = StringField(max_length=52, required=True) | |
number_incidents = StringField(max_length=52, required=True) | |
class OffenceRecord(EmbeddedDocument): | |
offence_group = StringField(max_length=52, required=True) | |
offence_type = StringField(max_length=52, required=True) | |
year_record = ListField(EmbeddedDocumentField(YearRecord)) | |
class CouncilRecord(Document): | |
lga = StringField(max_length=52, required=True) | |
title = StringField(max_length=52, required=True) | |
name = StringField(max_length=52, required=True) | |
offence_record = ListField(EmbeddedDocumentField(OffenceRecord)) | |
data = [{ | |
'lga': 'cowra', | |
'title': 'some stat', | |
'name': 'detailed name', | |
'offence_record': [ | |
{ | |
'offence_group': 'SRS!', | |
'offence_type': 'MURDER!', | |
'year_record': [ | |
{ 'year': '2015', | |
'crime_rate': '100%', | |
'number_incidents': '200'} | |
] | |
} | |
] | |
}, | |
{ | |
'lga': 'bathurst', | |
'title': 'some stat', | |
'name': 'detailed name', | |
'offence_record': [ | |
{ | |
'offence_group': 'SRS!', | |
'offence_type': 'MURDER!', | |
'year_record': [ | |
{ | |
'year': '2015', | |
'crime_rate': '100%', | |
'number_incidents': '200' | |
}, | |
{ | |
'year': '2016', | |
'crime_rate': '20%', | |
'number_incidents': '100'} | |
] | |
}, | |
{ | |
'offence_group': 'NOT SRS!', | |
'offence_type': 'MURDER!', | |
'year_record': [ | |
{ | |
'year': '2015', | |
'crime_rate': '100%', | |
'number_incidents': '200' | |
}, | |
{ | |
'year': '2016', | |
'crime_rate': '20%', | |
'number_incidents': '100'} | |
] | |
} | |
] | |
}] | |
connect('test_db', host=host) | |
# Comment this out when running for the second time | |
# or test for existance, so the DB isn't polluted. | |
# d = [CouncilRecord(**d) for d in data] | |
# for _ in d: | |
# print(_.lga) | |
# CouncilRecord.objects.insert(d) | |
lga_filter = "bathurst" | |
year_filter = "2015" | |
pipeline = [ | |
{ "$match": { "lga": lga_filter } }, | |
{ "$unwind": { "path": "$offence_record" } }, | |
{ "$unwind": { "path": "$offence_record.year_record" } }, | |
{ "$match": { "offence_record.year_record.year": year_filter } }, | |
{ "$group" : { | |
"_id" : "$_id", | |
"lga": {"$first": "$lga"}, | |
"title": {"$first": "$title"}, | |
"name": {"$first": "$name"}, | |
"offence_record": {"$push": "$offence_record"},} | |
} | |
] | |
for res in CouncilRecord.objects.aggregate(*pipeline): | |
print(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment