-
-
Save amolv/6d212150c7450646d55c99630648dd16 to your computer and use it in GitHub Desktop.
delete documents by query result for Amazon CloudSearch.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import sys | |
import urllib | |
import urllib2 | |
import json | |
# you need to set your domain endpoints. | |
SEARCH_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com" | |
DOCUMENT_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com" | |
API_VERSION = "2013-01-01" | |
def searchDocuments(queryParams): | |
query = urllib.urlencode(queryParams) | |
url = "http://" + SEARCH_ENDPOINT + "/" + API_VERSION + "/search" | |
# send query | |
result = urllib2.urlopen(url, query) | |
if(result.code == 200): | |
body = result.read() | |
return body | |
else: | |
raise Exception("Error occured while sending search query. Response Code:" + result.code) | |
def parseIdListFromBody(data): | |
jsondata = json.loads(data) | |
docList = jsondata["hits"]["hit"] | |
idList = [] | |
for doc in docList: | |
idList.append(doc["id"]) | |
return idList | |
def createSDFforDelete(idList): | |
dataList = [] | |
for i in idList: | |
data = {'type':'delete','id': i} | |
dataList.append(data) | |
return json.dumps(dataList) | |
def sendSDF(sdf): | |
url = "http://" + DOCUMENT_ENDPOINT + "/" + API_VERSION + "/documents/batch" | |
request = urllib2.Request(url) | |
request.add_header("Content-Type", "application/json") | |
request.add_data(sdf) | |
# send query | |
print urllib2.urlopen(request).read() | |
if __name__ == '__main__': | |
# build search query. | |
query = "*:*" | |
queryParser = "lucene" | |
size = "1000" | |
returnFieldName = "title" | |
queryParams = {"q" : query, "q.parser" : queryParser, "size" : size, "return" : returnFieldName} | |
data = searchDocuments(queryParams) | |
idList = parseIdListFromBody(data) | |
sdf = createSDFforDelete(idList) | |
sendSDF(sdf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment