Skip to content

Instantly share code, notes, and snippets.

@FennNaten
Forked from sararob/image-queue.js
Last active July 10, 2017 01:09
Show Gist options
  • Save FennNaten/93a706c5dff1f39ebf701137f05f43bf to your computer and use it in GitHub Desktop.
Save FennNaten/93a706c5dff1f39ebf701137f05f43bf to your computer and use it in GitHub Desktop.
updating https://gist.github.com/sararob/275b252d1eda3a5baa27d6464d2f2198 to add example of reducing memory usage by using file streams
'use strict';
/**
* this is a fork of https://gist.github.com/sararob/275b252d1eda3a5baa27d6464d2f2198
* with some code added to show how reading from and writing to the big source and destination files
* can be done with streams to reduce memory pressure
* code added is there as an example to illustrate the principles involved and is not tested, so it may contains bugs
*/
// Dependencies
const gcloud = require('google-cloud', {
projectId: 'sara-bigquery',
keyfileName: 'keyfile.json'
});
const vision = gcloud.vision();
const fs = require('fs');
const async = require('async');
//to avoid having to create ourselves a stream which emits data per line, let's use https://www.npmjs.com/package/line-by-line
const LineByLineReader = require('line-by-line');
//this will be the file writer into which we'll dump data as soon as we get them to not store them in memory
//see https://nodejs.org/api/stream.html#stream_writable_streams
const targetStream = fs.createWriteStream('./responses.json');
//as final format is a json with an array of objects, and we'll push objects one by one, let's push the character to open the array
targetStream.write('[');
const features = [
{ "type": "FACE_DETECTION" },
{ "type": "LABEL_DETECTION" },
{ "type": "LANDMARK_DETECTION" },
{ "type": "WEB_DETECTION" },
{ "type": "IMAGE_PROPERTIES" }
];
// Call the Vision API with our GCS image URL
function callVision(task, callback) {
console.log(`processing ${task.object_id}`);
let visionReq = {
"image": {
"source": {
"imageUri": `gs://gcs-storage-bucket/${task.id}.jpg`
}
},
"features": features
}
vision.annotate(visionReq, function(err, annotations, resp) {
if (err) {
return callback(new Error(err));
}
let imgMetadata = annotations[0];
if (!imgMetadata['imagePropertiesAnnotation']) {
return callback(new Error('no data'));
}
imgMetadata['object_id'] = task.name;
//send the stringified object to the target file immediately, no need for keeping that in memory
//here I use a global targetStream, if I were to make this a library the target stream would probably be injected into callVision instead
targetStream.write(JSON.stringify(imgMetadata));
return callback();
});
}
// The queue will send the image IDs to callVision()
let q = async.queue(callVision, 20);
function done() {
q.drain = null;
//when all data is processed, let's end the write stream
//depending on the way everything is implemented, there could be a bug here if the queue drains faster than we fill it
//to avoid this, we may add some bookkeeping: a line counter incremented on each read, a boolean indicating read end
//and a counter on each write. we would then only end stream if drain is emitted and allRead is true and number of written items === number of read lines
targetStream.end(']');
}
// Will only be executed when the queue is drained
q.drain = done;
//now let's create our file stream to read data from the file line by line
const imageIdsStream = new LineByLineReader('./image-ids.json');
//launch the read process. Each time a line is read, it is parsed and fed to the queue
//here each line is an imageID
imageIdsStream.on('line', function (line) {
q.push(JSON.parse(line), function (err) {
if (err) {
console.log(err)
}
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment