Created
July 31, 2018 07:16
-
-
Save anthonytxie/3884d98084fcd3fab6e0cff910e8aa2e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const axios = require("axios"); | |
const axiosRetry = require("axios-retry"); | |
const moment = require("moment"); | |
const _ = require("lodash"); | |
const Comment = require("./db/models/Comment.js"); | |
let startDate = new Date("Jan 1, 2015 00:00:00").getTime(); | |
const endDate = new Date("July 27, 2018").getTime(); | |
const config = require("./config/config.js"); | |
const mongoose = require("./db/mongoose.js"); | |
const limit = 50; | |
axiosRetry(axios, { retries: 3 }); | |
// pushshift doesnt use milliseconds, Date does, thats why you see substr & 1000 | |
const fetchComments = async (startDate, limit) => { | |
console.log(`beginning comment fetch from ${startDate} to ${endDate}`); | |
while (startDate < endDate) { | |
// gets the first limit comments after startDate | |
const response = await axios.get( | |
`https://apiv2.pushshift.io/reddit/search/comment/?q=elon%20musk&size=${limit}&after=${startDate | |
.toString() | |
.substr(0, 10)}&before=${endDate.toString().substr(0, 10)}` | |
); | |
const comments = response.data.data; | |
// prep comments for db | |
const commentDocuments = comments.map(comment => { | |
return { | |
commentDate: moment.unix(comment.created_utc).format("MM/DD/YYYY"), | |
subreddit: comment.subreddit, | |
body: comment.body, | |
score: comment.score, | |
}; | |
}); | |
console.log( | |
`insert (${new Date(_.first(comments).created_utc * 1000)} -> ${new Date( | |
_.last(comments).created_utc * 1000 | |
)})` | |
); | |
await Comment.insertMany(commentDocuments).catch(e => console.log(e)); | |
startDate = _.last(comments).created_utc * 1000; | |
} | |
}; | |
fetchComments(startDate, limit); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment