Last active
September 8, 2017 20:13
-
-
Save brianjhanson/13fc0182da19f4baaa5bb918e67c149b to your computer and use it in GitHub Desktop.
Scrape FAQs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var $questions = jQuery('.faq-question'); | |
var json = {}; | |
json.categories = []; | |
json.entries = []; | |
var categoryIndex = -1; | |
var sectionId = 21; | |
var typeId = 28; | |
function isFirst(string) { | |
return (string.search(/^1\./g) !== -1); | |
} | |
function convertToSlug(Text) { | |
return Text | |
.toLowerCase() | |
.replace(/ /g,'-') | |
.replace(/[^\w-]+/g,''); | |
} | |
function buildCategory(category) { | |
return { | |
"@model": "CategoryModel", | |
"attributes": { | |
"groupId": 3, | |
"parentId": 1, | |
"locale": "en_us", | |
"slug": category.slug, | |
"dateCreated": "2016-01-13 01:25:57", | |
"dateUpdated": "2016-01-13 01:25:57", | |
"enabled": true | |
}, | |
"content": { | |
"title": category.title, | |
"fields": [] | |
} | |
} | |
} | |
function buildEntry(question, answer, category) { | |
return { | |
"@model": "EntryModel", | |
"attributes": { | |
"sectionId": sectionId, | |
"typeId": typeId, | |
"authorId": 1, | |
"locale": "en_us", | |
"slug": convertToSlug(question), | |
"postDate": "2015-02-27 16:43:52", | |
"expiryDate": null, | |
"dateCreated": "2015-02-27 16:43:52", | |
"dateUpdated": "2015-02-27 16:51:42", | |
"enabled": true | |
}, | |
"content": { | |
"title": question, | |
"fields": { | |
"body": answer, | |
}, | |
"related": { | |
"category": { | |
"@model": "CategoryModel", | |
"matchBy": "slug", | |
"matchValue": [category], | |
"matchCriteria": { | |
"groupId": 3 | |
} | |
} | |
} | |
} | |
} | |
} | |
var $categories = jQuery('.faq-header'); | |
var categories = []; | |
$categories.each(function(item) { | |
var title = jQuery(this).text(); | |
json.categories.push(buildCategory({ | |
title: title, | |
slug: convertToSlug(title) | |
})); | |
}); | |
$questions.each(function(item) { | |
var category = json.categories[categoryIndex]; | |
var answerId = jQuery(this).parent().parent().attr('id'); | |
var $answer = jQuery('[aria-labelledby=' + answerId + ']'); | |
var answerBody = $answer.html(); | |
var question = jQuery(this).text(); | |
if (isFirst(question)) { | |
categoryIndex++; | |
category = json.categories[categoryIndex]; | |
} | |
json.entries.push(buildEntry( | |
question.replace(/^\d+\.\s*/, ''), | |
answerBody, | |
category.attributes.slug | |
)); | |
}); | |
// These numbers should be the same (make sure we got everything) | |
console.log('questions length: ', $questions.length); | |
console.log('faq length: ', json.entries.length); | |
// copy(json.categories); // copy just the categories | |
// copy(json.entries); // copy just the entries | |
copy(json); // Copy everything |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just paste this directly into the Chrome console to use.