Created
January 13, 2014 09:04
-
-
Save illarionvk/8396889 to your computer and use it in GitHub Desktop.
Find elements in 50 HTML files, extract data and create Jekyll post files with YAML metadata using Node.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1. Get list of files | |
// 2. For each file: | |
// 2.1 Read a file | |
// 2.2 Find required data | |
// 2.3 Put the information in a JSON object | |
// 3. Convert JSON object to YAML | |
// 4. Write new Markdown file in _posts folder | |
var fs = require('fs') | |
function getFileList(callback) { | |
fs.readdir('.', function (err, files) { | |
var hasHTMLextension = /\.html$/; | |
if (err) | |
throw err; | |
for (var index in files) { | |
//console.log(files[index]); | |
if (hasHTMLextension.test(files[index])) { | |
callback(null, files[index]); | |
} | |
} | |
}); | |
}; | |
function readFile(err, fileName) { | |
fs.readFile(fileName, 'utf8', function (err,data) { | |
if (err) { | |
return console.log(err); | |
} | |
//console.log(data); | |
console.log('File' + fileName + ' successfully read'); | |
parseData(data, fileName); | |
}); | |
}; | |
function parseData(data, fileName) { | |
// Run some jQuery on a html fragment | |
var jsdom = require("jsdom"); | |
jsdom.env( | |
data, | |
["http://code.jquery.com/jquery.js"], | |
function (errors, window) { | |
var YAML = require('json2yaml'); | |
var yml; | |
var membersJSON = new Object(); | |
var divTitle; | |
var descriptionTitle; | |
var divDescription, pDescription, description, memberLinkElement; | |
var divInfo, spanInfoTitle, spanInfoTitleParentDiv, spanData; | |
var divMemberLogo, imgTag, imgFullPath, imgFileNameOnly; | |
var divEmbedContainer, iframeTag, youtubeURL; | |
var i; | |
membersJSON.layout = 'members'; | |
membersJSON.published = true; | |
divTitle = window.document.getElementsByClassName('title'); | |
if (divTitle[0]) { | |
membersJSON.title = divTitle[0].textContent; | |
} | |
// description_title | |
divDescription = window.document.getElementsByClassName('description'); | |
if (divDescription[0]) { | |
descriptionTitle = divDescription[0].getElementsByTagName('h3'); | |
if (descriptionTitle[0]) { | |
membersJSON.description_title = descriptionTitle[0].textContent; | |
} | |
} | |
// description | |
divDescription = window.document.getElementsByClassName('description'); | |
if (divDescription[0]) { | |
pDescription = divDescription[0].getElementsByTagName('p'); | |
description = pDescription[0].textContent; | |
//membersJSON.description = pDescription[0].textContent; | |
if (pDescription[1]) { | |
memberLinkElement = pDescription[1].getElementsByTagName('a'); | |
if (memberLinkElement[0]) { | |
membersJSON.member_url = memberLinkElement[0].href; | |
} else { | |
membersJSON.member_url = ''; | |
} | |
} | |
} | |
// info block | |
divInfo = window.document.getElementsByClassName('info'); | |
if (divInfo[0]) { | |
spanInfoTitle = divInfo[0].getElementsByTagName('span'); | |
membersJSON.original_timestamp = spanInfoTitle[1].textContent; | |
membersJSON.contact = spanInfoTitle[3].textContent; | |
membersJSON.telephone = spanInfoTitle[5].textContent; | |
membersJSON.email = spanInfoTitle[7].textContent; | |
membersJSON.branch = spanInfoTitle[9].textContent; | |
} | |
// Member Logo | |
divMemberLogo = window.document.getElementsByClassName('member-logo'); | |
if (divMemberLogo[0]) { | |
imgTag = divMemberLogo[0].getElementsByTagName('img'); | |
if (imgTag[0]) { | |
imgFullPath = imgTag[0].src; | |
imgFileNameOnly = imgFullPath.replace(/^file:\/\/\/Users\/hex\/Dropbox\/Sites\/study\/images\//i, ''); | |
membersJSON.member_logo = imgFileNameOnly; | |
} | |
} | |
// YouTube URL | |
divEmbedContainer = window.document.getElementsByClassName('embed-container'); | |
if (divEmbedContainer[0]) { | |
iframeTag = divEmbedContainer[0].getElementsByTagName('iframe'); | |
if (iframeTag[0]) { | |
youtubeURL = iframeTag[0].src; | |
membersJSON.youtube_url = youtubeURL; | |
} | |
} | |
yml = YAML.stringify(membersJSON); | |
yml = yml.replace(/^---/, "---\n"); | |
yml = yml + "\n---" + "\n\n" + description + "\n"; | |
//console.log(membersJSON); | |
console.log(yml); | |
writeNewFile(fileName, yml); | |
} | |
); | |
}; | |
function writeNewFile(fileName, yml) { | |
var fileNameMarkdown = fileName.replace(/\.html$/i, ".markdown"); | |
var fullPath = './_posts/2014-01-09-' + fileNameMarkdown; | |
fs.writeFile(fullPath, yml, function (err) { | |
if (err) throw err; | |
console.log(fileName + ' is saved!'); | |
}); | |
}; | |
getFileList(readFile); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment