Skip to content

Instantly share code, notes, and snippets.

@chrisjmendez
Created April 17, 2018 22:24
Show Gist options
  • Save chrisjmendez/87392e7d041b8818535e81a92836ec0e to your computer and use it in GitHub Desktop.
Save chrisjmendez/87392e7d041b8818535e81a92836ec0e to your computer and use it in GitHub Desktop.
Create and parse XML into JSON
#!/usr/bin/env ruby
require "faker"
require 'nokogiri'
require File.join(File.dirname(__FILE__), 'lib', 'file_manager')
class EmailUtil
include FileManager
NUM_OF_EMAILS = 5000
FILE_PATH = "emails-#{Date.today.to_s}.xml"
def initialize
xml = create_xml
save(xml, FILE_PATH)
end
# http://stackoverflow.com/a/27065613
def create_xml
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
xml.data {
NUM_OF_EMAILS.times do
first_name = Faker::Name.first_name
last_name = Faker::Name.last_name
xml.option(
# This represents a message data tag with an optional full name
%{#{Faker::Internet.email} (#{['', first_name + " " + last_name].sample})},
first_name: ['', first_name].sample,
last_name: ['', last_name].sample,
zip_code: ['', Faker::Address.zip_code].sample,
gender: ["m", "f", "o"].sample,
dob: Faker::Date.between(Date.parse("1st Jan 1920"), Date.parse("1st Jan #{min_age_requirement}")),
phone_mobile: ['', Faker::PhoneNumber.cell_phone].sample,
phone_other: ['', Faker::PhoneNumber.phone_number].sample
)
end
}
end
builder.to_xml
end
private
def min_age_requirement
this_year = Time.now.year
min_age = 13
this_year - min_age
end
end
EmailUtil.new
require 'date'
require 'json'
require 'active_support/json'
require 'rubygems'
require "rexml/document"
require File.join(File.dirname(__FILE__), 'lib', 'file_manager')
class List
include FileManager
FILE_XML = "output/emails-#{Date.today.to_s}.xml"
FILE_JSON = "output/emails-#{Date.today.to_s}.json"
def initialize(input=FILE_XML)
if !input.empty?
file = load(input)
json = parse(file)
save(json, FILE_JSON)
elsif ARGV.empty?
puts "Please add an XML filepath"
puts "For example: ruby init.rb './path/to/file.xml'"
exit
else
ARGV.each_with_index do|a, idx|
if idx == 0
load(a)
end
end
end
end
def parse(file)
#Create a new Rolodex
contacts = Array.new
#Convert the file to become XML-ready
doc = REXML::Document.new(file)
#Iterate through each node
doc.elements.each_with_index("data/option") { |e, idx|
my_text = e.text
#Capture the email before "("
before_char = my_text[/[^(]+/]
#Capture the text after "("
after_char = my_text[/\(.*/m]
arr = my_text.split("(")
email = arr[0].strip!
name = arr[1][/[^)]+/] ? arr[1][/[^)]+/].strip : ""
contacts.push({
"email": email,
"full_name": name,
"first_name": e.attributes["first_name"],
"last_name": e.attributes["last_name"],
"zip_code": e.attributes["zip_code"],
"gender": e.attributes["gender"],
"dob": e.attributes["dob"],
"phone_mobile": e.attributes["phone_mobile"],
"phone_other": e.attributes["phone_other"]
})
}
json = ActiveSupport::JSON.encode(contacts)
end
end
List.new
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment