Created
December 19, 2017 16:12
-
-
Save alterisian/7743007ff79dc45661737cc323b54cc8 to your computer and use it in GitHub Desktop.
artoo dom example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Ok. What am I trying to do? | |
// =========================== | |
// Extract 3 data values from the li elements in a ul list, | |
// that are easily identified by classes on the target page. | |
// Not rocket science. | |
// Tool: Aartoo. Client Side Javascript. Looks evolved. | |
// Simple bookmarket run from the extraction page, | |
// and then run in the js console. | |
// Has examples: http://medialab.github.io/artoo/quick_start/ | |
// I dived straight in, and got the top level ul element. | |
// VICTORY IS MINE. Ah not so quick, Ian. | |
<ul class="atmLocatorSidebarSectionContent atmLocatorList atmLocatorAtmList"> | |
// Extacted with: | |
var cms = artoo.scrape(".atmLocatorList"); | |
// Gives all the html stringified. Could be useable, but not optimal really! | |
// And here's an example of the html: ul, and li. | |
<ul class="atmLocatorSidebarSectionContent atmLocatorList atmLocatorAtmList"> | |
<li class="atmLocatorListItem atmLocatorAtmListItem" data-atm-id="493600N813151" tabindex="0"> | |
<div class="atmLocatorAtmHeader"> | |
<div class="atmLocatorAtmIcon"> | |
<img src="/images/atmLocator/atmIconGreen.png" alt="ATM" class="atmLocatorAtmPushpin"> | |
<span class="atmLocatorAtmIndex">2</span> | |
</div> | |
<span class="atmLocatorAtmTitle">National Westminster Bank/NatWest</span> | |
<ul class="atmLocatorAtmCurrency"> | |
<li class="atmLocatorAtmCurrencyItem">GBP</li> | |
</ul> | |
</div> | |
<div class="atmLocatorAtmAddress"> | |
<div class="atmLocatorAtmPremises">SPINNINGFIELDS OFFICE</div> | |
1 SPINNINGFIELDS SQUARE<br>MANCHESTER<br>MANCHESTER<br>M3 3AP<br> | |
</div> | |
<div class="atmLocatorAtmDistance">0.03 miles away</div> | |
<ul class="atmLocatorAtmAttributes"> | |
<li> | |
<img src="/images/atmLocator/pin.svg" title="Pin management" alt="Pin management"> | |
<span>Pin management</span> | |
</li> | |
<li> | |
<img src="/images/atmLocator/mobile_topup.svg" title="Mobile top-up" alt="Mobile top-up"> | |
<span>Mobile top-up</span> | |
</li> | |
</ul> | |
<div class="atmLocatorAtmCharge"> | |
<strong class="chargeFree">Charge: Free to use</strong> | |
</div> | |
</li> | |
// The end goal is all of the data here extacted - but this doesn't | |
// work! Gives an Object per li, but no data against each field. | |
var cms = artoo.scrape(".atmLocatorList .atmLocatorListItem", { | |
company_name: ".atmLocatorAtmTitle", | |
cm_address: ".atmLocatorAtmPremises", | |
usage_charge: ".atmLocatorAtmCharge" | |
}); | |
// So a simplification, which also doesn't work. Gives lots of Objects, | |
// but with company name unpopulated: | |
var cms = artoo.scrape(".atmLocatorList .atmLocatorListItem", { | |
company_name: ".atmLocatorAtmTitle" }); | |
// So, the elements, I'm dealing with and how to represent them? | |
.atmLocatorListItem | |
.atmLocatorAtmHeader | |
.atmLocatorAtmTitle | |
li.atmLocatorListItem | |
div.atmLocatorAtmHeader | |
span.atmLocatorAtmTitle | |
// I hope I've explained it correctly. Basically trying to | |
// locate the right span/div under the right ul/li | |
// Really appreciate any help. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment