sc0tt · September 23, 2014 01:55
diff --git a/pokeparser.py b/pokeparser.py
 import re
 import time
 from selenium import webdriver

 # You will need to have the phantomjs exe in your path or alongside this script.
 driver = webdriver.PhantomJS()

 current_page = 0
 total_pages = -1

 parse_url = "http://www.pokemon.com/us/pokemon-tcg/pokemon-cards/?cardName=&cardText=&evolvesFrom=&simpleSubmit=&basic-pokemon=on&stage-1-pokemon=on&stage-2-pokemon=on&level-up-pokeomon=on&ex-pokemon=on&special-pokemon=on&pokemon-legend=on&restored-pokemon=on&trainer=on&trainer-pokemon-tool=on&trainer-stadium=on&trainer-supporter=on&trainer-technical-machine=on&trainer-rockets-secret-machine=on&basic-energy=on&special-energy=on&format=unlimited&hitPointsMin=0&hitPointsMax=200&retreatCostMin=0&retreatCostMax=5&totalAttackCostMin=0&totalAttackCostMax=10&particularArtist="

 pokemon_urls = open("urls.txt", "w")

 driver.get(parse_url)

 try:
  while current_page != total_pages:

    # Go to the page navigation
    pages = driver.find_element_by_id("cards-load-more")

    # Find the element containing the page info
    pages_container = pages.find_elements_by_tag_name("span")[1]

    # Match the text
    page_info = re.match(r"(\d+) of (\d+)", pages_container.get_attribute("innerHTML")).groups()

    current_page = page_info[0]
    total_pages = page_info[1]

    print("Page %s of %s" % (current_page, total_pages))

    # Go to the container of the pokemon cards
    card_container = driver.find_element_by_id("cardResults")

    # Find all the items
    cards = card_container.find_elements_by_tag_name("li")

    # For each card, print the url
    for pkmn in cards:
      pokemon_urls.write("%s\n" % pkmn.find_element_by_tag_name('a').get_attribute("href"))

    # Navigate to the next page
    pages.find_elements_by_tag_name("a")[1].click()

    time.sleep(10)
 except KeyboardInterrupt:
  pass

 driver.quit()
 pokemon_urls.close()
	import re
	import time
	from selenium import webdriver

	# You will need to have the phantomjs exe in your path or alongside this script.
	driver = webdriver.PhantomJS()

	current_page = 0
	total_pages = -1

	parse_url = "http://www.pokemon.com/us/pokemon-tcg/pokemon-cards/?cardName=&cardText=&evolvesFrom=&simpleSubmit=&basic-pokemon=on&stage-1-pokemon=on&stage-2-pokemon=on&level-up-pokeomon=on&ex-pokemon=on&special-pokemon=on&pokemon-legend=on&restored-pokemon=on&trainer=on&trainer-pokemon-tool=on&trainer-stadium=on&trainer-supporter=on&trainer-technical-machine=on&trainer-rockets-secret-machine=on&basic-energy=on&special-energy=on&format=unlimited&hitPointsMin=0&hitPointsMax=200&retreatCostMin=0&retreatCostMax=5&totalAttackCostMin=0&totalAttackCostMax=10&particularArtist="

	pokemon_urls = open("urls.txt", "w")

	driver.get(parse_url)

	try:
	while current_page != total_pages:

	# Go to the page navigation
	pages = driver.find_element_by_id("cards-load-more")

	# Find the element containing the page info
	pages_container = pages.find_elements_by_tag_name("span")[1]

	# Match the text
	page_info = re.match(r"(\d+) of (\d+)", pages_container.get_attribute("innerHTML")).groups()

	current_page = page_info[0]
	total_pages = page_info[1]

	print("Page %s of %s" % (current_page, total_pages))

	# Go to the container of the pokemon cards
	card_container = driver.find_element_by_id("cardResults")

	# Find all the items
	cards = card_container.find_elements_by_tag_name("li")

	# For each card, print the url
	for pkmn in cards:
	pokemon_urls.write("%s\n" % pkmn.find_element_by_tag_name('a').get_attribute("href"))

	# Navigate to the next page
	pages.find_elements_by_tag_name("a")[1].click()

	time.sleep(10)
	except KeyboardInterrupt:
	pass

	driver.quit()
	pokemon_urls.close()