nubela · November 20, 2014 16:13 · ganeshraj · Mar 20, 2020 · Ismail-Ishak · Aug 10, 2021
diff --git a/scraper.py b/scraper.py
 import json

 from BeautifulSoup import BeautifulSoup
 import requests


 FILE_NAME = "result.txt"
 BASE_URL = "http://www.propertyguru.com.sg/"
 URL = "http://www.propertyguru.com.sg/singapore-property-listing/property-for-rent/%d?property_type=H" \
      "&property_type_code[]=HDB&minprice=1500&maxprice=2500&minsize=1000&distance=0.5&center_lat=1" \
      ".39126455055&center_long=103.89543056488&latitude=1.39126455055&longitude=103.89543056488"


 def get_listings(page=1):
    r = requests.get(URL % (page))
    soup = BeautifulSoup(r.text)
    all_listings = soup.findAll("a", {"class": "infotitle listing_action clearfix "})
    return map(lambda x: BASE_URL + x["href"], all_listings)


 def process_listings(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text)
    agent_info = soup.find("div", {"class": "agent_info"})
    name = agent_info.h3.string.strip()
    no = agent_info.div.string.strip().split("\r")[0]
    info1 = soup.find("div", {"class": "info1"})
    info_all = info1.findAll("p")
    price = int(info_all[0].span.string.strip().replace("S$ ", "").replace(" / month", "").replace(",", ""))
    size = int(info_all[2].string.strip().split(" ")[0].replace(",", ""))
    address = info_all[3].string
    address = address.replace("\t", " ")
    address = address.replace("\r\n", " ")
    while "  " in address:
        address = address.replace("  ", " ")

    return {
        "agent_name": name,
        "no": no,
        "price": price,
        "size": size,
        "address": address,
    }


 for i in range(5):
    results = []
    all_listings = get_listings(i)
    for l in all_listings:
        results += [process_listings(l)]
    f = open(FILE_NAME, "w")
    f.write(json.dumps(results))
    f.close()
	import json

	from BeautifulSoup import BeautifulSoup
	import requests


	FILE_NAME = "result.txt"
	BASE_URL = "http://www.propertyguru.com.sg/"
	URL = "http://www.propertyguru.com.sg/singapore-property-listing/property-for-rent/%d?property_type=H" \
	"&property_type_code[]=HDB&minprice=1500&maxprice=2500&minsize=1000&distance=0.5&center_lat=1" \
	".39126455055&center_long=103.89543056488&latitude=1.39126455055&longitude=103.89543056488"


	def get_listings(page=1):
	r = requests.get(URL % (page))
	soup = BeautifulSoup(r.text)
	all_listings = soup.findAll("a", {"class": "infotitle listing_action clearfix "})
	return map(lambda x: BASE_URL + x["href"], all_listings)


	def process_listings(url):
	r = requests.get(url)
	soup = BeautifulSoup(r.text)
	agent_info = soup.find("div", {"class": "agent_info"})
	name = agent_info.h3.string.strip()
	no = agent_info.div.string.strip().split("\r")[0]
	info1 = soup.find("div", {"class": "info1"})
	info_all = info1.findAll("p")
	price = int(info_all[0].span.string.strip().replace("S$ ", "").replace(" / month", "").replace(",", ""))
	size = int(info_all[2].string.strip().split(" ")[0].replace(",", ""))
	address = info_all[3].string
	address = address.replace("\t", " ")
	address = address.replace("\r\n", " ")
	while " " in address:
	address = address.replace(" ", " ")

	return {
	"agent_name": name,
	"no": no,
	"price": price,
	"size": size,
	"address": address,
	}


	for i in range(5):
	results = []
	all_listings = get_listings(i)
	for l in all_listings:
	results += [process_listings(l)]
	f = open(FILE_NAME, "w")
	f.write(json.dumps(results))
	f.close()