Skip to content

Instantly share code, notes, and snippets.

@ZhangZhongwei73671
Created May 9, 2018 02:35
Show Gist options
  • Save ZhangZhongwei73671/7b1e2268f2c0fea44965ded651867d88 to your computer and use it in GitHub Desktop.
Save ZhangZhongwei73671/7b1e2268f2c0fea44965ded651867d88 to your computer and use it in GitHub Desktop.
#BeautifulSoup爬虫实践:房屋价格数据
import requests
import pandas as pd
import csv
from bs4 import BeautifulSoup
import MySQLdb
conn = MySQLdb.connect(host ='localhost',user = 'root',passwd = 'yourpasswd',db = 'ershoufang',charset = "utf8")
cur = conn.cursor()
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'}
csvFile = open("D:/fangyuan.csv", 'w', newline='', encoding='utf-8')
writer = csv.writer(csvFile)
writer.writerow(('房源', '房型', '总价', '单价', '面积', '楼层', '年代', '置业顾问', '地址'))
for j in range(1,11):
link = 'https://nanjing.anjuke.com/sale/p'+ str(j)
r = requests.get(link, headers=headers)
soup = BeautifulSoup(r.text, 'lxml')
house_list = soup.find_all('div', class_="house-details")
price_list = soup.find_all('div', class_="pro-price")
for index, house in enumerate(house_list):
hname = house.find('div', class_='house-title').a.text.strip()
no_room = house.find('div', class_='details-item').span.text
area = house.find('div', class_='details-item').contents[3].text
floor = house.find('div', class_='details-item').contents[5].text
year = house.find('div', class_='details-item').contents[7].text
broker = house.find('span',class_='brokername').text
broker = broker[1:]
address = house.find('span',class_='comm-address').text.strip()
address = address.replace('\xa0\xa0\n ',' ')
tag_list = house.find_all('span', class_='item-tags')
#tags = [i.text for i in tag_list]
#link_add = house.find('div',class_='house-title').a.href
price_info = price_list[index]
price = price_info.find('span', class_='price-det').text.strip()
price_area = price_info.find('span', class_='unit-price').text.strip()
writer.writerow((hname, no_room, price, price_area, area, floor, year, broker, address))
print (hname, no_room, price, price_area, area, floor, year, broker, address, )
#for eachone in title_list:
#url = eachone.a['href']
#title = eachone.a.text.strip()
cur.execute("insert into ershoufang (hname, no_room, price, price_area, area, floor, year, broker, address) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)",(hname, no_room, price, price_area, area, floor, year, broker, address))
cur.close()
conn.commit()
conn.close()
csvFile=csvFile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment