Skip to content

Instantly share code, notes, and snippets.

@imtaehyun
Created November 30, 2018 07:49
Show Gist options
  • Save imtaehyun/aab809aebe0579b4442ec5d8609a318e to your computer and use it in GitHub Desktop.
Save imtaehyun/aab809aebe0579b4442ec5d8609a318e to your computer and use it in GitHub Desktop.
중고나라 크롤링
import requests
from bs4 import BeautifulSoup
def get_recent_article(menuid, page=1):
"""
search.menuid: 게시판 별 아이디
search.page: 게시판 페이지 번호
articleid: 게시글 아이디
"""
url = f'https://cafe.naver.com/joonggonara/ArticleList.nhn?search.clubid=10050146&search.menuid={menuid}&search.boardtype=L&search.page={page}&userDisplay=50'
s = requests.Session()
response = s.get(url)
# print(response.text)
soup = BeautifulSoup(response.text, 'html.parser')
board = soup.select('div.article-board')[1]
for article in board.select('tr'):
# print(article)
try:
articleid = article.select('.board-number .inner_number')[0].text.strip()
title = article.select('a.article')[0].text.strip()
link = article.select('a.article')[0]['href']
date = article.select('td.td_date')[0].text.strip()
print(articleid, title, link, date)
except Exception as e:
pass
def get_article_content(menuid, articleid):
url = f'https://cafe.naver.com/joonggonara/ArticleRead.nhn?clubid=10050146&page=1&menuid={menuid}&boardtype=L&articleid={articleid}&referrerAllArticles=false'
s = requests.Session()
response = s.get(url)
get_recent_article(menuid='338')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment