Skip to content

Instantly share code, notes, and snippets.

@mundane799699
Created June 28, 2024 16:05
Show Gist options
  • Save mundane799699/dac6f01905fc9c358b686c9ec4ffa9ba to your computer and use it in GitHub Desktop.
Save mundane799699/dac6f01905fc9c358b686c9ec4ffa9ba to your computer and use it in GitHub Desktop.
抖音爬虫
from playwright.sync_api import sync_playwright, Page
from openpyxl import Workbook
import time
from datetime import datetime
# 安装依赖
# pip install playwright
# playwright install
# 创建 Excel 文件
wb = Workbook()
ws = wb.active
ws.append(["名称", "抖音号", "链接", "获赞", "粉丝数"])
def main():
with sync_playwright() as p:
browser = p.chromium.launch_persistent_context(headless=False, user_data_dir='douyin')
page = browser.new_page()
page.on("response", lambda response: handle_response(response))
page.goto("https://www.douyin.com/search/%E9%B9%BF%E5%AE%89%E6%8C%87%E7%BA%B9%E9%94%81?type=user")
# 等60秒,你可以操作一些验证码或者短信验证。如果没有验证码,可以把时间改短一点。
page.wait_for_timeout(60000)
last_height = page.evaluate("document.body.scrollHeight")
while True:
page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
time.sleep(3)
new_height = page.evaluate("document.body.scrollHeight")
if new_height == last_height:
print("End of page reached")
break
last_height = new_height
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
filename = f'抖音_{timestamp}.xlsx'
wb.save(filename)
print(f"数据已保存到 {filename}")
browser.close()
def handle_response(response):
if "aweme/v1/web/discover/search" in response.url:
data = response.json()
for user in data['user_list']:
user_info = user['user_info']
nickname = user_info['nickname']
unique_id = user_info['unique_id']
sec_uid = user_info['sec_uid']
link = f"https://www.douyin.com/user/{sec_uid}"
total_favorited = user_info['total_favorited']
follower_count = user_info['follower_count']
ws.append([nickname, unique_id, link, total_favorited, follower_count])
print(f"名称:{nickname},抖音号:{unique_id},链接:{link},获赞:{total_favorited},粉丝数:{follower_count}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment