Created
March 4, 2023 15:39
-
-
Save emisjerry/6c4329e829ccb2b26e6d062e86d294df to your computer and use it in GitHub Desktop.
movie.ahk: [AHK#58] 擷取豆瓣電影網的超簡單爬蟲腳本
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Requires AutoHotkey v2.0 | |
#SingleInstance Force | |
DEBUG := true | |
if (DEBUG) { | |
sID := "1294194" | |
sOutputDir := "z:\test\obsidian\collections\Movies\" | |
} else if (A_Args.Length == 0) { | |
MsgBox("需要兩個參數`n範例:movie.ahk2 豆瓣ID 輸出資料夾") | |
ExitApp | |
} else { | |
sID := A_Args[1] | |
sOutputDir := A_Args[2] | |
} | |
;;f1:: | |
FileEncoding("UTF-8") | |
url := "https://movie.douban.com/subject/" . sID . "/" | |
;;httpClient := ComObjCreate("WinHttp.WinHttpRequest.5.1") ;; AHK 1.x | |
httpClient := ComObject("WinHttp.WinHttpRequest.5.1") | |
httpClient.Open("POST", url, false) | |
;httpClient.SetRequestHeader("User-Agent", User-Agent) | |
;httpClient.SetRequestHeader("Content-Type", Content-Type) | |
;httpClient.SetRequestHeader("Cookie", Cookie) | |
httpClient.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded") | |
httpClient.Send() | |
httpClient.WaitForResponse() | |
Result := httpClient.ResponseText | |
;;html := ComObjCreate("HTMLFile") ;; AHK 1.x | |
html := ComObject("HTMLFile") | |
html.write(Result) | |
mainpic := html.getElementById("mainpic") | |
text := mainpic.innerHTML | |
pos1 := InStr(text, "<img ") | |
text := Substr(text, pos1, 256) | |
pos1 := InStr(text, ">") | |
text := Substr(text, 1, pos1) | |
pos1 := InStr(text, "src=") | |
sPic := Substr(text, pos1+5, 256) | |
pos2 := InStr(sPic, '"') | |
sPic := Substr(sPic, 1, pos2-1) | |
;;MsgBox(sPic) | |
pos1 := InStr(text, " alt=") | |
sTitle := Substr(text, pos1+5, 256) | |
pos2 := InStr(sTitle, ' ') | |
sTitle := Substr(sTitle, 1, pos2-1) | |
;;MsgBox(sTitle) | |
divInfo := html.getElementById("info") | |
if (divInfo) { | |
text := divInfo.innerText | |
;;MsgBox(text) | |
text := StrReplace(text, ": ", ":: ") | |
sFile := sOutputDir . sTitle . ".md" | |
if FileExist(sFile) { | |
FileDelete(sFile) | |
} | |
/*text := Format("---`r`ntemplate-output: Movies`r`ntags: movie`r`ntitle: {1}" . | |
"`r`n照片: {2}`r`n豆瓣ID: {3}`r`n---`r`n# {4}" . | |
"`r`n`r`n![|300]({5})`r`n`r`n{6}", sTitle, sPic, sID, sTitle, sPic, text) | |
*/ | |
sFormat := " | |
( | |
--- | |
template-output: Movies | |
tags: movie | |
title: {1}" | |
照片: {2} | |
豆瓣ID: {3} | |
--- | |
# {4} | |
![|300]({5}) | |
{6} | |
)" | |
text := Format(sFormat, sTitle, sPic, sID, sTitle, sPic, text) | |
FileAppend(text, sFile) | |
} else { | |
MsgBox("CAnnot find info div") | |
} | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment