Last active
May 26, 2024 12:43
-
-
Save ayamkv/6d511b504a11783edc65615dc9a43aee to your computer and use it in GitHub Desktop.
(deprecated) Youtube Your Most Liked Comment (Google Takeout)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from pyyoutube import Api | |
import csv | |
# Google Developer Console, Youtube Data API V3 | |
api = Api(api_key='YOUR_API_KEY') | |
# Your Google Takeout file | |
file = open('my-comments.html', 'r', encoding='utf8') | |
lines = file.readlines() | |
comments = [[], []] | |
for line in lines: | |
comments[0].extend(re.findall(r'(?<=Anda menambahkan <a href="http:\/\/www.youtube.com\/watch\?v=[a-zA-Z0-9_-]{11}&lc=)([a-zA-Z0-9_-]{26})', line)) | |
comments[1].extend(re.findall(r'(?<=Anda menambahkan <a href="http:\/\/www.youtube.com\/watch\?v=)([a-zA-Z0-9_-]{11})', line)) | |
def printAllComments(): | |
print("Printing all comments...") | |
comment_data = [] | |
for i in range(len(comments[0])): | |
url = comments[0][i] | |
video_id = comments[1][i] | |
comment = api.get_comment_by_id(comment_id=url) | |
if len(comment.items) > 0: | |
like_count = comment.items[0].snippet.likeCount | |
comment_url = "https://www.youtube.com/watch?v=" + video_id + "&lc=" + url | |
comment_data.append({ | |
'url': comment_url, | |
'like_count': like_count | |
}) | |
# Comment this out if it's annoying | |
print(f"URL: {comment_url}\nLikes: {like_count}\n") | |
print(f"Processed comment {i + 1}/{len(comments[0])}") | |
print("Done!") | |
# Sorting comments by like_count in descending order | |
sorted_comments = sorted(comment_data, key=lambda x: x['like_count'], reverse=True) | |
# Exporting to CSV | |
with open('comments.csv', 'w', newline='') as csv_file: | |
fieldnames = ['url', 'like_count'] | |
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) | |
writer.writeheader() | |
writer.writerows(sorted_comments) | |
print("Comments exported to comments.csv") | |
printAllComments() |
hey for those who find this script, google takeout for some reason has an update changing the format for comments and now has a limit for comments, trying to find a solution asap..
well i guess google decided to be a d**k and now have a time limit for their takeouts data
Takeout data is not inclusive. It's based on a rolling window. So you're advised to make an export a few times a year, and save them indefinitely, because old content won't appear in new archives. (s)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm trying to use this, I have no clue how you got that "my-comments.html" file. When I exported with google takeout, not only did I not get an html file that contains any info, I got it about the wrong channel (though this a thing Google is to blame for), so, how did you do this magic man? I spent like an hour trying to just even get an export about the right channel and failing but I think I might be on the wrong track anyway.