VirenMohindra · March 25, 2021 23:22
diff --git a/hackernews-daily.py b/hackernews-daily.py
 from github import Github
 import csv
 import re
 import requests
 from datetime import datetime, timedelta

 g = Github("access_token")

 REPO_NAME = 'headllines/hackernews-daily'
 FILE_NAME = "hackernews-daily.csv"
 HN_API_URL = 'https://hacker-news.firebaseio.com/v0/user/'
 NOOB_ACCOUNT_TRESHOLD = 15

 repo = g.get_repo(REPO_NAME)
 issues = repo.get_issues(state='open')

 fields = ['Date', 'Title', 'Link', 'userID', 'userID Age', 'New Account?', 'Number of Points', 'Number of Comments', 'HN Link']
 rows = []

 for issue in issues:
 	if (issue.title == 'npm ci'):
 		continue
 	date = all_square_brackets = all_links = title = link = user_id = ''
 	user_id_age = is_new = user_id_link = number_of_points = number_of_comments = ''

 	try:
 		date = issue.title.split('@')[1].strip(" ")
 	except IndexError:
 		date = issue.title.split('之')[1].strip(" ")
 	post = issue.body.split('\n\n')
 	for line in post[:-1]:
 		# regex for title, user_id, number of comments
 		all_square_brackets = re.findall("\[(.*?)\]", line)
 		# regex for Link, userID Link, HN Link
 		all_links = re.findall("(?P<url>https?://[^\s]+)", line.lower())
 		title = all_square_brackets[0]
 		link = all_links[0].strip("**").strip(" )")
 		user_id = all_square_brackets[1]

 		r = requests.get(HN_API_URL + user_id + '.json')
 		user_id_age = r.json()['created']

 		account_created_date = datetime.fromtimestamp(user_id_age)
 		submission_date = is_new = ''
 		try:
 			submission_date = datetime.strptime(date, '%Y-%m-%d')
 			calc_date = - timedelta(days=NOOB_ACCOUNT_TRESHOLD)
 			is_new = account_created_date > submission_date # submission date should always be 15 days greater than account creation
 		except ValueError:
 			cleaned_date = date.replace(' GMT+0000 (Coordinated Universal Time)', '')
 			submission_date = datetime.strptime(cleaned_date, '%a %b %d %Y %H:%M:%S')
 			calc_date = submission_date - timedelta(days=NOOB_ACCOUNT_TRESHOLD)
 			is_new = account_created_date > submission_date
 			date = submission_date.strftime('%Y-%m-%d') # conforming all dates to YYYY/MM/DD standard

 		user_id_link = all_links[1].strip(" )").replace('https://news.ycombinator.com/user?id=', '')
 		number_of_points = re.findall('(\w+ ){1}point', line)[0].strip(" ")
 		number_of_comments = all_square_brackets[2].split(' ')[0]
 		try:
 			hn_link = all_links[2].strip(" )")
 		except IndexError:
 			link = ''
 			user_id_link = all_links[0].strip(" )")
 			hn_link = all_links[1].strip(" )")
 		row = [date, title, link, user_id, user_id_age, is_new, number_of_points, number_of_comments, hn_link]
 		rows.append(row)

 with open(FILE_NAME, 'w', encoding='utf-8-sig') as csvfile:
 	csvwriter = csv.writer(csvfile)
 	csvwriter.writerow(fields)
 	csvwriter.writerows(rows)
	from github import Github
	import csv
	import re
	import requests
	from datetime import datetime, timedelta

	g = Github("access_token")

	REPO_NAME = 'headllines/hackernews-daily'
	FILE_NAME = "hackernews-daily.csv"
	HN_API_URL = 'https://hacker-news.firebaseio.com/v0/user/'
	NOOB_ACCOUNT_TRESHOLD = 15

	repo = g.get_repo(REPO_NAME)
	issues = repo.get_issues(state='open')

	fields = ['Date', 'Title', 'Link', 'userID', 'userID Age', 'New Account?', 'Number of Points', 'Number of Comments', 'HN Link']
	rows = []

	for issue in issues:
	if (issue.title == 'npm ci'):
	continue
	date = all_square_brackets = all_links = title = link = user_id = ''
	user_id_age = is_new = user_id_link = number_of_points = number_of_comments = ''

	try:
	date = issue.title.split('@')[1].strip(" ")
	except IndexError:
	date = issue.title.split('之')[1].strip(" ")
	post = issue.body.split('\n\n')
	for line in post[:-1]:
	# regex for title, user_id, number of comments
	all_square_brackets = re.findall("\[(.*?)\]", line)
	# regex for Link, userID Link, HN Link
	all_links = re.findall("(?P<url>https?://[^\s]+)", line.lower())
	title = all_square_brackets[0]
	link = all_links[0].strip("**").strip(" )")
	user_id = all_square_brackets[1]

	r = requests.get(HN_API_URL + user_id + '.json')
	user_id_age = r.json()['created']

	account_created_date = datetime.fromtimestamp(user_id_age)
	submission_date = is_new = ''
	try:
	submission_date = datetime.strptime(date, '%Y-%m-%d')
	calc_date = - timedelta(days=NOOB_ACCOUNT_TRESHOLD)
	is_new = account_created_date > submission_date # submission date should always be 15 days greater than account creation
	except ValueError:
	cleaned_date = date.replace(' GMT+0000 (Coordinated Universal Time)', '')
	submission_date = datetime.strptime(cleaned_date, '%a %b %d %Y %H:%M:%S')
	calc_date = submission_date - timedelta(days=NOOB_ACCOUNT_TRESHOLD)
	is_new = account_created_date > submission_date
	date = submission_date.strftime('%Y-%m-%d') # conforming all dates to YYYY/MM/DD standard

	user_id_link = all_links[1].strip(" )").replace('https://news.ycombinator.com/user?id=', '')
	number_of_points = re.findall('(\w+ ){1}point', line)[0].strip(" ")
	number_of_comments = all_square_brackets[2].split(' ')[0]
	try:
	hn_link = all_links[2].strip(" )")
	except IndexError:
	link = ''
	user_id_link = all_links[0].strip(" )")
	hn_link = all_links[1].strip(" )")
	row = [date, title, link, user_id, user_id_age, is_new, number_of_points, number_of_comments, hn_link]
	rows.append(row)

	with open(FILE_NAME, 'w', encoding='utf-8-sig') as csvfile:
	csvwriter = csv.writer(csvfile)
	csvwriter.writerow(fields)
	csvwriter.writerows(rows)