Last active
March 16, 2019 21:19
-
-
Save elaineo/3dc4dc3d1bfd5040c37e8c17ce71c558 to your computer and use it in GitHub Desktop.
Turn podcasts into Twitter threads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from TwitterAPI import TwitterAPI | |
import json | |
import logging | |
import time | |
from collections import OrderedDict | |
from random import random | |
""" | |
Assume 3 participants. You will need to create a Twitter account for each participant, and get API keys for each one. | |
""" | |
# Participant #1 | |
CONSUMER_KEY = '' | |
CONSUMER_SECRET = '' | |
ACCESS_TOKEN = '' | |
ACCESS_TOKEN_SECRET = '' | |
bot0_api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
# Participant #2 | |
CONSUMER_KEY = '' | |
CONSUMER_SECRET = '' | |
ACCESS_TOKEN = '' | |
ACCESS_TOKEN_SECRET = '' | |
bot1_api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
# Participant #3 | |
CONSUMER_KEY = '' | |
CONSUMER_SECRET = '' | |
ACCESS_TOKEN = '' | |
ACCESS_TOKEN_SECRET = '' | |
bot2_api = TwitterAPI(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
participants = OrderedDict({ "Interviewer": bot0_api, | |
"Interviewee1": bot1_api, | |
"Interviewee2": bot2_api }) | |
def chunks(l, n): | |
""" Break a string of text into n-sized chunks. | |
Chunks prioritize periods and question marks. after that, commas and spaces. | |
""" | |
q = [] | |
total = 0 | |
remainder = l | |
idx = len(l) - 1 | |
while len(remainder) > 0: | |
if len(remainder) <= n: | |
q.append(remainder[:idx]) | |
break | |
x = remainder[:n] | |
idx = max(x.rfind('?'), x.rfind('.')) | |
if idx > 0: | |
if idx > 140: | |
q.append(remainder[:idx+1]) | |
remainder = remainder[idx+1:] | |
continue | |
idx = x.rfind(',') | |
if idx > 0: | |
if idx > 140: | |
q.append(remainder[:idx+1]) | |
remainder = remainder[idx+1:] | |
continue | |
idx = x.rfind(' ') | |
q.append(remainder[:idx]) | |
remainder = remainder[idx+1:] | |
return q | |
""" | |
This assumes the podcast transcript is in a text file. | |
Instructions for transcribing text from audio here: https://elaineou.com/2015/06/13/podcast-transcripts-speech-to-text-conversion/ | |
Transcription format should look like this. Each block of text should start with the speaker's name, and there should be two line breaks when switching speakers. | |
Interviewer: How are you today? | |
Interviewee1: Splendid, thank you. | |
Interviewee2: I am speaking now. | |
""" | |
transcript = open('transcript.txt').read() | |
transcript = transcript.split("\n\n") | |
name = participants.keys()[0] | |
bot = participants[name] | |
last_id = None # Set this to last tweet id if restarting in the middle of the thread | |
for t in transcript: | |
if ":" in t[:20]: | |
name, text = t.split(': ', 1) | |
bot = participants[name] | |
else: | |
text = t | |
tweets = chunks(text, 250) # 280 chars minus some chars for usernames | |
for tweet in tweets: | |
print("%s (%s): %s" % (name, last_id, tweet)) | |
twit = bot.request('statuses/update', {'status': tweet, | |
'in_reply_to_status_id': last_id, 'auto_populate_reply_metadata': True}) | |
if twit.response.json().get('errors'): | |
print(twit.response.json().get('errors')) | |
raise # account likely restricted. Log in, solve the captcha, then restart from last_id | |
print(twit.response.json()) | |
last_id = twit.response.json().get('id_str') | |
time.sleep(round(random()*20)) # random sleep, so Twitter won't think it's a bot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment