Skip to content

Instantly share code, notes, and snippets.

@Hamid-K
Created August 20, 2024 22:26
Show Gist options
  • Save Hamid-K/04cce1aa9d28d609c16f509ed8b65546 to your computer and use it in GitHub Desktop.
Save Hamid-K/04cce1aa9d28d609c16f509ed8b65546 to your computer and use it in GitHub Desktop.
Whisper: Transcribe Audio to Text
# Sample script to use OpenAI Whisper API
# This script demonstrates how to convert input audio files to text, fur further processing.
# The code can be still improved and optimized in many ways. Feel free to modify and use it
# for your own needs.
#
import openai
from openai import OpenAI
client = OpenAI(api_key="sk-proj-....")
import argparse
from rich import print
from rich.console import Console
from rich.traceback import install
# Set your API key
console = Console()
install()
def transcribe_audio(file_path, language):
"""Transcribe the audio using the Whisper API."""
console.print(f"[bold blue]Starting transcription for file:[/bold blue] {file_path}")
try:
with open(file_path, "rb") as audio_file:
console.print("[bold blue]Sending audio file to Whisper API...[/bold blue]")
response = client.audio.transcribe(model="whisper-1",
file=audio_file,
response_format="json",
language=language)
console.print("[bold blue]Transcription received.[/bold blue]")
if isinstance(response, dict) and "text" in response:
return response.text
else:
console.print("[bold red]Unexpected response format received from Whisper API.[/bold red]")
return None
except openai.OpenAIError as e:
if "Invalid language" in str(e) and "ISO-639-1 format" in str(e):
console.print(f"[bold red]An API error occurred during transcription: Invalid language '{language}'. Language parameter must be specified in ISO-639-1 format.[/bold red]")
else:
console.print(f"[bold red]An API error occurred during transcription:[/bold red] {e}")
return None
except Exception as e:
console.print(f"[bold red]An unexpected error occurred during transcription:[/bold red] {e}")
return None
def summarize_text(text):
"""Generate a summary of the transcribed text using GPT-4."""
console.print("[bold blue]Starting text summarization...[/bold blue]")
try:
summary = client.chat.completions.create(model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Summarize the following text:\n\n{text}"}
])
console.print("[bold blue]Summary received.[/bold blue]")
return summary.choices[0].message.content
except openai.OpenAIError as e:
console.print(f"[bold red]An API error occurred during summarization:[/bold red] {e}")
return None
except Exception as e:
console.print(f"[bold red]An unexpected error occurred during summarization:[/bold red] { e }")
return None
def generate_bullet_points(text):
"""Generate bullet points of key topics from the text."""
console.print("[bold blue]Starting bullet points generation...[/bold blue]")
try:
bullet_points = client.chat.completions.create(model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Create bullet points from the following text:\n\n{text}"}
])
console.print("[bold blue]Bullet points received.[/bold blue]")
return bullet_points.choices[0].message.content
except openai.OpenAIError as e:
console.print(f"[bold red]An API error occurred while generating bullet points:[/bold red] {e}")
return None
except Exception as e:
console.print(f"[bold red]An unexpected error occurred while generating bullet points:[/bold red] {e}")
return None
def generate_mind_map_description(text):
"""Generate a mind map description based on the conversation."""
console.print("[bold blue]Starting mind map description generation...[/bold blue]")
try:
mind_map = client.chat.completions.create(model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"Describe how a mind map of the following text would look:\n\n{text}"}
])
console.print("[bold blue]Mind map description received.[/bold blue]")
return mind_map.choices[0].message.content
except openai.OpenAIError as e:
console.print(f"[bold red]An API error occurred while generating the mind map description:[/bold red] {e}")
return None
except Exception as e:
console.print(f"[bold red]An unexpected error occurred while generating the mind map description:[/bold red] {e}")
return None
def process_audio(file_path, options):
"""Process the audio file according to the specified options."""
console.print(f"[bold blue]Processing audio file:[/bold blue] {file_path}")
transcript = transcribe_audio(file_path, options.language)
if not transcript:
console.print("[bold yellow]No transcript available.[/bold yellow]")
return
if options.full_text:
console.print("[bold green]Full Text:[/bold green]\n", transcript)
if options.summary:
console.print("[bold blue]Generating summary...[/bold blue]")
summary = summarize_text(transcript)
if summary:
console.print("[bold green]Summary:[/bold green]\n", summary)
if options.bullet_points:
console.print("[bold blue]Generating bullet points...[/bold blue]")
bullet_points = generate_bullet_points(transcript)
if bullet_points:
console.print("[bold green]Bullet Points:[/bold green]\n", bullet_points)
if options.mind_map:
console.print("[bold blue]Generating mind map description...[/bold blue]")
mind_map = generate_mind_map_description(transcript)
if mind_map:
console.print("[bold green]Mind Map Description:[/bold green]\n", mind_map)
def main():
parser = argparse.ArgumentParser(description="Transcribe audio and generate various outputs using OpenAI Whisper API.")
parser.add_argument("audio_file", help="Path to the input audio file (mp3 format).")
parser.add_argument("--summary", action="store_true", help="Generate a summary of the conversation.")
parser.add_argument("--bullet_points", action="store_true", help="Generate bullet points from the most talked about topics.")
parser.add_argument("--mind_map", action="store_true", help="Generate a mind-map graph of subjects discussed during the conversation.")
parser.add_argument("--full_text", action="store_true", help="Get the full text of the conversation.")
parser.add_argument("--language", default="en", help="Specify the language of the audio for transcription (default: English). Language must be in ISO-639-1 format.")
args = parser.parse_args()
process_audio(args.audio_file, args)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment