Skip to content

Instantly share code, notes, and snippets.

Forked from stephenleo/00_SQL in Google
Created September 17, 2022 07:46
Show Gist options
  • Save zeromtmu/cbcbe99eb282e3d72a4811efb8171ec5 to your computer and use it in GitHub Desktop.
Save zeromtmu/cbcbe99eb282e3d72a4811efb8171ec5 to your computer and use it in GitHub Desktop.
[Medium] SQL in Google Colab

SQL in Google Colab

All the code for the Medium Post

Open In Colab

import sqlite3
import pandas as pd
def pd_to_sqlDB(input_df: pd.DataFrame,
table_name: str,
db_name: str = 'default.db') -> None:
'''Take a Pandas dataframe `input_df` and upload it to `table_name` SQLITE table
input_df (pd.DataFrame): Dataframe containing data to upload to SQLITE
table_name (str): Name of the SQLITE table to upload to
db_name (str, optional): Name of the SQLITE Database in which the table is created.
Defaults to 'default.db'.
# Step 1: Setup local logging
import logging
format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
# Step 2: Find columns in the dataframe
cols = input_df.columns
cols_string = ','.join(cols)
val_wildcard_string = ','.join(['?'] * len(cols))
# Step 3: Connect to a DB file if it exists, else crete a new file
con = sqlite3.connect(db_name)
cur = con.cursor()'SQL DB {db_name} created')
# Step 4: Create Table
sql_string = f"""CREATE TABLE {table_name} ({cols_string});"""
cur.execute(sql_string)'SQL Table {table_name} created with {len(cols)} columns')
# Step 5: Upload the dataframe
rows_to_upload = input_df.to_dict(orient='split')['data']
sql_string = f"""INSERT INTO {table_name} ({cols_string}) VALUES ({val_wildcard_string});"""
cur.executemany(sql_string, rows_to_upload)'{len(rows_to_upload)} rows uploaded to {table_name}')
# Step 6: Commit the changes and close the connection
def sql_query_to_pd(sql_query_string: str, db_name: str ='default.db') -> pd.DataFrame:
'''Execute an SQL query and return the results as a pandas dataframe
sql_query_string (str): SQL query string to execute
db_name (str, optional): Name of the SQLITE Database to execute the query in.
Defaults to 'default.db'.
pd.DataFrame: Results of the SQL query in a pandas dataframe
# Step 1: Connect to the SQL DB
con = sqlite3.connect(db_name)
# Step 2: Execute the SQL query
cursor = con.execute(sql_query_string)
# Step 3: Fetch the data and column names
result_data = cursor.fetchall()
cols = [description[0] for description in cursor.description]
# Step 4: Close the connection
# Step 5: Return as a dataframe
return pd.DataFrame(result_data, columns=cols)
# Step 1: Read the csv file into a dataframe
# Dataset from
input_df = pd.read_csv('country_vaccinations.csv')
# Step 2: Upload the dataframe to a SQL Table
# Step 3: Write the SQL query in a string variable
sql_query_string = """
SELECT country, SUM(daily_vaccinations) as total_vaccinated
FROM country_vaccinations
WHERE daily_vaccinations IS NOT NULL
GROUP BY country
ORDER BY total_vaccinated DESC
# Step 4: Exectue the SQL query
result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment