Interactive Tutorials
End-To-end Tutorial
- (Pycon 2016 - 3 hrs) : https://www.youtube.com/watch?v=RrdECLvHW6g
Command line tutorial
Interactive Tutorials
End-To-end Tutorial
Command line tutorial
-- show running queries (pre 9.2) | |
SELECT procpid, age(query_start, clock_timestamp()), usename, current_query | |
FROM pg_stat_activity | |
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%' | |
ORDER BY query_start desc; | |
-- show running queries (9.2) | |
SELECT pid, age(query_start, clock_timestamp()), usename, query | |
FROM pg_stat_activity | |
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%' |
**Convert .ipynb to Slides** | |
cd "test" | |
ipython nbconvert "test.ipynb" --to slides --reveal-prefix "http://cdn.jsdelivr.net/reveal.js/2.6.2" --post serve --config slides_config.py | |
* To print slides add ?print-pdf at the end of the URL and print | |
**Convert .ipynb to LaTex/PDF** | |
ipython nbconvert MyFirstNotebook.ipynb --to latex --post PDF | |
**Convert .ipynb to HTML** |
"""Google spreadsheet related. | |
Packages required: oauth2client, google-api-python-client | |
* https://gist.github.com/miohtama/f988a5a83a301dd27469 | |
""" | |
from oauth2client.service_account import ServiceAccountCredentials | |
from apiclient import discovery | |
def get_credentials(scopes: list) -> ServiceAccountCredentials: |
def df_to_ddl(df, tablename='test.mytable'): | |
data_dtypes = df.dtypes.reset_index().rename(columns = {'index':'colname',0:'datatype'}) | |
# Map pandas datatypes into SQL | |
data_dtypes['sql_dtype'] = data_dtypes.datatype.astype(str).map( | |
{'object':'varchar(24)', | |
'float64':'float', | |
'int64':'int', | |
'bool':'boolean'} ) |
import pandas as pd | |
def df_diff(index_cols, data1, data2, lsuffix='_1'): | |
""" | |
usage: | |
comparisondf= df_diff( ['unique_id','date'], current_df, new_df, lsuffix='_curr') | |
retuns: | |
single dataframe with index_cols on the index, as well as all other variables stacked on the index, and the | |
values in each dataframe along the columns. |
# For gensim | |
from itertools import groupby | |
import gensim | |
class VectorizedCorpus(object): | |
""" | |
Helper Class for using Sklearn Vectorizers with gensim's LDA model | |
handles transformations between gensim corpus / bow representations and sklearn matrix | |
#List unique values in a DataFrame column | |
pd.unique(df.column_name.ravel()) | |
#Convert Series datatype to numeric, getting rid of any non-numeric values | |
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
#Grab DataFrame rows where column has certain values | |
valuelist = ['value1', 'value2', 'value3'] | |
df = df[df.column.isin(value_list)] |