jaircastruita · May 6, 2016 18:38
diff --git a/ECOBICI_analysis_cluster_in_out b/ECOBICI_analysis_cluster_in_out
 {

 "metadata": {

  "name": "",

  "signature": "sha256:a8398ce40087121501e924c32e900fecbc5db8dd5d2713c651e50d79a75f3012"

 },

 "nbformat": 3,

 "nbformat_minor": 0,

 "worksheets": [

  {

   "cells": [

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "%matplotlib inline\n",

      "import matplotlib.pyplot as plt\n",

      "import pandas as pd\n",

      "import seaborn as sns; sns.set()\n",

      "import folium\n",

      "\n",

      "import vincent\n",

      "vincent.core.initialize_notebook()\n",

      "import json\n",

      "from vincent import AxisProperties, PropertySet, ValueRef"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [

      {

       "html": [

        "\n",

        "           <script>\n",

        "               \n",

        "                function vct_load_lib(url, callback){\n",

        "                      if(typeof d3 !== 'undefined' &&\n",

        "                         url === 'http://d3js.org/d3.v3.min.js'){\n",

        "                        callback()\n",

        "                      }\n",

        "                      var s = document.createElement('script');\n",

        "                      s.src = url;\n",

        "                      s.async = true;\n",

        "                      s.onreadystatechange = s.onload = callback;\n",

        "                      s.onerror = function(){\n",

        "                        console.warn(\"failed to load library \" + url);\n",

        "                        };\n",

        "                      document.getElementsByTagName(\"head\")[0].appendChild(s);\n",

        "                };\n",

        "                var vincent_event = new CustomEvent(\n",

        "                  \"vincent_libs_loaded\",\n",

        "                  {bubbles: true, cancelable: true}\n",

        "                );\n",

        "                \n",

        "               function load_all_libs(){\n",

        "                  console.log('Loading Vincent libs...')\n",

        "                  vct_load_lib('http://d3js.org/d3.v3.min.js', function(){\n",

        "                  vct_load_lib('http://d3js.org/d3.geo.projection.v0.min.js', function(){\n",

        "                  vct_load_lib('http://wrobstory.github.io/d3-cloud/d3.layout.cloud.js', function(){\n",

        "                  vct_load_lib('http://wrobstory.github.io/vega/vega.v1.3.3.js', function(){\n",

        "                  window.dispatchEvent(vincent_event);\n",

        "                  });\n",

        "                  });\n",

        "                  });\n",

        "                  });\n",

        "               };\n",

        "               if(typeof define === \"function\" && define.amd){\n",

        "                    if (window['d3'] === undefined ||\n",

        "                        window['topojson'] === undefined){\n",

        "                        require.config(\n",

        "                            {paths: {\n",

        "                              d3: 'http://d3js.org/d3.v3.min',\n",

        "                              topojson: 'http://d3js.org/topojson.v1.min'\n",

        "                              }\n",

        "                            }\n",

        "                          );\n",

        "                        require([\"d3\"], function(d3){\n",

        "                            console.log('Loading Vincent from require.js...')\n",

        "                            window.d3 = d3;\n",

        "                            require([\"topojson\"], function(topojson){\n",

        "                                window.topojson = topojson;\n",

        "                                load_all_libs();\n",

        "                            });\n",

        "                        });\n",

        "                    } else {\n",

        "                        load_all_libs();\n",

        "                    };\n",

        "               }else{\n",

        "                    console.log('Require.js not found, loading manually...')\n",

        "                    load_all_libs();\n",

        "               };\n",

        "\n",

        "           </script>"

       ],

       "metadata": {},

       "output_type": "display_data",

       "text": [

        "<IPython.core.display.HTML at 0xa554e48>"

       ]

      }

     ],

     "prompt_number": 1

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "path = r'D:\\ownCloud\\Documentos posgrado\\Posgrado 2do semestre\\Seminario investigacion\\Experimental Data\\Cd Mex\\Movilidad\\Hist Ecobici\\ecobici.csv'\n",

      "trips = pd.read_csv(path,\n",

      "                    parse_dates=['date_removed', 'date_arrived'],\n",

      "                    infer_datetime_format=True)\n",

      "\n",

      "trips = trips.loc[trips['action'] != 'C  ']\n",

      "\n",

      "# Find the start date\n",

      "ind = pd.DatetimeIndex(trips.date_arrived)\n",

      "trips['date'] = ind.date.astype('datetime64')\n",

      "trips['hour'] = ind.hour\n",

      "\n",

      "# Count trips by date\n",

      "by_date = trips.pivot_table('bike',aggfunc='count',\n",

      "                            index='date',\n",

      "                            columns='station_arrived', )\n",

      "\n",

      "by_date.drop([col for col in list(by_date.columns.values) if col > 275], \n",

      "        axis=1, inplace=True)\n",

      "\n",

      "# add a flag indicating weekend\n",

      "by_date['weekend'] = (by_date.index.dayofweek >= 5)\n",

      "\n",

      "by_date_weekday = by_date[by_date['weekend'] == False]\n",

      "\n",

      "#mavg_wd = pd.rolling_mean(by_date_weekday.sum(axis=1), 20)\n",

      "#mstd_wd = pd.rolling_std(by_date_weekday.sum(axis=1), 20)\n",

      "mavg_wd = by_date_weekday.sum(axis=1).rolling(window=20,center=False).mean()\n",

      "mstd_wd = by_date_weekday.sum(axis=1).rolling(window=20,center=False).std()\n",

      "\n",

      "black_list = by_date_weekday.sum(axis=1)[by_date_weekday.sum(axis=1) <= mavg_wd-2*mstd_wd].index\n",

      "\n",

      "by_date_weekend = by_date[by_date['weekend'] == True]\n",

      "\n",

      "#mavg_we = pd.rolling_mean(by_date_weekend.sum(axis=1), 20)\n",

      "#mstd_we = pd.rolling_std(by_date_weekend.sum(axis=1), 20)\n",

      "mavg_we = by_date_weekend.sum(axis=1).rolling(window=20,center=False).mean()\n",

      "mstd_we = by_date_weekend.sum(axis=1).rolling(window=20,center=False).std()\n",

      "\n",

      "black_list = black_list.union(by_date_weekend.sum(axis=1)[by_date_weekend.sum(axis=1) <= mavg_we-2*mstd_we].index)"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [

      {

       "output_type": "stream",

       "stream": "stderr",

       "text": [

        "C:\\Users\\ghost\\AppData\\Local\\Enthought\\Canopy\\App\\appdata\\canopy-1.5.2.2785.win-x86_64\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2741: DtypeWarning: Columns (0,1) have mixed types. Specify dtype option on import or set low_memory=False.\n",

        "  interactivity=interactivity, compiler=compiler)\n"

       ]

      }

     ],

     "prompt_number": 2

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "trips_in = pd.read_csv(path,\n",

      "                    parse_dates=['date_removed', 'date_arrived'],\n",

      "                    infer_datetime_format=True)\n",

      "\n",

      "trips_in = trips_in.loc[trips_in['action'] != 'C  ']\n",

      "\n",

      "# Find the start date\n",

      "ind = pd.DatetimeIndex(trips.date_arrived)\n",

      "trips_in['date'] = ind.date.astype('datetime64')\n",

      "trips_in['hour'] = ind.hour\n",

      "\n",

      "# remove those values on the blacklist\n",

      "trips_in = trips_in.loc[~trips_in.date.isin(black_list)]"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 3

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "trips_out = pd.read_csv(path,\n",

      "                    parse_dates=['date_removed', 'date_arrived'],\n",

      "                    infer_datetime_format=True)\n",

      "\n",

      "trips_out = trips_out.loc[trips_out['action'] != 'C  ']\n",

      "\n",

      "# Find the start date\n",

      "ind = pd.DatetimeIndex(trips.date_removed)\n",

      "trips_out['date'] = ind.date.astype('datetime64')\n",

      "trips_out['hour'] = ind.hour\n",

      "\n",

      "# remove those values on the blacklist\n",

      "trips_out = trips_out.loc[~trips_out.date.isin(black_list)]"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 4

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "#trips_out\n",

      "#trips_out = trips_out.loc[~trips_out.date.isin(black_list)]\n",

      "#trips_out.loc[~trips_out.date.isin(black_list)]\n",

      "#black_list"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 5

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "# count trips by date and by hour\n",

      "by_hour_out = trips_out.pivot_table('bike', aggfunc='count',\n",

      "                            index=['date', 'hour'],\n",

      "                            columns='station_removed').fillna(0).reset_index('hour')\n",

      "\n",

      "# average these counts by weekend\n",

      "by_hour_out['weekend'] = (by_hour_out.index.dayofweek >= 5)\n",

      "by_hour_out = by_hour_out.groupby(['weekend', 'hour']).mean()\n",

      "by_hour_out.index.set_levels([['weekday', 'weekend'],\n",

      "                          [\"{0}:00\".format(i) for i in range(24)]],\n",

      "                         inplace=True);\n",

      "\n",

      "by_hour_out.drop([col for col in list(by_hour_out.columns.values) if col > 275], axis=1, inplace=True)"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 6

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "# count trips by date and by hour\n",

      "by_hour_in = trips_in.pivot_table('bike', aggfunc='count',\n",

      "                            index=['date', 'hour'],\n",

      "                            columns='station_arrived').fillna(0).reset_index('hour')\n",

      "\n",

      "# average these counts by weekend\n",

      "by_hour_in['weekend'] = (by_hour_in.index.dayofweek >= 5)\n",

      "by_hour_in = by_hour_in.groupby(['weekend', 'hour']).mean()\n",

      "by_hour_in.index.set_levels([['weekday', 'weekend'],\n",

      "                          [\"{0}:00\".format(i) for i in range(24)]],\n",

      "                         inplace=True);\n",

      "\n",

      "by_hour_in.drop([col for col in list(by_hour_in.columns.values) if col > 275], axis=1, inplace=True)"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 7

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "#by_hour = pd.concat([by_hour_in.loc['weekday'],by_hour_out.loc['weekday']],axis=0)\n",

      "by_hour = pd.concat([by_hour_in,by_hour_out],axis=0)"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 8

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "from sklearn import mixture \n",

      "\n",

      "X = by_hour.loc['weekday'].values.T\n",

      "num_clusters = 6\n",

      "cv_types = ['spherical', 'tied', 'diag', 'full']\n",

      "\n",

      "gmm = mixture.GMM(n_components=num_clusters, covariance_type=cv_types[3])\n",

      "gmm.fit(X)"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [

      {

       "metadata": {},

       "output_type": "pyout",

       "prompt_number": 9,

       "text": [

        "GMM(covariance_type='full', init_params='wmc', min_covar=0.001,\n",

        "  n_components=6, n_init=1, n_iter=100, params='wmc', random_state=None,\n",

        "  thresh=None, tol=0.001, verbose=0)"

       ]

      }

     ],

     "prompt_number": 9

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "CDMX_COORDINATES = (19.4186, -99.1686) \n",

      "\n",

      "#create empty map zoomed in on Mexico City\n",

      "station_attrs = pd.read_csv('C:\\Users\\ghost\\Google Drive\\JairCastruitaGastelum\\Progress Reports\\Clustering_ECOBICI\\DATA\\estacion_coordenadas.csv')"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 10

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "color_iter = ['blue', 'red', 'green', '#e017e5', '#d8f604', '#15d4e2', '#e21569']\n",

      "by_hour_in.columns = by_hour_in.columns.astype(str) + ' in'\n",

      "by_hour_out.columns = by_hour_out.columns.astype(str) + ' out'\n",

      "by_hour_concat = pd.concat([by_hour_in.loc['weekday'],by_hour_out.loc['weekday']],axis=1)"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [],

     "prompt_number": 11

    },

    {

     "cell_type": "code",

     "collapsed": false,

     "input": [

      "map = folium.Map(location=CDMX_COORDINATES, tiles='Stamen Toner', zoom_start=14)\n",

      "Y_ = gmm.predict(X)\n",

      "plt.figure(figsize=(16,8))\n",

      "\n",

      "for i, (mean, covar, color) in enumerate(zip(\n",

      "        gmm.means_, gmm._get_covars(), color_iter)):\n",

      "        \n",

      "    if not np.any(Y_ == i):\n",

      "        continue   \n",

      "        \n",

      "    plt.plot(mean,color=color)\n",

      "    \n",

      "    for attr in range(len(Y_)):\n",

      "        if Y_[attr] == i:\n",

      "            alt = station_attrs[station_attrs['id'] == by_hour.columns[attr]]['longitud'].values[0]\n",

      "            lat = station_attrs[station_attrs['id'] == by_hour.columns[attr]]['latitud'].values[0]\n",

      "            \n",

      "            counts = vincent.Bar(by_hour_concat[[str(by_hour.columns[attr])+' in',str(by_hour.columns[attr])+' out']], width=650, height=300)\n",

      "            counts.axis_titles(x='Hora', y='Estacion '+str(int(by_hour.columns[attr]))+': Valor Promedio en un dia')\n",

      "            counts.to_json(str(by_hour.columns[attr])+'.json')\n",

      "            counts.legend(title=str(station_attrs.iloc[attr,0]))\n",

      "                         \n",

      "            folium.RegularPolygonMarker(location=[lat,alt], \n",

      "                                popup=folium.Popup(max_width=700).add_child(\n",

      "                                                                folium.Vega(json.load(open(str(by_hour.columns[attr])+'.json')), \n",

      "                                                                width=700, \n",

      "                                                                height=350)),\n",

      "                                fill_color=color,\n",

      "                                number_of_sides=8,\n",

      "                                radius=10).add_to(map)\n",

      "    \n",

      "map"

     ],

     "language": "python",

     "metadata": {},

     "outputs": [

      {

       "html": [
	{

	"metadata": {

	"name": "",

	"signature": "sha256:a8398ce40087121501e924c32e900fecbc5db8dd5d2713c651e50d79a75f3012"

	},

	"nbformat": 3,

	"nbformat_minor": 0,

	"worksheets": [

	{

	"cells": [

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"%matplotlib inline\n",

	"import matplotlib.pyplot as plt\n",

	"import pandas as pd\n",

	"import seaborn as sns; sns.set()\n",

	"import folium\n",

	"\n",

	"import vincent\n",

	"vincent.core.initialize_notebook()\n",

	"import json\n",

	"from vincent import AxisProperties, PropertySet, ValueRef"

	],

	"language": "python",

	"metadata": {},

	"outputs": [

	{

	"html": [

	"\n",

	" <script>\n",

	" \n",

	" function vct_load_lib(url, callback){\n",

	" if(typeof d3 !== 'undefined' &&\n",

	" url === 'http://d3js.org/d3.v3.min.js'){\n",

	" callback()\n",

	" }\n",

	" var s = document.createElement('script');\n",

	" s.src = url;\n",

	" s.async = true;\n",

	" s.onreadystatechange = s.onload = callback;\n",

	" s.onerror = function(){\n",

	" console.warn(\"failed to load library \" + url);\n",

	" };\n",

	" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",

	" };\n",

	" var vincent_event = new CustomEvent(\n",

	" \"vincent_libs_loaded\",\n",

	" {bubbles: true, cancelable: true}\n",

	" );\n",

	" \n",

	" function load_all_libs(){\n",

	" console.log('Loading Vincent libs...')\n",

	" vct_load_lib('http://d3js.org/d3.v3.min.js', function(){\n",

	" vct_load_lib('http://d3js.org/d3.geo.projection.v0.min.js', function(){\n",

	" vct_load_lib('http://wrobstory.github.io/d3-cloud/d3.layout.cloud.js', function(){\n",

	" vct_load_lib('http://wrobstory.github.io/vega/vega.v1.3.3.js', function(){\n",

	" window.dispatchEvent(vincent_event);\n",

	" });\n",

	" });\n",

	" });\n",

	" });\n",

	" };\n",

	" if(typeof define === \"function\" && define.amd){\n",

	" if (window['d3'] === undefined \|\|\n",

	" window['topojson'] === undefined){\n",

	" require.config(\n",

	" {paths: {\n",

	" d3: 'http://d3js.org/d3.v3.min',\n",

	" topojson: 'http://d3js.org/topojson.v1.min'\n",

	" }\n",

	" }\n",

	" );\n",

	" require([\"d3\"], function(d3){\n",

	" console.log('Loading Vincent from require.js...')\n",

	" window.d3 = d3;\n",

	" require([\"topojson\"], function(topojson){\n",

	" window.topojson = topojson;\n",

	" load_all_libs();\n",

	" });\n",

	" });\n",

	" } else {\n",

	" load_all_libs();\n",

	" };\n",

	" }else{\n",

	" console.log('Require.js not found, loading manually...')\n",

	" load_all_libs();\n",

	" };\n",

	"\n",

	" </script>"

	],

	"metadata": {},

	"output_type": "display_data",

	"text": [

	"<IPython.core.display.HTML at 0xa554e48>"

	]

	}

	],

	"prompt_number": 1

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"path = r'D:\\ownCloud\\Documentos posgrado\\Posgrado 2do semestre\\Seminario investigacion\\Experimental Data\\Cd Mex\\Movilidad\\Hist Ecobici\\ecobici.csv'\n",

	"trips = pd.read_csv(path,\n",

	" parse_dates=['date_removed', 'date_arrived'],\n",

	" infer_datetime_format=True)\n",

	"\n",

	"trips = trips.loc[trips['action'] != 'C ']\n",

	"\n",

	"# Find the start date\n",

	"ind = pd.DatetimeIndex(trips.date_arrived)\n",

	"trips['date'] = ind.date.astype('datetime64')\n",

	"trips['hour'] = ind.hour\n",

	"\n",

	"# Count trips by date\n",

	"by_date = trips.pivot_table('bike',aggfunc='count',\n",

	" index='date',\n",

	" columns='station_arrived', )\n",

	"\n",

	"by_date.drop([col for col in list(by_date.columns.values) if col > 275], \n",

	" axis=1, inplace=True)\n",

	"\n",

	"# add a flag indicating weekend\n",

	"by_date['weekend'] = (by_date.index.dayofweek >= 5)\n",

	"\n",

	"by_date_weekday = by_date[by_date['weekend'] == False]\n",

	"\n",

	"#mavg_wd = pd.rolling_mean(by_date_weekday.sum(axis=1), 20)\n",

	"#mstd_wd = pd.rolling_std(by_date_weekday.sum(axis=1), 20)\n",

	"mavg_wd = by_date_weekday.sum(axis=1).rolling(window=20,center=False).mean()\n",

	"mstd_wd = by_date_weekday.sum(axis=1).rolling(window=20,center=False).std()\n",

	"\n",

	"black_list = by_date_weekday.sum(axis=1)[by_date_weekday.sum(axis=1) <= mavg_wd-2*mstd_wd].index\n",

	"\n",

	"by_date_weekend = by_date[by_date['weekend'] == True]\n",

	"\n",

	"#mavg_we = pd.rolling_mean(by_date_weekend.sum(axis=1), 20)\n",

	"#mstd_we = pd.rolling_std(by_date_weekend.sum(axis=1), 20)\n",

	"mavg_we = by_date_weekend.sum(axis=1).rolling(window=20,center=False).mean()\n",

	"mstd_we = by_date_weekend.sum(axis=1).rolling(window=20,center=False).std()\n",

	"\n",

	"black_list = black_list.union(by_date_weekend.sum(axis=1)[by_date_weekend.sum(axis=1) <= mavg_we-2*mstd_we].index)"

	],

	"language": "python",

	"metadata": {},

	"outputs": [

	{

	"output_type": "stream",

	"stream": "stderr",

	"text": [

	"C:\\Users\\ghost\\AppData\\Local\\Enthought\\Canopy\\App\\appdata\\canopy-1.5.2.2785.win-x86_64\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2741: DtypeWarning: Columns (0,1) have mixed types. Specify dtype option on import or set low_memory=False.\n",

	" interactivity=interactivity, compiler=compiler)\n"

	]

	}

	],

	"prompt_number": 2

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"trips_in = pd.read_csv(path,\n",

	" parse_dates=['date_removed', 'date_arrived'],\n",

	" infer_datetime_format=True)\n",

	"\n",

	"trips_in = trips_in.loc[trips_in['action'] != 'C ']\n",

	"\n",

	"# Find the start date\n",

	"ind = pd.DatetimeIndex(trips.date_arrived)\n",

	"trips_in['date'] = ind.date.astype('datetime64')\n",

	"trips_in['hour'] = ind.hour\n",

	"\n",

	"# remove those values on the blacklist\n",

	"trips_in = trips_in.loc[~trips_in.date.isin(black_list)]"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 3

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"trips_out = pd.read_csv(path,\n",

	" parse_dates=['date_removed', 'date_arrived'],\n",

	" infer_datetime_format=True)\n",

	"\n",

	"trips_out = trips_out.loc[trips_out['action'] != 'C ']\n",

	"\n",

	"# Find the start date\n",

	"ind = pd.DatetimeIndex(trips.date_removed)\n",

	"trips_out['date'] = ind.date.astype('datetime64')\n",

	"trips_out['hour'] = ind.hour\n",

	"\n",

	"# remove those values on the blacklist\n",

	"trips_out = trips_out.loc[~trips_out.date.isin(black_list)]"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 4

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"#trips_out\n",

	"#trips_out = trips_out.loc[~trips_out.date.isin(black_list)]\n",

	"#trips_out.loc[~trips_out.date.isin(black_list)]\n",

	"#black_list"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 5

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"# count trips by date and by hour\n",

	"by_hour_out = trips_out.pivot_table('bike', aggfunc='count',\n",

	" index=['date', 'hour'],\n",

	" columns='station_removed').fillna(0).reset_index('hour')\n",

	"\n",

	"# average these counts by weekend\n",

	"by_hour_out['weekend'] = (by_hour_out.index.dayofweek >= 5)\n",

	"by_hour_out = by_hour_out.groupby(['weekend', 'hour']).mean()\n",

	"by_hour_out.index.set_levels([['weekday', 'weekend'],\n",

	" [\"{0}:00\".format(i) for i in range(24)]],\n",

	" inplace=True);\n",

	"\n",

	"by_hour_out.drop([col for col in list(by_hour_out.columns.values) if col > 275], axis=1, inplace=True)"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 6

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"# count trips by date and by hour\n",

	"by_hour_in = trips_in.pivot_table('bike', aggfunc='count',\n",

	" index=['date', 'hour'],\n",

	" columns='station_arrived').fillna(0).reset_index('hour')\n",

	"\n",

	"# average these counts by weekend\n",

	"by_hour_in['weekend'] = (by_hour_in.index.dayofweek >= 5)\n",

	"by_hour_in = by_hour_in.groupby(['weekend', 'hour']).mean()\n",

	"by_hour_in.index.set_levels([['weekday', 'weekend'],\n",

	" [\"{0}:00\".format(i) for i in range(24)]],\n",

	" inplace=True);\n",

	"\n",

	"by_hour_in.drop([col for col in list(by_hour_in.columns.values) if col > 275], axis=1, inplace=True)"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 7

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"#by_hour = pd.concat([by_hour_in.loc['weekday'],by_hour_out.loc['weekday']],axis=0)\n",

	"by_hour = pd.concat([by_hour_in,by_hour_out],axis=0)"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 8

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"from sklearn import mixture \n",

	"\n",

	"X = by_hour.loc['weekday'].values.T\n",

	"num_clusters = 6\n",

	"cv_types = ['spherical', 'tied', 'diag', 'full']\n",

	"\n",

	"gmm = mixture.GMM(n_components=num_clusters, covariance_type=cv_types[3])\n",

	"gmm.fit(X)"

	],

	"language": "python",

	"metadata": {},

	"outputs": [

	{

	"metadata": {},

	"output_type": "pyout",

	"prompt_number": 9,

	"text": [

	"GMM(covariance_type='full', init_params='wmc', min_covar=0.001,\n",

	" n_components=6, n_init=1, n_iter=100, params='wmc', random_state=None,\n",

	" thresh=None, tol=0.001, verbose=0)"

	]

	}

	],

	"prompt_number": 9

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"CDMX_COORDINATES = (19.4186, -99.1686) \n",

	"\n",

	"#create empty map zoomed in on Mexico City\n",

	"station_attrs = pd.read_csv('C:\\Users\\ghost\\Google Drive\\JairCastruitaGastelum\\Progress Reports\\Clustering_ECOBICI\\DATA\\estacion_coordenadas.csv')"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 10

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"color_iter = ['blue', 'red', 'green', '#e017e5', '#d8f604', '#15d4e2', '#e21569']\n",

	"by_hour_in.columns = by_hour_in.columns.astype(str) + ' in'\n",

	"by_hour_out.columns = by_hour_out.columns.astype(str) + ' out'\n",

	"by_hour_concat = pd.concat([by_hour_in.loc['weekday'],by_hour_out.loc['weekday']],axis=1)"

	],

	"language": "python",

	"metadata": {},

	"outputs": [],

	"prompt_number": 11

	},

	{

	"cell_type": "code",

	"collapsed": false,

	"input": [

	"map = folium.Map(location=CDMX_COORDINATES, tiles='Stamen Toner', zoom_start=14)\n",

	"Y_ = gmm.predict(X)\n",

	"plt.figure(figsize=(16,8))\n",

	"\n",

	"for i, (mean, covar, color) in enumerate(zip(\n",

	" gmm.means_, gmm._get_covars(), color_iter)):\n",

	" \n",

	" if not np.any(Y_ == i):\n",

	" continue \n",

	" \n",

	" plt.plot(mean,color=color)\n",

	" \n",

	" for attr in range(len(Y_)):\n",

	" if Y_[attr] == i:\n",

	" alt = station_attrs[station_attrs['id'] == by_hour.columns[attr]]['longitud'].values[0]\n",

	" lat = station_attrs[station_attrs['id'] == by_hour.columns[attr]]['latitud'].values[0]\n",

	" \n",

	" counts = vincent.Bar(by_hour_concat[[str(by_hour.columns[attr])+' in',str(by_hour.columns[attr])+' out']], width=650, height=300)\n",

	" counts.axis_titles(x='Hora', y='Estacion '+str(int(by_hour.columns[attr]))+': Valor Promedio en un dia')\n",

	" counts.to_json(str(by_hour.columns[attr])+'.json')\n",

	" counts.legend(title=str(station_attrs.iloc[attr,0]))\n",

	" \n",

	" folium.RegularPolygonMarker(location=[lat,alt], \n",

	" popup=folium.Popup(max_width=700).add_child(\n",

	" folium.Vega(json.load(open(str(by_hour.columns[attr])+'.json')), \n",

	" width=700, \n",

	" height=350)),\n",

	" fill_color=color,\n",

	" number_of_sides=8,\n",

	" radius=10).add_to(map)\n",

	" \n",

	"map"

	],

	"language": "python",

	"metadata": {},

	"outputs": [

	{

	"html": [