First, a quick code example of K-Means in Scikit-learn
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
n_centers = 5
X, _ = make_blobs(n_samples=10000, n_centers=n_centers)
from collections import namedtuple | |
import vaex | |
import time | |
import orjson | |
import os | |
import psutil | |
from pyarrow import flight | |
import pyarrow as pa |
from collections import namedtuple | |
import vaex | |
import time | |
import orjson | |
import os | |
import psutil | |
from pyarrow import flight | |
import pyarrow as pa |
wget https://download.dremio.com/odbc-driver/1.4.2.1003/dremio-odbc-1.4.2.1003-1.x86_64.rpm | |
sudo apt-get install alien unixodbc-dev -y | |
sudo alien dremio-odbc-1.4.2.1003-1.x86_64.rpm | |
sudo dpkg -i dremio-odbc_1.4.2.1003-2_amd64.deb |
#!/bin/bash | |
#SBATCH --job-name spark-cluster | |
#SBATCH --account=qh82 | |
#SBATCH --time=02:00:00 | |
# --- Master resources --- | |
#SBATCH --nodes=1 | |
#SBATCH --mem-per-cpu=1G | |
#SBATCH --cpus-per-task=1 | |
#SBATCH --ntasks-per-node=1 | |
# --- Worker resources --- |
#!/usr/bin/env bash | |
set -eu | |
PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | |
SRC_DIR=$(realpath "${PWD}/..") | |
CXX_SRC=${SRC_DIR}/cpp | |
# The following can be set | |
: "${CMAKE:=cmake}" |
#!/bin/bash | |
## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.3 in ubuntu 18.04 | |
### steps #### | |
# verify the system has a cuda-capable gpu | |
# download and install the nvidia cuda toolkit and cudnn | |
# setup environmental variables | |
# verify the installation | |
### |