Skip to content

Instantly share code, notes, and snippets.

@cameres
Last active December 2, 2016 08:04
Show Gist options
  • Save cameres/2a00e7206d04e7cab498656b3066e863 to your computer and use it in GitHub Desktop.
Save cameres/2a00e7206d04e7cab498656b3066e863 to your computer and use it in GitHub Desktop.
Scripts & Files Relevant to Local Hadoop Install on OS X 10.11.
# these aliases can be appended to your .profile, .bashrc, .zshrc, etc.
## replace version w/ version downloaded in brew
## since I installed hadoop 2.7.3, I replace <version> w/ 2.7.3
alias hstart="/usr/local/Cellar/hadoop/<version>/sbin/start-dfs.sh;/usr/local/Cellar/hadoop/<version>/sbin/start-yarn.sh"
alias hstop="/usr/local/Cellar/hadoop/<version>/sbin/stop-yarn.sh;/usr/local/Cellar/hadoop/<version>/sbin/stop-dfs.sh"
<!-- /usr/local/Cellar/hadoop/<version>/libexec/etc/hadoop/core-site.xml -->
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
# /usr/local/Cellar/hadoop/2.7.3/libexec/etc/hadoop/hadoop-env.sh
# change
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# to
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
<!-- /usr/local/Cellar/hadoop/<version>/libexec/etc/hadoop/hdfs-site.xml -->
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
# install brew using curl & ruby
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
# install hadoop from brew
# *note the version number*
# for path to hadoop files later
brew install hadoop
# reference from AAS by Rynza, Laserson, Owen, and Wills
# prepare dataset
mkdir linkage
cd linkage
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00210/donation.zip
unzip donation.zip
unzip 'block_*.zip'
# push dataset to hdfs
# only want to push csv files
# NOTE: requires completion of
# hdfs for psuedo-distributed work
hdfs dfs -mkdir linkage
hdfs dfs -put block_*.csv linkage
<!-- /usr/local/Cellar/hadoop/<version>/libexec/etc/hadoop/mapred-site.xml -->
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9010</value>
</property>
</configuration>
parsed = sqlContext.read.csv("hdfs://localhost:9000/user/<username>/linkage",
header = True,
nullValue = '?')
parsed.take(10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment