非编译运行
streaming运行
创建实验目录,并上传单词库文件
mkdir -p /opt/WordCount
cd /opt/WordCount/
sudo rz
hdfs dfs -mkdir -p /usr/admin/input
hdfs dfs -put input.txt /usr/admin/input创建map.sh和reduce.sh两个文件
#!/bin/bash
# map.sh
<NolebasePageProperties />
while read line; do
for word in $line; do
echo -e "$word\t1"
done
done#!/bin/bash
# reduce.sh
awk -F' ' '{count[$1]+=$2} END {for (word in count) print word, count[word]}'执行运行代码
hadoop jar $HADOOP_HOME/share/hadoop/tools/lib/hadoop-streaming-*.jar \
> -input /usr/admin/input \
> -output /usr/admin/output \
> -mapper map.sh \
> -reducer reduce.sh \
> -file map.sh \
> -file reduce.sh查看运行结果
hdfs dfs -cat /usr/admin/output/part-00000使用example库
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /usr/admin/input /usr/admin/output3查看结果
hdfs dfs -cat /usr/admin/output3/part-r-00000编译运行
编译Java
添加环境变量
sudo vi /etc/profile
export HADOOP_CLASSPATH=$(hadoop classpath)更新环境变量
source /etc/profile编译Jar包
上传Java文件并编译
sudo rz
mkdir WordCount_class
javac -classpath $HADOOP_CLASSPATH -d WordCount_class/ WordCount.java
jar -cvf WordCount.jar -C WordCount_class/ .运行编译的Jar包
hadoop jar WordCount.jar WordCount /usr/admin/input /usr/admin/output2查看运行结果
hdfs dfs -cat /usr/admin/output2/part-r-00000