方法一:使用Apache官方包(适合学习测试)
1. 环境准备
# 更新系统
sudo apt update && sudo apt upgrade -y
# 安装Java(Hadoop 3.x需要Java 8或11)
sudo apt install openjdk-11-jdk -y
# 验证Java安装
java -version
# 创建Hadoop用户(可选但推荐)
sudo adduser hadoop
sudo usermod -aG sudo hadoop
su - hadoop
2. 下载安装Hadoop
# 下载Hadoop(选择最新稳定版)
wget https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
# 解压
tar -xzvf hadoop-*.tar.gz
sudo mv hadoop-3.3.6 /opt/hadoop
# 设置环境变量
echo 'export HADOOP_HOME=/opt/hadoop' >> ~/.bashrc
echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> ~/.bashrc
echo 'export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64' >> ~/.bashrc
source ~/.bashrc
# 验证安装
hadoop version
3. 配置单机模式
# 编辑Hadoop配置
cd /opt/hadoop/etc/hadoop
# 配置core-site.xml
cat > core-site.xml << EOF
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
EOF
# 配置hdfs-site.xml
cat > hdfs-site.xml << EOF
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
EOF
# 配置hadoop-env.sh
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" >> hadoop-env.sh
# 格式化HDFS
hdfs namenode -format
# 启动HDFS
start-dfs.sh
# 检查进程
jps
# 应看到:NameNode、DataNode、SecondaryNameNode
方法二:使用Docker(最快方式)
# 安装Docker(如果未安装)
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
# 运行Hadoop单节点容器
docker run -it --name hadoop -p 9870:9870 -p 9864:9864 bde2020/hadoop
# 或使用docker-compose
cat > docker-compose.yml << EOF
version: '3'
services:
hadoop:
image: bde2020/hadoop
container_name: hadoop
ports:
- "9870:9870"
- "9864:9864"
volumes:
- ./data:/hadoop/dfs/data
EOF
docker-compose up -d
方法三:使用自动化脚本
#!/bin/bash
# save as install-hadoop.sh
# 安装Java
sudo apt install -y openjdk-11-jdk wget ssh pdsh
# 下载Hadoop
wget https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
tar -xzf hadoop-3.3.6.tar.gz
sudo mv hadoop-3.3.6 /usr/local/hadoop
# 配置环境变量
echo 'export HADOOP_HOME=/usr/local/hadoop' >> /etc/profile
echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> /etc/profile
echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> /etc/profile
source /etc/profile
# 生成SSH密钥
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
# 测试SSH
ssh localhost
# 运行脚本
# chmod +x install-hadoop.sh
# sudo ./install-hadoop.sh
验证安装
# 检查版本
hadoop version
# 运行测试
hdfs dfs -mkdir /test
hdfs dfs -ls /
# 访问Web UI(如果启动了服务)
# NameNode: http://localhost:9870
常用命令
# 启动服务
start-dfs.sh # 启动HDFS
start-yarn.sh # 启动YARN
# 停止服务
stop-dfs.sh
stop-yarn.sh
# HDFS操作
hdfs dfs -mkdir /user
hdfs dfs -put localfile /user/
hdfs dfs -cat /user/file
注意事项
内存要求:Hadoop需要足够内存,建议至少4GB
权限问题:确保对Hadoop目录有适当权限
防火墙:关闭防火墙或开放相关端口(9000, 9870, 9864等)
学习建议:从单机模式开始,逐步过渡到伪分布式和全分布式
最快的方式是使用Docker,5分钟内即可启动运行。生产环境建议使用方法一并进行详细配置。