# 更新系统
sudo apt update && sudo apt upgrade -y
# 安装基础开发工具
sudo apt install -y build-essential gcc g++ make cmake unzip git wget curl htop
# 卸载自带的驱动
sudo apt purge *nvidia*
# 安装驱动
ubuntu-drivers devices
sudo apt install nvidia-driver-580-server
sudo reboot
# 验证NVIDIA驱动安装情况
nvidia-smi
# 安装CUDA https://developer.nvidia.com/cuda-downloads
wget wget https://developer.download.nvidia.com/compute/cuda/13.0.2/local_installers/cuda_13.0.2_580.95.05_linux.run
sudo sh cuda_13.0.2_580.95.05_linux.run
# 安装UV
curl -LsSf https://astral.sh/uv/install.sh | sh
(下载很慢,可能需要好几分钟)
# 修改UV国内源
echo 'export UV_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/' >> ~/.bashrc
source ~/.bashrc
# 安装vLLM
mkdir ~/.venv
cd .venv
uv venv vllm
source vllm/bin/activate
uv pip install vllm
# 安装modelscope并下载模型
uv pip install modelscope
modelscope download --model Qwen/Qwen3-32B --local_dir /data/vllm/models/Qwen3-32B
# 启动vLLM
CUDA_VISIBLE_DEVICES=0,1 vllm serve /data/vllm/models/Qwen3-32B --tensor-parallel-size 2 --api-key "YOUR_API_KEY" --max-model-len 16384 --port 8000