Ubuntu 24 安装 显卡驱动+CUDA+vLLM

# 更新系统
sudo apt update && sudo apt upgrade -y

# 安装基础开发工具
sudo apt install -y build-essential gcc g++ make cmake unzip git wget curl htop

# 卸载自带的驱动
sudo apt purge *nvidia*

# 安装驱动
ubuntu-drivers devices
sudo apt install nvidia-driver-580-server
sudo reboot

# 验证NVIDIA驱动安装情况
nvidia-smi

# 安装CUDA https://developer.nvidia.com/cuda-downloads
wget wget https://developer.download.nvidia.com/compute/cuda/13.0.2/local_installers/cuda_13.0.2_580.95.05_linux.run
sudo sh cuda_13.0.2_580.95.05_linux.run

# 安装UV
curl -LsSf https://astral.sh/uv/install.sh | sh 
(下载很慢,可能需要好几分钟)

# 修改UV国内源
echo 'export UV_INDEX_URL=https://mirrors.aliyun.com/pypi/simple/' >> ~/.bashrc
source ~/.bashrc

# 安装vLLM

mkdir ~/.venv
cd .venv
uv venv vllm
source vllm/bin/activate
uv pip install vllm

# 安装modelscope并下载模型
uv pip install modelscope
modelscope download --model Qwen/Qwen3-32B --local_dir /data/vllm/models/Qwen3-32B

# 启动vLLM
CUDA_VISIBLE_DEVICES=0,1 vllm serve /data/vllm/models/Qwen3-32B --tensor-parallel-size 2 --api-key "YOUR_API_KEY" --max-model-len 16384 --port 8000