Ubuntu 24 安装 显卡驱动+CUDA+cuDNN+vLLM

# 更新系统
sudo apt update && sudo apt upgrade -y

# 安装基础开发工具
sudo apt install -y build-essential gcc g++ make cmake unzip git wget curl htop

# 卸载自带的驱动
sudo apt purge *nvidia*

# 安装驱动
ubuntu-drivers devices
sudo apt install nvidia-driver-570-server
sudo reboot

# 验证NVIDIA驱动安装情况
nvidia-smi

# 安装CUDA
wget https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run
sudo sh cuda_12.8.1_570.124.06_linux.run

echo 'export PATH=/usr/local/cuda/bin${PATH:+:${PATH}}' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}' >> ~/.bashrc
source ~/.bashrc

# 安装cuDNN https://developer.nvidia.com/cudnn-archive
wget https://developer.download.nvidia.com/compute/cudnn/9.8.0/local_installers/cudnn-local-repo-ubuntu2404-9.8.0_1.0-1_amd64.deb
sudo dpkg -i cudnn-local-repo-ubuntu2404-9.8.0_1.0-1_amd64.deb
sudo cp /var/cudnn-local-repo-ubuntu2404-9.8.0/cudnn-*-keyring.gpg /usr/share/keyrings/
sudo apt-get update
sudo apt-get -y install cudnn

# 准备vLLM运行Python环境
sudo mkdir /data/vllm
cd /data/vllm
sudo python -m venv venv
source venv/bin/activate

# 修改pip国内源
mkdir -p ~/.pip
vi ~/.pip/pip.conf

文件内容为:
[global]
index-url = https://mirrors.aliyun.com/pypi/simple
trusted-host = mirrors.aliyun.com

# 查看源设置情况
source ~/.pip/pip.conf
pip config list

# 安装vLLM
sudo pip install vllm

# 下载模型
pip install modelscope
modelscope download --model Qwen/QwQ-32B-AWQ --local_dir /data/models/LLM/QwQ-32B-AWQ

# 启动vLLM
CUDA_VISIBLE_DEVICES=0,1 vllm serve /data/models/LLM/QwQ-32B-AWQ --tensor-parallel-size 2 --api-key "YOUR_API_KEY" --max-model-len 16384 --port 8000