nvidia-docker.sh 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #!/usr/bin/env bash
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. ## @description download nvidia docker bin
  18. ## @audience public
  19. ## @stability stable
  20. function download_nvidia_docker_bin()
  21. {
  22. # download http server
  23. if [[ -n "$DOWNLOAD_HTTP" ]]; then
  24. MY_NVIDIA_DOCKER_RPM_URL="${DOWNLOAD_HTTP}/downloads/nvidia-docker/${NVIDIA_DOCKER_RPM}"
  25. else
  26. MY_NVIDIA_DOCKER_RPM_URL=${NVIDIA_DOCKER_RPM_URL}
  27. fi
  28. if [[ -f "${DOWNLOAD_DIR}/nvidia-docker/${NVIDIA_DOCKER_RPM}" ]]; then
  29. echo "${DOWNLOAD_DIR}/nvidia-docker/${NVIDIA_DOCKER_RPM} is exist."
  30. else
  31. echo "download ${MY_NVIDIA_DOCKER_RPM_URL} ..."
  32. wget -P ${DOWNLOAD_DIR}/nvidia-docker/ ${MY_NVIDIA_DOCKER_RPM_URL}
  33. fi
  34. }
  35. ## @description install nvidia docker
  36. ## @audience public
  37. ## @stability stable
  38. function install_nvidia_docker()
  39. {
  40. download_nvidia_docker_bin
  41. sudo rpm -i ${DOWNLOAD_DIR}/nvidia-docker/${NVIDIA_DOCKER_RPM}
  42. echo -e "\033[32m===== Start nvidia-docker =====\033[0m"
  43. sudo systemctl start nvidia-docker
  44. echo -e "\033[32m===== Check nvidia-docker status =====\033[0m"
  45. systemctl status nvidia-docker
  46. echo -e "\033[32m===== Check nvidia-docker log =====\033[0m"
  47. journalctl -u nvidia-docker
  48. echo -e "\033[32m===== Test nvidia-docker-plugin =====\033[0m"
  49. curl http://localhost:3476/v1.0/docker/cli
  50. # create nvidia driver library path
  51. if [ ! -d "/var/lib/nvidia-docker/volumes/nvidia_driver" ]; then
  52. echo "WARN: /var/lib/nvidia-docker/volumes/nvidia_driver folder path is not exist!"
  53. mkdir -p /var/lib/nvidia-docker/volumes/nvidia_driver
  54. fi
  55. local nvidiaVersion=`get_nvidia_version`
  56. echo -e "\033[31m nvidia detect version is ${nvidiaVersion}\033[0m"
  57. mkdir /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}
  58. mkdir /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/bin
  59. mkdir /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/lib64
  60. cp /usr/bin/nvidia* /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/bin
  61. cp /usr/lib64/libcuda* /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/lib64
  62. cp /usr/lib64/libnvidia* /var/lib/nvidia-docker/volumes/nvidia_driver/${nvidiaVersion}/lib64
  63. echo -e "\033[32m===== Please manually execute the following command =====\033[0m"
  64. echo -e "\033[32mshell:> nvidia-docker run --rm ${DOCKER_REGISTRY}/nvidia/cuda:9.0-devel nvidia-smi
  65. # If you don't see the list of graphics cards above, the NVIDIA driver installation failed. =====
  66. \033[0m"
  67. echo -e "\033[32m===== Please manually execute the following command =====\033[0m"
  68. echo -e "\033[32m# Test with tf.test.is_gpu_available()
  69. shell:> nvidia-docker run -it ${DOCKER_REGISTRY}/tensorflow/tensorflow:1.9.0-gpu bash
  70. # In docker container
  71. container:> python
  72. python:> import tensorflow as tf
  73. python:> tf.test.is_gpu_available()
  74. python:> exit()
  75. \033[0m"
  76. }
  77. ## @description uninstall nvidia docker
  78. ## @audience public
  79. ## @stability stable
  80. function uninstall_nvidia_docker()
  81. {
  82. echo "This method is not implemented."
  83. }