separate ubuntu docker jobs

wkpark · wkpark · commit ece67ae2a964 · 2024-02-06T21:36:41.000+09:00
* build-shared-libs-cuda-docker: cuda build using docker (ubuntu)
 * build-shared-libs-cuda: cuda build using conda+mamba (windows)
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -109,27 +109,21 @@ jobs:
       with:
         name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
         path: output/*
+
   ##
-  # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
+  # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Windows)
   ##
   build-shared-libs-cuda:
     strategy:
       # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
       fail-fast: false
 
       matrix:
-        os: [ubuntu-latest, windows-latest]
-        arch: [x86_64, aarch64]
+        os: [windows-latest]
+        arch: [x86_64]
         cuda-version: ['11.8.0', '12.1.1']
         build_type: [Release]
-        exclude:
-          - os: windows-latest # This probably requires arm64 Windows agents
-            arch: aarch64
     runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
-    container:
-      image: ${{ matrix.os == 'windows-latest' && 'null' || format('nvidia/cuda:{0}-{1}', matrix.cuda-version, 'devel-ubuntu22.04') }}
-      volumes:
-        - /home/runner/work:/home/runner/work
     steps:
       # Check out code
     - uses: actions/checkout@v4
@@ -171,13 +165,6 @@ jobs:
         python-version: "3.10"
         mamba-version: "*"
 
-    - name: Setup Docker image
-      if: startsWith(matrix.os, 'ubuntu')
-      shell: bash
-      run: |
-        apt-get update
-        DEBIAN_FRONTEND=noninteractive apt-get install -y sudo cmake
-
     - name: Set reusable strings
       # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
       id: strings
@@ -270,10 +257,122 @@ jobs:
         name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda-version }}
         path: output/*
 
+  ##
+  # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64)
+  ##
+  build-shared-libs-cuda-docker:
+    strategy:
+      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
+      fail-fast: false
+
+      matrix:
+        os: [ubuntu-latest]
+        arch: [x86_64, aarch64]
+        cuda-version: ['11.8.0', '12.1.1']
+        build_type: [Release]
+        exclude:
+          - os: windows-latest # This probably requires arm64 Windows agents
+            arch: aarch64
+    runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
+    container:
+      image: ${{ matrix.os == 'windows-latest' && 'null' || format('nvidia/cuda:{0}-{1}', matrix.cuda-version, 'devel-ubuntu22.04') }}
+      volumes:
+        - /home/runner/work:/home/runner/work
+    steps:
+      # Check out code
+    - uses: actions/checkout@v4
+      # Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation)
+    - name: Setup cmake
+      uses: jwlawson/actions-setup-cmake@v1.14
+      with:
+        cmake-version: '3.26.x'
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.10"
+
+    - name: Setup Docker image
+      if: startsWith(matrix.os, 'ubuntu')
+      shell: bash
+      run: |
+        apt-get update
+        DEBIAN_FRONTEND=noninteractive apt-get install -y sudo cmake
+
+    - name: Set reusable strings
+      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
+      id: strings
+      shell: bash
+      run: |
+        echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
+
+    - name: Allow cross-compile on aarch64
+      if: ${{ matrix.os == 'ubuntu-latest' && matrix.arch == 'aarch64' }}
+      run: |
+        # Allow cross-compile on aarch64
+        sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
+
+    - name: CUDA Toolkit
+      shell: bash -el {0}
+      run: |
+        cuda_version=${{ matrix.cuda-version }}
+        [ "$cuda_version" = "11.8.0" ] && cuda__version="11.8"
+        [ "$cuda_version" = "12.1.1" ] && cuda__version="12.1"
+
+        CUDA_HOME="${{ env.CONDA }}/envs/bnb-env"
+        echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV"
+        echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV"
+        echo CXX_COMPILER=g++ >> "$GITHUB_ENV"
+
+        nvcc --version
+
+    - name: Prep build
+      run: python -m pip install cmake==3.27.9 ninja
+
+    # TODO: the following steps (CUDA, NOBLASLT, CPU) could be moved to the matrix, so they're built in parallel
+
+    - name: Configure CUDA
+      run: >
+        cmake -B ${{ steps.strings.outputs.build-output-dir }}
+        -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
+        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
+        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+        -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
+        -DCOMPUTE_BACKEND=cuda
+        -S ${{ github.workspace }}
+
+    - name: Build CUDA
+      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
+
+    - name: Configure NOBLASLT
+      run: >
+        cmake -B ${{ steps.strings.outputs.build-output-dir }}
+        -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }}
+        -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }}
+        -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+        -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90"
+        -DCOMPUTE_BACKEND=cuda
+        -DNO_CUBLASLT=ON
+        -S ${{ github.workspace }}
+
+    - name: Build NOBLASLT
+      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }}
+
+    - name: Copy libraries
+      shell: bash
+      run: |
+        mkdir -p output/${{ matrix.os }}/${{ matrix.arch }}
+        ( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ )
+    - name: Upload build artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda-version }}
+        path: output/*
+
   build-wheels:
     needs:
     - build-shared-libs
     - build-shared-libs-cuda
+    - build-shared-libs-cuda-docker
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]