ollama/.github/workflows/release.yaml

name: release

on:
  push:
    tags:
      - 'v*'

env:
  CGO_CFLAGS: '-O3'
  CGO_CXXFLAGS: '-O3'

jobs:
  setup-environment:
    runs-on: ubuntu-latest
    environment: release
    outputs:
      GOFLAGS: ${{ steps.goflags.outputs.GOFLAGS }}
      VERSION: ${{ steps.goflags.outputs.VERSION }}
      vendorsha: ${{ steps.goflags.outputs.vendorsha }}
    steps:
      - uses: actions/checkout@v4
      - name: Set environment
        id: goflags
        run: |
          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" | tee -a $GITHUB_OUTPUT
          echo VERSION="${GITHUB_REF_NAME#v}" | tee -a $GITHUB_OUTPUT
          echo vendorsha=$(cat LLAMA_CPP_VERSION)-$(cat MLX_VERSION)-$(cat MLX_C_VERSION) | tee -a $GITHUB_OUTPUT

  darwin-build:
    runs-on: macos-26-xlarge
    environment: release
    needs: setup-environment
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
      VERSION: ${{ needs.setup-environment.outputs.VERSION }}
      APPLE_IDENTITY: ${{ secrets.APPLE_IDENTITY }}
      APPLE_PASSWORD: ${{ secrets.APPLE_PASSWORD }}
      APPLE_TEAM_ID: ${{ vars.APPLE_TEAM_ID }}
      APPLE_ID: ${{ vars.APPLE_ID }}
      MACOS_SIGNING_KEY: ${{ secrets.MACOS_SIGNING_KEY }}
      MACOS_SIGNING_KEY_PASSWORD: ${{ secrets.MACOS_SIGNING_KEY_PASSWORD }}
      CGO_CFLAGS: '-mmacosx-version-min=14.0 -O3'
      CGO_CXXFLAGS: '-mmacosx-version-min=14.0 -O3'
      CGO_LDFLAGS: '-mmacosx-version-min=14.0 -O3'
    steps:
      - uses: actions/checkout@v4
      - run: |
          echo $MACOS_SIGNING_KEY | base64 --decode > certificate.p12
          security create-keychain -p password build.keychain
          security default-keychain -s build.keychain
          security unlock-keychain -p password build.keychain
          security import certificate.p12 -k build.keychain -P $MACOS_SIGNING_KEY_PASSWORD -T /usr/bin/codesign
          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
          security set-keychain-settings -lut 3600 build.keychain
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
          cache-dependency-path: |
            go.sum
            LLAMA_CPP_VERSION
            MLX_VERSION
            MLX_C_VERSION
      - run: |
          ./scripts/build_darwin.sh
      - name: Log build results
        run: |
          ls -l dist/
      - uses: actions/upload-artifact@v4
        with:
          name: bundles-darwin
          path: |
            dist/*.tgz
            dist/*.tar.zst
            dist/*.zip
            dist/*.dmg

  windows-depends:
    needs: setup-environment
    strategy:
      matrix:
        os: [windows]
        arch: [amd64]
        preset: ['CPU']
        build-steps: ['cpu cpuArm64']
        include:
          - os: windows
            arch: amd64
            preset: 'CUDA 12'
            build-steps: cuda12
            install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
            cuda-components:
              - '"cudart"'
              - '"nvcc"'
              - '"cublas"'
              - '"cublas_dev"'
            cuda-version: '12.8'
          - os: windows
            arch: amd64
            preset: 'CUDA 13'
            build-steps: cuda13
            install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
            cuda-components:
              - '"cudart"'
              - '"nvcc"'
              - '"cublas"'
              - '"cublas_dev"'
              - '"crt"'
              - '"nvvm"'
              - '"nvptxcompiler"'
            cuda-version: '13.0'
          - os: windows
            arch: amd64
            preset: 'ROCm 7'
            build-steps: rocm7
            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-26.Q1-Win11-For-HIP.exe
            rocm-version: '7.1'
          - os: windows
            arch: amd64
            preset: Vulkan
            build-steps: vulkan
            install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
          - os: windows
            arch: amd64
            preset: 'MLX CUDA 13'
            build-steps: mlxCuda13
            build-parallel: '16'
            cmake-cuda-flags: '-t 6'
            install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
            cudnn-install: https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.18.1.3_cuda13-archive.zip
            cuda-components:
              - '"cudart"'
              - '"nvcc"'
              - '"cublas"'
              - '"cublas_dev"'
              - '"cufft"'
              - '"cufft_dev"'
              - '"nvrtc"'
              - '"nvrtc_dev"'
              - '"crt"'
              - '"nvvm"'
              - '"nvptxcompiler"'
            cuda-version: '13.0'
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
      VERSION: ${{ needs.setup-environment.outputs.VERSION }}
    steps:
      - if: startsWith(matrix.preset, 'MLX ')
        name: Increase pagefile to 200 GB
        uses: al-cheb/configure-pagefile-action@v1.5
        with:
          minimum-size: 16GB
          maximum-size: 200GB
          disk-root: "D:"
      - name: Install system dependencies
        run: |
          choco install -y --no-progress ccache ninja
          if (Get-Command ccache -ErrorAction SilentlyContinue) {
            ccache -o cache_dir=${{ github.workspace }}\.ccache
          }
      - if: matrix.preset == 'CPU'
        name: Install Windows ARM64 cross compiler
        run: |
          Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-x86_64.zip" -OutFile "${{ runner.temp }}\llvm-mingw-ucrt.zip"
          Expand-Archive -Path ${{ runner.temp }}\llvm-mingw-ucrt.zip -DestinationPath "C:\Program Files\"
          $installPath=(Resolve-Path -Path "C:\Program Files\llvm-mingw-*-ucrt-x86_64").path
          if (!(Test-Path "$installPath\bin\aarch64-w64-mingw32-gcc.exe")) {
            throw "llvm-mingw x86_64 package is missing the aarch64 cross compiler"
          }
      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'ROCm ') || startsWith(matrix.preset, 'Vulkan') || startsWith(matrix.preset, 'MLX ')
        id: cache-install
        uses: actions/cache/restore@v4
        with:
          path: |
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
            C:\Program Files\NVIDIA\CUDNN
          key: ${{ matrix.install }}-${{ matrix.cudnn-install }}
      - if: startsWith(matrix.preset, 'CUDA ') || startsWith(matrix.preset, 'MLX ')
        name: Install CUDA ${{ matrix.cuda-version }}
        run: |
          $ErrorActionPreference = "Stop"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
            $subpackages = @(${{ join(matrix.cuda-components, ', ') }}) | Foreach-Object {"${_}_${{ matrix.cuda-version }}"}
            Start-Process -FilePath .\install.exe -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
          }

          $cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
          echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
      - if: startsWith(matrix.preset, 'ROCm')
        name: Install ROCm ${{ matrix.rocm-version }}
        run: |
          $ErrorActionPreference = "Stop"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
            Start-Process -FilePath .\install.exe -ArgumentList '-install' -NoNewWindow -Wait
          }

          $hipPath = (Resolve-Path "C:\Program Files\AMD\ROCm\*").path
          echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
      - if: matrix.preset == 'Vulkan'
        name: Install Vulkan ${{ matrix.rocm-version }}
        run: |
          $ErrorActionPreference = "Stop"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
            Start-Process -FilePath .\install.exe -ArgumentList "-c","--am","--al","in" -NoNewWindow -Wait
          }

          $vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path
          $vulkanRuntime = Join-Path $vulkanPath "Helpers\VulkanRT.exe"
          if (Test-Path $vulkanRuntime) {
            Start-Process -FilePath $vulkanRuntime -ArgumentList "/s" -NoNewWindow -Wait
          }
          echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV
      - if: startsWith(matrix.preset, 'MLX ')
        name: Install cuDNN for MLX
        run: |
          $ErrorActionPreference = "Stop"
          $cudnnRoot = "C:\Program Files\NVIDIA\CUDNN"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.cudnn-install }}" -OutFile "cudnn.zip"
            Expand-Archive -Path cudnn.zip -DestinationPath cudnn-extracted
            $cudnnDir = (Get-ChildItem -Path cudnn-extracted -Directory)[0].FullName
            New-Item -ItemType Directory -Force -Path $cudnnRoot
            Copy-Item -Path "$cudnnDir\*" -Destination "$cudnnRoot\" -Recurse
          }

          echo "CUDNN_ROOT_DIR=$cudnnRoot" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CUDNN_INCLUDE_PATH=$cudnnRoot\include" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CUDNN_LIBRARY_PATH=$cudnnRoot\lib\x64" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "$cudnnRoot\bin\x64" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
      - if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
        uses: actions/cache/save@v4
        with:
          path: |
            C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
            C:\Program Files\AMD\ROCm
            C:\VulkanSDK
            C:\Program Files\NVIDIA\CUDNN
          key: ${{ matrix.install }}-${{ matrix.cudnn-install }}
      - uses: actions/checkout@v4
      - uses: actions/cache@v4
        with:
          path: ${{ github.workspace }}\.ccache
          key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}-${{ needs.setup-environment.outputs.vendorsha }}
      - name: Build Windows dependencies
        run: |
          Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
          Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
          $steps = "${{ matrix.build-steps }}".Split(' ', [System.StringSplitOptions]::RemoveEmptyEntries)
          ./scripts/build_windows.ps1 @steps
        env:
          CMAKE_GENERATOR: Ninja
          OLLAMA_BUILD_PARALLEL: ${{ matrix.build-parallel || '' }}
          OLLAMA_CMAKE_CUDA_FLAGS: ${{ matrix.cmake-cuda-flags || '' }}
      - name: Log build results
        run: |
          gci -path .\dist -Recurse -File | ForEach-Object { get-filehash -path $_.FullName -Algorithm SHA256 } | format-list
      - if: matrix.preset == 'CPU'
        name: Verify Windows CPU payloads
        shell: bash
        run: |
          set -euo pipefail
          for payload in \
            dist/windows-amd64/lib/ollama/llama-server.exe \
            dist/windows-arm64/lib/ollama/llama-server.exe
          do
            [ -f "$payload" ] || { echo "missing $payload"; exit 1; }
          done
      - uses: actions/upload-artifact@v4
        with:
          name: depends-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
          path: dist\*

  windows-build:
    runs-on: windows
    environment: release
    needs: [setup-environment]
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
      VERSION: ${{ needs.setup-environment.outputs.VERSION }}
    steps:
      - name: Install clang and gcc-compat
        run: |
          $ErrorActionPreference = "Stop"
          Set-ExecutionPolicy Bypass -Scope Process -Force
          Invoke-WebRequest -Uri "https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-x86_64.zip" -OutFile "${{ runner.temp }}\llvm-mingw-ucrt.zip"
          Expand-Archive -Path ${{ runner.temp }}\llvm-mingw-ucrt.zip -DestinationPath "C:\Program Files\"
          $installPath=(Resolve-Path -Path "C:\Program Files\llvm-mingw-*-ucrt-x86_64").path
          echo "$installPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          if (!(Test-Path "$installPath\bin\aarch64-w64-mingw32-gcc.exe")) {
            throw "llvm-mingw x86_64 package is missing the aarch64 cross compiler"
          }
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
          cache-dependency-path: |
            go.sum
            LLAMA_CPP_VERSION
            MLX_VERSION
            MLX_C_VERSION
      - name: Verify gcc is actually clang
        run: |
          $ErrorActionPreference='Continue'
          $version=& gcc -v 2>&1
          $version=$version -join "`n"
          echo "gcc is $version"
          if ($version -notmatch 'clang') {
            echo "ERROR: GCC must be clang for proper utf16 handling"
            exit 1
          }
          $ErrorActionPreference='Stop'
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: "20"
      - run: |
          ./scripts/build_windows ollama ollamaArm64 app appArm64
      - name: Verify Windows build payloads
        shell: bash
        run: |
          set -euo pipefail
          for payload in \
            dist/windows-amd64/ollama.exe \
            dist/windows-arm64/ollama.exe
          do
            [ -f "$payload" ] || { echo "missing $payload"; exit 1; }
          done
      - name: Log build results
        run: |
          gci -path .\dist -Recurse -File | ForEach-Object { get-filehash -path $_.FullName -Algorithm SHA256 } | format-list
      - uses: actions/upload-artifact@v4
        with:
          name: build-windows-amd64
          path: |
            dist\*

  windows-app:
    runs-on: windows
    environment: release
    needs: [setup-environment, windows-build, windows-depends]
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
      VERSION: ${{ needs.setup-environment.outputs.VERSION }}
      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
    steps:
      - uses: actions/checkout@v4
      - uses: google-github-actions/auth@v2
        with:
          project_id: ollama
          credentials_json: ${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}
      - run: |
          $ErrorActionPreference = "Stop"
          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${{ runner.temp }}\sdksetup.exe"
          Start-Process "${{ runner.temp }}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait

          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${{ runner.temp }}\plugin.zip"
          Expand-Archive -Path "${{ runner.temp }}\plugin.zip" -DestinationPath "${{ runner.temp }}\plugin\"
          & "${{ runner.temp }}\plugin\*\kmscng.msi" /quiet

          echo "${{ vars.OLLAMA_CERT }}" >ollama_inc.crt
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
          cache-dependency-path: |
            go.sum
            LLAMA_CPP_VERSION
            MLX_VERSION
            MLX_C_VERSION
      - uses: actions/download-artifact@v4
        with:
          pattern: depends-windows*
          path: dist
          merge-multiple: true
      - uses: actions/download-artifact@v4
        with:
          pattern: build-windows*
          path: dist
          merge-multiple: true
      - name: Log dist contents after download
        run: |
          gci -path .\dist -recurse
      - name: Verify Windows package inputs
        shell: bash
        run: |
          set -euo pipefail
          for payload in \
            dist/windows-amd64/ollama.exe \
            dist/windows-amd64/lib/ollama/llama-server.exe \
            dist/windows-arm64/ollama.exe \
            dist/windows-arm64/lib/ollama/llama-server.exe
          do
            [ -f "$payload" ] || { echo "missing $payload"; exit 1; }
          done
      - run: |
          ./scripts/build_windows.ps1 deps sign installer zip
      - name: Log contents after build
        run: |
          gci -path .\dist -Recurse -File | ForEach-Object { get-filehash -path $_.FullName -Algorithm SHA256 } | format-list
      - uses: actions/upload-artifact@v4
        with:
          name: bundles-windows
          path: |
            dist/*.zip
            dist/*.ps1
            dist/OllamaSetup.exe

  linux-depends:
    strategy:
      matrix:
        include:
          - arch: amd64
            target: llama-server-cpu
          - arch: amd64
            target: llama-server-cuda_v12
          - arch: amd64
            target: llama-server-cuda_v13
          - arch: amd64
            target: mlx
          - arch: amd64
            target: llama-server-rocm_v7_2
          - arch: amd64
            target: llama-server-vulkan
          - arch: arm64
            target: llama-server-cpu
          - arch: arm64
            target: llama-server-cuda_v12
          - arch: arm64
            target: llama-server-cuda_v13
          - arch: arm64
            target: jetpack-5
          - arch: arm64
            target: jetpack-6
    runs-on: ${{ matrix.arch == 'arm64' && 'linux-arm64' || 'linux' }}
    environment: release
    needs: setup-environment
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
    steps:
      - uses: actions/checkout@v4
      - uses: docker/setup-buildx-action@v3
      - uses: docker/login-action@v3
        with:
          username: ${{ vars.DOCKER_USER }}
          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
      - if: matrix.target == 'mlx'
        name: Increase Linux swap to 200 GB
        shell: bash
        run: |
          set -e
          SWAP_PATH=/swapfile-mlx
          SWAP_SIZE_GB=200
          if [ -f "$SWAP_PATH" ]; then
              sudo swapoff "$SWAP_PATH" 2>/dev/null || true
              sudo rm -f "$SWAP_PATH"
          fi
          if ! sudo fallocate -l ${SWAP_SIZE_GB}G "$SWAP_PATH" 2>/dev/null; then
              echo "fallocate unsupported, falling back to dd"
              sudo dd if=/dev/zero of="$SWAP_PATH" bs=1M count=$((SWAP_SIZE_GB * 1024))
          fi
          sudo chmod 600 "$SWAP_PATH"
          sudo mkswap "$SWAP_PATH"
          sudo swapon "$SWAP_PATH"
          swapon --show
          free -h
      - uses: docker/build-push-action@v6
        with:
          context: .
          platforms: linux/${{ matrix.arch }}
          target: ${{ matrix.target }}
          provenance: false
          sbom: false
          build-args: |
            GOFLAGS=${{ env.GOFLAGS }}
            CGO_CFLAGS=${{ env.CGO_CFLAGS }}
            CGO_CXXFLAGS=${{ env.CGO_CXXFLAGS }}
            OLLAMA_MLX_BUILD_JOBS=16
            OLLAMA_MLX_NVCC_THREADS=6
            APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
            APT_PORTS_MIRROR=http://azure.ports.ubuntu.com/ubuntu-ports
          cache-from: |
            type=registry,ref=ollama/release:cache-${{ matrix.arch }}-${{ matrix.target }}
            type=registry,ref=${{ vars.DOCKER_REPO }}:latest
          cache-to: type=registry,ref=ollama/release:cache-${{ matrix.arch }}-${{ matrix.target }},mode=max

  # Build each Docker variant (OS, arch, and flavor) separately. Using QEMU is unreliable and slower.
  # Heavy stages were pre-built by linux-depends; this job is cache-hit-only for those layers
  # and just assembles, runs the Go build, pushes the final image, and extracts release bundles.
  docker-build-push:
    strategy:
      matrix:
        include:
          - os: linux
            arch: arm64
            archive-target: archive
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
              APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
              APT_PORTS_MIRROR=http://azure.ports.ubuntu.com/ubuntu-ports
              OLLAMA_MLX_BUILD_JOBS=16
              OLLAMA_MLX_NVCC_THREADS=6
            cache-from: |
              type=registry,ref=ollama/release:cache-arm64-llama-server-cpu
              type=registry,ref=ollama/release:cache-arm64-llama-server-cuda_v12
              type=registry,ref=ollama/release:cache-arm64-llama-server-cuda_v13
              type=registry,ref=ollama/release:cache-arm64-jetpack-5
              type=registry,ref=ollama/release:cache-arm64-jetpack-6
              type=registry,ref=${{ vars.DOCKER_REPO }}:latest
          - os: linux
            arch: amd64
            archive-target: archive
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
              APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
              APT_PORTS_MIRROR=http://azure.ports.ubuntu.com/ubuntu-ports
              OLLAMA_MLX_BUILD_JOBS=16
              OLLAMA_MLX_NVCC_THREADS=6
            cache-from: |
              type=registry,ref=ollama/release:cache-amd64-llama-server-cpu
              type=registry,ref=ollama/release:cache-amd64-llama-server-cuda_v12
              type=registry,ref=ollama/release:cache-amd64-llama-server-cuda_v13
              type=registry,ref=ollama/release:cache-amd64-mlx
              type=registry,ref=ollama/release:cache-amd64-llama-server-rocm_v7_2
              type=registry,ref=ollama/release:cache-amd64-llama-server-vulkan
              type=registry,ref=${{ vars.DOCKER_REPO }}:latest
          - os: linux
            arch: amd64
            suffix: '-rocm'
            archive-target: image-archive
            build-args: |
              CGO_CFLAGS
              CGO_CXXFLAGS
              GOFLAGS
              FLAVOR=rocm
              APT_MIRROR=http://azure.archive.ubuntu.com/ubuntu
              APT_PORTS_MIRROR=http://azure.ports.ubuntu.com/ubuntu-ports
              OLLAMA_MLX_BUILD_JOBS=16
              OLLAMA_MLX_NVCC_THREADS=6
            cache-from: |
              type=registry,ref=ollama/release:cache-amd64-llama-server-cpu
              type=registry,ref=ollama/release:cache-amd64-llama-server-rocm_v7_2
              type=registry,ref=${{ vars.DOCKER_REPO }}:latest
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    needs: [setup-environment, linux-depends]
    env:
      GOFLAGS: ${{ needs.setup-environment.outputs.GOFLAGS }}
    steps:
      - uses: actions/checkout@v4
      - uses: docker/setup-buildx-action@v3
      - uses: docker/login-action@v3
        with:
          username: ${{ vars.DOCKER_USER }}
          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
      - id: build-push
        uses: docker/build-push-action@v6
        with:
          context: .
          platforms: ${{ matrix.os }}/${{ matrix.arch }}
          provenance: false
          sbom: false
          build-args: ${{ matrix.build-args }}
          outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
          cache-from: ${{ matrix.cache-from }}
          cache-to: type=inline
      - run: |
          mkdir -p ${{ matrix.os }}-${{ matrix.arch }}
          echo "${{ steps.build-push.outputs.digest }}" >${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}.txt
        working-directory: ${{ runner.temp }}
      - uses: actions/upload-artifact@v4
        with:
          name: digest-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}
          path: |
            ${{ runner.temp }}/${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.suffix }}.txt
      - uses: docker/build-push-action@v6
        with:
          context: .
          platforms: ${{ matrix.os }}/${{ matrix.arch }}
          target: ${{ matrix.archive-target }}
          provenance: false
          sbom: false
          build-args: ${{ matrix.build-args }}
          outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
          cache-from: ${{ matrix.cache-from }}
      - name: Deduplicate CUDA libraries
        run: |
          ./scripts/deduplicate_cuda_libs.sh dist/${{ matrix.os }}-${{ matrix.arch }}
      - name: Verify Linux build payloads
        shell: bash
        run: |
          set -euo pipefail
          base="dist/${{ matrix.os }}-${{ matrix.arch }}"
          for payload in \
            "$base/bin/ollama" \
            "$base/lib/ollama/llama-server"
          do
            [ -f "$payload" ] || { echo "missing $payload"; exit 1; }
          done
      - run: |
          for COMPONENT in bin/* lib/ollama/*; do
            case "$COMPONENT" in
              bin/ollama*)               echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/*.so*)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/llama-server*|lib/ollama/llama-quantize*)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/cuda_v*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/vulkan*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/mlx*)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-mlx.tar.in ;;
              lib/ollama/include*)       echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-mlx.tar.in ;;
              lib/ollama/cuda_jetpack5)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
              lib/ollama/cuda_jetpack6)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
              lib/ollama/rocm_v*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
            esac
          done
        working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
      - if: matrix.suffix == '-rocm'
        run: rm -f dist/${{ matrix.os }}-${{ matrix.arch }}/ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in
      - run: |
          echo "Manifests"
          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in ; do
            echo $ARCHIVE
            cat $ARCHIVE
          done
      - run: |
          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
            tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | zstd -19 -T0 >$(basename ${ARCHIVE//.*/}.tar.zst) &
          done
          wait
      - uses: actions/upload-artifact@v4
        with:
          name: bundles-${{ matrix.os }}-${{ matrix.arch }}${{ matrix.suffix }}
          path: |
            *.tar.zst

  # Merge Docker images for the same flavor into a single multi-arch manifest
  docker-merge-push:
    strategy:
      matrix:
        suffix: ['', '-rocm']
    runs-on: linux
    environment: release
    needs: [docker-build-push]
    steps:
      - uses: docker/login-action@v3
        with:
          username: ${{ vars.DOCKER_USER }}
          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
      - id: metadata
        uses: docker/metadata-action@v4
        with:
          flavor: |
            latest=false
            suffix=${{ matrix.suffix }}
          images: |
            ${{ vars.DOCKER_REPO }}
          tags: |
            type=ref,enable=true,priority=600,prefix=pr-,event=pr
            type=semver,pattern={{version}}
      - uses: actions/download-artifact@v4
        with:
          pattern: digest-*
          path: ${{ runner.temp }}
          merge-multiple: true
      - run: |
          docker buildx imagetools create $(echo '${{ steps.metadata.outputs.json }}' | jq -cr '.tags | map("-t", .) | join(" ")') $(cat *-${{ matrix.suffix }}.txt | xargs printf '${{ vars.DOCKER_REPO }}@%s ')
          docker buildx imagetools inspect ${{ vars.DOCKER_REPO }}:${{ steps.metadata.outputs.version }}
        working-directory: ${{ runner.temp }}

  # Final release process
  release:
    runs-on: ubuntu-latest
    environment: release
    needs: [darwin-build, windows-app, docker-build-push]
    permissions:
      contents: write
    env:
      GH_TOKEN: ${{ github.token }}
    steps:
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
        with:
          pattern: bundles-*
          path: dist
          merge-multiple: true
      - name: Log dist contents
        run: |
          ls -l dist/
      - name: Copy install scripts to dist
        run: |
          cp scripts/install.sh dist/install.sh
      - name: Verify release artifacts
        run: |
          required=(
            dist/OllamaSetup.exe
            dist/install.ps1
            dist/install.sh
            dist/ollama-windows-amd64.zip
            dist/ollama-windows-arm64.zip
          )
          for payload in "${required[@]}"; do
            if [ ! -f "$payload" ]; then
              echo "::error::Missing expected release artifact: $payload"
              exit 1
            fi
          done
      - name: Generate checksum file
        run: find . -type f -not -name 'sha256sum.txt' | xargs sha256sum | tee sha256sum.txt
        working-directory: dist
      - name: Create or update Release for tag
        run: |
          RELEASE_VERSION="$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)"
          echo "Looking for existing release for ${RELEASE_VERSION}"
          OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${RELEASE_VERSION}\") | .tagName")
          if [ -n "$OLD_TAG" ]; then
            echo "Updating release ${RELEASE_VERSION} to point to new tag ${GITHUB_REF_NAME}"
            gh release edit ${OLD_TAG} --tag ${GITHUB_REF_NAME}
          else
            echo "Creating new release ${RELEASE_VERSION} pointing to tag ${GITHUB_REF_NAME}"
            gh release create ${GITHUB_REF_NAME} \
              --title ${RELEASE_VERSION} \
              --draft \
              --generate-notes \
              --prerelease
          fi
      - name: Upload release artifacts
        run: |
          pids=()
          for payload in dist/*.txt dist/*.zip dist/*.tgz dist/*.tar.zst dist/*.exe dist/*.dmg dist/*.ps1 dist/*.sh ; do
            echo "Uploading $payload"
            gh release upload ${GITHUB_REF_NAME} $payload --clobber &
            pids+=($!)
            sleep 1
          done
          echo "Waiting for uploads to complete"
          failed=0
          for pid in "${pids[@]}"; do
            if ! wait $pid; then
              echo "::error::Upload failed (pid $pid)"
              failed=1
            fi
          done
          if [ $failed -ne 0 ]; then
            echo "One or more uploads failed"
            exit 1
          fi
          echo "done"