From 338e7c8f32e208487cc72c3b9c54295672b0d538 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 22 Apr 2024 13:37:15 -0700 Subject: [PATCH] Updated linux_arm64 Dockerfile Ubuntu from 20.04 to 22.04 (LTS) to support C++20 in TensorFlow. PiperOrigin-RevId: 627141720 --- ci/official/containers/linux_arm64/Dockerfile | 13 ++-- ci/official/containers/linux_arm64/build.sh | 4 +- .../builder.devtoolset/build_devtoolset.sh | 60 +++++++++++++------ ci/official/envs/ci_default | 3 +- ci/official/envs/linux_arm64 | 2 +- 5 files changed, 57 insertions(+), 25 deletions(-) diff --git a/ci/official/containers/linux_arm64/Dockerfile b/ci/official/containers/linux_arm64/Dockerfile index 5ddf6b02f46d60..b702607182ea34 100644 --- a/ci/official/containers/linux_arm64/Dockerfile +++ b/ci/official/containers/linux_arm64/Dockerfile @@ -1,5 +1,5 @@ ################################################################################ -FROM ubuntu:20.04 as builder +FROM ubuntu:22.04 as builder ################################################################################ # Install devtoolset build dependencies @@ -23,17 +23,19 @@ COPY apt.conf /etc/apt/ RUN /build_patchelf.sh ################################################################################ -FROM nvidia/cuda:12.3.1-devel-ubuntu20.04 as devel +FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 as devel ################################################################################ COPY --from=builder /dt10 /dt10 +RUN echo "Step1" COPY --from=builder /patchelf/patchelf_0.14.3-1_arm64.deb /patchelf/patchelf_0.14.3-1_arm64.deb - +RUN echo "Step2" # Install devtoolset devel dependencies COPY setup.sources.sh /setup.sources.sh COPY setup.packages.sh /setup.packages.sh COPY devel.packages.txt /devel.packages.txt COPY cuda.packages.txt /cuda.packages.txt RUN /setup.sources.sh && /setup.packages.sh /devel.packages.txt +RUN echo "Step3" # Install various tools. # - bats: bash unit testing framework @@ -42,10 +44,13 @@ RUN /setup.sources.sh && /setup.packages.sh /devel.packages.txt # - buildifier: clean bazel build deps # - buildozer: clean bazel build deps RUN git clone --branch v1.7.0 https://github.com/bats-core/bats-core.git && bats-core/install.sh /usr/local && rm -rf bats-core +RUN echo "Step4" RUN wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 https://github.com/bazelbuild/bazelisk/releases/download/v1.12.0/bazelisk-linux-arm64 -O /usr/local/bin/bazel && chmod +x /usr/local/bin/bazel +RUN echo "Step5" RUN wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 https://github.com/bazelbuild/buildtools/releases/download/4.2.5/buildifier-linux-arm64 -O /usr/local/bin/buildifier && chmod +x /usr/local/bin/buildifier +RUN echo "Step6" RUN wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 https://github.com/bazelbuild/buildtools/releases/download/4.2.5/buildozer-linux-arm64 -O /usr/local/bin/buildozer && chmod +x /usr/local/bin/buildozer - +RUN echo "Step7" RUN groupadd -g 1001 buildslave && useradd -m -u 1001 -g buildslave buildslave RUN mkdir -p /tf/venv RUN chown -R buildslave:buildslave /tf diff --git a/ci/official/containers/linux_arm64/build.sh b/ci/official/containers/linux_arm64/build.sh index 5d6a40658bd782..064ff834498a2c 100755 --- a/ci/official/containers/linux_arm64/build.sh +++ b/ci/official/containers/linux_arm64/build.sh @@ -44,7 +44,9 @@ fi # almost all of the same cache layers export DOCKER_BUILDKIT=1 for target in jax tf; do - IMAGE="gcr.io/tensorflow-sigs/build-arm64:$target-$TAG" + # IMAGE="gcr.io/tensorflow-sigs/build-arm64:$target-$TAG" + IMAGE="gcr.io/tensorflow-sigs/build-arm64:$target-latest-multi-python" + docker pull "$IMAGE" || true # Due to some flakiness of resources pulled in the build, allow the docker # command to reattempt build a few times in the case of failure (b/302558736) diff --git a/ci/official/containers/linux_arm64/builder.devtoolset/build_devtoolset.sh b/ci/official/containers/linux_arm64/builder.devtoolset/build_devtoolset.sh index 2bba3fe55ac8d0..febf3ef3449884 100755 --- a/ci/official/containers/linux_arm64/builder.devtoolset/build_devtoolset.sh +++ b/ci/official/containers/linux_arm64/builder.devtoolset/build_devtoolset.sh @@ -25,7 +25,7 @@ devtoolset-9) LIBSTDCXX_ABI="new" ;; devtoolset-10) - LIBSTDCXX_VERSION="6.0.28" + LIBSTDCXX_VERSION="6.0.30" LIBSTDCXX_ABI="new" ;; *) @@ -52,25 +52,47 @@ ln -s "/usr/include/aarch64-linux-gnu/asm" "${TARGET}/usr/include/asm" mkdir -p glibc-src mkdir -p glibc-build cd glibc-src -wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/os/Source/SPackages/glibc-2.17-317.el7.src.rpm" -rpm2cpio "glibc-2.17-317.el7.src.rpm" |cpio -idmv -tar -xvzf "glibc-2.17-c758a686.tar.gz" --strip 1 -tar -xvzf "glibc-2.17-c758a686-releng.tar.gz" --strip 1 -sed -i '/patch0060/d' glibc.spec -/rpm-patch.sh "glibc.spec" -rm -f "glibc-2.17-317.el7.src.rpm" "glibc-2.17-c758a686.tar.gz" "glibc-2.17-c758a686-releng.tar.gz" -patch -p1 < /gcc9-fixups.patch -patch -p1 < /stringop_trunc.patch + +# wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/os/Source/SPackages/glibc-2.17-317.el7.src.rpm" +wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/glibc-all-langpacks-2.34-105.el9.x86_64.rpm" + + + +# rpm2cpio "glibc-2.17-317.el7.src.rpm" |cpio -idmv +rpm2cpio "glibc-all-langpacks-2.34-105.el9.x86_64.rpm" |cpio -idmv + +echo "HELLO_5" +# tar -xvzf "glibc-2.17-c758a686.tar.gz" --strip 1 +tar -xvzf "glibc-all-langpacks-2.34-105.el9.x86_64.tar.gz" --strip 1 + +# echo "HELLO_6" +# tar -xvzf "glibc-2.17-c758a686-releng.tar.gz" --strip 1 + +# echo "HELLO_7" +# sed -i '/patch0060/d' glibc.spec +# /rpm-patch.sh "glibc.spec" + +# echo "HELLO_8" +# rm -f "glibc-2.17-317.el7.src.rpm" "glibc-2.17-c758a686.tar.gz" "glibc-2.17-c758a686-releng.tar.gz" + +# echo "HELLO_9" +# patch -p1 < /gcc9-fixups.patch + +# echo "HELLO_10" +# patch -p1 < /stringop_trunc.patch cd ../glibc-build ../glibc-src/configure --prefix=/usr --disable-werror --enable-obsolete-rpc --disable-profile + +echo "HELLO_12" make -j$(nproc) make install DESTDIR=${TARGET} cd .. # Symlinks in the binary distribution are set up for installation in /usr, we # need to fix up all the links to stay within /${TARGET}. -/fixlinks.sh "/${TARGET}" +/fixlinks.sh "/${TARGET}" +echo "HELLO_13" # Patch to allow non-glibc 2.12 compatible builds to work. sed -i '54i#define TCP_USER_TIMEOUT 18' "/${TARGET}/usr/include/netinet/tcp.h" @@ -81,7 +103,7 @@ wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 unar "libstdc++6_4.8.1-10ubuntu8_arm64.deb" && \ tar -C "${TARGET}" -xvzf "libstdc++6_4.8.1-10ubuntu8_arm64/data.tar.gz" "./usr/lib/aarch64-linux-gnu/libstdc++.so.6.0.18" && \ rm -rf "libstdc++6_4.8.1-10ubuntu8_arm64.deb" "libstdc++6_4.8.1-10ubuntu8_arm64" - +echo "HELLO_14" mkdir -p "${TARGET}-src" cd "${TARGET}-src" @@ -93,15 +115,16 @@ devtoolset-9) tar -xvf "gcc-9.3.1-20200408.tar.xz" --strip 1 ;; devtoolset-10) - wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/sclo/Source/rh/devtoolset-10-gcc-10.2.1-11.2.el7.src.rpm" - rpm2cpio "devtoolset-10-gcc-10.2.1-11.2.el7.src.rpm" |cpio -idmv + rpm2cpio "devtoolset-10-gcc-10.2.1-11.2.el7.src.rpm" |cpio -idmv wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 "https://vault.centos.org/centos/7/sclo/Source/rh/devtoolset-10-gcc-10.2.1-11.2.el7.src.rpm" + + echo "HELLO_15" tar -xvf "gcc-10.2.1-20210130.tar.xz" --strip 1 ;; esac # Apply the devtoolset patches to gcc. /rpm-patch.sh "gcc.spec" - +echo "HELLO_16" ./contrib/download_prerequisites mkdir -p "${TARGET}-build" @@ -133,7 +156,7 @@ cd "${TARGET}-build" make -j$(nproc) && \ make install - +echo "HELLO_17" # Create the devtoolset libstdc++ linkerscript that links dynamically against # the system libstdc++ 4.4 and provides all other symbols statically. # Note that the installation path for libstdc++ here is ${TARGET}/usr/lib64/ @@ -144,14 +167,15 @@ echo -e "OUTPUT_FORMAT(elf64-littleaarch64)\nINPUT ( libstdc++.so.6.0.18 -lstdc+ cp "./aarch64-unknown-linux-gnu/libstdc++-v3/src/.libs/libstdc++_nonshared44.a" \ "${TARGET}/usr/lib64" - +echo "HELLO_18" # Link in architecture specific includes from the system; note that we cannot # link in the whole aarch64-linux-gnu folder, as otherwise we're overlaying # system gcc paths that we do not want to find. # TODO(klimek): Automate linking in all non-gcc / non-kernel include # directories. mkdir -p "${TARGET}/usr/include/aarch64-linux-gnu" -PYTHON_VERSIONS=("python3.8" "python3.9" "python3.10" "python3.11") +PYTHON_VERSIONS=("python3.8" "python3.9" "python3.10" "python3.11" "python3.12") for v in "${PYTHON_VERSIONS[@]}"; do ln -s "/usr/local/include/${v}" "${TARGET}/usr/include/aarch64-linux-gnu/${v}" done + diff --git a/ci/official/envs/ci_default b/ci/official/envs/ci_default index 7db6569b3dc075..92677cc0ad0b3d 100644 --- a/ci/official/envs/ci_default +++ b/ci/official/envs/ci_default @@ -40,7 +40,8 @@ TFCI_DOCKER_ENABLE= TFCI_DOCKER_IMAGE= TFCI_DOCKER_PULL_ENABLE= TFCI_DOCKER_REBUILD_ARGS= -TFCI_DOCKER_REBUILD_ENABLE= +# DO_NOT_SUBMIT - This should be disabled before merge +TFCI_DOCKER_REBUILD_ENABLE=1 TFCI_DOCKER_REBUILD_UPLOAD_ENABLE= TFCI_GIT_DIR= TFCI_INDEX_HTML_ENABLE= diff --git a/ci/official/envs/linux_arm64 b/ci/official/envs/linux_arm64 index 161b0e2e803822..491cfd8781a681 100644 --- a/ci/official/envs/linux_arm64 +++ b/ci/official/envs/linux_arm64 @@ -19,7 +19,7 @@ TFCI_BAZEL_TARGET_SELECTING_CONFIG_PREFIX=linux_arm64 # despite lacking Nvidia CUDA support. TFCI_BUILD_PIP_PACKAGE_ARGS="--repo_env=WHEEL_NAME=tensorflow" TFCI_DOCKER_ENABLE=1 -TFCI_DOCKER_IMAGE=gcr.io/tensorflow-sigs/build-arm64:tf-2-16-multi-python +TFCI_DOCKER_IMAGE=gcr.io/tensorflow-sigs/build@sha256:dddcaf30321e9007103dce75c51b83fea3c06de462fcf41e7c6ae93f37fc3545 TFCI_DOCKER_PULL_ENABLE=1 TFCI_DOCKER_REBUILD_ARGS="--target=tf ci/official/containers/linux_arm64" TFCI_INDEX_HTML_ENABLE=1