Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Segfault on riscv64 #203

Open
yosifkit opened this issue Jun 27, 2024 · 14 comments
Open

Segfault on riscv64 #203

yosifkit opened this issue Jun 27, 2024 · 14 comments

Comments

@yosifkit
Copy link
Member

As discovered in #202, busybox segfaults when running on real riscv64 hardware but works fine on QEMU 😭. Just opening this as a tracking issue.

+ gdb -core=rootfs/core -silent -ex bt full -ex quit busybox_unstripped
Reading symbols from busybox_unstripped...

warning: core file may not match specified executable file.
[New LWP 11]
Core was generated by `nslookup google.com'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  __GI_memset (dstpp=dstpp@entry=0x3ffc0d1530, c=c@entry=0, 
    len=<optimized out>, len@entry=16777216)
    at libc/string/generic/memset.c:50
50		  ((op_t *) dstp)[0] = cccc;
#0  __GI_memset (dstpp=dstpp@entry=0x3ffc0d1530, c=c@entry=0, 
    len=<optimized out>, len@entry=16777216)
    at libc/string/generic/memset.c:50
        xlen = <optimized out>
        cccc = 0
        dstp = 274811655472
#1  0x000000000010c220 in __poll_nocancel (fds=fds@entry=0x3ffd0d1628, 
    nfds=nfds@entry=1, timeout=<optimized out>)
    at libc/sysdeps/linux/common/poll.c:70
        max_fd_size = 1073741816
        tv = {tv_sec = 16, tv_usec = 2}
        rset = 0x3ffc0d1530
        wset = 0x3ffb0d1530
        xset = <optimized out>
        f = <optimized out>
        ready = <optimized out>
        error_num = <optimized out>
        maxfd = 0
        bytes = 16777216
#2  0x000000000010c740 in __GI_poll (fds=fds@entry=0x3ffd0d1628, 
    nfds=nfds@entry=1, timeout=timeout@entry=2500)
    at libc/sysdeps/linux/common/poll.c:215
        oldtype = <optimized out>
        result = <optimized out>
#3  0x0000000000047d68 in send_queries (ns=0x19e80e70)
    at networking/nslookup.c:569
        qn = <optimized out>
        recvlen = <optimized out>
        reply = "X\004\023\000\000\000\000\000h\004\023\000\000\000\000\000X\004\023\000\000\000\000\000\374\375\347\031", '\000' <repeats 12 times>, "\001", '\000' <repeats 15 times>, "\001", '\000' <repeats 16 times>, "\027\r\375?\000\000\000`r\027\000\000\000\000\000\024\235\022", '\000' <repeats 13 times>, "t\004\023\000\000\000\000\000X\004\023\000\000\000\000\000h\004\023\000\000\000\000\000X\004\023\000\000\000\000\000\374\375\347\031", '\000' <repeats 28 times>, "\001", '\000' <repeats 16 times>, "\027\r\375?\000\000\000`r\027\000\000\000\000\000"...
        rcode = <optimized out>
        local_lsa = 0x19e80850
        pfd = {fd = 3, events = 1, revents = 0}
        servfail_retry = 4
        n_replies = 0
        retry_interval = <optimized out>
        timeout = 5000
        tstart = 97021078
        tsent = 97021078
        tcur = 97021078
#4  0x00000000000484fc in nslookup_main (argc=<optimized out>, 
    argv=<optimized out>, argv@entry=0x3ffd0d1c98)
    at networking/nslookup.c:984
        c = <optimized out>
        types = 0
        rc = 0
        err = <optimized out>
#5  0x0000000000010b58 in run_applet_no_and_exit (applet_no=<optimized out>, 
    name=name@entry=0x3ffd0d1ed1 "nslookup", argv=argv@entry=0x3ffd0d1c98)
    at libbb/appletlib.c:969
        argc = <optimized out>
#6  0x0000000000010f28 in run_applet_and_exit (name=0x3ffd0d1ed1 "nslookup", 
    argv=argv@entry=0x3ffd0d1c98) at libbb/appletlib.c:988
        applet = <optimized out>
#7  0x0000000000010fc0 in main (argc=<optimized out>, argv=0x3ffd0d1c98)
    at libbb/appletlib.c:1128
No locals.

Originally posted by @tianon in #202 (comment)

@tianon
Copy link
Member

tianon commented Jun 27, 2024

Options:

  • revert Update buildroot to 2024.02.3 #201
  • do more bisecting
  • try system emulation instead of user emulation (to see if we can emulate a faster machine that reproduces the segfault so bisecting is easier)
  • do the official builds emulated and just plug our ears ( 😭 😬 )

@tianon
Copy link
Member

tianon commented Jul 9, 2024

Good news! It segfaults in system emulation too!!

@tianon
Copy link
Member

tianon commented Jul 9, 2024

Hmm, package/uclibc (which is where our uClibc version comes from: https://github.com/buildroot/buildroot/blob/2024.02.3/package/uclibc/uclibc.hash) didn't change between 2024.02.2 and 2024.02.3, so while the segfault clearly exhibits somewhere in uClibc, it's probably not caused by uClibc.

@tianon
Copy link
Member

tianon commented Jul 9, 2024

This seems like a more likely candidate: (buildroot/buildroot@5d9c54d)

diff --git a/package/gcc/gcc.hash b/package/gcc/gcc.hash
index 5061a603bc..964fbc97df 100644
--- a/package/gcc/gcc.hash
+++ b/package/gcc/gcc.hash
@@ -6,8 +6,8 @@ sha512  440c08ca746da450d9a1b35e8fd2305cb27e7e6987cd9d0f7d375f3b1fc9e4b0bd7acb3c
 sha512  a5018bf1f1fa25ddf33f46e720675d261987763db48e7a5fdf4c26d3150a8abcb82fdc413402df1c32f2e6b057d9bae6bdfa026defc4030e10144a8532e60f14  gcc-11.4.0.tar.xz
 # From https://gcc.gnu.org/pub/gcc/releases/gcc-12.3.0/sha512.sum
 sha512  8fb799dfa2e5de5284edf8f821e3d40c2781e4c570f5adfdb1ca0671fcae3fb7f794ea783e80f01ec7bfbf912ca508e478bd749b2755c2c14e4055648146c204  gcc-12.3.0.tar.xz
-# From https://gcc.gnu.org/pub/gcc/releases/gcc-13.2.0/sha512.sum
-sha512  d99e4826a70db04504467e349e9fbaedaa5870766cda7c5cab50cdebedc4be755ebca5b789e1232a34a20be1a0b60097de9280efe47bdb71c73251e30b0862a2  gcc-13.2.0.tar.xz
+# From https://gcc.gnu.org/pub/gcc/releases/gcc-13.3.0/sha512.sum
+sha512  ed5f2f4c6ed2c796fcf2c93707159e9dbd3ddb1ba063d549804dd68cdabbb6d550985ae1c8465ae9a336cfe29274a6eb0f42e21924360574ebd8e5d5c7c9a801  gcc-13.3.0.tar.xz
 
 # Locally calculated (fetched from Github)
 sha512  4dca20f517a42bb027fec605965b09fb917a535eebf3fe3e811d93476b02b1962df5ad4665f117bd44c2ec8e8015d51a44c00591761fe5f259c201ac5c7d920f  gcc-arc-2023.09-release.tar.gz

@tianon
Copy link
Member

tianon commented Jul 10, 2024

Oh, that was a dead end -- the default is gcc-12, not gcc-13, so we get 12.3.0 with or without that patch. 🤦

@tianon
Copy link
Member

tianon commented Jul 10, 2024

In the pursuit of further narrowing things down, the upgrade from kernel headers 6.6.22 to 6.6.32 is also not the culprit.

@tianon
Copy link
Member

tianon commented Jul 11, 2024

buildroot/buildroot@a270097 is another dead-end (it was a long shot, but I'm running out of promising things in git diff 2024.02.2..2024.02.3 😭)

@tianon
Copy link
Member

tianon commented Jul 12, 2024

Well, the reason I felt like buildroot was gaslighting me was because I was gaslighting myself. I can reproduce the segfault on 2024.02.2 as well. 😭

@tianon
Copy link
Member

tianon commented Jul 12, 2024

Confirmed, the current published busybox:uclibc image segfaults too. 😮‍💨

Edit: * on native riscv64 hardware

@tianon
Copy link
Member

tianon commented Jul 12, 2024

Also segfaulting (thanks to repo-info):

  • busybox@sha256:550a3dba1de5d4598fcb6ef9dbb7953b21166ef3f1bae1400e36aa58a2bf22bf is 2024.02.1
  • busybox@sha256:9b820d1fb7c0d72dc25bf71e8ea6e1e690e3a2751610e4481a5e7372ae7dc8a3 is 2024.02.0
  • busybox@sha256:f681eed9b2db497321012a47fa83f50edccb5ae789b07dea2e608e81e05f5899 is 2023.11.1
  • busybox@sha256:41af629cfa908c7ed7550db6a260c79d6dd55fe4996d7481595e3d547ab829c9 is the last build before OCI imports
  • busybox@sha256:0187895b668a1225646bb3454d3373ee8f35bb2122a1d0fda61add9e1a5a5e8b
  • busybox@sha256:d7817958f6c97c4df87a48a2f619e8f8bb22f7294e42f2554000565c67cafd77
  • busybox@sha256:cb012a73b8b1b2f8485adcaef1ecd4b3576d0cc2bd5f361a88aed35be30e82eb is the last build before meta-scripts / buildkit (so same exact rootfs as the previous build)
  • busybox@sha256:1411f4a8c78f5fadafa8f733e71f6ff01dfd637263ae090d68511a6e152451e3
  • busybox@sha256:1bc902c78702c7852f15ef579b1609f215915c5a670de1abd8f16a00a8997625
  • busybox@sha256:e3012943882fb37a7847d6127d82984f212d1606754a9f72d0a04757c9197524
  • ...
  • busybox@sha256:5a43a82d5e4a8f1a827c0bb63648460af4e6a9f3c95a039c446614045a695b84 is the very first one we ever pushed, and it ... segfaults too 😭

@tianon
Copy link
Member

tianon commented Jul 12, 2024

In better news, busybox:glibc and busybox:musl are both fine, so we could just disable our riscv64 builds of uclibc and call it good, but it should be supported. 😭

@tianon
Copy link
Member

tianon commented Jul 12, 2024

I should also clarify that this isn't just nslookup -- running the interactive shell segfaults reliably for me as well.

@tianon
Copy link
Member

tianon commented Jul 30, 2024

I finally got gdb built successfully! and it segfaults too 😭

edit: just so I have it recorded somewhere, https://buildroot.uclibc.narkive.com/7mJA2hV6/target-full-gdb-fails-to-build-statically was really relevant to my getting gdb compiled successfully (even though it didn't work at all)

Extra Interesting Tidbits:
diff --git a/latest/uclibc/Dockerfile.builder b/latest/uclibc/Dockerfile.builder
index 067e156..679ac6a 100644
--- a/latest/uclibc/Dockerfile.builder
+++ b/latest/uclibc/Dockerfile.builder
@@ -4,7 +4,7 @@
 # PLEASE DO NOT EDIT IT DIRECTLY.
 #
 
-FROM debian:bookworm-slim
+FROM debian:unstable-slim
 
 RUN set -eux; \
 	apt-get update; \
@@ -63,14 +60,28 @@ RUN set -eux; \
 	cd /usr/src/buildroot; \
 	\
 	setConfs=' \
-		BR2_STATIC_LIBS=y \
+		BR2_PACKAGE_HOST_ENVIRONMENT_SETUP=y \
+#		BR2_STATIC_LIBS=y \
 		BR2_TOOLCHAIN_BUILDROOT_UCLIBC=y \
 		BR2_TOOLCHAIN_BUILDROOT_WCHAR=y \
+		\
+# this is necessary for "gdb" (https://buildroot.uclibc.narkive.com/7mJA2hV6/target-full-gdb-fails-to-build-statically), but makes "getconf" dynamic and there is no configuration option that makes it static again (that behavior is hard-coded in the uclibc makefiles)
+		BR2_SHARED_STATIC_LIBS=y \
+		BR2_ENABLE_DEBUG=y \
+		BR2_PTHREAD_DEBUG=y \
+#		BR2_GCC_VERSION_14_X=y \
+		BR2_TOOLCHAIN_BUILDROOT_CXX=y \
+		BR2_PACKAGE_GDB=y \
+		BR2_PACKAGE_GDB_DEBUGGER=y \
+		BR2_PACKAGE_GDB_SERVER=y \
+		BR2_PACKAGE_HOST_GDB=y \
 	'; \
 	\
 	unsetConfs=' \
 		BR2_SHARED_LIBS \
 		BR2_TOOLCHAIN_BUILDROOT_GLIBC \
+		\
+		BR2_STRIP_strip \
 	'; \
 	\
 # buildroot arches: https://git.busybox.net/buildroot/tree/arch
@@ -189,19 +200,27 @@ RUN set -eux; \
 	done; \
 	for confV in $setConfs; do \
 		grep -q "^$confV\$" .config; \
-	done;
+	done
+
+COPY uclibc-static-utils.patch /usr/src/buildroot/package/uclibc/9999-doi-static-utils.patch
+
+RUN set -eux; \
+# this pre-downloads everything necessary for compilation
+	make -C /usr/src/buildroot source
 
 # https://www.finnie.org/2014/02/13/compiling-busybox-with-uclibc/
 RUN set -eux; \
 # force a particular GNU arch for "host-gmp" (otherwise it fails on some arches)
 	gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; \
 	make -C /usr/src/buildroot \
-		HOST_GMP_CONF_OPTS="--build='"$gnuArch"'" \
+		HOST_GMP_CONF_OPTS="--build='$gnuArch'" \
 # building host-tar:
 #   configure: error: you should not run configure as root (set FORCE_UNSAFE_CONFIGURE=1 in environment to bypass this check)
 		FORCE_UNSAFE_CONFIGURE=1 \
 		-j "$(nproc)" \
 		toolchain
+RUN make -C /usr/src/buildroot -j "$(nproc)" host-gdb
+RUN make -C /usr/src/buildroot -j "$(nproc)" gdb
 ENV PATH /usr/src/buildroot/output/host/usr/bin:$PATH
 
 # pub   1024D/ACC9965B 2006-12-12
@@ -235,6 +254,15 @@ RUN set -eux; \
 
 WORKDIR /usr/src/busybox
 
+# https://github.com/docker-library/busybox/issues/198
+# https://bugs.busybox.net/show_bug.cgi?id=15931
+# https://bugs.debian.org/1071648
+RUN set -eux; \
+	curl -fL -o busybox-no-cbq.patch 'https://bugs.busybox.net/attachment.cgi?id=9751'; \
+	echo '6671a12c48dbcefb653fc8403d1f103a1e2eba4a49b1ee9a9c27da8aa2db80d4 *busybox-no-cbq.patch' | sha256sum -c -; \
+	patch -p1 --input=busybox-no-cbq.patch; \
+	rm busybox-no-cbq.patch
+
 RUN set -eux; \
 	\
 # build date/time gets embedded in the BusyBox binary -- SOURCE_DATE_EPOCH should override that
@@ -288,10 +316,10 @@ RUN set -eux; \
 	nproc="$(nproc)"; \
 	CROSS_COMPILE="$(basename /usr/src/buildroot/output/host/usr/*-buildroot-linux-uclibc*)"; \
 	export CROSS_COMPILE="$CROSS_COMPILE-"; \
-	make -j "$nproc" busybox; \
-	./busybox --help; \
+	make -j "$nproc" busybox_unstripped; \
+	./busybox_unstripped --help; \
 	mkdir -p rootfs/bin; \
-	ln -vL busybox rootfs/bin/; \
+	ln -vLT busybox_unstripped rootfs/bin/busybox; \
 	\
 # copy "getconf" from buildroot
 	ln -vL ../buildroot/output/target/usr/bin/getconf rootfs/bin/; \
$ cat latest/uclibc/uclibc-static-utils.patch
Description: make uclibc utils (esp. "getconf") static no matter how we compile

diff --git a/utils/Makefile.in b/utils/Makefile.in
index 1544c8fcc..104eba2db 100644
--- a/utils/Makefile.in
+++ b/utils/Makefile.in
@@ -25,9 +25,7 @@ else
 CFLAGS-utils-shared :=
 endif
 
-ifneq ($(HAVE_SHARED),y)
 CFLAGS-utils += -static
-endif
 
 CFLAGS-ldconfig := -DBUILDING_LINKAGE
 ifeq ($(UCLIBC_STATIC_LDCONFIG),y)

@tianon
Copy link
Member

tianon commented Aug 29, 2024

In better news, busybox:glibc and busybox:musl are both fine, so we could just disable our riscv64 builds of uclibc and call it good, but it should be supported. 😭

This is the one, for now.

Edit: docker-library/oi-janky-groovy@1d96094

tianon added a commit to docker-library/oi-janky-groovy that referenced this issue Aug 29, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants