Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Misc] Add CRaC support #869

Merged
merged 17 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
b5441df
[Runtime] Backport CRaC from CRaC Project(base on JDK17) to AJDK11
lingjun-cg Sep 14, 2021
dcef622
[Runtime] Fix bugs when test CRaC on presto.
lingjun-cg Jul 27, 2023
3cf7368
[Runtime] Fix CRaC failed jtreg testcases and support load CRaC class…
lingjun-cg Sep 5, 2023
5eb1b5c
[Runtime] CRaC support validate OS version, JVM version, CPU features…
lingjun-cg Oct 31, 2023
30387d9
[Runtime] Fix a CRaC testcase due to interface changes from upstream
jia-wei-tang Jan 19, 2024
1f8c70b
[Runtime] Avoid thread hang in SafepointSynchronize::begin() by set t…
lingjun-cg Jan 30, 2024
eefc2c2
[Runtime] CRaC: add features support flink checkpoint and restore.
lingjun-cg Nov 16, 2023
a328b91
[Runtime] CRaC: Support restore pipes.
lingjun-cg Mar 5, 2024
51b837c
[Runtime] Fix failed CRaC testcases.
lingjun-cg Mar 11, 2024
a221a94
[Runtime] CRaC: Remove checking the OS kernel version when restore
lingjun-cg Mar 19, 2024
3bbf446
[Backport] 8212129: Remove finalize methods from java.util.zip.ZipFIl…
Oct 26, 2018
b93450c
[Runtime] CRaC: Resample JMX localhost cache when restore.
lingjun-cg Mar 25, 2024
15dfd3e
[Runtime] CRaC: Fixed failed jdk/crac/LazyProps.java testcase.
lingjun-cg Mar 28, 2024
60f69c3
[Runtime] Fix unstable jdk/crac/recursiveCheckpoint/Test.java
lingjun-cg Jul 30, 2024
326332f
[Runtime] CRaC: Restore stdout and stderr when run CRaC in unprivileg…
lingjun-cg Sep 23, 2024
05bd159
[Runtime] CRaC: fix the bug that cannot restore pipe fd successful.
lingjun-cg Oct 8, 2024
965486d
[Runtime] CRaC: no need to close the files that open with write&appen…
lingjun-cg Oct 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions make/CompileDemos.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ $(eval $(call SetupBuildDemo, TransparentRuler, \
MAIN_CLASS := transparentruler.Ruler, \
))

$(eval $(call SetupBuildDemo, JavaCompilerCRaC, \
DEMO_SUBDIR := crac, \
))

################################################################################
# Copy html and README files.

Expand Down
9 changes: 9 additions & 0 deletions make/autoconf/libraries.m4
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread"
fi

# librt for legacy clock_gettime
if test "x$OPENJDK_TARGET_OS" = xlinux; then
# Hotspot needs to link librt to get the clock_* functions.
# But once our supported minimum build and runtime platform
# has glibc 2.17, this can be removed as the functions are
# in libc.
BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lrt"
fi

# Because RISC-V only has word-sized atomics, it requries libatomic where
# other common architectures do not. So link libatomic by default.
if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then
Expand Down
5 changes: 5 additions & 0 deletions make/hotspot/symbols/symbols-shared
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ JNI_GetDefaultJavaVMInitArgs
JVM_FindClassFromBootLoader
JVM_GetVersionInfo
JVM_InitAgentProperties
JVM_Checkpoint
JVM_RegisterPersistent
JVM_DeregisterPersistent
JVM_RegisterPseudoPersistent
JVM_UnregisterPseudoPersistent
2 changes: 2 additions & 0 deletions make/hotspot/symbols/symbols-unix
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,5 @@ JVM_AddModuleExportsToAllUnnamed
JVM_AddReadsModule
JVM_DefineModule
JVM_SetBootLoaderUnnamedModule

JVM_CheckpointEnabled
38 changes: 38 additions & 0 deletions make/launcher/Launcher-java.base.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,41 @@ ifeq ($(call isTargetOs, macosx solaris aix linux), true)
endif

################################################################################

ifeq ($(OPENJDK_TARGET_OS), linux)
$(eval $(call SetupJdkExecutable, BUILD_CRIUENGINE, \
NAME := criuengine, \
SRC := $(TOPDIR)/src/$(MODULE)/unix/native/criuengine, \
INCLUDE_FILES := criuengine.c, \
OPTIMIZATION := HIGH, \
CFLAGS := $(CFLAGS_JDKEXE), \
LDFLAGS := $(LDFLAGS), \
OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE), \
))
TARGETS += $(BUILD_CRIUENGINE)

$(eval $(call SetupJdkExecutable, BUILD_PAUSEENGINE, \
NAME := pauseengine, \
SRC := $(TOPDIR)/src/$(MODULE)/unix/native/pauseengine, \
INCLUDE_FILES := pauseengine.c, \
OPTIMIZATION := LOW, \
CFLAGS := $(CFLAGS_JDKEXE), \
LDFLAGS := $(LDFLAGS), \
OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE), \
))
TARGETS += $(BUILD_PAUSEENGINE)

$(eval $(call SetupJdkExecutable, BUILD_SIMENGINE, \
NAME := simengine, \
SRC := $(TOPDIR)/src/$(MODULE)/unix/native/simengine, \
INCLUDE_FILES := simengine.c, \
OPTIMIZATION := LOW, \
CFLAGS := $(CFLAGS_JDKEXE), \
LDFLAGS := $(LDFLAGS), \
OUTPUT_DIR := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE), \
))
TARGETS += $(BUILD_SIMENGINE)

endif

################################################################################
5 changes: 5 additions & 0 deletions src/demo/share/crac/JavaCompilerCRaC/Compile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
public class Compile {
public static void main(String... args) throws Exception {
JavaCompilerCRaC.runJavac(args);
}
}
29 changes: 29 additions & 0 deletions src/demo/share/crac/JavaCompilerCRaC/JavaCompilerCRaC.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import java.util.Arrays;
import jdk.crac.Core;

public class JavaCompilerCRaC {

static void runJavac(String... args) {
System.out.println("javac " + String.join(" ", args));
int status = com.sun.tools.javac.Main.compile(args);
if (status != 0) {
System.exit(status);
}
}

public static void main(String... args) throws Exception {
int startIdx = 0;
for (int endIdx = 1; endIdx < args.length; ++endIdx) {
if (args[endIdx].equals("--")) {
runJavac(Arrays.copyOfRange(args, startIdx, endIdx));
startIdx = endIdx + 1;
}
}

if (startIdx < args.length) {
runJavac(Arrays.copyOfRange(args, startIdx, args.length));
}

Core.checkpointRestore();
}
}
138 changes: 57 additions & 81 deletions src/hotspot/os/linux/attachListener_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/os.inline.hpp"
#include "services/attachListener.hpp"
#include "attachListener_linux.hpp"
#include "services/dtraceAttacher.hpp"
#include "linuxAttachOperation.hpp"
#include "memory/resourceArea.hpp"

#include <unistd.h>
#include <signal.h>
Expand All @@ -37,10 +40,6 @@
#include <sys/un.h>
#include <sys/stat.h>

#ifndef UNIX_PATH_MAX
#define UNIX_PATH_MAX sizeof(((struct sockaddr_un *)0)->sun_path)
#endif

// The attach mechanism on Linux uses a UNIX domain socket. An attach listener
// thread is created at startup or is created on-demand via a signal from
// the client tool. The attach listener creates a socket and binds it to a file
Expand All @@ -57,78 +56,12 @@
// obtain the credentials of client. We check that the effective uid
// of the client matches this process.

// forward reference
class LinuxAttachOperation;

class LinuxAttachListener: AllStatic {
private:
// the path to which we bind the UNIX domain socket
static char _path[UNIX_PATH_MAX];
static bool _has_path;

// the file descriptor for the listening socket
static volatile int _listener;

static bool _atexit_registered;

// reads a request from the given connected socket
static LinuxAttachOperation* read_request(int s);

public:
enum {
ATTACH_PROTOCOL_VER = 1 // protocol version
};
enum {
ATTACH_ERROR_BADVERSION = 101 // error codes
};

static void set_path(char* path) {
if (path == NULL) {
_path[0] = '\0';
_has_path = false;
} else {
strncpy(_path, path, UNIX_PATH_MAX);
_path[UNIX_PATH_MAX-1] = '\0';
_has_path = true;
}
}

static void set_listener(int s) { _listener = s; }

// initialize the listener, returns 0 if okay
static int init();

static char* path() { return _path; }
static bool has_path() { return _has_path; }
static int listener() { return _listener; }

// write the given buffer to a socket
static int write_fully(int s, char* buf, int len);

static LinuxAttachOperation* dequeue();
};

class LinuxAttachOperation: public AttachOperation {
private:
// the connection to the client
int _socket;

public:
void complete(jint res, bufferedStream* st);

void set_socket(int s) { _socket = s; }
int socket() const { return _socket; }

LinuxAttachOperation(char* name) : AttachOperation(name) {
set_socket(-1);
}
};

// statics
char LinuxAttachListener::_path[UNIX_PATH_MAX];
bool LinuxAttachListener::_has_path;
volatile int LinuxAttachListener::_listener = -1;
bool LinuxAttachListener::_atexit_registered = false;
LinuxAttachOperation* LinuxAttachListener::_current_op = NULL;

// Supporting class to help split a buffer into individual components
class ArgumentIterator : public StackObj {
Expand Down Expand Up @@ -377,6 +310,7 @@ LinuxAttachOperation* LinuxAttachListener::dequeue() {
::close(s);
continue;
} else {
_current_op = op;
return op;
}
}
Expand All @@ -397,6 +331,18 @@ int LinuxAttachListener::write_fully(int s, char* buf, int len) {
return 0;
}

// An operation completion is splitted into two parts.
// For proper handling the jcmd connection at CRaC checkpoint action.
// An effectively_complete_raw is called in checkpoint processing, before criu engine calls, for properly closing the socket.
// The complete() gets called after restore for proper deletion the leftover object.

void LinuxAttachOperation::complete(jint result, bufferedStream* st) {
LinuxAttachOperation::effectively_complete_raw(result, st);
// reset the current op as late as possible, this happens on attach listener thread.
LinuxAttachListener::reset_current_op();
delete this;
}

// Complete an operation by sending the operation result and any result
// output to the client. At this time the socket is in blocking mode so
// potentially we can block if there is a lot of data and the client is
Expand All @@ -405,34 +351,64 @@ int LinuxAttachListener::write_fully(int s, char* buf, int len) {
// if there are operations that involves a very big reply then it the
// socket could be made non-blocking and a timeout could be used.

void LinuxAttachOperation::complete(jint result, bufferedStream* st) {
JavaThread* thread = JavaThread::current();
ThreadBlockInVM tbivm(thread);
void LinuxAttachOperation::effectively_complete_raw(jint result, bufferedStream* st) {

thread->set_suspend_equivalent();
// cleared by handle_special_suspend_equivalent_condition() or
// java_suspend_self() via check_and_wait_while_suspended()
if (_effectively_completed) {
assert(st->size() == 0, "no lost output");
return;
}

// write operation result
Thread* thread = Thread::current();
if (thread->is_Java_thread()) {
JavaThread* jt = (JavaThread* )thread;
ThreadBlockInVM tbivm(jt);
jt->set_suspend_equivalent();
// cleared by handle_special_suspend_equivalent_condition() or
// java_suspend_self() via check_and_wait_while_suspended()

write_operation_result(result, st);

// were we externally suspended while we were waiting?
jt->check_and_wait_while_suspended();
} else {
write_operation_result(result, st);
}
_effectively_completed = true;
}

void LinuxAttachOperation::write_operation_result(jint result, bufferedStream* st) {
char msg[32];
sprintf(msg, "%d\n", result);
int rc = LinuxAttachListener::write_fully(this->socket(), msg, strlen(msg));

// write any result data
if (rc == 0) {
LinuxAttachListener::write_fully(this->socket(), (char*) st->base(), st->size());
::shutdown(this->socket(), 2);
::shutdown(this->socket(), SHUT_RDWR);
}

// done
::close(this->socket());
st->reset();
}

// were we externally suspended while we were waiting?
thread->check_and_wait_while_suspended();
static void assert_listener_thread() {
#ifdef ASSERT
ResourceMark rm; // For retrieving the thread names
assert(strcmp("Attach Listener", Thread::current()->name()) == 0, "should gets called from Attach Listener thread");
#endif
}

delete this;
LinuxAttachOperation* LinuxAttachListener::get_current_op() {
assert_listener_thread();
return LinuxAttachListener::_current_op;
}

void LinuxAttachListener::reset_current_op() {
assert_listener_thread();
LinuxAttachListener::_current_op = NULL;
}

// AttachListener functions

Expand Down
Loading
Loading