diff --git a/.gitea/workflows/abstract-machine-build.yml b/.gitea/workflows/abstract-machine-build.yml
new file mode 100644
index 0000000..af10d43
--- /dev/null
+++ b/.gitea/workflows/abstract-machine-build.yml
@@ -0,0 +1,21 @@
+name: Build abstract machine with nix
+on: [push]
+
+jobs:
+  build-abstract-machine:
+    runs-on: nix
+    steps:
+      - uses: https://github.com/cachix/cachix-action@v14
+        with:
+          name: ysyx
+          signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}'
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Build abstract-machine
+        run: |
+          nix build .#abstract-machine
+      - name: Build nemu
+        run: |
+          nix build .#nemu
+
diff --git a/.gitignore b/.gitignore
index 4ed3ed8..44a51ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@
 !init.sh
 /fceux-am
 /nvboard
-/am-kernels
+**/.cache
+**/result
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..3d834b3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "am-kernels"]
+	path = am-kernels
+	url = https://git.xinyang.life/xin/am-kernels.git
diff --git a/abstract-machine/.gitignore b/abstract-machine/.gitignore
index 84c3ed2..bcba0ab 100644
--- a/abstract-machine/.gitignore
+++ b/abstract-machine/.gitignore
@@ -1,19 +1,6 @@
-*
-!*/
-!*.h
-!*.c
-!*.cc
-!*.S
-!*.ld
-!*.sh
-!*.py
-!*.mk
-!Makefile
-!README
-!LICENSE
-.*
-_*
-*~
-build/
-!.gitignore
-.vscode
\ No newline at end of file
+**/.direnv/
+**/build/
+**/.envrc
+**/.cache
+.vscode
+compile_commands.json
diff --git a/abstract-machine/CMakeLists.txt b/abstract-machine/CMakeLists.txt
new file mode 100644
index 0000000..508bc68
--- /dev/null
+++ b/abstract-machine/CMakeLists.txt
@@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 3.22)
+
+project(abstract-machine)
+enable_language(CXX C ASM)
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
+
+include(CMakeDependentOption)
+include(CMakePackageConfigHelpers)  # Used to find libcheck
+include(CTest)
+
+# -- General options
+set(ISA CACHE STRING "Target ISA")
+set_property(CACHE ISA PROPERTY STRINGS "riscv" "x86" "x86_64" "native")
+string(TOUPPER ${ISA} ISA_UPPER)
+
+cmake_dependent_option(
+    __PLATFORM_NEMU__ "Run on NEMU"
+    ON "ISA MATCHES \"(riscv | x86)\"" OFF)
+cmake_dependent_option(
+    __PLATFORM_NATIVE__ "Run on native"
+    ON "ISA MATCHES native" OFF)
+
+# -- Set PLATFORM according to options
+set(MATCH_PLATFORM_PATTERN "^__PLATFORM_([A-Z]*)__")
+get_cmake_property(CACHE_VARS CACHE_VARIABLES)
+
+message(STATUS "ISA: ${ISA}")
+foreach(VAR IN LISTS CACHE_VARS)
+    if(VAR MATCHES ${MATCH_PLATFORM_PATTERN})
+        # Retrieve the value of the cache variable
+        get_property(VAR_VALUE CACHE ${VAR} PROPERTY VALUE)
+        set(PLATFORM_UPPER ${CMAKE_MATCH_1})
+        string(TOLOWER ${PLATFORM_UPPER} PLATFORM)
+        message(STATUS "Variable: ${VAR}=${VAR_VALUE}, Platform: ${PLATFORM}")
+    endif()
+endforeach()
+
+if(${PLATFORM} MATCHES "native")
+set(ARCH "native")
+else()
+set(ARCH ${ISA}-${PLATFORM})
+endif()
+string(TOUPPER ${ARCH} ARCH_UPPER)
+
+# -- Target specific options
+cmake_dependent_option(
+    NATIVE_USE_KLIB "Use Klib even if on native"
+    ON "NOT __ISA_NATIVE__" OFF)
+
+# -- Add compile definitions based on options
+add_compile_definitions(
+    $<MAKE_C_IDENTIFIER:__ARCH_${ARCH_UPPER}__>
+    __ISA_${ISA_UPPER}__
+    __PLATFORM_${PLATFORM_UPPER}__
+)
+
+add_compile_definitions(
+    $<$<BOOL:${NATIVE_USE_KLIB}>:__NATIVE_USE_KLIB__>
+)
+
+# -- Required compiler flags
+add_compile_options(
+    # -Werror
+    -Wno-main
+    -fno-asynchronous-unwind-tables
+    -fno-builtin
+    -fno-stack-protector
+    -U_FORTIFY_SOURCE
+    $<$<COMPILE_LANGUAGE:CXX>:-fno-exceptions>
+    $<$<COMPILE_LANGUAGE:CXX>:-ffreestanding>
+    $<$<COMPILE_LANGUAGE:CXX>:-fno-rtti>)
+
+add_link_options(
+    -znoexecstack
+)
+
+# -- Include linker script here. Use this linker script at link time if INCLUDE_LINKER_SCRIPT is set to true
+set(LINKER_SCRIPT linker.ld)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
+
+add_compile_options(-march=rv32if -mabi=ilp32)
+add_link_options(-march=rv32if -mabi=ilp32)
+
+add_subdirectory(klib)
+add_subdirectory(am)
diff --git a/abstract-machine/CMakePresets.json b/abstract-machine/CMakePresets.json
new file mode 100644
index 0000000..d14c0b6
--- /dev/null
+++ b/abstract-machine/CMakePresets.json
@@ -0,0 +1,29 @@
+{
+    "version": 6,
+    "configurePresets": [
+        {
+            "name": "native",
+            "displayName": "Native",
+            "generator": "Unix Makefiles",
+            "binaryDir": "${sourceDir}/out/build/${presetName}",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Debug",
+                "ISA": "native",
+                "__PLATFORM_NATIVE__": true,
+                "NATIVE_USE_KLIB": true
+            }
+        },
+        {
+            "name": "riscv-nemu",
+            "displayName": "Riscv32 NEMU",
+            "generator": "Unix Makefiles",
+            "binaryDir": "${sourceDir}/out/build/${presetName}",
+            "installDir": "/home/xin/repo/ysyx-workbench/abstract-machine/out/install",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Debug",
+                "ISA": "riscv",
+                "__PLATFORM_NEMU__": true
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/abstract-machine/Makefile b/abstract-machine/Makefile
index 3a5d60a..1377857 100644
--- a/abstract-machine/Makefile
+++ b/abstract-machine/Makefile
@@ -47,33 +47,32 @@ endif
 
 ### Create the destination directory (`build/$ARCH`)
 WORK_DIR  = $(shell pwd)
-DST_DIR   = $(WORK_DIR)/build/$(ARCH)
+BUILD_DIR ?= $(WORK_DIR)/build
+DST_DIR   = $(BUILD_DIR)/$(ARCH)
 $(shell mkdir -p $(DST_DIR))
 
 ### Compilation targets (a binary image or archive)
-IMAGE_REL = build/$(NAME)-$(ARCH)
+IMAGE_REL = $(DST_DIR)/$(NAME)-$(ARCH)
 IMAGE     = $(abspath $(IMAGE_REL))
-ARCHIVE   = $(WORK_DIR)/build/$(NAME)-$(ARCH).a
+ARCHIVE   = $(BUILD_DIR)/$(NAME)-$(ARCH).a
 
 ### Collect the files to be linked: object files (`.o`) and libraries (`.a`)
 OBJS      = $(addprefix $(DST_DIR)/, $(addsuffix .o, $(basename $(SRCS))))
 LIBS     := $(sort $(LIBS) am klib) # lazy evaluation ("=") causes infinite recursions
 LINKAGE   = $(OBJS) \
-  $(addsuffix -$(ARCH).a, $(join \
-    $(addsuffix /build/, $(addprefix $(AM_HOME)/, $(LIBS))), \
-    $(LIBS) ))
+  $(addsuffix -$(ARCH).a, $(addprefix $(BUILD_DIR)/, $(LIBS)))
 
 ## 3. General Compilation Flags
 
 ### (Cross) compilers, e.g., mips-linux-gnu-g++
-AS        = $(CROSS_COMPILE)gcc
-CC        = $(CROSS_COMPILE)gcc
-CXX       = $(CROSS_COMPILE)g++
-LD        = $(CROSS_COMPILE)ld
-AR        = $(CROSS_COMPILE)ar
-OBJDUMP   = $(CROSS_COMPILE)objdump
-OBJCOPY   = $(CROSS_COMPILE)objcopy
-READELF   = $(CROSS_COMPILE)readelf
+AS        ?= $(CROSS_COMPILE)gcc
+CC        ?= $(CROSS_COMPILE)gcc
+CXX       ?= $(CROSS_COMPILE)g++
+LD        ?= $(CROSS_COMPILE)ld
+AR        ?= $(CROSS_COMPILE)ar
+OBJDUMP   ?= $(CROSS_COMPILE)objdump
+OBJCOPY   ?= $(CROSS_COMPILE)objcopy
+READELF   ?= $(CROSS_COMPILE)readelf
 
 ### Compilation flags
 INC_PATH += $(WORK_DIR)/include $(addsuffix /include/, $(addprefix $(AM_HOME)/, $(LIBS)))
diff --git a/abstract-machine/am/CMakeLists.txt b/abstract-machine/am/CMakeLists.txt
new file mode 100644
index 0000000..b0462e4
--- /dev/null
+++ b/abstract-machine/am/CMakeLists.txt
@@ -0,0 +1,10 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+add_library(am_interface INTERFACE)
+target_include_directories(am_interface INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:include/abstract-machine>)
+
+add_subdirectory(src)
+
+install(DIRECTORY include/ DESTINATION include/abstract-machine)
diff --git a/abstract-machine/am/src/CMakeLists.txt b/abstract-machine/am/src/CMakeLists.txt
new file mode 100644
index 0000000..533dd3b
--- /dev/null
+++ b/abstract-machine/am/src/CMakeLists.txt
@@ -0,0 +1,53 @@
+if(ISA MATCHES "native")
+set(SOURCEDIR "./${PLATFORM}")
+else()
+set(SOURCEDIR "./${ISA}/${PLATFORM}")
+endif()
+
+add_subdirectory(${SOURCEDIR})
+
+target_include_directories(am-${ARCH}
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
+    PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
+      $<INSTALL_INTERFACE:include/abstract-machine>)
+target_link_libraries(am-${ARCH}
+    PUBLIC klib_interface
+    INTERFACE m)
+
+# TODO: Check
+target_link_options(am-${ARCH} INTERFACE
+    $<BUILD_INTERFACE:-T${CMAKE_SOURCE_DIR}/scripts/${LINKER_SCRIPT}>
+    $<INSTALL_INTERFACE:-T${CMAKE_INSTALL_LIBDIR}/cmake/am-${ARCH}/${LINKER_SCRIPT}>)
+
+# Interface compile flags
+target_link_options(am-${ARCH} INTERFACE
+        -znoexecstack)
+
+target_compile_options(am-${ARCH} INTERFACE
+        -fno-asynchronous-unwind-tables
+        -fno-builtin
+        -fno-stack-protector
+        -U_FORTIFY_SOURCE
+        $<$<COMPILE_LANGUAGE:CXX>:-fno-exceptions>
+        $<$<COMPILE_LANGUAGE:CXX>:-ffreestanding>
+        $<$<COMPILE_LANGUAGE:CXX>:-fno-rtti>)
+
+install(TARGETS am-${ARCH} klib_interface am_interface
+        EXPORT amTargets
+        LIBRARY DESTINATION lib)
+
+install(EXPORT amTargets 
+        FILE amTargets.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/am-${ARCH})
+
+configure_package_config_file(${CMAKE_SOURCE_DIR}/cmake/am-config.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/am-${ARCH}-config.cmake
+  INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/am-${ARCH})
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/am-${ARCH}-config.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/am-${ARCH})
+
+# TODO: check
+install(FILES ${CMAKE_SOURCE_DIR}/scripts/${LINKER_SCRIPT}
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/am-${ARCH})
diff --git a/abstract-machine/am/src/native/CMakeLists.txt b/abstract-machine/am/src/native/CMakeLists.txt
new file mode 100644
index 0000000..e3c9303
--- /dev/null
+++ b/abstract-machine/am/src/native/CMakeLists.txt
@@ -0,0 +1,26 @@
+include(CheckPIESupported)
+check_pie_supported()
+
+set(SOURCES
+    trap.S
+    cte.c
+    ioe.c
+    mpe.c
+    platform.c
+    trm.c
+    vme.c
+    ioe/audio.c
+    ioe/disk.c
+    ioe/gpu.c
+    ioe/input.c
+    ioe/timer.c
+)
+add_library(am-native ${SOURCES})
+
+# FIXME: get free(): invalid address when user program compiled without pie
+set_target_properties(am-native PROPERTIES
+    POSITION_INDEPENDENT_CODE TRUE
+    INTERFACE_POSITION_INDEPENDENT_CODE TRUE)
+
+find_package(SDL2 REQUIRED)
+target_link_libraries(am-${ARCH} PUBLIC SDL2::SDL2)
diff --git a/abstract-machine/am/src/riscv/nemu/CMakeLists.txt b/abstract-machine/am/src/riscv/nemu/CMakeLists.txt
new file mode 100644
index 0000000..a6992db
--- /dev/null
+++ b/abstract-machine/am/src/riscv/nemu/CMakeLists.txt
@@ -0,0 +1,34 @@
+include(nemu-settings)
+include(riscv-settings)
+
+add_library(am-${ISA}-nemu
+    cte.c
+    start.S
+    trap.S
+    vme.c
+    ${NEMU_SOURCES}
+)
+
+target_compile_options(am-${ISA}-nemu PRIVATE
+    ${NEMU_COMPILE_OPTIONS}
+    ${RISCV_COMPILE_OPTIONS})
+target_link_options(am-${ISA}-nemu PRIVATE
+    ${NEMU_LINK_OPITIONS}
+    ${RISCV_LINK_OPTIONS})
+target_include_directories(am-${ISA}-nemu PRIVATE
+    ${NEMU_INCLUDE_DIRECTORIES})
+target_link_options(am-${ISA}-nemu INTERFACE
+    LINKER:--defsym=_pmem_start=0x80000000
+    LINKER:--defsym=_entry_offset=0x0
+    LINKER:--gc-sections
+    LINKER:-e _start
+    -nostartfiles)
+
+target_compile_definitions(am-${ISA}-nemu PUBLIC
+    ARCH_H="arch/riscv.h")
+target_compile_definitions(am-${ISA}-nemu PRIVATE
+    ISA_H="riscv/riscv.h")
+
+set_target_properties(am-${ISA}-nemu PROPERTIES
+    POSITION_INDEPENDENT_CODE OFF
+    INTERFACE_POSITION_INDEPENDENT_CODE OFF)
diff --git a/abstract-machine/cmake/am-config.cmake.in b/abstract-machine/cmake/am-config.cmake.in
new file mode 100644
index 0000000..f2fbb32
--- /dev/null
+++ b/abstract-machine/cmake/am-config.cmake.in
@@ -0,0 +1,9 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+if(${ARCH} MATCHES "native")
+find_dependency(SDL2 REQUIRED)
+endif()
+
+# Include the targets file
+include("${CMAKE_CURRENT_LIST_DIR}/amTargets.cmake")
diff --git a/abstract-machine/cmake/klib-config.cmake.in b/abstract-machine/cmake/klib-config.cmake.in
new file mode 100644
index 0000000..6b57e7f
--- /dev/null
+++ b/abstract-machine/cmake/klib-config.cmake.in
@@ -0,0 +1,6 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+
+# Include the targets file
+include("${CMAKE_CURRENT_LIST_DIR}/klibTargets.cmake")
diff --git a/abstract-machine/cmake/nemu-settings.cmake b/abstract-machine/cmake/nemu-settings.cmake
new file mode 100644
index 0000000..910cdcf
--- /dev/null
+++ b/abstract-machine/cmake/nemu-settings.cmake
@@ -0,0 +1,11 @@
+set(NEMU_COMPILE_OPTIONS -fdata-sections -ffunction-sections)
+set(NEMU_LINK_OPTIONS
+    --defsym=_pmem_start=0x80000000
+    --defsym=_entry_offset=0x0
+    --gc-sections
+    -e _start)
+set(NEMU_INCLUDE_DIRECTORIES
+    ${CMAKE_SOURCE_DIR}/am/src/platform/nemu/include)
+file(GLOB_RECURSE NEMU_SOURCES
+    ${CMAKE_SOURCE_DIR}/am/src/platform/nemu/*.[cS])
+set(INCLUDE_LINKER_SCRIPT ON)
diff --git a/abstract-machine/cmake/riscv-settings.cmake b/abstract-machine/cmake/riscv-settings.cmake
new file mode 100644
index 0000000..1286e4c
--- /dev/null
+++ b/abstract-machine/cmake/riscv-settings.cmake
@@ -0,0 +1,2 @@
+set(RISCV_COMPILE_OPTIONS)
+set(RISCV_LINK_OPTIONS)
diff --git a/abstract-machine/default.nix b/abstract-machine/default.nix
new file mode 100644
index 0000000..1f1f67d
--- /dev/null
+++ b/abstract-machine/default.nix
@@ -0,0 +1,26 @@
+{ stdenv, 
+  lib,
+  cmake,
+  SDL2,
+  isa ? "native",
+  platform ? "NEMU"
+}:
+stdenv.mkDerivation {
+  pname = "abstract-machine";
+  version = "2024.02.18";
+
+  src = ./.;
+
+  cmakeFlags =  [
+    (lib.cmakeFeature "ISA" isa)
+    (lib.cmakeBool "__PLATFORM_${lib.strings.toUpper platform}__" true)
+  ];
+
+  nativeBuildInputs = [
+    cmake
+  ];
+
+  buildInputs = [
+
+  ] ++ (if platform=="native" then [ SDL2 ] else [ ]);
+}
diff --git a/abstract-machine/klib/CMakeLists.txt b/abstract-machine/klib/CMakeLists.txt
new file mode 100644
index 0000000..2cf4a78
--- /dev/null
+++ b/abstract-machine/klib/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+add_library(klib_interface INTERFACE)
+target_include_directories(klib_interface
+    INTERFACE
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+      $<INSTALL_INTERFACE:include/abstract-machine>)
+
+add_subdirectory(src)
+# add_subdirectory(tests)
+
+install(DIRECTORY include/ DESTINATION include/abstract-machine)
diff --git a/abstract-machine/klib/include/klib.h b/abstract-machine/klib/include/klib.h
index ecb24c8..48d63e9 100644
--- a/abstract-machine/klib/include/klib.h
+++ b/abstract-machine/klib/include/klib.h
@@ -35,6 +35,7 @@ int    atoi      (const char *nptr);
 int    printf    (const char *format, ...);
 int    sprintf   (char *str, const char *format, ...);
 int    snprintf  (char *str, size_t size, const char *format, ...);
+int    vprintf    (const char *format, va_list ap);
 int    vsprintf  (char *str, const char *format, va_list ap);
 int    vsnprintf (char *str, size_t size, const char *format, va_list ap);
 
diff --git a/abstract-machine/klib/src/CMakeLists.txt b/abstract-machine/klib/src/CMakeLists.txt
new file mode 100644
index 0000000..bf7e136
--- /dev/null
+++ b/abstract-machine/klib/src/CMakeLists.txt
@@ -0,0 +1,33 @@
+# find_package(FLEX)
+# find_package(BISON)
+
+# FLEX_TARGET(fmt_scanner fmt_scanner.l fmt_scanner.c)
+
+set(SOURCES
+    cpp.c
+    int64.c
+    stdio.c
+    stdlib.c
+    string.c
+    # ${FLEX_fmt_scanner_OUTPUTS}
+)
+
+add_library(klib ${SOURCES})
+target_include_directories(klib PUBLIC $<TARGET_PROPERTY:am_interface,INTERFACE_INCLUDE_DIRECTORIES>)
+target_compile_definitions(klib PUBLIC $<TARGET_PROPERTY:am-${ARCH},INTERFACE_COMPILE_DEFINITIONS>)
+
+install(TARGETS klib
+        EXPORT klibTargets
+        LIBRARY DESTINATION lib)
+
+install(EXPORT klibTargets
+        FILE klibTargets.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/klib)
+
+configure_package_config_file(${CMAKE_SOURCE_DIR}/cmake/klib-config.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/klib-config.cmake
+  INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/klib)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/klib-config.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/klib)
+
diff --git a/abstract-machine/klib/src/stdio.c b/abstract-machine/klib/src/stdio.c
index 1b19953..fec63bc 100644
--- a/abstract-machine/klib/src/stdio.c
+++ b/abstract-machine/klib/src/stdio.c
@@ -5,8 +5,20 @@
 
 #if !defined(__ISA_NATIVE__) || defined(__NATIVE_USE_KLIB__)
 
+int vprintf(const char *fmt, va_list ap) {
+  const char *p = fmt;
+  while(*p != '\0') {
+    putch(*p);
+  }
+  return 0;
+}
+
 int printf(const char *fmt, ...) {
-  panic("Not implemented");
+  va_list args;
+  va_start(args, fmt);
+  vprintf(fmt, args);
+  va_end(args);
+  return 0;
 }
 
 int vsprintf(char *out, const char *fmt, va_list ap) {
diff --git a/abstract-machine/klib/src/string.c b/abstract-machine/klib/src/string.c
index f1a1f22..931e7dd 100644
--- a/abstract-machine/klib/src/string.c
+++ b/abstract-machine/klib/src/string.c
@@ -5,43 +5,115 @@
 #if !defined(__ISA_NATIVE__) || defined(__NATIVE_USE_KLIB__)
 
 size_t strlen(const char *s) {
-  panic("Not implemented");
+  const char *p = s;
+  size_t len = 0;
+  while(*(p++) != '\0') len++;
+  return len;
 }
 
 char *strcpy(char *dst, const char *src) {
-  panic("Not implemented");
+  char *p_dst = dst;
+  const char *p_src = src;
+  for(; *p_src != '\0'; p_src++, p_dst++) {
+    *p_dst = *p_src;
+  }
+  *p_dst = '\0';
+  return dst;
 }
 
 char *strncpy(char *dst, const char *src, size_t n) {
-  panic("Not implemented");
+  int i = 0;
+  for(; i < n && src[i] != '\0'; i++) {
+    dst[i] = src[i];
+  }
+  for(; i < n; i++) {
+    dst[i] = '\0';
+  }
+  return dst;
 }
 
 char *strcat(char *dst, const char *src) {
-  panic("Not implemented");
+  char *p_dst = dst;
+  const char *p_src = src;
+  while(*p_dst != '\0') p_dst++;
+  for(; *p_src != '\0'; p_src++, p_dst++) {
+    *p_dst = *p_src;
+  }
+  *p_dst = '\0';
+  return dst;
 }
 
 int strcmp(const char *s1, const char *s2) {
-  panic("Not implemented");
+  const char *p_s1 = s1, *p_s2 = s2;
+  for(; *p_s1 == *p_s2; p_s1++, p_s2++) {
+    if(*p_s1 == '\0' || *p_s2 == '\0') {
+      break;
+    }
+  } 
+  return *p_s1 - *p_s2;
 }
 
 int strncmp(const char *s1, const char *s2, size_t n) {
-  panic("Not implemented");
+  const char *p_s1 = s1, *p_s2 = s2;
+  int i = 0;
+  for(i = 0; i < n - 1; i++) {
+    if(s1[i] == '\0' || s2[i] == '\0')
+      break;
+  } 
+  return s1[i] - s2[i];
 }
 
 void *memset(void *s, int c, size_t n) {
-  panic("Not implemented");
+  uint8_t *p = s;
+  for(int i = 0; i < n; i++) {
+    p[i] = c;
+  }
+  return s;
 }
 
 void *memmove(void *dst, const void *src, size_t n) {
-  panic("Not implemented");
+  if (src + n  > dst && src < dst) {
+    size_t len = dst - src;
+    void *p_dst = (void *)src + n;
+    const void *p_src = src + n - len;
+    while(p_dst >= dst) {
+      memcpy(p_dst, p_src, len);
+      p_src -= len;
+      p_dst -= len;
+    }
+    if(n % len) memcpy(dst, src, n % len);
+  } else if (dst < src && dst + n > src) {
+    size_t len = src - dst;
+    void *p_dst = dst;
+    const void *p_src = src;
+    while(p_src < src + n) {
+      memcpy(p_dst, p_src, len);
+      p_src += len;
+      p_dst += len;
+    }
+    if(n % len) memcpy(p_dst, p_src, n % len);
+  } else { 
+    memcpy(dst, src, n);
+  }
+
+  return dst;
 }
 
 void *memcpy(void *out, const void *in, size_t n) {
-  panic("Not implemented");
+  for (size_t i = 0 ; i < n ; i++) {
+    *(uint8_t *)(out + i) = *(uint8_t *)(in + i);
+  }
+  return out;
 }
 
 int memcmp(const void *s1, const void *s2, size_t n) {
-  panic("Not implemented");
+  const uint8_t *p1 = s1, *p2 = s2;
+  for (int i = 0; i < n; i++) {
+    if(*p1 != *p2)
+      return p1 - p2;
+    p1++; p2++;
+  }
+  return 0;
 }
 
 #endif
diff --git a/abstract-machine/klib/tests/CMakeLists.txt b/abstract-machine/klib/tests/CMakeLists.txt
new file mode 100644
index 0000000..f72c555
--- /dev/null
+++ b/abstract-machine/klib/tests/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(TEST_SOURCES
+    stdio
+    string
+)
+
+foreach(TEST IN LISTS TEST_SOURCES)
+    add_executable(${TEST} ${TEST}.c)
+    target_link_libraries(${TEST} am-${ARCH} klib m)
+    target_include_directories(${TEST}
+        PRIVATE $<TARGET_PROPERTY:am_interface,INTERFACE_INCLUDE_DIRECTORIES>
+        PRIVATE $<TARGET_PROPERTY:klib_interface,INTERFACE_INCLUDE_DIRECTORIES>
+    )
+    # TODO: Run tests in other configurations
+    if(__PLATFORM_NATIVE__)
+        add_test(NAME ${TEST} COMMAND ${TEST})
+    endif()
+endforeach()
diff --git a/abstract-machine/klib/tests/stdio.c b/abstract-machine/klib/tests/stdio.c
new file mode 100644
index 0000000..7287e83
--- /dev/null
+++ b/abstract-machine/klib/tests/stdio.c
@@ -0,0 +1,5 @@
+#include <klib.h>
+
+int main(void) {
+    return 0;
+}
\ No newline at end of file
diff --git a/abstract-machine/klib/tests/string.c b/abstract-machine/klib/tests/string.c
new file mode 100644
index 0000000..640f2d6
--- /dev/null
+++ b/abstract-machine/klib/tests/string.c
@@ -0,0 +1,75 @@
+#include <klib.h>
+#include <klib-macros.h>
+#include <stdint.h>
+
+void test_strcpy() {
+    char b[32];
+    char *s;
+ 	b[16]='a'; b[17]='b'; b[18]='c'; b[19]=0;
+ 	panic_on((s = strcpy(b, b+16)) != b, "strcpy wrong return value");
+	panic_on(strcmp(s, "abc") != 0, "strcpy gave incorrect string");
+ 	panic_on((s = strcpy(b+1, b+16)) != b+1, "strcpy wrong return value");
+	panic_on(strcmp(s, "abc") != 0, "strcpy gave incorrect string");
+
+	panic_on((s = strcpy(b+1, b+17)) != b+1, "strcpy wrong return value");
+	panic_on(strcmp(s, "bc") != 0, "strcpy gave incorrect string");
+}
+
+void test_strncpy() {
+    char b[32];
+	char *s;
+	int i;
+    b[3] = 'x'; b[4] = 0;
+	panic_on((s = strncpy(b, "abc", 3)) != b, "strncpy wrong return value");
+    panic_on(b[2] != 'c', "strncpy fails to copy last byte");
+    panic_on(b[3] != 'x', "strncpy overruns buffer to null-terminate");
+}
+
+void test_strncmp() {
+	panic_on(strncmp("abcd", "abce", 3) != 0, "strncmp compares past n");
+	panic_on(strncmp("abc", "abd", 3) == 0, "strncmp fails to compare n-1st byte");
+}
+
+void test_memset() {
+    uint8_t arr[128];
+    arr[120] = 0xd;
+    panic_on(memset(arr, 0xf, 120) != arr, "memset wrong return value");
+    panic_on(arr[7] != 0xf, "memset fails to set value in range");
+    panic_on(arr[120] != 0xd, "memset set value past n");
+}
+
+void test_memcpy() {
+    const uint8_t src[] = { 0x0, 0x0, 0x1, 0x2, 0x3, 0x4, 0x0, 0x0 };
+    uint8_t dst[8] = {0};
+    memcpy(dst, src, 8);
+    panic_on(memcmp(dst, src, 8) != 0, "memcpy fails to copy memory");
+}
+
+void test_memmove() {
+    const uint8_t ref[] = { 0x0, 0x0, 0x1, 0x2, 0x3, 0x4, 0x0, 0x0 };
+    uint8_t dst[8] = {0};
+    const uint8_t ans1[] = { 0x1, 0x2, 0x3, 0x4, 0x3, 0x4, 0x0, 0x0 };
+    const uint8_t ans2[] = { 0x1, 0x2, 0x2, 0x3, 0x4, 0x3, 0x0, 0x0 };
+    const uint8_t ans3[] = { 0x1, 0x2, 0x2, 0x1, 0x2, 0x2, 0x3, 0x4 };
+    memmove(dst, ref, 8);
+    panic_on(memcmp(dst, ref, 8) != 0, "memmove fails to copy non-overlapping memory");
+
+    memmove(dst, dst + 2, 4);
+    panic_on(memcmp(dst, ans1, 8) != 0, "memmove fails to copy overlapping memory (dst < src)");
+
+    memmove(dst + 2, dst + 1, 4);
+    panic_on(memcmp(dst, ans2, 8) != 0, "memmove fails to copy overlapping memory (src < dst)");
+
+    memmove(dst + 3, dst, 5);
+    panic_on(memcmp(dst, ans3, 8) != 0, "memmove fails to copy overlapping memory (src < dst)");
+}
+
+int main(void) {
+    test_strcpy();
+    test_strncpy();
+    test_strncmp();
+    test_memset();
+    test_memcpy();
+    test_memmove();
+    return 0;
+}
diff --git a/abstract-machine/out/install/lib/libklib.a b/abstract-machine/out/install/lib/libklib.a
new file mode 100644
index 0000000..5023a30
Binary files /dev/null and b/abstract-machine/out/install/lib/libklib.a differ
diff --git a/am-kernels b/am-kernels
new file mode 160000
index 0000000..2f55982
--- /dev/null
+++ b/am-kernels
@@ -0,0 +1 @@
+Subproject commit 2f559823a63cf6909d5a9e32dee47d6891caf553
diff --git a/nemu/flake.lock b/flake.lock
similarity index 54%
rename from nemu/flake.lock
rename to flake.lock
index 5c64744..36b9a39 100644
--- a/nemu/flake.lock
+++ b/flake.lock
@@ -5,11 +5,11 @@
         "systems": "systems"
       },
       "locked": {
-        "lastModified": 1701680307,
-        "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=",
+        "lastModified": 1709126324,
+        "narHash": "sha256-q6EQdSeUZOG26WelxqkmR7kArjgWCdw5sfJVHPH/7j8=",
         "owner": "numtide",
         "repo": "flake-utils",
-        "rev": "4022d587cbbfd70fe950c1e2083a02621806a725",
+        "rev": "d465f4819400de7c8d874d50b982301f28a84605",
         "type": "github"
       },
       "original": {
@@ -20,11 +20,11 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1704722960,
-        "narHash": "sha256-mKGJ3sPsT6//s+Knglai5YflJUF2DGj7Ai6Ynopz0kI=",
+        "lastModified": 1709237383,
+        "narHash": "sha256-cy6ArO4k5qTx+l5o+0mL9f5fa86tYUX3ozE1S+Txlds=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "317484b1ead87b9c1b8ac5261a8d2dd748a0492d",
+        "rev": "1536926ef5621b09bba54035ae2bb6d806d72ac8",
         "type": "github"
       },
       "original": {
@@ -34,31 +34,10 @@
         "type": "github"
       }
     },
-    "nur-xin": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1704680562,
-        "narHash": "sha256-ffec3HL8OgbHB/TvLHILvC3ylou6N+KDtrn4qYVV+U4=",
-        "ref": "refs/heads/master",
-        "rev": "8adf33b6fdd113c645d83feda28622a0b1ef9f83",
-        "revCount": 144,
-        "type": "git",
-        "url": "https://git.xinyang.life/xin/nur.git"
-      },
-      "original": {
-        "type": "git",
-        "url": "https://git.xinyang.life/xin/nur.git"
-      }
-    },
     "root": {
       "inputs": {
         "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs",
-        "nur-xin": "nur-xin"
+        "nixpkgs": "nixpkgs"
       }
     },
     "systems": {
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..3a07d79
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,59 @@
+{
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs = { self, ... }@inputs: with inputs;
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = nixpkgs.legacyPackages.${system};
+        crossPkgs = import nixpkgs {
+          localSystem = system;
+          crossSystem = {
+            config = "riscv32-none-elf";
+            gcc = {
+              abi = "ilp32";
+              arch = "rv32if";
+            };
+          };
+        };
+      in
+      {
+        packages.nemu = pkgs.callPackage ./nemu { am-kernels = self.packages.${system}.am-kernels; };
+        packages.abstract-machine = crossPkgs.callPackage ./abstract-machine { isa = "riscv"; platform = "nemu"; };
+
+        packages.am-kernels = crossPkgs.stdenv.mkDerivation rec {
+          pname = "am-kernels-cmake";
+          version = "2024.02.18";
+
+          src = ./am-kernels; 
+
+          nativeBuildInputs = [
+            pkgs.cmake
+          ];
+
+          cmakeFlags = [
+            (pkgs.lib.cmakeFeature "ISA" "riscv")
+            (pkgs.lib.cmakeFeature "PLATFORM" "nemu")
+            (pkgs.lib.cmakeFeature "CMAKE_INSTALL_DATADIR" "share")
+          ];
+
+          buildInputs = [
+            # SDL2
+            self.packages.${system}.abstract-machine
+          ];
+        };
+        
+        devShells.nemu = pkgs.mkShell {
+          packages = with pkgs; [
+            clang-tools
+            gdb
+          ];
+          inputsFrom = [
+            self.packages.${system}.nemu
+          ];
+        };
+      }
+    );
+}
diff --git a/nemu/Kconfig b/nemu/Kconfig
index a1ed68e..ae1921f 100644
--- a/nemu/Kconfig
+++ b/nemu/Kconfig
@@ -143,14 +143,54 @@ config TRACE_END
 
 config ITRACE
   depends on TRACE && TARGET_NATIVE_ELF && ENGINE_INTERPRETER
-  bool "Enable instruction tracer"
+  bool "Enable instruction tracing"
   default y
+  help
+    Instraction tracing will log past instructions into a ring buffer
+    and print them when NEMU exit unexpectedly.
 
 config ITRACE_COND
   depends on ITRACE
   string "Only trace instructions when the condition is true"
   default "true"
 
+config ITRACE_BUFFER
+  depends on ITRACE
+  int "Buffer size for intruction trace (unit: number of instructions)"
+  default 10
+
+config MTRACE
+  depends on TRACE && TARGET_NATIVE_ELF && ENGINE_INTERPRETER
+  bool "Enable memory tracing"
+  default n
+
+config MTRACE_RANGE
+  depends on MTRACE
+  string "Memory trace active range"
+  default "0x0-0xfffffff"
+  help
+    Memory tracer will only print memory access in these ranges.
+    Use comma to seperate between ranges.
+
+config MTRACE_RANGE_MAX
+  depends on MTRACE
+  int "Max range count in MTRACE_RANGE"
+  default 10
+
+config FTRACE
+  depends on TRACE && TARGET_NATIVE_ELF && ENGINE_INTERPRETER
+  bool "Enable function tracing"
+  default y
+
+config FTRACE_STACK_SIZE
+  depends on FTRACE
+  int "Max function track stack size"
+  default 100
+
+config FTRACE_LOG
+  depends on FTRACE
+  bool "Print log when entering a funciton"
+  default n
 
 config DIFFTEST
   depends on TARGET_NATIVE_ELF
diff --git a/nemu/Makefile b/nemu/Makefile
index 0f360fe..e5e5838 100644
--- a/nemu/Makefile
+++ b/nemu/Makefile
@@ -64,12 +64,34 @@ include $(NEMU_HOME)/scripts/native.mk
 endif
 
 include $(NEMU_HOME)/tests/Makefile
-all-tests: TEST_OBJS = $(filter-out $(OBJ_DIR)/src/nemu-main.o, $(OBJS))
-all-tests: CFLAGS += $(shell pkg-config --cflags check)
-all-tests: LDFLAGS += $(shell pkg-config --libs check)
-all-tests: $(TEST_SRCS:%.c=$(OBJ_DIR)/%)
+unit-tests: TEST_OBJS = $(filter-out $(OBJ_DIR)/src/nemu-main.o, $(OBJS))
+unit-tests: CFLAGS += $(shell pkg-config --cflags check)
+unit-tests: LDFLAGS += $(shell pkg-config --libs check)
+unit-tests: $(TEST_SRCS:%.c=$(OBJ_DIR)/%)
 
-test: all-tests
-	@$(OBJ_DIR)/tests/expr_test
+IMAGES = $(patsubst %.bin, %, $(shell find $(IMAGES_PATH) -type f -name '*.bin'))
 
-.PHONY: test
\ No newline at end of file
+COLOR_RED   = \033[1;31m
+COLOR_GREEN = \033[1;32m
+COLOR_BLUE = \033[1;34m
+COLOR_NONE  = \033[0m
+
+RESULT = .result.tmp
+$(shell > $(RESULT)) 	# Clear result file
+
+$(IMAGES): %: %.bin $(BINARY)
+	@echo + TEST $(notdir $<)
+	@$(BINARY) -b $< >/dev/null 2>&1 || printf "\t%14s\n" $(notdir $<) >> $(RESULT)
+
+integration-tests: $(IMAGES) 
+	@printf "$(COLOR_BLUE)INTEGRATION TEST:$(COLOR_NONE)\n\tALL: %s\n\tFAILED: %s\n" $(words $(IMAGES)) $(shell wc -l $(RESULT) | cut -f1 -d' ')
+	@test ! -s $(RESULT) || printf "$(COLOR_RED)FAILED:$(COLOR_NONE)\n"
+	@cat $(RESULT)
+	@test ! -s $(RESULT); \
+	 r=$$?; \
+	 $(RM) $(RESULT); \
+	 test $$r -eq 0
+
+test: unit-tests integration-tests
+
+.PHONY: test unit-tests integration-tests
diff --git a/nemu/default.nix b/nemu/default.nix
new file mode 100644
index 0000000..d3d5a70
--- /dev/null
+++ b/nemu/default.nix
@@ -0,0 +1,63 @@
+{ pkgs,
+  lib,
+  stdenv,
+  am-kernels,
+  dtc
+}:
+
+stdenv.mkDerivation rec {
+  pname = "nemu";
+  version = "2024-03-02";
+
+  src = ./.;
+
+  nativeBuildInputs = with pkgs; [
+    gnumake
+    pkg-config
+    flex
+    bison
+    dtc
+  ];
+
+  buildInputs = with pkgs; [
+    readline
+    libllvm
+  ];
+
+  checkInputs = [
+    pkgs.check
+    am-kernels
+  ];
+
+  configurePhase = ''
+    export NEMU_HOME=$(pwd)
+    make alldefconfig
+  '';
+
+  buildPhase = ''
+    make
+  '';
+
+  doCheck = true;
+  checkPhase = ''
+    export IMAGES_PATH=${am-kernels}/share/binary
+    make test
+  '';
+
+  installPhase = ''
+    mkdir -p $out/bin
+    make PREFIX=$out install
+  '';
+
+  shellHook = ''
+    export NEMU_HOME=$(pwd)
+    export IMAGES_PATH=${am-kernels}/share/binary
+  '';
+
+  meta = with lib; {
+    description = "NJU EMUlator, a full system x86/mips32/riscv32/riscv64 emulator for teaching";
+    homepage = "https://github.com/NJU-ProjectN/nemu.git";
+    license = with licenses; [ ];
+    maintainers = with maintainers; [ ];
+  };
+}
diff --git a/nemu/flake.nix b/nemu/flake.nix
deleted file mode 100644
index 325421f..0000000
--- a/nemu/flake.nix
+++ /dev/null
@@ -1,80 +0,0 @@
-{
-  inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
-    nur-xin = {
-      url = "git+https://git.xinyang.life/xin/nur.git";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-  };
-
-  outputs = { self, ... }@inputs: with inputs;
-    flake-utils.lib.eachDefaultSystem (system:
-      let
-        pkgs = nixpkgs.legacyPackages.${system} //
-          { nur.xin = nur-xin.legacyPackages.${system}; };
-      in
-      {
-        devShells.default = with pkgs; mkShell {
-          packages = [
-            clang-tools
-            rnix-lsp
-            bear
-
-            gdb
-            jre
-
-            gtkwave
-          ];
-
-          inputsFrom = [ self.packages.${system}.nemu ];
-        };
-
-        packages.nemu = with pkgs; stdenv.mkDerivation rec {
-          pname = "nemu";
-          version = "2024-01-02";
-
-          src = ./.;
-
-          nativeBuildInputs = [
-            gnumake
-            flex
-            bison
-            pkg-config
-            python3       # for testing
-          ];
-
-          buildInputs = [
-            check
-            readline
-            libllvm
-          ];
-
-          configurePhase = ''
-            echo NEMU_HOME=$NEMU_HOME
-            echo pwd=$(pwd)
-            mkdir -p $(pwd)/kconfig
-            WORK_DIR=$(pwd) obj=$(pwd)/kconfig make --trace -e -f scripts/config.mk WORK_DIR=$(pwd) obj=$(pwd)/kconfig rv32_defconfig
-          '';
-
-          installPhase = ''
-            BUILD_DIR=$out make install
-          '';
-
-          checkPhase = ''
-            BUILD_DIR=$out make test
-          '';
-
-          NEMU_HOME = src;
-
-          meta = with lib; {
-            description = "NJU EMUlator, a full system x86/mips32/riscv32/riscv64 emulator for teaching";
-            homepage = "https://github.com/NJU-ProjectN/nemu.git";
-            license = with licenses; [ ];
-            maintainers = with maintainers; [ ];
-          };
-        };
-      }
-    );
-}
-
diff --git a/nemu/include/common.h b/nemu/include/common.h
index fbffaa5..08a46e5 100644
--- a/nemu/include/common.h
+++ b/nemu/include/common.h
@@ -17,12 +17,12 @@
 #define __COMMON_H__
 
 #include <stdint.h>
-#include <inttypes.h>
 #include <stdbool.h>
 #include <string.h>
 
 #include <generated/autoconf.h>
 #include <macro.h>
+#include <types.h>
 
 #ifdef CONFIG_TARGET_AM
 #include <klib.h>
@@ -31,23 +31,6 @@
 #include <stdlib.h>
 #endif
 
-#if CONFIG_MBASE + CONFIG_MSIZE > 0x100000000ul
-#define PMEM64 1
-#endif
-
-typedef MUXDEF(CONFIG_ISA64, uint64_t, uint32_t) word_t;
-typedef MUXDEF(CONFIG_ISA64, int64_t, int32_t)  sword_t;
-static const word_t WORD_T_MAX = MUXDEF(CONFIG_ISA64, UINT64_MAX, UINT32_MAX);
-static const sword_t SWORD_T_MAX = MUXDEF(CONFIG_ISA64, INT64_MAX, INT32_MAX);
-static const sword_t SWORD_T_MIN = MUXDEF(CONFIG_ISA64, INT64_MIN, INT32_MIN);
-#define WORD_BYTES MUXDEF(CONFIG_ISA64, 8, 4)
-#define FMT_WORD MUXDEF(CONFIG_ISA64, "0x%016" PRIx64, "0x%08" PRIx32)
-
-typedef word_t vaddr_t;
-typedef MUXDEF(PMEM64, uint64_t, uint32_t) paddr_t;
-#define FMT_PADDR MUXDEF(PMEM64, "0x%016" PRIx64, "0x%08" PRIx32)
-typedef uint16_t ioaddr_t;
-
 #include <debug.h>
 
 #endif
diff --git a/nemu/include/cpu/decode.h b/nemu/include/cpu/decode.h
index 915bcf2..a17c888 100644
--- a/nemu/include/cpu/decode.h
+++ b/nemu/include/cpu/decode.h
@@ -23,7 +23,6 @@ typedef struct Decode {
   vaddr_t snpc; // static next pc
   vaddr_t dnpc; // dynamic next pc
   ISADecodeInfo isa;
-  IFDEF(CONFIG_ITRACE, char logbuf[128]);
 } Decode;
 
 // --- pattern matching mechanism ---
diff --git a/nemu/include/debug.h b/nemu/include/debug.h
index 087da4d..329c64a 100644
--- a/nemu/include/debug.h
+++ b/nemu/include/debug.h
@@ -16,9 +16,14 @@
 #ifndef __DEBUG_H__
 #define __DEBUG_H__
 
-#include <common.h>
 #include <stdio.h>
 #include <utils.h>
+#include <macro.h>
+
+IFDEF(CONFIG_ITRACE, void log_itrace_print());
+
+#define Trace(format, ...) \
+    _Log("[TRACE] " format "\n", ## __VA_ARGS__)
 
 #define Log(format, ...) \
     _Log(ANSI_FMT("[INFO] %s:%d %s() ", ANSI_FG_BLUE) format "\n", \
@@ -38,6 +43,7 @@
       MUXDEF(CONFIG_TARGET_AM, printf(ANSI_FMT(format, ANSI_FG_RED) "\n", ## __VA_ARGS__), \
         (fflush(stdout), fprintf(stderr, ANSI_FMT(format, ANSI_FG_RED) "\n", ##  __VA_ARGS__))); \
       IFNDEF(CONFIG_TARGET_AM, extern FILE* log_fp; fflush(log_fp)); \
+      IFDEF(CONFIG_ITRACE, log_itrace_print()); \
       extern void assert_fail_msg(); \
       assert_fail_msg(); \
       assert(cond); \
diff --git a/nemu/include/ftrace.h b/nemu/include/ftrace.h
new file mode 100644
index 0000000..9fcf28a
--- /dev/null
+++ b/nemu/include/ftrace.h
@@ -0,0 +1,18 @@
+#ifndef __FUNC_DEF_H__
+#define __FUNC_DEF_H__
+#include <common.h>
+
+#ifdef CONFIG_FTRACE
+typedef struct {
+  vaddr_t start;
+  vaddr_t len;
+  char * name;
+} func_t;
+
+extern func_t *func_table;
+void ftrace_call(vaddr_t, vaddr_t);
+void ftrace_return(vaddr_t, vaddr_t);
+// const char *get_func_name(vaddr_t addr);
+#endif
+
+#endif
\ No newline at end of file
diff --git a/nemu/include/macro.h b/nemu/include/macro.h
index 8aa38f8..47f11b0 100644
--- a/nemu/include/macro.h
+++ b/nemu/include/macro.h
@@ -92,6 +92,8 @@
 
 #define PG_ALIGN __attribute((aligned(4096)))
 
+#define FAILED_GOTO(tag, exp) do {if((exp)) goto tag;} while(0)
+
 #if !defined(likely)
 #define likely(cond)   __builtin_expect(cond, 1)
 #define unlikely(cond) __builtin_expect(cond, 0)
diff --git a/nemu/include/types.h b/nemu/include/types.h
new file mode 100644
index 0000000..364f2ed
--- /dev/null
+++ b/nemu/include/types.h
@@ -0,0 +1,21 @@
+#ifndef __TYPES_H__
+#define __TYPES_H__
+#include <inttypes.h>
+#include <macro.h>
+#if CONFIG_MBASE + CONFIG_MSIZE > 0x100000000ul
+#define PMEM64 1
+#endif
+
+typedef MUXDEF(CONFIG_ISA64, uint64_t, uint32_t) word_t;
+typedef MUXDEF(CONFIG_ISA64, int64_t, int32_t)  sword_t;
+static const word_t WORD_T_MAX = MUXDEF(CONFIG_ISA64, UINT64_MAX, UINT32_MAX);
+static const sword_t SWORD_T_MAX = MUXDEF(CONFIG_ISA64, INT64_MAX, INT32_MAX);
+static const sword_t SWORD_T_MIN = MUXDEF(CONFIG_ISA64, INT64_MIN, INT32_MIN);
+#define WORD_BYTES MUXDEF(CONFIG_ISA64, 8, 4)
+#define FMT_WORD MUXDEF(CONFIG_ISA64, "0x%016" PRIx64, "0x%08" PRIx32)
+
+typedef word_t vaddr_t;
+typedef MUXDEF(PMEM64, uint64_t, uint32_t) paddr_t;
+#define FMT_PADDR MUXDEF(PMEM64, "0x%016" PRIx64, "0x%08" PRIx32)
+typedef uint16_t ioaddr_t;
+#endif
\ No newline at end of file
diff --git a/nemu/include/utils.h b/nemu/include/utils.h
index 2cd1561..f974584 100644
--- a/nemu/include/utils.h
+++ b/nemu/include/utils.h
@@ -16,7 +16,7 @@
 #ifndef __UTILS_H__
 #define __UTILS_H__
 
-#include <common.h>
+#include <types.h>
 
 // ----------- state -----------
 
diff --git a/nemu/scripts/config.mk b/nemu/scripts/config.mk
index 0525ee3..01b9d1f 100644
--- a/nemu/scripts/config.mk
+++ b/nemu/scripts/config.mk
@@ -48,6 +48,9 @@ menuconfig: $(MCONF) $(CONF) $(FIXDEP)
 savedefconfig: $(CONF)
 	$(Q)$< $(silent) --$@=configs/defconfig $(Kconfig)
 
+alldefconfig: $(CONF) $(FIXDEP)
+	$(Q)$(CONF) $(silent) --$@ $(Kconfig)
+
 %defconfig: $(CONF) $(FIXDEP)
 	$(Q)$< $(silent) --defconfig=configs/$@ $(Kconfig)
 	$(Q)$< $(silent) --syncconfig $(Kconfig)
@@ -60,7 +63,7 @@ help:
 	@echo  '  savedefconfig   - Save current config as configs/defconfig (minimal config)'
 
 distclean: clean
-	-@rm -rf $(rm-distclean)
+	-rm -rf $(rm-distclean)
 
 .PHONY: help distclean
 
diff --git a/nemu/src/cpu/cpu-exec.c b/nemu/src/cpu/cpu-exec.c
index 1f2940f..1e402ab 100644
--- a/nemu/src/cpu/cpu-exec.c
+++ b/nemu/src/cpu/cpu-exec.c
@@ -13,6 +13,7 @@
 * See the Mulan PSL v2 for more details.
 ***************************************************************************************/
 
+#include <utils.h>
 #include <cpu/cpu.h>
 #include <cpu/decode.h>
 #include <cpu/difftest.h>
@@ -29,15 +30,17 @@ CPU_state cpu = {};
 uint64_t g_nr_guest_inst = 0;
 static uint64_t g_timer = 0; // unit: us
 static bool g_print_step = false;
+IFDEF(CONFIG_ITRACE, extern char logbuf[CONFIG_ITRACE_BUFFER][128]);
+IFDEF(CONFIG_ITRACE, extern int logbuf_rear);
 
 void device_update();
 bool wp_eval_all();
 
 static void trace_and_difftest(Decode *_this, vaddr_t dnpc) {
 #ifdef CONFIG_ITRACE_COND
-  if (ITRACE_COND) { log_write("%s\n", _this->logbuf); }
+  if (ITRACE_COND) { log_write("%s\n", logbuf[logbuf_rear]); }
 #endif
-  if (g_print_step) { IFDEF(CONFIG_ITRACE, puts(_this->logbuf)); }
+  if (g_print_step) { IFDEF(CONFIG_ITRACE, puts(logbuf[logbuf_rear])); }
   IFDEF(CONFIG_DIFFTEST, difftest_step(_this->pc, dnpc));
 }
 
@@ -47,8 +50,9 @@ static void exec_once(Decode *s, vaddr_t pc) {
   isa_exec_once(s);
   cpu.pc = s->dnpc;
 #ifdef CONFIG_ITRACE
-  char *p = s->logbuf;
-  p += snprintf(p, sizeof(s->logbuf), FMT_WORD ":", s->pc);
+  logbuf_rear = (logbuf_rear + 1) % CONFIG_ITRACE_BUFFER;
+  char *p = logbuf[logbuf_rear];
+  p += snprintf(p, sizeof(logbuf), FMT_WORD ":", s->pc);
   int ilen = s->snpc - s->pc;
   int i;
   uint8_t *inst = (uint8_t *)&s->isa.inst.val;
@@ -64,7 +68,7 @@ static void exec_once(Decode *s, vaddr_t pc) {
 
 #ifndef CONFIG_ISA_loongarch32r
   void disassemble(char *str, int size, uint64_t pc, uint8_t *code, int nbyte);
-  disassemble(p, s->logbuf + sizeof(s->logbuf) - p,
+  disassemble(p, logbuf[logbuf_rear] + sizeof(logbuf[logbuf_rear]) - p,
       MUXDEF(CONFIG_ISA_x86, s->snpc, s->pc), (uint8_t *)&s->isa.inst.val, ilen);
 #else
   p[0] = '\0'; // the upstream llvm does not support loongarch32r
@@ -79,7 +83,7 @@ static void execute(uint64_t n) {
     g_nr_guest_inst ++;
     trace_and_difftest(&s, cpu.pc);
     if (wp_eval_all()) { 
-      puts(s.logbuf);
+      puts(logbuf[logbuf_rear]);
       break;
     }
     if (nemu_state.state != NEMU_RUNNING) break;
@@ -121,13 +125,16 @@ void cpu_exec(uint64_t n) {
   switch (nemu_state.state) {
     case NEMU_RUNNING: nemu_state.state = NEMU_STOP; break;
 
-    case NEMU_END: case NEMU_ABORT:
+    case NEMU_END: case NEMU_ABORT: {
       Log("nemu: %s at pc = " FMT_WORD,
           (nemu_state.state == NEMU_ABORT ? ANSI_FMT("ABORT", ANSI_FG_RED) :
            (nemu_state.halt_ret == 0 ? ANSI_FMT("HIT GOOD TRAP", ANSI_FG_GREEN) :
             ANSI_FMT("HIT BAD TRAP", ANSI_FG_RED))),
           nemu_state.halt_pc);
-      // fall through
+      if(nemu_state.halt_ret != 0) {
+        log_itrace_print();
+      }
+    } // fall through
     case NEMU_QUIT: statistic();
   }
 }
diff --git a/nemu/src/isa/riscv32/difftest/dut.c b/nemu/src/isa/riscv32/difftest/dut.c
index c5ebf13..06748ce 100644
--- a/nemu/src/isa/riscv32/difftest/dut.c
+++ b/nemu/src/isa/riscv32/difftest/dut.c
@@ -18,7 +18,10 @@
 #include "../local-include/reg.h"
 
 bool isa_difftest_checkregs(CPU_state *ref_r, vaddr_t pc) {
-  return false;
+  for(int i = 0; i < MUXDEF(CONFIG_RVE, 16, 32); i++) {
+    if(!difftest_check_reg(reg_name(i), pc, ref_r->gpr[i], gpr(i))) return false;
+  }
+  return true;
 }
 
 void isa_difftest_attach() {
diff --git a/nemu/src/isa/riscv32/inst.c b/nemu/src/isa/riscv32/inst.c
index cb0c44e..1c41c63 100644
--- a/nemu/src/isa/riscv32/inst.c
+++ b/nemu/src/isa/riscv32/inst.c
@@ -1,29 +1,33 @@
 /***************************************************************************************
-* Copyright (c) 2014-2022 Zihao Yu, Nanjing University
-*
-* NEMU is licensed under Mulan PSL v2.
-* You can use this software according to the terms and conditions of the Mulan PSL v2.
-* You may obtain a copy of Mulan PSL v2 at:
-*          http://license.coscl.org.cn/MulanPSL2
-*
-* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
-* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
-* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
-*
-* See the Mulan PSL v2 for more details.
-***************************************************************************************/
+ * Copyright (c) 2014-2022 Zihao Yu, Nanjing University
+ *
+ * NEMU is licensed under Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan
+ *PSL v2. You may obtain a copy of Mulan PSL v2 at:
+ *          http://license.coscl.org.cn/MulanPSL2
+ *
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY
+ *KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+ *NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ *
+ * See the Mulan PSL v2 for more details.
+ ***************************************************************************************/
 
+#include <common.h>
 #include "local-include/reg.h"
+#include "macro.h"
 #include <cpu/cpu.h>
 #include <cpu/ifetch.h>
 #include <cpu/decode.h>
+#include <ftrace.h>
+#include <utils.h>
 
 #define R(i) gpr(i)
 #define Mr vaddr_read
 #define Mw vaddr_write
 
 enum {
-  TYPE_I, TYPE_U, TYPE_S,
+  TYPE_R, TYPE_I, TYPE_I_SHIFT, TYPE_U, TYPE_S, TYPE_B, TYPE_J,
   TYPE_N, // none
 };
 
@@ -31,20 +35,45 @@ enum {
 #define src2R() do { *src2 = R(rs2); } while (0)
 #define immI() do { *imm = SEXT(BITS(i, 31, 20), 12); } while(0)
 #define immU() do { *imm = SEXT(BITS(i, 31, 12), 20) << 12; } while(0)
-#define immS() do { *imm = (SEXT(BITS(i, 31, 25), 7) << 5) | BITS(i, 11, 7); } while(0)
+#define immS() do { *imm = SEXT(BITS(i, 31, 25), 7) << 5 | BITS(i, 11, 7); } while(0)
+#define immB() do { *imm = SEXT(BITS(i, 31, 31), 1) << 12 | BITS(i, 30, 25) << 5 | BITS(i, 11, 8) << 1 | BITS(i, 7, 7) << 11; } while(0)
+#define immJ() do { *imm = SEXT(BITS(i, 31, 31), 1) << 20 | BITS(i, 30, 21) << 1 | BITS(i, 20, 20) << 11 | BITS(i, 19, 12) << 12; } while(0)
 
-static void decode_operand(Decode *s, int *rd, word_t *src1, word_t *src2, word_t *imm, int type) {
+static void decode_operand(Decode *s, int *rd, word_t *src1, word_t *src2,
+                           word_t *imm, int type) {
   uint32_t i = s->isa.inst.val;
   int rs1 = BITS(i, 19, 15);
   int rs2 = BITS(i, 24, 20);
   *rd     = BITS(i, 11, 7);
   switch (type) {
+    case TYPE_R: src1R(); src2R();         break;
     case TYPE_I: src1R();          immI(); break;
     case TYPE_U:                   immU(); break;
+    case TYPE_J:                   immJ(); break;
     case TYPE_S: src1R(); src2R(); immS(); break;
+    case TYPE_B: src1R(); src2R(); immB(); break;
   }
 }
 
+static void do_branch(Decode *s, bool condition, word_t offset) {
+  if (condition) {
+    // puts(s->logbuf[s->logbuf_rear]);
+    s->dnpc = s->pc + offset;
+  }
+}
+
+#ifdef CONFIG_FTRACE
+static void ftrace_jalr(Decode *s, int rd, vaddr_t dst) {
+  uint32_t i = s->isa.inst.val;
+  int rs1 = BITS(i, 19, 15);
+  if(rs1 == 1 && rd == 0) {
+    ftrace_return(s->pc, dst);
+  } else {
+    ftrace_call(s->pc, dst);
+  }
+}
+#endif
+
 static int decode_exec(Decode *s) {
   int rd = 0;
   word_t src1 = 0, src2 = 0, imm = 0;
@@ -57,11 +86,63 @@ static int decode_exec(Decode *s) {
 }
 
   INSTPAT_START();
+  INSTPAT("??????? ????? ????? ??? ????? 01101 11", lui    , U, R(rd) = imm);
   INSTPAT("??????? ????? ????? ??? ????? 00101 11", auipc  , U, R(rd) = s->pc + imm);
+
+  INSTPAT("??????? ????? ????? ??? ????? 11011 11", jal    , J, do {
+    s->dnpc = s->pc + imm; R(rd) = s->pc + 4;
+    IFDEF(CONFIG_FTRACE, ftrace_call(s->pc, s->pc + imm)); } while(0));
+  INSTPAT("??????? ????? ????? ??? ????? 11001 11", jalr   , I, do {
+    s->dnpc = src1 + imm; R(rd) = s->pc + 4; 
+    IFDEF(CONFIG_FTRACE, ftrace_jalr(s, rd, src1 + imm)); } while(0));
+  INSTPAT("??????? ????? ????? 000 ????? 11000 11", beq    , B, do_branch(s, src1 == src2, imm));
+  INSTPAT("??????? ????? ????? 001 ????? 11000 11", bne    , B, do_branch(s, src1 != src2, imm));
+  INSTPAT("??????? ????? ????? 100 ????? 11000 11", blt    , B, do_branch(s, (sword_t)src1 < (sword_t)src2, imm));
+  INSTPAT("??????? ????? ????? 101 ????? 11000 11", bge    , B, do_branch(s, (sword_t)src1 >= (sword_t)src2, imm));
+  INSTPAT("??????? ????? ????? 110 ????? 11000 11", bltu   , B, do_branch(s, src1 < src2, imm));
+  INSTPAT("??????? ????? ????? 111 ????? 11000 11", bgeu   , B, do_branch(s, src1 >= src2, imm));
+
+  INSTPAT("??????? ????? ????? 000 ????? 00000 11", lb     , I, R(rd) = SEXT(Mr(src1 + imm, 1), 8));
+  INSTPAT("??????? ????? ????? 001 ????? 00000 11", lh     , I, R(rd) = SEXT(Mr(src1 + imm, 2), 16));
+  INSTPAT("??????? ????? ????? 010 ????? 00000 11", lw     , I, R(rd) = SEXT(Mr(src1 + imm, 4), 32));
   INSTPAT("??????? ????? ????? 100 ????? 00000 11", lbu    , I, R(rd) = Mr(src1 + imm, 1));
+  INSTPAT("??????? ????? ????? 101 ????? 00000 11", lhu    , I, R(rd) = Mr(src1 + imm, 2));
   INSTPAT("??????? ????? ????? 000 ????? 01000 11", sb     , S, Mw(src1 + imm, 1, src2));
+  INSTPAT("??????? ????? ????? 001 ????? 01000 11", sh     , S, Mw(src1 + imm, 2, src2));
+  INSTPAT("??????? ????? ????? 010 ????? 01000 11", sw     , S, Mw(src1 + imm, 4, src2));
+
+  INSTPAT("??????? ????? ????? 000 ????? 00100 11", addi   , I, R(rd) = src1 + imm);
+  INSTPAT("??????? ????? ????? 010 ????? 00100 11", slti   , I, R(rd) = (sword_t)src1 < (sword_t)imm ? 1 : 0);
+  INSTPAT("??????? ????? ????? 011 ????? 00100 11", sltiu  , I, R(rd) = src1 < imm ? 1 : 0);
+  INSTPAT("??????? ????? ????? 100 ????? 00100 11", xori   , I, R(rd) = src1 ^ imm);
+  INSTPAT("??????? ????? ????? 110 ????? 00100 11", ori    , I, R(rd) = src1 | imm);
+  INSTPAT("??????? ????? ????? 111 ????? 00100 11", andi   , I, R(rd) = src1 & imm);
+  INSTPAT("0000000 ????? ????? 001 ????? 00100 11", slli   , I, R(rd) = src1 << imm);
+  INSTPAT("0000000 ????? ????? 101 ????? 00100 11", srli   , I, R(rd) = src1 >> imm);
+  INSTPAT("0100000 ????? ????? 101 ????? 00100 11", srai   , I, R(rd) = (sword_t)src1 >> (imm & 0x01F));
+  INSTPAT("0000000 ????? ????? 000 ????? 01100 11", add    , R, R(rd) = src1 + src2);
+  INSTPAT("0100000 ????? ????? 000 ????? 01100 11", sub    , R, R(rd) = src1 - src2);
+  INSTPAT("0000000 ????? ????? 001 ????? 01100 11", sll    , R, R(rd) = src1 << src2);
+  INSTPAT("0000000 ????? ????? 010 ????? 01100 11", slt    , R, R(rd) = (sword_t)src1 < (sword_t)src2 ? 1 : 0);
+  INSTPAT("0000000 ????? ????? 011 ????? 01100 11", sltu   , R, R(rd) = src1 < src2 ? 1 : 0);
+  INSTPAT("0000000 ????? ????? 100 ????? 01100 11", xor    , R, R(rd) = src1 ^ src2);
+  INSTPAT("0000000 ????? ????? 101 ????? 01100 11", srl    , R, R(rd) = src1 >> src2);
+  INSTPAT("0100000 ????? ????? 101 ????? 01100 11", sra    , R, R(rd) = (sword_t)src1 >> (src2 & 0x01F));
+  INSTPAT("0000000 ????? ????? 110 ????? 01100 11", or     , R, R(rd) = src1 | src2);
+  INSTPAT("0000000 ????? ????? 111 ????? 01100 11", and    , R, R(rd) = src1 & src2);
 
   INSTPAT("0000000 00001 00000 000 00000 11100 11", ebreak , N, NEMUTRAP(s->pc, R(10))); // R(10) is $a0
+
+  // "M"
+  INSTPAT("0000001 ????? ????? 000 ????? 01100 11", mul    , R, R(rd) = src1 * src2);
+  INSTPAT("0000001 ????? ????? 001 ????? 01100 11", mulh   , R, R(rd) = (int64_t)(sword_t)src1 * (sword_t)src2 >> 32);
+  INSTPAT("0000001 ????? ????? 010 ????? 01100 11", mulhsu , R, R(rd) = (int64_t)(sword_t)src1 * (uint64_t)src2 >> 32);
+  INSTPAT("0000001 ????? ????? 011 ????? 01100 11", mulhu  , R, R(rd) = (uint64_t)src1 * (uint64_t)src2 >> 32);
+  INSTPAT("0000001 ????? ????? 100 ????? 01100 11", div    , R, R(rd) = (sword_t)src1 / (sword_t)src2);
+  INSTPAT("0000001 ????? ????? 101 ????? 01100 11", divu   , R, R(rd) = src1 / src2);
+  INSTPAT("0000001 ????? ????? 110 ????? 01100 11", rem    , R, R(rd) = (sword_t)src1 % (sword_t)src2);
+  INSTPAT("0000001 ????? ????? 111 ????? 01100 11", remu   , R, R(rd) = src1 % src2);
+
   INSTPAT("??????? ????? ????? ??? ????? ????? ??", inv    , N, INV(s->pc));
   INSTPAT_END();
 
diff --git a/nemu/src/memory/paddr.c b/nemu/src/memory/paddr.c
index ee30e70..437debd 100644
--- a/nemu/src/memory/paddr.c
+++ b/nemu/src/memory/paddr.c
@@ -13,6 +13,8 @@
 * See the Mulan PSL v2 for more details.
 ***************************************************************************************/
 
+#include "common.h"
+#include "debug.h"
 #include <memory/host.h>
 #include <memory/paddr.h>
 #include <device/mmio.h>
@@ -23,6 +25,11 @@ static uint8_t *pmem = NULL;
 #else // CONFIG_PMEM_GARRAY
 static uint8_t pmem[CONFIG_MSIZE] PG_ALIGN = {};
 #endif
+#ifdef CONFIG_MTRACE
+static word_t mtrace_start[CONFIG_MTRACE_RANGE_MAX] = {0};
+static word_t mtrace_end[CONFIG_MTRACE_RANGE_MAX] = {0};
+static int range_count = 0;
+#endif
 
 uint8_t* guest_to_host(paddr_t paddr) { return pmem + paddr - CONFIG_MBASE; }
 paddr_t host_to_guest(uint8_t *haddr) { return haddr - pmem + CONFIG_MBASE; }
@@ -41,23 +48,58 @@ static void out_of_bound(paddr_t addr) {
       addr, PMEM_LEFT, PMEM_RIGHT, cpu.pc);
 }
 
+#ifdef CONFIG_MTRACE
+static void mtrace_print(char type, word_t addr, int len, word_t data) {
+  for (int i = 0; i < range_count; i++)
+    if (addr <= mtrace_end[i] && addr >= mtrace_start[i] ) {
+      Trace("Mem %c " FMT_PADDR "%d D " FMT_PADDR, type, addr, len, data);
+      break;
+    }
+}
+#endif
+
 void init_mem() {
 #if   defined(CONFIG_PMEM_MALLOC)
   pmem = malloc(CONFIG_MSIZE);
   assert(pmem);
+#endif
+#ifdef CONFIG_MTRACE
+  char range[sizeof(CONFIG_MTRACE_RANGE)] = CONFIG_MTRACE_RANGE;
+  char *saveptr, *ptr;
+  ptr = strtok_r(range, ",", &saveptr);
+  for (range_count = 0; range_count < CONFIG_MTRACE_RANGE_MAX; ) {
+    word_t start, end;
+    Assert(sscanf(ptr, FMT_PADDR "-" FMT_PADDR, &start, &end) == 2, "Config option MTRACE_RANGE has wrong format");
+    mtrace_start[range_count] = start;
+    mtrace_end[range_count] = end;
+
+    range_count++;
+    ptr = strtok_r(NULL, ",", &saveptr);
+    if (!ptr) break;
+  }
+  Trace("MTRACE ranges: ");
+  for (int i = 0; i < range_count; i++) {
+    Trace("[0x%x, 0x%x]", mtrace_start[i], mtrace_end[i]);
+  }
 #endif
   IFDEF(CONFIG_MEM_RANDOM, memset(pmem, rand(), CONFIG_MSIZE));
   Log("physical memory area [" FMT_PADDR ", " FMT_PADDR "]", PMEM_LEFT, PMEM_RIGHT);
 }
 
 word_t paddr_read(paddr_t addr, int len) {
-  if (likely(in_pmem(addr))) return pmem_read(addr, len);
-  IFDEF(CONFIG_DEVICE, return mmio_read(addr, len));
+  word_t result = 0;
+  if (likely(in_pmem(addr))) { result = pmem_read(addr, len); goto mtrace;}
+  IFDEF(CONFIG_DEVICE, result = mmio_read(addr, len); goto mtrace)
   out_of_bound(addr);
-  return 0;
+
+mtrace:
+  IFDEF(CONFIG_MTRACE, mtrace_print('R', addr, len, result));
+  
+  return result;
 }
 
 void paddr_write(paddr_t addr, int len, word_t data) {
+  IFDEF(CONFIG_MTRACE, mtrace_print('W', addr, len, data));
   if (likely(in_pmem(addr))) { pmem_write(addr, len, data); return; }
   IFDEF(CONFIG_DEVICE, mmio_write(addr, len, data); return);
   out_of_bound(addr);
diff --git a/nemu/src/monitor/monitor.c b/nemu/src/monitor/monitor.c
index 2279ca0..0154208 100644
--- a/nemu/src/monitor/monitor.c
+++ b/nemu/src/monitor/monitor.c
@@ -15,6 +15,7 @@
 
 #include <isa.h>
 #include <memory/paddr.h>
+#include <utils.h>
 
 void init_rand();
 void init_log(const char *log_file);
@@ -40,6 +41,7 @@ static void welcome() {
 void sdb_set_batch_mode();
 
 static char *log_file = NULL;
+static char *elf_file = NULL;
 static char *diff_so_file = NULL;
 static char *img_file = NULL;
 static int difftest_port = 1234;
@@ -72,6 +74,7 @@ static int parse_args(int argc, char *argv[]) {
     {"log"      , required_argument, NULL, 'l'},
     {"diff"     , required_argument, NULL, 'd'},
     {"port"     , required_argument, NULL, 'p'},
+    {"elf"      , required_argument, NULL, 'f'},
     {"help"     , no_argument      , NULL, 'h'},
     {0          , 0                , NULL,  0 },
   };
@@ -82,6 +85,7 @@ static int parse_args(int argc, char *argv[]) {
       case 'p': sscanf(optarg, "%d", &difftest_port); break;
       case 'l': log_file = optarg; break;
       case 'd': diff_so_file = optarg; break;
+      case 'f': elf_file = optarg; break;
       case 1: img_file = optarg; return 0;
       default:
         printf("Usage: %s [OPTION...] IMAGE [args]\n\n", argv[0]);
@@ -89,6 +93,7 @@ static int parse_args(int argc, char *argv[]) {
         printf("\t-l,--log=FILE           output log to FILE\n");
         printf("\t-d,--diff=REF_SO        run DiffTest with reference REF_SO\n");
         printf("\t-p,--port=PORT          run DiffTest with port PORT\n");
+        printf("\t-f,--elf=FILE           elf file with debug info\n");
         printf("\n");
         exit(0);
     }
@@ -126,6 +131,16 @@ void init_monitor(int argc, char *argv[]) {
   /* Initialize the simple debugger. */
   init_sdb();
 
+  // printf("elf_file: %s\n", elf_file);
+  if(elf_file != NULL) {
+#ifdef CONFIG_FTRACE
+    void init_elf(const char *path);
+    init_elf(elf_file);
+#else
+    Warning("Elf file provided, but ftrace not turned on. Ignoring elf file.");
+#endif
+  }
+
 #ifndef CONFIG_ISA_loongarch32r
   IFDEF(CONFIG_ITRACE, init_disasm(
     MUXDEF(CONFIG_ISA_x86,     "i686",
diff --git a/nemu/src/monitor/sdb/addrexp.y b/nemu/src/monitor/sdb/addrexp.y
index 8e7df9a..4094e0b 100644
--- a/nemu/src/monitor/sdb/addrexp.y
+++ b/nemu/src/monitor/sdb/addrexp.y
@@ -7,6 +7,7 @@
 }
 %{
     #include <common.h>
+    #include <utils.h>
     #include <isa.h>
     #include <stdio.h>
     #include <stdlib.h>
diff --git a/nemu/src/utils/ftrace.c b/nemu/src/utils/ftrace.c
new file mode 100644
index 0000000..ea2f2b6
--- /dev/null
+++ b/nemu/src/utils/ftrace.c
@@ -0,0 +1,124 @@
+#include <assert.h>
+#include <common.h>
+#include <elf.h>
+#include <ftrace.h>
+#include <utils.h>
+
+// Put this into another file
+#ifdef CONFIG_FTRACE
+static vaddr_t ftrace_stack[CONFIG_FTRACE_STACK_SIZE] = {0};
+static vaddr_t ftrace_stack_len = 0;
+func_t *func_table = NULL;
+int func_table_len = 0, func_table_size = 8;
+
+static int cmp_func_t(const void *a, const void *b) {
+  return ((func_t *)a)->start > ((func_t *)b)->start;
+}
+
+static func_t *get_func(vaddr_t addr) {
+  int l = 0, r = func_table_len - 1;
+  while(l <= r) {
+    int mid = (l + r) / 2;
+    if(func_table[mid].start <= addr)  l = mid + 1;
+    else r = mid - 1;
+  }
+  return l == 0 ? NULL : &func_table[l - 1];
+}
+
+void init_elf(const char *path) {
+  FILE *elf_file = fopen(path, "rb");
+  Elf32_Ehdr header;
+  Elf32_Shdr section_header[200], *psh;
+
+  func_table = (func_t *)calloc(func_table_size, sizeof(func_t));
+  assert(func_table);
+
+  FAILED_GOTO(failed_header, fread(&header, sizeof(Elf32_Ehdr), 1, elf_file) <= 0);
+  FAILED_GOTO(failed_header, fseek(elf_file, header.e_shoff, SEEK_SET) != 0);
+  FAILED_GOTO(failed_header, fread(section_header, header.e_shentsize, header.e_shnum, elf_file) <= 0);
+
+  char *shstrtab = calloc(1, section_header[header.e_shstrndx].sh_size);
+  FAILED_GOTO(failed_shstrtab, fseek(elf_file, section_header[header.e_shstrndx].sh_offset, SEEK_SET) != 0);
+  FAILED_GOTO(failed_shstrtab, fread(shstrtab, section_header[header.e_shstrndx].sh_size, 1, elf_file) <= 0);
+
+  Elf32_Shdr *symtab = NULL, *strtab = NULL;
+  for(int i = 0; i < header.e_shnum; i++) {
+    psh = section_header + i;
+    if (psh->sh_type == SHT_SYMTAB) {
+      symtab = psh;
+    } else if (psh->sh_type == SHT_STRTAB && strncmp(shstrtab + psh->sh_name, ".strtab", 8) == 0) {
+      strtab = psh;
+    }
+  }
+
+  int sym_length = symtab->sh_size / sizeof(Elf32_Sym);
+  Elf32_Sym *sym = calloc(sym_length, sizeof(Elf32_Sym));
+  assert(sym);
+  FAILED_GOTO(failed_funcname, fseek(elf_file, symtab->sh_offset, SEEK_SET) != 0);
+  FAILED_GOTO(failed_funcname, fread(sym, sizeof(Elf32_Sym), sym_length, elf_file) <= 0);
+  
+  for(int j = 0; j < sym_length; j++) {
+    if(ELF32_ST_TYPE(sym[j].st_info) != STT_FUNC) continue;
+    // Only read function type symbol
+    func_t *f = &func_table[func_table_len];
+    char *func = (char *)malloc(30);
+    FAILED_GOTO(failed_funcname, fseek(elf_file, strtab->sh_offset + sym[j].st_name, SEEK_SET) != 0);
+    FAILED_GOTO(failed_funcname, fgets(func, 30, elf_file) <= 0);
+    f->start = sym[j].st_value;
+    f->len = sym[j].st_size;
+    f->name = func;
+    ++func_table_len;
+    if(func_table_len >= func_table_size) {
+      Assert(func_table_size * 2 > func_table_size, "Function table exceed memory limit");
+      func_table_size *= 2;
+      func_table = realloc(func_table, func_table_size * sizeof(func_t));
+      Assert(func_table, "Function table exceed memory limit");
+    }
+  }
+  qsort(func_table, func_table_len, sizeof(func_t), cmp_func_t);
+  goto success;
+
+success:
+  free(sym);
+  free(shstrtab);
+  return;
+
+failed_funcname:
+  free(sym);
+failed_shstrtab:
+  free(shstrtab);
+failed_header:
+  for(int i = 0; i < func_table_len; i++) {
+    func_t *f = &func_table[i];
+    if(f->name) { free(f->name); }
+  } 
+  free(func_table);
+  Error("Failed reading elf file");
+  return;
+}
+
+void ftrace_call(vaddr_t pc, vaddr_t addr) {
+  func_t *f = get_func(addr);
+  Assert(ftrace_stack_len < CONFIG_FTRACE_STACK_SIZE,
+         "Ftrace stack exceed size limit, consider turn off ftrace or increase "
+         "FTRACE_STACK_SIZE.");
+  ftrace_stack[ftrace_stack_len] = pc + 4;
+  Trace("%*s0x%x call 0x%x <%s+0x%x>", ftrace_stack_len, "", pc, addr,
+        f == NULL ? "???" : f->name, addr - f->start);
+  ftrace_stack_len++;
+}
+
+void ftrace_return(vaddr_t pc, vaddr_t addr) {
+  --ftrace_stack_len;
+  for (; addr != ftrace_stack[ftrace_stack_len] && ftrace_stack_len >= 0;
+       ftrace_stack_len--) {
+    vaddr_t tco_addr = ftrace_stack[ftrace_stack_len];
+    func_t *f = get_func(tco_addr);
+    Trace("%*s0x%x ret 0x%x <%s+0x%x> (TCO)", ftrace_stack_len, "", pc, tco_addr,
+          f == NULL ? "???" : f->name, tco_addr - f->start);
+  }
+  func_t *f = get_func(addr);
+  Trace("%*s0x%x ret 0x%x <%s+0x%x>", ftrace_stack_len, "", pc, addr, 
+        f == NULL ? "???" : f->name, addr - f->start);
+}
+#endif
diff --git a/nemu/src/utils/log.c b/nemu/src/utils/log.c
index a9bb9a7..a041a2d 100644
--- a/nemu/src/utils/log.c
+++ b/nemu/src/utils/log.c
@@ -14,6 +14,7 @@
 ***************************************************************************************/
 
 #include <common.h>
+#include <utils.h>
 
 extern uint64_t g_nr_guest_inst;
 
@@ -35,3 +36,18 @@ bool log_enable() {
          (g_nr_guest_inst <= CONFIG_TRACE_END), false);
 }
 #endif
+
+IFDEF(CONFIG_ITRACE, char logbuf[CONFIG_ITRACE_BUFFER][128]);
+IFDEF(CONFIG_ITRACE, int logbuf_rear);
+
+#ifdef CONFIG_ITRACE
+void log_itrace_print() {
+  puts("ITRACE buffer:");
+  for (int i = (logbuf_rear + 1) % CONFIG_ITRACE_BUFFER; i != logbuf_rear; i = (i + 1) % CONFIG_ITRACE_BUFFER) {
+    if (logbuf[i][0] == '\0') continue;
+    puts(logbuf[i]);
+  }
+  puts("Current command:");
+  puts(logbuf[logbuf_rear]);
+}
+#endif
diff --git a/nemu/tests/Makefile b/nemu/tests/Makefile
index 6804b41..2e8c784 100644
--- a/nemu/tests/Makefile
+++ b/nemu/tests/Makefile
@@ -7,3 +7,4 @@ $(OBJ_DIR)/%: %.c $(TEST_OBJS) app
 	@$(CC) $(CFLAGS) -o $@.o -c $< 
 	@echo + LD $@
 	@$(LD) $(LIBS) $(LDFLAGS) -o $@ $(TEST_OBJS) $@.o
+	@$@
diff --git a/npc/core/build.sbt b/npc/core/build.sbt
index 792a764..16f31f3 100644
--- a/npc/core/build.sbt
+++ b/npc/core/build.sbt
@@ -6,10 +6,11 @@ val chiselVersion = "5.1.0"
 
 lazy val root = (project in file("."))
   .settings(
-    name := "ChiselLearning",
+    name := "flow",
     libraryDependencies ++= Seq(
       "org.chipsalliance" %% "chisel" % chiselVersion,
-      "edu.berkeley.cs" %% "chiseltest" % "5.0.2" % "test"
+      "edu.berkeley.cs" %% "chiseltest" % "5.0.2" % "test",
+      "com.chuusai" %% "shapeless" % "2.3.3"
     ),
     scalacOptions ++= Seq(
       "-language:reflectiveCalls",
diff --git a/npc/core/src/main/scala/Main.scala b/npc/core/src/main/scala/Main.scala
index c06bc8e..95b9e61 100644
--- a/npc/core/src/main/scala/Main.scala
+++ b/npc/core/src/main/scala/Main.scala
@@ -2,8 +2,10 @@ package npc
 
 import chisel3._
 import chisel3.util.{MuxLookup, Fill, Decoupled, Counter, Queue, Reverse}
+import chisel3.util.{SRAM}
 import chisel3.stage.ChiselOption
-import npc.util.KeyboardSegController
+import npc.util.{ KeyboardSegController, RegisterFile }
+import flowpc.components.ProgramCounter
 
 class Switch extends Module {
   val io = IO(new Bundle {
@@ -31,3 +33,12 @@ class Keyboard extends Module {
   io.segs := seg_handler.io.segs
 }
 
+<<<<<<< Updated upstream
+=======
+class Flowpc extends Module {
+  val io = IO(new Bundle { })
+  val register_file = new RegisterFile(readPorts = 2);
+  val pc = new ProgramCounter(32);
+  val adder = new SRAM()
+}
+>>>>>>> Stashed changes
diff --git a/npc/core/src/main/scala/ProgramCounter.scala b/npc/core/src/main/scala/ProgramCounter.scala
new file mode 100644
index 0000000..0687f9a
--- /dev/null
+++ b/npc/core/src/main/scala/ProgramCounter.scala
@@ -0,0 +1,11 @@
+package flowpc.components
+import chisel3._
+import chisel3.util.{Valid}
+
+class ProgramCounter (width: Int) extends Module {
+  val io = new Bundle {
+    val next_pc = Input(Flipped(Valid(UInt(width.W))))
+    val pc = Output(UInt(width.W))
+  }
+    io.pc := Mux(io.next_pc.valid, io.next_pc.bits, io.pc)
+}
diff --git a/npc/core/src/main/scala/RegisterFile.scala b/npc/core/src/main/scala/RegisterFile.scala
index fbf8a94..a6c5a62 100644
--- a/npc/core/src/main/scala/RegisterFile.scala
+++ b/npc/core/src/main/scala/RegisterFile.scala
@@ -1,25 +1,73 @@
-package npc.util
+package flowpc.components
 
 import chisel3._
+import chisel3.util.log2Ceil
+import chisel3.util.UIntToOH
+import chisel3.util.MuxLookup
 
-class RegisterFile(readPorts: Int) extends Module {
-  require(readPorts >= 0)
-  val io = IO(new Bundle {
-    val writeEnable = Input(Bool())
-    val writeAddr = Input(UInt(5.W))
-    val writeData = Input(UInt(32.W))
-    val readAddr = Input(Vec(readPorts, UInt(5.W)))
-    val readData = Output(Vec(readPorts, UInt(32.W)))
-  })
+class RegControl extends Bundle {
+  val writeEnable = Input(Bool()) 
 
-  val regFile = RegInit(VecInit(Seq.fill(32)(0.U(32.W))))
-  for (i <- 1 until 32) {
-    regFile(i) := regFile(i)
+  object WriteSelect extends ChiselEnum {
+    val rAluOut, rMemOut = Value
+  }
+  val writeSelect = Input(WriteSelect())
+}
+
+class RegFileData[T <: Data](size:Int, tpe: T, numReadPorts: Int, numWritePorts: Int) extends Bundle {
+  val write = new Bundle {
+    val addr = Input(UInt(size.W))
+    val data = Vec(numWritePorts, Input(tpe))
+  }
+  val read = Vec(numReadPorts, new Bundle {
+    val rs = Input(UInt(size.W))
+    val src = Output(tpe)
+  })
+}
+
+class RegFileInterface[T <: Data](size: Int, tpe: T, numReadPorts: Int, numWritePorts: Int) extends Bundle {
+  val control = new RegControl
+  val data = new RegFileData(size, tpe, numReadPorts, numWritePorts)
+}
+
+class RegisterFileCore[T <: Data](size: Int, tpe: T, numReadPorts: Int) extends Module {
+  require(numReadPorts >= 0)
+  val writePort = IO(new Bundle {
+    val enable = Input(Bool())
+    val addr = Input(UInt(log2Ceil(size).W))
+    val data = Input(tpe)
+  })
+  val readPorts = IO(Vec(numReadPorts, new Bundle {
+    val addr = Input(UInt(log2Ceil(size).W))
+    val data = Output(tpe)
+  }))
+
+  val regFile = RegInit(VecInit(Seq.fill(size)(0.U(tpe.getWidth.W))))
+  val writeAddrOH = UIntToOH(writePort.addr)
+  for ((reg, i) <- regFile.zipWithIndex.tail) {
+    reg := Mux(writeAddrOH(i) && writePort.enable, writePort.data, reg)
   }
-  regFile(io.writeAddr) := Mux(io.writeEnable, io.writeData, regFile(io.writeAddr))
   regFile(0) := 0.U
 
-  for (i <- 0 until readPorts) {
-    io.readData(i) := regFile(io.readAddr(i))
+  for (readPort <- readPorts) {
+    readPort.data := regFile(readPort.addr)
+  }
+}
+
+object RegisterFile {
+  def apply[T <: Data](size: Int, tpe: T, numReadPorts: Int, numWritePorts: Int): RegFileInterface[T] = {
+    val core = Module(new RegisterFileCore(size, tpe, numReadPorts))
+    val _out = Wire(new RegFileInterface(size, tpe, numReadPorts, numWritePorts))
+    val clock = core.clock
+    for (i <- 0 until numReadPorts) {
+      core.readPorts(i).addr := _out.data.read(i).rs
+      _out.data.read(i).src := core.readPorts(i).data
+    }
+    core.writePort.addr := _out.data.write.addr
+    core.writePort.data := MuxLookup(_out.control.writeSelect, 0.U)(
+      _out.control.WriteSelect.all.map(x => (x -> _out.data.write.data(x.asUInt).asUInt))
+    )
+    core.writePort.enable := _out.control.writeEnable
+    _out
   }
 }
diff --git a/npc/core/src/test/scala/RegisterFile.scala b/npc/core/src/test/scala/RegisterFile.scala
new file mode 100644
index 0000000..87be171
--- /dev/null
+++ b/npc/core/src/test/scala/RegisterFile.scala
@@ -0,0 +1,63 @@
+package flowpc
+
+import chisel3._
+import chiseltest._
+import org.scalatest.freespec.AnyFreeSpec
+import chiseltest.simulator.WriteVcdAnnotation
+
+import flowpc.components._
+class RegisterFileSpec extends AnyFreeSpec with ChiselScalatestTester {
+  "RegisterFileCore" - {
+    "register 0 is always 0" in {
+      test(new RegisterFileCore(32, UInt(32.W), 2)) { c =>
+          c.readPorts(0).addr.poke(0)
+          c.readPorts(1).addr.poke(0)
+          c.writePort.enable.poke(true)
+          c.writePort.addr.poke(0)
+          c.writePort.data.poke(0x1234)
+
+          c.readPorts(0).data.expect(0)
+          c.readPorts(1).data.expect(0)
+          c.clock.step(2)
+          c.readPorts(0).data.expect(0)
+          c.readPorts(1).data.expect(0)
+        }
+    }
+    "register other than 0 can be written" in {
+      test(new RegisterFileCore(32, UInt(32.W), 2)) { c =>
+          import scala.util.Random
+          val r = new Random()
+          for (i <- 1 until 32) {
+            val v = r.nextLong() & 0xFFFFFFFFL
+            c.readPorts(0).addr.poke(i)
+            c.writePort.enable.poke(true)
+            c.writePort.addr.poke(i)
+            c.writePort.data.poke(v)
+
+            c.clock.step(1)
+            c.readPorts(0).data.expect(v)
+          }
+        }
+    }
+  }
+  "RegisterInterface" - {
+    "worked" in {
+      class Top extends Module {
+        val io = IO(new RegFileInterface(32, UInt(32.W), 2, 2))
+        val rf = RegisterFile(32, UInt(32.W), 2, 2)
+        io :<>= rf
+      }
+      test(new Top).withAnnotations(Seq(WriteVcdAnnotation)) { c =>
+        import c.io.control.WriteSelect._
+        val writePort = rAluOut.litValue.toInt
+        c.io.control.writeEnable.poke(true)
+        c.io.control.writeSelect.poke(rAluOut)
+        c.io.data.write.addr.poke(5)
+        c.io.data.write.data(writePort).poke(0xcdef)
+        c.io.data.read(0).rs.poke(5)
+        c.clock.step(1)
+        c.io.data.read(0).src.expect(0xcdef)
+      }
+    }
+  }
+}