diff --git a/.gitignore b/.gitignore
index 2f31ace..84c3ed2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,4 @@ _*
 *~
 build/
 !.gitignore
+.vscode
\ No newline at end of file
diff --git a/am/include/arch/riscv64-mycpu.h b/am/include/arch/riscv64-mycpu.h
new file mode 100644
index 0000000..e69de29
diff --git a/am/src/mycpu/boot/start.S b/am/src/mycpu/boot/start.S
new file mode 100644
index 0000000..3e56e5c
--- /dev/null
+++ b/am/src/mycpu/boot/start.S
@@ -0,0 +1,8 @@
+.section entry, "ax"
+.globl _start
+.type _start, @function
+
+_start:
+  mv s0, zero
+  la sp, _stack_pointer
+  jal _trm_init
diff --git a/am/src/mycpu/include/mycpu.h b/am/src/mycpu/include/mycpu.h
new file mode 100644
index 0000000..a4c20b8
--- /dev/null
+++ b/am/src/mycpu/include/mycpu.h
@@ -0,0 +1,5 @@
+#include <klib-macros.h>
+
+#define PMEM_SIZE (128 * 1024 * 1024)
+#define PMEM_END  ((uintptr_t)&_pmem_start + PMEM_SIZE)
+extern char _pmem_start;
diff --git a/am/src/mycpu/ioe/input.c b/am/src/mycpu/ioe/input.c
new file mode 100644
index 0000000..29a8662
--- /dev/null
+++ b/am/src/mycpu/ioe/input.c
@@ -0,0 +1,6 @@
+#include <am.h>
+
+void __am_input_keybrd(AM_INPUT_KEYBRD_T *kbd) {
+
+
+}
diff --git a/am/src/mycpu/ioe/ioe.c b/am/src/mycpu/ioe/ioe.c
new file mode 100644
index 0000000..97a306d
--- /dev/null
+++ b/am/src/mycpu/ioe/ioe.c
@@ -0,0 +1,33 @@
+#include <am.h>
+#include <klib-macros.h>
+
+void __am_timer_init();
+
+void __am_timer_rtc(AM_TIMER_RTC_T *);
+void __am_timer_uptime(AM_TIMER_UPTIME_T *);
+void __am_input_keybrd(AM_INPUT_KEYBRD_T *);
+void __am_timer_rtc(AM_TIMER_RTC_T *);
+
+static void __am_timer_config(AM_TIMER_CONFIG_T *cfg) { cfg->present = true; cfg->has_rtc = true; }
+static void __am_input_config(AM_INPUT_CONFIG_T *cfg) { cfg->present = true;  }
+
+typedef void (*handler_t)(void *buf);
+static void *lut[128] = {
+  [AM_TIMER_CONFIG] = __am_timer_config,
+  [AM_TIMER_RTC   ] = __am_timer_rtc,
+  [AM_TIMER_UPTIME] = __am_timer_uptime,
+  [AM_INPUT_CONFIG] = __am_input_config,
+  [AM_INPUT_KEYBRD] = __am_input_keybrd,
+};
+
+static void fail(void *buf) { panic("access nonexist register"); }
+
+bool ioe_init() {
+  for (int i = 0; i < LENGTH(lut); i++)
+    if (!lut[i]) lut[i] = fail;
+  __am_timer_init();
+  return true;
+}
+
+void ioe_read (int reg, void *buf) { ((handler_t)lut[reg])(buf); }
+void ioe_write(int reg, void *buf) { ((handler_t)lut[reg])(buf); }
diff --git a/am/src/mycpu/ioe/timer.c b/am/src/mycpu/ioe/timer.c
new file mode 100644
index 0000000..f176d94
--- /dev/null
+++ b/am/src/mycpu/ioe/timer.c
@@ -0,0 +1,13 @@
+#include <am.h>
+
+void __am_timer_init() {
+
+}
+
+void __am_timer_uptime(AM_TIMER_UPTIME_T *uptime) {
+
+}
+
+void __am_timer_rtc(AM_TIMER_RTC_T *rtc) {
+
+}
diff --git a/am/src/mycpu/scripts/section.ld b/am/src/mycpu/scripts/section.ld
new file mode 100644
index 0000000..a9f2688
--- /dev/null
+++ b/am/src/mycpu/scripts/section.ld
@@ -0,0 +1,33 @@
+_pmem_start = 0x80000000;
+
+ENTRY(_start)
+
+SECTIONS {
+  . = _pmem_start + 0x100000;
+  .text : {
+    *(entry)
+    *(.text*)
+  }
+  etext = .;
+  _etext = .;
+  .rodata : {
+    *(.rodata*)
+  }
+  .data : {
+    *(.data)
+  }
+  edata = .;
+  _data = .;
+  .bss : {
+	_bss_start = .;
+    *(.bss*)
+    *(.sbss*)
+    *(.scommon)
+  }
+  _stack_top = ALIGN(0x1000);
+  . = _stack_top + 0x8000;
+  _stack_pointer = .;
+  end = .;
+  _end = .;
+  _heap_start = ALIGN(0x1000);
+}
diff --git a/am/src/mycpu/trm.c b/am/src/mycpu/trm.c
new file mode 100644
index 0000000..f9a9b84
--- /dev/null
+++ b/am/src/mycpu/trm.c
@@ -0,0 +1,24 @@
+#include <am.h>
+#include <mycpu.h>
+
+extern char _heap_start;
+int main(const char *args);
+
+Area heap = RANGE(&_heap_start, PMEM_END);
+#ifndef MAINARGS
+#define MAINARGS ""
+#endif
+static const char mainargs[] = MAINARGS;
+
+void putch(char ch) {
+
+}
+
+void halt(int code) {
+  while (1);
+}
+
+void _trm_init() {
+  int ret = main(mainargs);
+  halt(ret);
+}
diff --git a/klib/Makefile b/klib/Makefile
index b117c60..068a023 100644
--- a/klib/Makefile
+++ b/klib/Makefile
@@ -1,3 +1,4 @@
 NAME = klib
 SRCS = $(shell find src/ -name "*.c")
+SRCS += $(shell find src/ -name "*.S")
 include $(AM_HOME)/Makefile
diff --git a/klib/src/div.S b/klib/src/div.S
new file mode 100644
index 0000000..0d394d2
--- /dev/null
+++ b/klib/src/div.S
@@ -0,0 +1,140 @@
+#define FUNC_TYPE(X)	.type X,@function
+#define FUNC_SIZE(X)	.size X,.-X
+
+#define FUNC_BEGIN(X)		\
+	.globl X;		\
+	FUNC_TYPE (X);		\
+X:
+
+#define FUNC_END(X)		\
+	FUNC_SIZE(X)
+
+#define FUNC_ALIAS(X,Y)		\
+	.globl X;		\
+	X = Y
+
+
+#define __riscv_xlen 64
+
+  .text
+  .align 2
+
+#if __riscv_xlen == 32
+/* Our RV64 64-bit routines are equivalent to our RV32 32-bit routines.  */
+# define __udivdi3 __udivsi3
+# define __umoddi3 __umodsi3
+# define __divdi3 __divsi3
+# define __moddi3 __modsi3
+#else
+FUNC_BEGIN (__udivsi3)
+  /* Compute __udivdi3(a0 << 32, a1 << 32); cast result to uint32_t.  */
+  sll    a0, a0, 32
+  sll    a1, a1, 32
+  move   t0, ra
+  jal    __udivdi3
+  sext.w a0, a0
+  jr     t0
+FUNC_END (__udivsi3)
+
+FUNC_BEGIN (__umodsi3)
+  /* Compute __udivdi3((uint32_t)a0, (uint32_t)a1); cast a1 to uint32_t.  */
+  sll    a0, a0, 32
+  sll    a1, a1, 32
+  srl    a0, a0, 32
+  srl    a1, a1, 32
+  move   t0, ra
+  jal    __udivdi3
+  sext.w a0, a1
+  jr     t0
+FUNC_END (__umodsi3)
+
+FUNC_ALIAS (__modsi3, __moddi3)
+
+FUNC_BEGIN( __divsi3)
+  /* Check for special case of INT_MIN/-1. Otherwise, fall into __divdi3.  */
+  li    t0, -1
+  beq   a1, t0, .L20
+#endif
+
+FUNC_BEGIN (__divdi3)
+  bltz  a0, .L10
+  bltz  a1, .L11
+  /* Since the quotient is positive, fall into __udivdi3.  */
+
+FUNC_BEGIN (__udivdi3)
+  mv    a2, a1
+  mv    a1, a0
+  li    a0, -1
+  beqz  a2, .L5
+  li    a3, 1
+  bgeu  a2, a1, .L2
+.L1:
+  blez  a2, .L2
+  slli  a2, a2, 1
+  slli  a3, a3, 1
+  bgtu  a1, a2, .L1
+.L2:
+  li    a0, 0
+.L3:
+  bltu  a1, a2, .L4
+  sub   a1, a1, a2
+  or    a0, a0, a3
+.L4:
+  srli  a3, a3, 1
+  srli  a2, a2, 1
+  bnez  a3, .L3
+.L5:
+  ret
+FUNC_END (__udivdi3)
+
+FUNC_BEGIN (__umoddi3)
+  /* Call __udivdi3(a0, a1), then return the remainder, which is in a1.  */
+  move  t0, ra
+  jal   __udivdi3
+  move  a0, a1
+  jr    t0
+FUNC_END (__umoddi3)
+
+  /* Handle negative arguments to __divdi3.  */
+.L10:
+  neg   a0, a0
+  /* Zero is handled as a negative so that the result will not be inverted.  */
+  bgtz  a1, .L12     /* Compute __udivdi3(-a0, a1), then negate the result.  */
+
+  neg   a1, a1
+  j     __udivdi3    /* Compute __udivdi3(-a0, -a1).  */
+.L11:                /* Compute __udivdi3(a0, -a1), then negate the result.  */
+  neg   a1, a1
+.L12:
+  move  t0, ra
+  jal   __udivdi3
+  neg   a0, a0
+  jr    t0
+FUNC_END (__divdi3)
+
+FUNC_BEGIN (__moddi3)
+  move   t0, ra
+  bltz   a1, .L31
+  bltz   a0, .L32
+.L30:
+  jal    __udivdi3    /* The dividend is not negative.  */
+  move   a0, a1
+  jr     t0
+.L31:
+  neg    a1, a1
+  bgez   a0, .L30
+.L32:
+  neg    a0, a0
+  jal    __udivdi3    /* The dividend is hella negative.  */
+  neg    a0, a1
+  jr     t0
+FUNC_END (__moddi3)
+
+#if __riscv_xlen == 64
+  /* continuation of __divsi3 */
+.L20:
+  sll   t0, t0, 31
+  bne   a0, t0, __divdi3
+  ret
+FUNC_END (__divsi3)
+#endif
diff --git a/klib/src/int64.c b/klib/src/int64.c
index b3b2876..8ef6d18 100644
--- a/klib/src/int64.c
+++ b/klib/src/int64.c
@@ -363,273 +363,7 @@ uint32_t __inline __builtin_clzll(uint64_t value) {
 
 #include <am.h>
 
-/* Returns: a / b */
 
-COMPILER_RT_ABI di_int
-__divdi3(di_int a, di_int b)
-{
-    const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
-    di_int s_a = a >> bits_in_dword_m1;           /* s_a = a < 0 ? -1 : 0 */
-    di_int s_b = b >> bits_in_dword_m1;           /* s_b = b < 0 ? -1 : 0 */
-    a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */
-    b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */
-    s_a ^= s_b;                                  /*sign of quotient */
-    return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */
-}
-
-/* Returns: a / b, *rem = a % b  */
-
-COMPILER_RT_ABI di_int
-__divmoddi4(di_int a, di_int b, di_int* rem)
-{
-  di_int d = __divdi3(a,b);
-  *rem = a - (d*b);
-  return d;
-}
-
-/* Returns: a % b */
-
-COMPILER_RT_ABI di_int
-__moddi3(di_int a, di_int b)
-{
-    const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
-    di_int s = b >> bits_in_dword_m1;  /* s = b < 0 ? -1 : 0 */
-    b = (b ^ s) - s;                   /* negate if s == -1 */
-    s = a >> bits_in_dword_m1;         /* s = a < 0 ? -1 : 0 */
-    a = (a ^ s) - s;                   /* negate if s == -1 */
-    du_int r;
-    __udivmoddi4(a, b, &r);
-    return ((di_int)r ^ s) - s;                /* negate if s == -1 */
-}
-
-/* Returns: a / b */
-
-COMPILER_RT_ABI du_int
-__udivdi3(du_int a, du_int b)
-{
-    return __udivmoddi4(a, b, 0);
-}
-
-
-COMPILER_RT_ABI du_int
-__udivmoddi4(du_int a, du_int b, du_int* rem)
-{
-    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
-    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
-    udwords n;
-    n.all = a;
-    udwords d;
-    d.all = b;
-    udwords q;
-    udwords r;
-    unsigned sr;
-    /* special cases, X is unknown, K != 0 */
-    if (n.s.high == 0)
-    {
-        if (d.s.high == 0)
-        {
-            /* 0 X
-             * ---
-             * 0 X
-             */
-            if (rem)
-                *rem = n.s.low % d.s.low;
-            return n.s.low / d.s.low;
-        }
-        /* 0 X
-         * ---
-         * K X
-         */
-        if (rem)
-            *rem = n.s.low;
-        return 0;
-    }
-    /* n.s.high != 0 */
-    if (d.s.low == 0)
-    {
-        if (d.s.high == 0)
-        {
-            /* K X
-             * ---
-             * 0 0
-             */
-            if (rem)
-                *rem = n.s.high % d.s.low;
-            return n.s.high / d.s.low;
-        }
-        /* d.s.high != 0 */
-        if (n.s.low == 0)
-        {
-            /* K 0
-             * ---
-             * K 0
-             */
-            if (rem)
-            {
-                r.s.high = n.s.high % d.s.high;
-                r.s.low = 0;
-                *rem = r.all;
-            }
-            return n.s.high / d.s.high;
-        }
-        /* K K
-         * ---
-         * K 0
-         */
-        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
-        {
-            if (rem)
-            {
-                r.s.low = n.s.low;
-                r.s.high = n.s.high & (d.s.high - 1);
-                *rem = r.all;
-            }
-            return n.s.high >> __builtin_ctz(d.s.high);
-        }
-        /* K K
-         * ---
-         * K 0
-         */
-        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
-        /* 0 <= sr <= n_uword_bits - 2 or sr large */
-        if (sr > n_uword_bits - 2)
-        {
-           if (rem)
-                *rem = n.all;
-            return 0;
-        }
-        ++sr;
-        /* 1 <= sr <= n_uword_bits - 1 */
-        /* q.all = n.all << (n_udword_bits - sr); */
-        q.s.low = 0;
-        q.s.high = n.s.low << (n_uword_bits - sr);
-        /* r.all = n.all >> sr; */
-        r.s.high = n.s.high >> sr;
-        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-    }
-    else  /* d.s.low != 0 */
-    {
-        if (d.s.high == 0)
-        {
-            /* K X
-             * ---
-             * 0 K
-             */
-            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
-            {
-                if (rem)
-                    *rem = n.s.low & (d.s.low - 1);
-                if (d.s.low == 1)
-                    return n.all;
-                sr = __builtin_ctz(d.s.low);
-                q.s.high = n.s.high >> sr;
-                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-                return q.all;
-            }
-            /* K X
-             * ---
-             * 0 K
-             */
-            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
-            /* 2 <= sr <= n_udword_bits - 1
-             * q.all = n.all << (n_udword_bits - sr);
-             * r.all = n.all >> sr;
-             */
-            if (sr == n_uword_bits)
-            {
-                q.s.low = 0;
-                q.s.high = n.s.low;
-                r.s.high = 0;
-                r.s.low = n.s.high;
-            }
-            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
-            {
-                q.s.low = 0;
-                q.s.high = n.s.low << (n_uword_bits - sr);
-                r.s.high = n.s.high >> sr;
-                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-            }
-            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
-            {
-                q.s.low = n.s.low << (n_udword_bits - sr);
-                q.s.high = (n.s.high << (n_udword_bits - sr)) |
-                           (n.s.low >> (sr - n_uword_bits));
-                r.s.high = 0;
-                r.s.low = n.s.high >> (sr - n_uword_bits);
-            }
-        }
-        else
-        {
-            /* K X
-             * ---
-             * K K
-             */
-            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
-            /* 0 <= sr <= n_uword_bits - 1 or sr large */
-            if (sr > n_uword_bits - 1)
-            {
-                if (rem)
-                    *rem = n.all;
-                return 0;
-            }
-            ++sr;
-            /* 1 <= sr <= n_uword_bits */
-            /*  q.all = n.all << (n_udword_bits - sr); */
-            q.s.low = 0;
-            if (sr == n_uword_bits)
-            {
-                q.s.high = n.s.low;
-                r.s.high = 0;
-                r.s.low = n.s.high;
-            }
-            else
-            {
-                q.s.high = n.s.low << (n_uword_bits - sr);
-                r.s.high = n.s.high >> sr;
-                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-            }
-        }
-    }
-    /* Not a special case
-     * q and r are initialized with:
-     * q.all = n.all << (n_udword_bits - sr);
-     * r.all = n.all >> sr;
-     * 1 <= sr <= n_udword_bits - 1
-     */
-    su_int carry = 0;
-    for (; sr > 0; --sr)
-    {
-        /* r:q = ((r:q)  << 1) | carry */
-        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
-        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
-        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
-        q.s.low  = (q.s.low  << 1) | carry;
-        /* carry = 0;
-         * if (r.all >= d.all)
-         * {
-         *      r.all -= d.all;
-         *      carry = 1;
-         * }
-         */
-        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
-        carry = s & 1;
-        r.all -= d.all & s;
-    }
-    q.all = (q.all << 1) | carry;
-    if (rem)
-        *rem = r.all;
-    return q.all;
-}
-
-/* Returns: a % b */
-
-COMPILER_RT_ABI du_int
-__umoddi3(du_int a, du_int b)
-{
-    du_int r;
-    __udivmoddi4(a, b, &r);
-    return r;
-}
 
 // Returns: the number of leading 0-bits
 
@@ -705,3 +439,25 @@ COMPILER_RT_ABI si_int __ctzsi2(si_int a) {
   //     }
   return r + ((2 - (x >> 1)) & -((x & 1) == 0));
 }
+
+typedef int si_int;
+typedef long long di_int;
+typedef unsigned su_int;
+#define CHAR_BIT __CHAR_BIT__
+
+
+si_int __ctzdi2(di_int a) {
+  dwords x;
+  x.all = a;
+  const si_int f = -(x.s.low == 0);
+  return __ctzsi2((x.s.high & f) | (x.s.low & ~f)) +
+         (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
+
+si_int __clzdi2(di_int a) {
+  dwords x;
+  x.all = a;
+  const si_int f = -(x.s.high == 0);
+  return __clzsi2((x.s.high & ~f) | (x.s.low & f)) +
+         (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/klib/src/muldi3.S b/klib/src/muldi3.S
new file mode 100644
index 0000000..c9e131d
--- /dev/null
+++ b/klib/src/muldi3.S
@@ -0,0 +1,25 @@
+#define FUNC_TYPE(X)	.type X,@function
+#define FUNC_SIZE(X)	.size X,.-X
+
+#define FUNC_BEGIN(X)		\
+	.globl X;		\
+	FUNC_TYPE (X);		\
+X:
+
+#define FUNC_END(X)		\
+	FUNC_SIZE(X)
+
+
+FUNC_BEGIN (__muldi3)
+  mv     a2, a0
+  li     a0, 0
+.L1:
+  andi   a3, a1, 1
+  beqz   a3, .L2
+  add    a0, a0, a2
+.L2:
+  srli   a1, a1, 1
+  slli   a2, a2, 1
+  bnez   a1, .L1
+  ret
+FUNC_END (__muldi3)
\ No newline at end of file
diff --git a/scripts/riscv64-mycpu.mk b/scripts/riscv64-mycpu.mk
new file mode 100644
index 0000000..d20c8fc
--- /dev/null
+++ b/scripts/riscv64-mycpu.mk
@@ -0,0 +1,22 @@
+CROSS_COMPILE := riscv64-linux-gnu-
+COMMON_FLAGS  := -fno-pic -march=rv64ifd -mcmodel=medany
+CFLAGS        += $(COMMON_FLAGS) -static
+ASFLAGS       += $(COMMON_FLAGS) -O0
+LDFLAGS       += -melf64lriscv
+
+AM_SRCS := mycpu/boot/start.S \
+           mycpu/trm.c \
+		   mycpu/ioe/timer.c \
+		   mycpu/ioe/ioe.c \
+		   mycpu/ioe/input.c
+
+CFLAGS    += -fdata-sections -ffunction-sections
+LDFLAGS   += -T $(AM_HOME)/am/src/mycpu/scripts/section.ld
+LDFLAGS   += --gc-sections -e _start
+CFLAGS += -DMAINARGS=\"$(mainargs)\"
+CFLAGS += -I$(AM_HOME)/am/src/mycpu/include
+
+image: $(IMAGE).elf
+	@$(OBJDUMP) -d $(IMAGE).elf > $(IMAGE).txt
+	@echo + OBJCOPY "->" $(IMAGE_REL).bin
+	@$(OBJCOPY) -S --set-section-flags .bss=alloc,contents -O binary $(IMAGE).elf $(IMAGE).bin