diff --git a/libgo/Makefile.am b/libgo/Makefile.am
index 9172f7ed31869683ee5ff4ba75fbe4c9dac95844..c30d9c36c2b0ab985c5bb470973595bf74378f1e 100644
--- a/libgo/Makefile.am
+++ b/libgo/Makefile.am
@@ -421,14 +421,11 @@ runtime_files = \
 	runtime/go-eface-compare.c \
 	runtime/go-eface-val-compare.c \
 	runtime/go-getgoroot.c \
-	runtime/go-go.c \
-	runtime/go-gomaxprocs.c \
 	runtime/go-int-array-to-string.c \
 	runtime/go-int-to-string.c \
 	runtime/go-interface-compare.c \
 	runtime/go-interface-eface-compare.c \
 	runtime/go-interface-val-compare.c \
-	runtime/go-lock-os-thread.c \
 	runtime/go-make-slice.c \
 	runtime/go-map-delete.c \
 	runtime/go-map-index.c \
@@ -451,9 +448,7 @@ runtime_files = \
 	runtime/go-reflect-map.c \
 	runtime/go-rune.c \
 	runtime/go-runtime-error.c \
-	runtime/go-sched.c \
 	runtime/go-select.c \
-	runtime/go-semacquire.c \
 	runtime/go-send-big.c \
 	runtime/go-send-nb-big.c \
 	runtime/go-send-nb-small.c \
@@ -499,6 +494,8 @@ runtime_files = \
 	map.c \
 	mprof.c \
 	reflect.c \
+	runtime1.c \
+	sema.c \
 	sigqueue.c \
 	string.c
 
@@ -520,6 +517,14 @@ reflect.c: $(srcdir)/runtime/reflect.goc goc2c
 	./goc2c --gcc --go-prefix libgo_reflect $< > $@.tmp
 	mv -f $@.tmp $@
 
+runtime1.c: $(srcdir)/runtime/runtime1.goc goc2c
+	./goc2c --gcc --go-prefix libgo_runtime $< > $@.tmp
+	mv -f $@.tmp $@
+
+sema.c: $(srcdir)/runtime/sema.goc goc2c
+	./goc2c --gcc --go-prefix libgo_runtime $< > $@.tmp
+	mv -f $@.tmp $@
+
 sigqueue.c: $(srcdir)/runtime/sigqueue.goc goc2c
 	./goc2c --gcc --go-prefix libgo_runtime $< > $@.tmp
 	mv -f $@.tmp $@
diff --git a/libgo/Makefile.in b/libgo/Makefile.in
index 1f7fa18f1ec2408e733b88da8c1942e04553ad4a..8b6bb7282722de66757c8d81b2973d1579e7ca32 100644
--- a/libgo/Makefile.in
+++ b/libgo/Makefile.in
@@ -189,25 +189,24 @@ am__libgo_la_SOURCES_DIST = runtime/go-append.c runtime/go-assert.c \
 	runtime/go-copy.c runtime/go-defer.c \
 	runtime/go-deferred-recover.c runtime/go-eface-compare.c \
 	runtime/go-eface-val-compare.c runtime/go-getgoroot.c \
-	runtime/go-go.c runtime/go-gomaxprocs.c \
 	runtime/go-int-array-to-string.c runtime/go-int-to-string.c \
 	runtime/go-interface-compare.c \
 	runtime/go-interface-eface-compare.c \
-	runtime/go-interface-val-compare.c runtime/go-lock-os-thread.c \
-	runtime/go-make-slice.c runtime/go-map-delete.c \
-	runtime/go-map-index.c runtime/go-map-len.c \
-	runtime/go-map-range.c runtime/go-nanotime.c \
-	runtime/go-new-channel.c runtime/go-new-map.c runtime/go-new.c \
-	runtime/go-panic.c runtime/go-print.c runtime/go-rec-big.c \
+	runtime/go-interface-val-compare.c runtime/go-make-slice.c \
+	runtime/go-map-delete.c runtime/go-map-index.c \
+	runtime/go-map-len.c runtime/go-map-range.c \
+	runtime/go-nanotime.c runtime/go-new-channel.c \
+	runtime/go-new-map.c runtime/go-new.c runtime/go-panic.c \
+	runtime/go-print.c runtime/go-rec-big.c \
 	runtime/go-rec-nb-big.c runtime/go-rec-nb-small.c \
 	runtime/go-rec-small.c runtime/go-recover.c \
 	runtime/go-reflect.c runtime/go-reflect-call.c \
 	runtime/go-reflect-chan.c runtime/go-reflect-map.c \
 	runtime/go-rune.c runtime/go-runtime-error.c \
-	runtime/go-sched.c runtime/go-select.c runtime/go-semacquire.c \
-	runtime/go-send-big.c runtime/go-send-nb-big.c \
-	runtime/go-send-nb-small.c runtime/go-send-small.c \
-	runtime/go-setenv.c runtime/go-signal.c runtime/go-strcmp.c \
+	runtime/go-select.c runtime/go-send-big.c \
+	runtime/go-send-nb-big.c runtime/go-send-nb-small.c \
+	runtime/go-send-small.c runtime/go-setenv.c \
+	runtime/go-signal.c runtime/go-strcmp.c \
 	runtime/go-string-to-byte-array.c \
 	runtime/go-string-to-int-array.c runtime/go-strplus.c \
 	runtime/go-strslice.c runtime/go-trampoline.c \
@@ -224,7 +223,7 @@ am__libgo_la_SOURCES_DIST = runtime/go-append.c runtime/go-assert.c \
 	runtime/mheap.c runtime/msize.c runtime/proc.c \
 	runtime/runtime.c runtime/thread.c runtime/yield.c \
 	runtime/rtems-task-variable-add.c chan.c iface.c malloc.c \
-	map.c mprof.c reflect.c sigqueue.c string.c
+	map.c mprof.c reflect.c runtime1.c sema.c sigqueue.c string.c
 @LIBGO_IS_LINUX_FALSE@am__objects_1 = lock_sema.lo thread-sema.lo
 @LIBGO_IS_LINUX_TRUE@am__objects_1 = lock_futex.lo thread-linux.lo
 @HAVE_SYS_MMAN_H_FALSE@am__objects_2 = mem_posix_memalign.lo
@@ -236,19 +235,18 @@ am__objects_4 = go-append.lo go-assert.lo go-assert-interface.lo \
 	go-chan-len.lo go-check-interface.lo go-close.lo \
 	go-construct-map.lo go-convert-interface.lo go-copy.lo \
 	go-defer.lo go-deferred-recover.lo go-eface-compare.lo \
-	go-eface-val-compare.lo go-getgoroot.lo go-go.lo \
-	go-gomaxprocs.lo go-int-array-to-string.lo go-int-to-string.lo \
+	go-eface-val-compare.lo go-getgoroot.lo \
+	go-int-array-to-string.lo go-int-to-string.lo \
 	go-interface-compare.lo go-interface-eface-compare.lo \
-	go-interface-val-compare.lo go-lock-os-thread.lo \
-	go-make-slice.lo go-map-delete.lo go-map-index.lo \
-	go-map-len.lo go-map-range.lo go-nanotime.lo go-new-channel.lo \
-	go-new-map.lo go-new.lo go-panic.lo go-print.lo go-rec-big.lo \
-	go-rec-nb-big.lo go-rec-nb-small.lo go-rec-small.lo \
-	go-recover.lo go-reflect.lo go-reflect-call.lo \
+	go-interface-val-compare.lo go-make-slice.lo go-map-delete.lo \
+	go-map-index.lo go-map-len.lo go-map-range.lo go-nanotime.lo \
+	go-new-channel.lo go-new-map.lo go-new.lo go-panic.lo \
+	go-print.lo go-rec-big.lo go-rec-nb-big.lo go-rec-nb-small.lo \
+	go-rec-small.lo go-recover.lo go-reflect.lo go-reflect-call.lo \
 	go-reflect-chan.lo go-reflect-map.lo go-rune.lo \
-	go-runtime-error.lo go-sched.lo go-select.lo go-semacquire.lo \
-	go-send-big.lo go-send-nb-big.lo go-send-nb-small.lo \
-	go-send-small.lo go-setenv.lo go-signal.lo go-strcmp.lo \
+	go-runtime-error.lo go-select.lo go-send-big.lo \
+	go-send-nb-big.lo go-send-nb-small.lo go-send-small.lo \
+	go-setenv.lo go-signal.lo go-strcmp.lo \
 	go-string-to-byte-array.lo go-string-to-int-array.lo \
 	go-strplus.lo go-strslice.lo go-trampoline.lo go-type-eface.lo \
 	go-type-error.lo go-type-identity.lo go-type-interface.lo \
@@ -258,7 +256,7 @@ am__objects_4 = go-append.lo go-assert.lo go-assert-interface.lo \
 	mcache.lo mcentral.lo $(am__objects_2) mfinal.lo mfixalloc.lo \
 	mgc0.lo mheap.lo msize.lo proc.lo runtime.lo thread.lo \
 	yield.lo $(am__objects_3) chan.lo iface.lo malloc.lo map.lo \
-	mprof.lo reflect.lo sigqueue.lo string.lo
+	mprof.lo reflect.lo runtime1.lo sema.lo sigqueue.lo string.lo
 am_libgo_la_OBJECTS = $(am__objects_4)
 libgo_la_OBJECTS = $(am_libgo_la_OBJECTS)
 libgo_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
@@ -850,14 +848,11 @@ runtime_files = \
 	runtime/go-eface-compare.c \
 	runtime/go-eface-val-compare.c \
 	runtime/go-getgoroot.c \
-	runtime/go-go.c \
-	runtime/go-gomaxprocs.c \
 	runtime/go-int-array-to-string.c \
 	runtime/go-int-to-string.c \
 	runtime/go-interface-compare.c \
 	runtime/go-interface-eface-compare.c \
 	runtime/go-interface-val-compare.c \
-	runtime/go-lock-os-thread.c \
 	runtime/go-make-slice.c \
 	runtime/go-map-delete.c \
 	runtime/go-map-index.c \
@@ -880,9 +875,7 @@ runtime_files = \
 	runtime/go-reflect-map.c \
 	runtime/go-rune.c \
 	runtime/go-runtime-error.c \
-	runtime/go-sched.c \
 	runtime/go-select.c \
-	runtime/go-semacquire.c \
 	runtime/go-send-big.c \
 	runtime/go-send-nb-big.c \
 	runtime/go-send-nb-small.c \
@@ -928,6 +921,8 @@ runtime_files = \
 	map.c \
 	mprof.c \
 	reflect.c \
+	runtime1.c \
+	sema.c \
 	sigqueue.c \
 	string.c
 
@@ -2476,14 +2471,11 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-eface-compare.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-eface-val-compare.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-getgoroot.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-go.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-gomaxprocs.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-int-array-to-string.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-int-to-string.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-interface-compare.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-interface-eface-compare.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-interface-val-compare.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-lock-os-thread.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-main.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-make-slice.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-map-delete.Plo@am__quote@
@@ -2507,9 +2499,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-reflect.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-rune.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-runtime-error.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-sched.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-select.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-semacquire.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-send-big.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-send-nb-big.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/go-send-nb-small.Plo@am__quote@
@@ -2553,6 +2543,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reflect.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rtems-task-variable-add.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/runtime.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/runtime1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sema.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sigqueue.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/string.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thread-linux.Plo@am__quote@
@@ -2735,20 +2727,6 @@ go-getgoroot.lo: runtime/go-getgoroot.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-getgoroot.lo `test -f 'runtime/go-getgoroot.c' || echo '$(srcdir)/'`runtime/go-getgoroot.c
 
-go-go.lo: runtime/go-go.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-go.lo -MD -MP -MF $(DEPDIR)/go-go.Tpo -c -o go-go.lo `test -f 'runtime/go-go.c' || echo '$(srcdir)/'`runtime/go-go.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-go.Tpo $(DEPDIR)/go-go.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='runtime/go-go.c' object='go-go.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-go.lo `test -f 'runtime/go-go.c' || echo '$(srcdir)/'`runtime/go-go.c
-
-go-gomaxprocs.lo: runtime/go-gomaxprocs.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-gomaxprocs.lo -MD -MP -MF $(DEPDIR)/go-gomaxprocs.Tpo -c -o go-gomaxprocs.lo `test -f 'runtime/go-gomaxprocs.c' || echo '$(srcdir)/'`runtime/go-gomaxprocs.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-gomaxprocs.Tpo $(DEPDIR)/go-gomaxprocs.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='runtime/go-gomaxprocs.c' object='go-gomaxprocs.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-gomaxprocs.lo `test -f 'runtime/go-gomaxprocs.c' || echo '$(srcdir)/'`runtime/go-gomaxprocs.c
-
 go-int-array-to-string.lo: runtime/go-int-array-to-string.c
 @am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-int-array-to-string.lo -MD -MP -MF $(DEPDIR)/go-int-array-to-string.Tpo -c -o go-int-array-to-string.lo `test -f 'runtime/go-int-array-to-string.c' || echo '$(srcdir)/'`runtime/go-int-array-to-string.c
 @am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-int-array-to-string.Tpo $(DEPDIR)/go-int-array-to-string.Plo
@@ -2784,13 +2762,6 @@ go-interface-val-compare.lo: runtime/go-interface-val-compare.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-interface-val-compare.lo `test -f 'runtime/go-interface-val-compare.c' || echo '$(srcdir)/'`runtime/go-interface-val-compare.c
 
-go-lock-os-thread.lo: runtime/go-lock-os-thread.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-lock-os-thread.lo -MD -MP -MF $(DEPDIR)/go-lock-os-thread.Tpo -c -o go-lock-os-thread.lo `test -f 'runtime/go-lock-os-thread.c' || echo '$(srcdir)/'`runtime/go-lock-os-thread.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-lock-os-thread.Tpo $(DEPDIR)/go-lock-os-thread.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='runtime/go-lock-os-thread.c' object='go-lock-os-thread.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-lock-os-thread.lo `test -f 'runtime/go-lock-os-thread.c' || echo '$(srcdir)/'`runtime/go-lock-os-thread.c
-
 go-make-slice.lo: runtime/go-make-slice.c
 @am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-make-slice.lo -MD -MP -MF $(DEPDIR)/go-make-slice.Tpo -c -o go-make-slice.lo `test -f 'runtime/go-make-slice.c' || echo '$(srcdir)/'`runtime/go-make-slice.c
 @am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-make-slice.Tpo $(DEPDIR)/go-make-slice.Plo
@@ -2945,13 +2916,6 @@ go-runtime-error.lo: runtime/go-runtime-error.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-runtime-error.lo `test -f 'runtime/go-runtime-error.c' || echo '$(srcdir)/'`runtime/go-runtime-error.c
 
-go-sched.lo: runtime/go-sched.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-sched.lo -MD -MP -MF $(DEPDIR)/go-sched.Tpo -c -o go-sched.lo `test -f 'runtime/go-sched.c' || echo '$(srcdir)/'`runtime/go-sched.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-sched.Tpo $(DEPDIR)/go-sched.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='runtime/go-sched.c' object='go-sched.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-sched.lo `test -f 'runtime/go-sched.c' || echo '$(srcdir)/'`runtime/go-sched.c
-
 go-select.lo: runtime/go-select.c
 @am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-select.lo -MD -MP -MF $(DEPDIR)/go-select.Tpo -c -o go-select.lo `test -f 'runtime/go-select.c' || echo '$(srcdir)/'`runtime/go-select.c
 @am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-select.Tpo $(DEPDIR)/go-select.Plo
@@ -2959,13 +2923,6 @@ go-select.lo: runtime/go-select.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-select.lo `test -f 'runtime/go-select.c' || echo '$(srcdir)/'`runtime/go-select.c
 
-go-semacquire.lo: runtime/go-semacquire.c
-@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-semacquire.lo -MD -MP -MF $(DEPDIR)/go-semacquire.Tpo -c -o go-semacquire.lo `test -f 'runtime/go-semacquire.c' || echo '$(srcdir)/'`runtime/go-semacquire.c
-@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-semacquire.Tpo $(DEPDIR)/go-semacquire.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='runtime/go-semacquire.c' object='go-semacquire.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o go-semacquire.lo `test -f 'runtime/go-semacquire.c' || echo '$(srcdir)/'`runtime/go-semacquire.c
-
 go-send-big.lo: runtime/go-send-big.c
 @am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-send-big.lo -MD -MP -MF $(DEPDIR)/go-send-big.Tpo -c -o go-send-big.lo `test -f 'runtime/go-send-big.c' || echo '$(srcdir)/'`runtime/go-send-big.c
 @am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-send-big.Tpo $(DEPDIR)/go-send-big.Plo
@@ -4454,6 +4411,14 @@ reflect.c: $(srcdir)/runtime/reflect.goc goc2c
 	./goc2c --gcc --go-prefix libgo_reflect $< > $@.tmp
 	mv -f $@.tmp $@
 
+runtime1.c: $(srcdir)/runtime/runtime1.goc goc2c
+	./goc2c --gcc --go-prefix libgo_runtime $< > $@.tmp
+	mv -f $@.tmp $@
+
+sema.c: $(srcdir)/runtime/sema.goc goc2c
+	./goc2c --gcc --go-prefix libgo_runtime $< > $@.tmp
+	mv -f $@.tmp $@
+
 sigqueue.c: $(srcdir)/runtime/sigqueue.goc goc2c
 	./goc2c --gcc --go-prefix libgo_runtime $< > $@.tmp
 	mv -f $@.tmp $@
diff --git a/libgo/go/syscall/mksyscall.awk b/libgo/go/syscall/mksyscall.awk
index 1b612f3fb996b0706b5a7637048b6470d69847fb..49828d94ce35ebbef1a77acb2366b38d7470d07f 100644
--- a/libgo/go/syscall/mksyscall.awk
+++ b/libgo/go/syscall/mksyscall.awk
@@ -102,10 +102,6 @@ BEGIN {
 	   gofnname, gofnparams, gofnresults == "" ? "" : "(", gofnresults,
 	   gofnresults == "" ? "" : ")", gofnresults == "" ? "" : " ")
 
-    if (blocking) {
-	print "\tentersyscall()"
-    }
-
     loc = gofnname "/" cfnname ":"
 
     split(gofnparams, goargs, ", *")
@@ -151,7 +147,8 @@ BEGIN {
 		status = 1
 		next
 	    }
-	    args = args "StringBytePtr(" goname ")"
+	    printf("\t_p%d := StringBytePtr(%s)\n", goarg, goname)
+	    args = sprintf("%s_p%d", args, goarg)
 	} else if (gotype ~ /^\[\](.*)/) {
 	    if (ctype !~ /^\*/ || cargs[carg + 1] == "") {
 		print loc, "bad C type for slice:", gotype, ctype | "cat 1>&2"
@@ -192,6 +189,10 @@ BEGIN {
 	next
     }
 
+    if (blocking) {
+	print "\tentersyscall()"
+    }
+
     printf("\t")
     if (gofnresults != "") {
 	printf("_r := ")
diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c
index 18cf595536480e0f4b5d0d9957ec6e9c2cdd2f92..5e3fc99d914b7b224b5e53dd41ad52947ecf9e56 100644
--- a/libgo/runtime/cpuprof.c
+++ b/libgo/runtime/cpuprof.c
@@ -361,9 +361,9 @@ getprofile(Profile *p)
 		return ret;
 
 	// Wait for new log.
-	// runtimeÂ·entersyscall();
+	runtime_entersyscall();
 	runtime_notesleep(&p->wait);
-	// runtimeÂ·exitsyscall();
+	runtime_exitsyscall();
 	runtime_noteclear(&p->wait);
 
 	n = p->handoff;
diff --git a/libgo/runtime/go-close.c b/libgo/runtime/go-close.c
index 778eab3d7d8f2bc41622bdc1a51ac1aaecb289cc..a6df3833c26e5ef5216deafaa4afa309196c97c8 100644
--- a/libgo/runtime/go-close.c
+++ b/libgo/runtime/go-close.c
@@ -4,6 +4,7 @@
    Use of this source code is governed by a BSD-style
    license that can be found in the LICENSE file.  */
 
+#include "runtime.h"
 #include "go-assert.h"
 #include "go-panic.h"
 #include "channel.h"
@@ -23,10 +24,7 @@ __go_builtin_close (struct __go_channel *channel)
   __go_assert (i == 0);
 
   while (channel->selected_for_send)
-    {
-      i = pthread_cond_wait (&channel->cond, &channel->lock);
-      __go_assert (i == 0);
-    }
+    runtime_cond_wait (&channel->cond, &channel->lock);
 
   if (channel->is_closed)
     {
diff --git a/libgo/runtime/go-defer.c b/libgo/runtime/go-defer.c
index dda62fb4e1ab79539a828a9ef1c40d03d2818881..c27de6ab463c7878c04a270394d45c9daef51c48 100644
--- a/libgo/runtime/go-defer.c
+++ b/libgo/runtime/go-defer.c
@@ -16,8 +16,10 @@
 void
 __go_defer (_Bool *frame, void (*pfn) (void *), void *arg)
 {
+  G *g;
   struct __go_defer_stack *n;
 
+  g = runtime_g ();
   n = (struct __go_defer_stack *) __go_alloc (sizeof (struct __go_defer_stack));
   n->__next = g->defer;
   n->__frame = frame;
@@ -33,6 +35,9 @@ __go_defer (_Bool *frame, void (*pfn) (void *), void *arg)
 void
 __go_undefer (_Bool *frame)
 {
+  G *g;
+
+  g = runtime_g ();
   while (g->defer != NULL && g->defer->__frame == frame)
     {
       struct __go_defer_stack *d;
@@ -63,6 +68,9 @@ __go_undefer (_Bool *frame)
 _Bool
 __go_set_defer_retaddr (void *retaddr)
 {
+  G *g;
+
+  g = runtime_g ();
   if (g->defer != NULL)
     g->defer->__retaddr = retaddr;
   return 0;
diff --git a/libgo/runtime/go-deferred-recover.c b/libgo/runtime/go-deferred-recover.c
index d749c2788ab70b2e79735fa735645108463c24a7..78ef287cf00a05342ec65ac83b4f1ba19fe92971 100644
--- a/libgo/runtime/go-deferred-recover.c
+++ b/libgo/runtime/go-deferred-recover.c
@@ -79,6 +79,9 @@
 struct __go_empty_interface
 __go_deferred_recover ()
 {
+  G *g;
+
+  g = runtime_g ();
   if (g->defer == NULL || g->defer->__panic != g->panic)
     {
       struct __go_empty_interface ret;
@@ -87,5 +90,5 @@ __go_deferred_recover ()
       ret.__object = NULL;
       return ret;
     }
-  return __go_recover();
+  return __go_recover ();
 }
diff --git a/libgo/runtime/go-go.c b/libgo/runtime/go-go.c
deleted file mode 100644
index 82b265f964ef748320f274f1df196a0e65fc51c7..0000000000000000000000000000000000000000
--- a/libgo/runtime/go-go.c
+++ /dev/null
@@ -1,668 +0,0 @@
-/* go-go.c -- the go function.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-#include <errno.h>
-#include <limits.h>
-#include <signal.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <semaphore.h>
-
-#include "config.h"
-#include "go-assert.h"
-#include "go-panic.h"
-#include "go-alloc.h"
-#include "runtime.h"
-#include "arch.h"
-#include "malloc.h"
-
-#ifdef USING_SPLIT_STACK
-/* FIXME: This is not declared anywhere.  */
-extern void *__splitstack_find (void *, void *, size_t *, void **, void **,
-				void **);
-#endif
-
-/* We stop the threads by sending them the signal GO_SIG_STOP and we
-   start them by sending them the signal GO_SIG_START.  */
-
-#define GO_SIG_START (SIGRTMIN + 1)
-#define GO_SIG_STOP (SIGRTMIN + 2)
-
-#ifndef SA_RESTART
-  #define SA_RESTART 0
-#endif
-
-/* A doubly linked list of the threads we have started.  */
-
-struct __go_thread_id
-{
-  /* Links.  */
-  struct __go_thread_id *prev;
-  struct __go_thread_id *next;
-  /* True if the thread ID has not yet been filled in.  */
-  _Bool tentative;
-  /* Thread ID.  */
-  pthread_t id;
-  /* Thread's M structure.  */
-  struct M *m;
-  /* If the thread ID has not been filled in, the function we are
-     running.  */
-  void (*pfn) (void *);
-  /* If the thread ID has not been filled in, the argument to the
-     function.  */
-  void *arg;
-};
-
-static struct __go_thread_id *__go_all_thread_ids;
-
-/* A lock to control access to ALL_THREAD_IDS.  */
-
-static pthread_mutex_t __go_thread_ids_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/* A semaphore used to wait until all the threads have stopped.  */
-
-static sem_t __go_thread_ready_sem;
-
-/* A signal set used to wait until garbage collection is complete.  */
-
-static sigset_t __go_thread_wait_sigset;
-
-/* Remove the current thread from the list of threads.  */
-
-static void
-remove_current_thread (void *dummy __attribute__ ((unused)))
-{
-  struct __go_thread_id *list_entry;
-  MCache *mcache;
-  int i;
-  
-  list_entry = m->list_entry;
-  mcache = m->mcache;
-
-  i = pthread_mutex_lock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  if (list_entry->prev != NULL)
-    list_entry->prev->next = list_entry->next;
-  else
-    __go_all_thread_ids = list_entry->next;
-  if (list_entry->next != NULL)
-    list_entry->next->prev = list_entry->prev;
-
-  /* This will lock runtime_mheap as needed.  */
-  runtime_MCache_ReleaseAll (mcache);
-
-  /* This should never deadlock--there shouldn't be any code that
-     holds the runtime_mheap lock when locking __go_thread_ids_lock.
-     We don't want to do this after releasing __go_thread_ids_lock
-     because it will mean that the garbage collector might run, and
-     the garbage collector does not try to lock runtime_mheap in all
-     cases since it knows it is running single-threaded.  */
-  runtime_lock (&runtime_mheap);
-  mstats.heap_alloc += mcache->local_alloc;
-  mstats.heap_objects += mcache->local_objects;
-  __builtin_memset (mcache, 0, sizeof (struct MCache));
-  runtime_FixAlloc_Free (&runtime_mheap.cachealloc, mcache);
-  runtime_unlock (&runtime_mheap);
-
-  /* As soon as we release this look, a GC could run.  Since this
-     thread is no longer on the list, the GC will not find our M
-     structure, so it could get freed at any time.  That means that
-     any code from here to thread exit must not assume that m is
-     valid.  */
-  m = NULL;
-  g = NULL;
-
-  i = pthread_mutex_unlock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  free (list_entry);
-}
-
-/* Start the thread.  */
-
-static void *
-start_go_thread (void *thread_arg)
-{
-  struct M *newm = (struct M *) thread_arg;
-  void (*pfn) (void *);
-  void *arg;
-  struct __go_thread_id *list_entry;
-  int i;
-
-#ifdef __rtems__
-  __wrap_rtems_task_variable_add ((void **) &m);
-  __wrap_rtems_task_variable_add ((void **) &g);
-#endif
-
-  m = newm;
-  g = m->curg;
-
-  pthread_cleanup_push (remove_current_thread, NULL);
-
-  list_entry = newm->list_entry;
-
-  pfn = list_entry->pfn;
-  arg = list_entry->arg;
-
-#ifndef USING_SPLIT_STACK
-  /* If we don't support split stack, record the current stack as the
-     top of the stack.  There shouldn't be anything relevant to the
-     garbage collector above this point.  */
-  m->gc_sp = (void *) &arg;
-#endif
-
-  /* Finish up the entry on the thread list.  */
-
-  i = pthread_mutex_lock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  list_entry->id = pthread_self ();
-  list_entry->pfn = NULL;
-  list_entry->arg = NULL;
-  list_entry->tentative = 0;
-
-  i = pthread_mutex_unlock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  (*pfn) (arg);
-
-  pthread_cleanup_pop (1);
-
-  return NULL;
-}
-
-/* The runtime.Goexit function.  */
-
-void Goexit (void) asm ("libgo_runtime.runtime.Goexit");
-
-void
-Goexit (void)
-{
-  pthread_exit (NULL);
-  abort ();
-}
-
-/* Count of threads created.  */
-
-static volatile int mcount;
-
-/* Implement the go statement.  */
-
-void
-__go_go (void (*pfn) (void*), void *arg)
-{
-  int i;
-  pthread_attr_t attr;
-  struct M *newm;
-  struct __go_thread_id *list_entry;
-  pthread_t tid;
-
-  i = pthread_attr_init (&attr);
-  __go_assert (i == 0);
-  i = pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
-  __go_assert (i == 0);
-
-#ifdef LINKER_SUPPORTS_SPLIT_STACK
-  /* The linker knows how to handle calls between code which uses
-     -fsplit-stack and code which does not.  That means that we can
-     run with a smaller stack and rely on the -fsplit-stack support to
-     save us.  The GNU/Linux glibc library won't let us have a very
-     small stack, but we make it as small as we can.  */
-#ifndef PTHREAD_STACK_MIN
-#define PTHREAD_STACK_MIN 8192
-#endif
-  i = pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
-  __go_assert (i == 0);
-#endif
-
-  newm = __go_alloc (sizeof (M));
-
-  list_entry = malloc (sizeof (struct __go_thread_id));
-  list_entry->prev = NULL;
-  list_entry->next = NULL;
-  list_entry->tentative = 1;
-  list_entry->m = newm;
-  list_entry->pfn = pfn;
-  list_entry->arg = arg;
-
-  newm->list_entry = list_entry;
-
-  newm->curg = __go_alloc (sizeof (G));
-  newm->curg->m = newm;
-
-  newm->id = __sync_fetch_and_add (&mcount, 1);
-  newm->fastrand = 0x49f6428aUL + newm->id;
-
-  newm->mcache = runtime_allocmcache ();
-
-  /* Add the thread to the list of all threads, marked as tentative
-     since it is not yet ready to go.  */
-  i = pthread_mutex_lock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  if (__go_all_thread_ids != NULL)
-    __go_all_thread_ids->prev = list_entry;
-  list_entry->next = __go_all_thread_ids;
-  __go_all_thread_ids = list_entry;
-
-  i = pthread_mutex_unlock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  /* Start the thread.  */
-  i = pthread_create (&tid, &attr, start_go_thread, newm);
-  __go_assert (i == 0);
-
-  i = pthread_attr_destroy (&attr);
-  __go_assert (i == 0);
-}
-
-/* This is the signal handler for GO_SIG_START.  The garbage collector
-   will send this signal to a thread when it wants the thread to
-   start.  We don't have to actually do anything here, but we need a
-   signal handler since ignoring the signal will mean that the
-   sigsuspend will never see it.  */
-
-static void
-gc_start_handler (int sig __attribute__ ((unused)))
-{
-}
-
-/* Tell the garbage collector that we are ready, and wait for the
-   garbage collector to tell us that it is done.  This may be called
-   by a signal handler, so it is restricted to using functions which
-   are async cancel safe.  */
-
-static void
-stop_for_gc (void)
-{
-  int i;
-
-  /* Tell the garbage collector about our stack.  */
-#ifdef USING_SPLIT_STACK
-  m->gc_sp = __splitstack_find (NULL, NULL, &m->gc_len,
-				&m->gc_next_segment, &m->gc_next_sp,
-				&m->gc_initial_sp);
-#else
-  {
-    uintptr_t top = (uintptr_t) m->gc_sp;
-    uintptr_t bottom = (uintptr_t) &top;
-    if (top < bottom)
-      {
-	m->gc_next_sp = m->gc_sp;
-	m->gc_len = bottom - top;
-      }
-    else
-      {
-	m->gc_next_sp = (void *) bottom;
-	m->gc_len = top - bottom;
-      }
-  }
-#endif
-
-  /* Tell the garbage collector that we are ready by posting to the
-     semaphore.  */
-  i = sem_post (&__go_thread_ready_sem);
-  __go_assert (i == 0);
-
-  /* Wait for the garbage collector to tell us to continue.  */
-  sigsuspend (&__go_thread_wait_sigset);
-}
-
-/* This is the signal handler for GO_SIG_STOP.  The garbage collector
-   will send this signal to a thread when it wants the thread to
-   stop.  */
-
-static void
-gc_stop_handler (int sig __attribute__ ((unused)))
-{
-  struct M *pm = m;
-
-  if (__sync_bool_compare_and_swap (&pm->holds_finlock, 1, 1))
-    {
-      /* We can't interrupt the thread while it holds the finalizer
-	 lock.  Otherwise we can get into a deadlock when mark calls
-	 runtime_walkfintab.  */
-      __sync_bool_compare_and_swap (&pm->gcing_for_finlock, 0, 1);
-      return;
-    }
-
-  if (__sync_bool_compare_and_swap (&pm->mallocing, 1, 1))
-    {
-      /* m->mallocing was already non-zero.  We can't interrupt the
-	 thread while it is running an malloc.  Instead, tell it to
-	 call back to us when done.  */
-      __sync_bool_compare_and_swap (&pm->gcing, 0, 1);
-      return;
-    }
-
-  if (__sync_bool_compare_and_swap (&pm->nomemprof, 1, 1))
-    {
-      /* Similarly, we can't interrupt the thread while it is building
-	 profiling information.  Otherwise we can get into a deadlock
-	 when sweepspan calls MProf_Free.  */
-      __sync_bool_compare_and_swap (&pm->gcing_for_prof, 0, 1);
-      return;
-    }
-
-  stop_for_gc ();
-}
-
-/* This is called by malloc when it gets a signal during the malloc
-   call itself.  */
-
-int
-__go_run_goroutine_gc (int r)
-{
-  /* Force callee-saved registers to be saved on the stack.  This is
-     not needed if we are invoked from the signal handler, but it is
-     needed if we are called directly, since otherwise we might miss
-     something that a function somewhere up the call stack is holding
-     in a register.  */
-  __builtin_unwind_init ();
-
-  stop_for_gc ();
-
-  /* This avoids tail recursion, to make sure that the saved registers
-     are on the stack.  */
-  return r;
-}
-
-/* Stop all the other threads for garbage collection.  */
-
-void
-runtime_stoptheworld (void)
-{
-  int i;
-  pthread_t me;
-  int c;
-  struct __go_thread_id *p;
-
-  i = pthread_mutex_lock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-
-  me = pthread_self ();
-  c = 0;
-  p = __go_all_thread_ids;
-  while (p != NULL)
-    {
-      if (p->tentative || pthread_equal (me, p->id))
-	p = p->next;
-      else
-	{
-	  i = pthread_kill (p->id, GO_SIG_STOP);
-	  if (i == 0)
-	    {
-	      ++c;
-	      p = p->next;
-	    }
-	  else if (i == ESRCH)
-	    {
-	      struct __go_thread_id *next;
-
-	      /* This thread died somehow.  Remove it from the
-		 list.  */
-	      next = p->next;
-	      if (p->prev != NULL)
-		p->prev->next = next;
-	      else
-		__go_all_thread_ids = next;
-	      if (next != NULL)
-		next->prev = p->prev;
-	      free (p);
-	      p = next;
-	    }
-	  else
-	    abort ();
-	}
-    }
-
-  /* Wait for each thread to receive the signal and post to the
-     semaphore.  If a thread receives the signal but contrives to die
-     before it posts to the semaphore, then we will hang forever
-     here.  */
-
-  while (c > 0)
-    {
-      i = sem_wait (&__go_thread_ready_sem);
-      if (i < 0 && errno == EINTR)
-	continue;
-      __go_assert (i == 0);
-      --c;
-    }
-
-  /* Leave with __go_thread_ids_lock held.  */
-}
-
-/* Scan all the stacks for garbage collection.  This should be called
-   with __go_thread_ids_lock held.  */
-
-void
-__go_scanstacks (void (*scan) (byte *, int64))
-{
-  pthread_t me;
-  struct __go_thread_id *p;
-
-  /* Make sure all the registers for this thread are on the stack.  */
-  __builtin_unwind_init ();
-
-  me = pthread_self ();
-  for (p = __go_all_thread_ids; p != NULL; p = p->next)
-    {
-      if (p->tentative)
-	{
-	  /* The goroutine function and argument can be allocated on
-	     the heap, so we have to scan them for a thread that has
-	     not yet started.  */
-	  scan ((void *) &p->pfn, sizeof (void *));
-	  scan ((void *) &p->arg, sizeof (void *));
-	  scan ((void *) &p->m, sizeof (void *));
-	  continue;
-	}
-
-#ifdef USING_SPLIT_STACK
-
-      void *sp;
-      size_t len;
-      void *next_segment;
-      void *next_sp;
-      void *initial_sp;
-
-      if (pthread_equal (me, p->id))
-	{
-	  next_segment = NULL;
-	  next_sp = NULL;
-	  initial_sp = NULL;
-	  sp = __splitstack_find (NULL, NULL, &len, &next_segment,
-				  &next_sp, &initial_sp);
-	}
-      else
-	{
-	  sp = p->m->gc_sp;
-	  len = p->m->gc_len;
-	  next_segment = p->m->gc_next_segment;
-	  next_sp = p->m->gc_next_sp;
-	  initial_sp = p->m->gc_initial_sp;
-	}
-
-      while (sp != NULL)
-	{
-	  scan (sp, len);
-	  sp = __splitstack_find (next_segment, next_sp, &len,
-				  &next_segment, &next_sp, &initial_sp);
-	}
-
-#else /* !defined(USING_SPLIT_STACK) */
-
-      if (pthread_equal (me, p->id))
-	{
-	  uintptr_t top = (uintptr_t) m->gc_sp;
-	  uintptr_t bottom = (uintptr_t) &top;
-	  if (top < bottom)
-	    scan (m->gc_sp, bottom - top);
-	  else
-	    scan ((void *) bottom, top - bottom);
-	}
-      else
-	{
-	  scan (p->m->gc_next_sp, p->m->gc_len);
-	}
-	
-#endif /* !defined(USING_SPLIT_STACK) */
-
-      /* Also scan the M structure while we're at it.  */
-
-      scan ((void *) &p->m, sizeof (void *));
-    }
-}
-
-/* Release all the memory caches.  This is called with
-   __go_thread_ids_lock held.  */
-
-void
-__go_stealcache (void)
-{
-  struct __go_thread_id *p;
-
-  for (p = __go_all_thread_ids; p != NULL; p = p->next)
-    runtime_MCache_ReleaseAll (p->m->mcache);
-}
-
-/* Gather memory cache statistics.  This is called with
-   __go_thread_ids_lock held.  */
-
-void
-__go_cachestats (void)
-{
-  struct __go_thread_id *p;
-
-  for (p = __go_all_thread_ids; p != NULL; p = p->next)
-    {
-      MCache *c;
-      int i;
-
-      runtime_purgecachedstats(p->m);
-      c = p->m->mcache;
-      for (i = 0; i < NumSizeClasses; ++i)
-	{
-	  mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
-	  c->local_by_size[i].nmalloc = 0;
-	  mstats.by_size[i].nfree += c->local_by_size[i].nfree;
-	  c->local_by_size[i].nfree = 0;
-	}
-    }
-}
-
-/* Start the other threads after garbage collection.  */
-
-void
-runtime_starttheworld (bool extra __attribute__ ((unused)))
-{
-  int i;
-  pthread_t me;
-  struct __go_thread_id *p;
-
-  /* Here __go_thread_ids_lock should be held.  */
-
-  me = pthread_self ();
-  p = __go_all_thread_ids;
-  while (p != NULL)
-    {
-      if (p->tentative || pthread_equal (me, p->id))
-	p = p->next;
-      else
-	{
-	  i = pthread_kill (p->id, GO_SIG_START);
-	  if (i == 0)
-	    p = p->next;
-	  else
-	    abort ();
-	}
-    }
-
-  i = pthread_mutex_unlock (&__go_thread_ids_lock);
-  __go_assert (i == 0);
-}
-
-/* Initialize the interaction between goroutines and the garbage
-   collector.  */
-
-void
-__go_gc_goroutine_init (void *sp __attribute__ ((unused)))
-{
-  struct __go_thread_id *list_entry;
-  int i;
-  sigset_t sset;
-  struct sigaction act;
-
-  /* Add the initial thread to the list of all threads.  */
-
-  list_entry = malloc (sizeof (struct __go_thread_id));
-  list_entry->prev = NULL;
-  list_entry->next = NULL;
-  list_entry->tentative = 0;
-  list_entry->id = pthread_self ();
-  list_entry->m = m;
-  list_entry->pfn = NULL;
-  list_entry->arg = NULL;
-  __go_all_thread_ids = list_entry;
-
-  /* Initialize the semaphore which signals when threads are ready for
-     GC.  */
-
-  i = sem_init (&__go_thread_ready_sem, 0, 0);
-  __go_assert (i == 0);
-
-  /* Fetch the current signal mask.  */
-
-  i = sigemptyset (&sset);
-  __go_assert (i == 0);
-  i = sigprocmask (SIG_BLOCK, NULL, &sset);
-  __go_assert (i == 0);
-
-  /* Make sure that GO_SIG_START is not blocked and GO_SIG_STOP is
-     blocked, and save that set for use with later calls to sigsuspend
-     while waiting for GC to complete.  */
-
-  i = sigdelset (&sset, GO_SIG_START);
-  __go_assert (i == 0);
-  i = sigaddset (&sset, GO_SIG_STOP);
-  __go_assert (i == 0);
-  __go_thread_wait_sigset = sset;
-
-  /* Block SIG_SET_START and unblock SIG_SET_STOP, and use that for
-     the process signal mask.  */
-
-  i = sigaddset (&sset, GO_SIG_START);
-  __go_assert (i == 0);
-  i = sigdelset (&sset, GO_SIG_STOP);
-  __go_assert (i == 0);
-  i = sigprocmask (SIG_SETMASK, &sset, NULL);
-  __go_assert (i == 0);
-
-  /* Install the signal handlers.  */
-  memset (&act, 0, sizeof act);
-  i = sigemptyset (&act.sa_mask);
-  __go_assert (i == 0);
-
-  act.sa_handler = gc_start_handler;
-  act.sa_flags = SA_RESTART;
-  i = sigaction (GO_SIG_START, &act, NULL);
-  __go_assert (i == 0);
-
-  /* We could consider using an alternate signal stack for this.  The
-     function does not use much stack space, so it may be OK.  */
-  act.sa_handler = gc_stop_handler;
-  i = sigaction (GO_SIG_STOP, &act, NULL);
-  __go_assert (i == 0);
-
-#ifndef USING_SPLIT_STACK
-  /* If we don't support split stack, record the current stack as the
-     top of the stack.  */
-  m->gc_sp = sp;
-#endif
-}
diff --git a/libgo/runtime/go-gomaxprocs.c b/libgo/runtime/go-gomaxprocs.c
deleted file mode 100644
index 65146c501208ce5f7528e536ef46fcb636f2b245..0000000000000000000000000000000000000000
--- a/libgo/runtime/go-gomaxprocs.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* go-gomaxprocs.c -- runtime.GOMAXPROCS.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-/* This is the runtime.GOMAXPROCS function.  This currently does
-   nothing, since each goroutine runs in a separate thread anyhow.  */
-
-extern int GOMAXPROCS (int) asm ("libgo_runtime.runtime.GOMAXPROCS");
-
-static int set = 1;
-
-int
-GOMAXPROCS (int n)
-{
-  int ret;
-
-  ret = set;
-  if (n > 0)
-    set = n;
-  return ret;
-}
diff --git a/libgo/runtime/go-lock-os-thread.c b/libgo/runtime/go-lock-os-thread.c
deleted file mode 100644
index 204f11dce7cde7a8246a01919be0a75ce490625c..0000000000000000000000000000000000000000
--- a/libgo/runtime/go-lock-os-thread.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* go-lock-os-thread.c -- the LockOSThread and UnlockOSThread functions.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-/* The runtime.LockOSThread and runtime.UnlockOSThread functions are
-   meaningless in the current implementation, since for us a goroutine
-   always stays on a single OS thread.  */
-
-extern void LockOSThread (void) __asm__ ("libgo_runtime.runtime.LockOSThread");
-
-void
-LockOSThread (void)
-{
-}
-
-extern void UnlockOSThread (void)
-  __asm__ ("libgo_runtime.runtime.UnlockOSThread");
-
-void
-UnlockOSThread (void)
-{
-}
diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c
index 6fa6a1fa717f1c81d685a08c72a8bfeae8650d67..8047eaea93f75ae97df52a8cd676fe28f554554f 100644
--- a/libgo/runtime/go-main.c
+++ b/libgo/runtime/go-main.c
@@ -8,6 +8,7 @@
 
 #include <stdlib.h>
 #include <time.h>
+#include <unistd.h>
 
 #ifdef HAVE_FPU_CONTROL_H
 #include <fpu_control.h>
@@ -15,7 +16,6 @@
 
 #include "go-alloc.h"
 #include "array.h"
-#include "go-signal.h"
 #include "go-string.h"
 
 #include "runtime.h"
@@ -36,36 +36,39 @@ extern char **environ;
 extern void __go_init_main (void);
 extern void real_main (void) asm ("main.main");
 
+static void mainstart (void *);
+
 /* The main function.  */
 
 int
 main (int argc, char **argv)
 {
+  runtime_initsig (0);
   runtime_args (argc, (byte **) argv);
-
-  m = &runtime_m0;
-  g = &runtime_g0;
-  m->curg = g;
-  g->m = m;
-  runtime_mallocinit ();
-  __go_gc_goroutine_init (&argc);
-
-  runtime_osinit();
-  runtime_goargs();
-  runtime_goenvs();
-
-  __initsig ();
+  runtime_osinit ();
+  runtime_schedinit ();
 
 #if defined(HAVE_SRANDOM)
   srandom ((unsigned int) time (NULL));
 #else
   srand ((unsigned int) time (NULL));
 #endif
+
+  __go_go (mainstart, NULL);
+  runtime_mstart (runtime_m ());
+  abort ();
+}
+
+static void
+mainstart (void *arg __attribute__ ((unused)))
+{
   __go_init_main ();
 
-  __go_enable_gc ();
+  mstats.enablegc = 1;
 
   real_main ();
 
-  return 0;
+  runtime_exit (0);
+
+  abort ();
 }
diff --git a/libgo/runtime/go-panic.c b/libgo/runtime/go-panic.c
index 9eae5527636e22fde364b1484aba3ccfb7fae202..23df57930b73fec5eb91a5271f55a9dfe6d8959b 100644
--- a/libgo/runtime/go-panic.c
+++ b/libgo/runtime/go-panic.c
@@ -39,8 +39,11 @@ __printpanics (struct __go_panic_stack *p)
 void
 __go_panic (struct __go_empty_interface arg)
 {
+  G *g;
   struct __go_panic_stack *n;
 
+  g = runtime_g ();
+
   n = (struct __go_panic_stack *) __go_alloc (sizeof (struct __go_panic_stack));
   n->__arg = arg;
   n->__next = g->panic;
diff --git a/libgo/runtime/go-rec-nb-small.c b/libgo/runtime/go-rec-nb-small.c
index 054392009c311a5612a849033a2ead6e534a7c69..c21878ce131b247a395d64613dfcd3374328d719 100644
--- a/libgo/runtime/go-rec-nb-small.c
+++ b/libgo/runtime/go-rec-nb-small.c
@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 
+#include "runtime.h"
 #include "go-assert.h"
 #include "go-panic.h"
 #include "channel.h"
@@ -22,10 +23,7 @@ __go_receive_nonblocking_acquire (struct __go_channel *channel)
   __go_assert (i == 0);
 
   while (channel->selected_for_receive)
-    {
-      i = pthread_cond_wait (&channel->cond, &channel->lock);
-      __go_assert (i == 0);
-    }
+    runtime_cond_wait (&channel->cond, &channel->lock);
 
   if (channel->is_closed
       && (channel->num_entries == 0
@@ -59,10 +57,7 @@ __go_receive_nonblocking_acquire (struct __go_channel *channel)
 	  __go_broadcast_to_select (channel);
 
 	  while (channel->next_store == 0)
-	    {
-	      i = pthread_cond_wait (&channel->cond, &channel->lock);
-	      __go_assert (i == 0);
-	    }
+	    runtime_cond_wait (&channel->cond, &channel->lock);
 
 	  has_data = 1;
 	}
diff --git a/libgo/runtime/go-rec-small.c b/libgo/runtime/go-rec-small.c
index d94763296941b631aa9e214f64cbc7575f7d283e..f26dbcdd99388644a3de66625e23a0024596025b 100644
--- a/libgo/runtime/go-rec-small.c
+++ b/libgo/runtime/go-rec-small.c
@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 
+#include "runtime.h"
 #include "go-assert.h"
 #include "go-panic.h"
 #include "channel.h"
@@ -198,8 +199,7 @@ __go_receive_acquire (struct __go_channel *channel, _Bool for_select)
       /* Wait for something to change, then loop around and try
 	 again.  */
 
-      i = pthread_cond_wait (&channel->cond, &channel->lock);
-      __go_assert (i == 0);
+      runtime_cond_wait (&channel->cond, &channel->lock);
     }
 }
 
diff --git a/libgo/runtime/go-recover.c b/libgo/runtime/go-recover.c
index fe6031c100f18cb51beccff8abe930882e7f36c0..7101d518ade9bfb4eb4e29ccd946a03d29dba3c4 100644
--- a/libgo/runtime/go-recover.c
+++ b/libgo/runtime/go-recover.c
@@ -18,10 +18,13 @@
 _Bool
 __go_can_recover (const void* retaddr)
 {
+  G *g;
   struct __go_defer_stack *d;
   const char* ret;
   const char* dret;
 
+  g = runtime_g ();
+
   d = g->defer;
   if (d == NULL)
     return 0;
@@ -50,8 +53,11 @@ __go_can_recover (const void* retaddr)
 struct __go_empty_interface
 __go_recover ()
 {
+  G *g;
   struct __go_panic_stack *p;
 
+  g = runtime_g ();
+
   if (g->panic == NULL || g->panic->__was_recovered)
     {
       struct __go_empty_interface ret;
diff --git a/libgo/runtime/go-sched.c b/libgo/runtime/go-sched.c
deleted file mode 100644
index 2e36d31a5dc94949b50b7fa00dc7cf5e6e677d3a..0000000000000000000000000000000000000000
--- a/libgo/runtime/go-sched.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/* go-sched.c -- the runtime.Gosched function.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-#include <sched.h>
-
-void Gosched (void) asm ("libgo_runtime.runtime.Gosched");
-
-void
-Gosched (void)
-{
-  sched_yield ();
-}
diff --git a/libgo/runtime/go-select.c b/libgo/runtime/go-select.c
index e425aae24c7f34a7b0ba72930d0400d59a0e992e..677c699b52c39c168fd69dd32fbd28dcd5ae67c8 100644
--- a/libgo/runtime/go-select.c
+++ b/libgo/runtime/go-select.c
@@ -11,6 +11,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#include "runtime.h"
 #include "config.h"
 #include "go-assert.h"
 #include "channel.h"
@@ -746,10 +747,7 @@ __go_select (uintptr_t count, _Bool has_default,
 					  (is_queued
 					   ? NULL
 					   : &selected_for_read)))
-	    {
-	      x = pthread_cond_wait (&__go_select_cond, &__go_select_mutex);
-	      __go_assert (x == 0);
-	    }
+	    runtime_cond_wait (&__go_select_cond, &__go_select_mutex);
 
 	  is_queued = 1;
 	}
diff --git a/libgo/runtime/go-semacquire.c b/libgo/runtime/go-semacquire.c
deleted file mode 100644
index 7c77c0b418bfa4e03512cdcfa2d1a5d00b5c89b0..0000000000000000000000000000000000000000
--- a/libgo/runtime/go-semacquire.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/* go-semacquire.c -- implement runtime.Semacquire and runtime.Semrelease.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-#include <stdint.h>
-
-#include <pthread.h>
-
-#include "go-assert.h"
-#include "runtime.h"
-
-/* We use a single global lock and condition variable.  This is
-   painful, since it will cause unnecessary contention, but is hard to
-   avoid in a portable manner.  On GNU/Linux we can use futexes, but
-   they are unfortunately not exposed by libc and are thus also hard
-   to use portably.  */
-
-static pthread_mutex_t sem_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t sem_cond = PTHREAD_COND_INITIALIZER;
-
-/* If the value in *ADDR is positive, and we are able to atomically
-   decrement it, return true.  Otherwise do nothing and return
-   false.  */
-
-static _Bool
-acquire (uint32 *addr)
-{
-  while (1)
-    {
-      uint32 val;
-
-      val = *addr;
-      if (val == 0)
-	return 0;
-      if (__sync_bool_compare_and_swap (addr, val, val - 1))
-	return 1;
-    }
-}
-
-/* Implement runtime.Semacquire.  ADDR points to a semaphore count.
-   We have acquired the semaphore when we have decremented the count
-   and it remains nonnegative.  */
-
-void
-runtime_semacquire (uint32 *addr)
-{
-  while (1)
-    {
-      int i;
-
-      /* If the current count is positive, and we are able to atomically
-	 decrement it, then we have acquired the semaphore.  */
-      if (acquire (addr))
-	return;
-
-      /* Lock the mutex.  */
-      i = pthread_mutex_lock (&sem_lock);
-      __go_assert (i == 0);
-
-      /* Check the count again with the mutex locked.  */
-      if (acquire (addr))
-	{
-	  i = pthread_mutex_unlock (&sem_lock);
-	  __go_assert (i == 0);
-	  return;
-	}
-
-      /* The count is zero.  Even if a call to runtime.Semrelease
-	 increments it to become positive, that call will try to
-	 acquire the mutex and block, so we are sure to see the signal
-	 of the condition variable.  */
-      i = pthread_cond_wait (&sem_cond, &sem_lock);
-      __go_assert (i == 0);
-
-      /* Unlock the mutex and try again.  */
-      i = pthread_mutex_unlock (&sem_lock);
-      __go_assert (i == 0);
-    }
-}
-
-/* Implement runtime.Semrelease.  ADDR points to a semaphore count.  We
-   must atomically increment the count.  If the count becomes
-   positive, we signal the condition variable to wake up another
-   process.  */
-
-void
-runtime_semrelease (uint32 *addr)
-{
-  int32_t val;
-
-  val = __sync_fetch_and_add (addr, 1);
-
-  /* VAL is the old value.  It should never be negative.  If it is
-     negative, that implies that Semacquire somehow decremented a zero
-     value, or that the count has overflowed.  */
-  __go_assert (val >= 0);
-
-  /* If the old value was zero, then we have now released a count, and
-     we signal the condition variable.  If the old value was positive,
-     then nobody can be waiting.  We have to use
-     pthread_cond_broadcast, not pthread_cond_signal, because
-     otherwise there would be a race condition when the count is
-     incremented twice before any locker manages to decrement it.  */
-  if (val == 0)
-    {
-      int i;
-
-      i = pthread_mutex_lock (&sem_lock);
-      __go_assert (i == 0);
-
-      i = pthread_cond_broadcast (&sem_cond);
-      __go_assert (i == 0);
-
-      i = pthread_mutex_unlock (&sem_lock);
-      __go_assert (i == 0);
-    }
-}
diff --git a/libgo/runtime/go-send-nb-small.c b/libgo/runtime/go-send-nb-small.c
index 51914dbfa3c4591fb039af6f6e6f3de730025201..f3336099bf7e626700f7271f89ea7f3d3f6fb0d2 100644
--- a/libgo/runtime/go-send-nb-small.c
+++ b/libgo/runtime/go-send-nb-small.c
@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 
+#include "runtime.h"
 #include "go-assert.h"
 #include "go-panic.h"
 #include "channel.h"
@@ -24,10 +25,7 @@ __go_send_nonblocking_acquire (struct __go_channel *channel)
   __go_assert (i == 0);
 
   while (channel->selected_for_send)
-    {
-      i = pthread_cond_wait (&channel->cond, &channel->lock);
-      __go_assert (i == 0);
-    }
+    runtime_cond_wait (&channel->cond, &channel->lock);
 
   if (channel->is_closed)
     {
diff --git a/libgo/runtime/go-send-small.c b/libgo/runtime/go-send-small.c
index 25e3c82bd89e41b92d415980a89f41287b91b2b5..89a7032756da9b0c41128655d9e8d1e0094c17f4 100644
--- a/libgo/runtime/go-send-small.c
+++ b/libgo/runtime/go-send-small.c
@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 
+#include "runtime.h"
 #include "go-assert.h"
 #include "go-panic.h"
 #include "channel.h"
@@ -62,8 +63,7 @@ __go_send_acquire (struct __go_channel *channel, _Bool for_select)
       /* Wait for something to change, then loop around and try
 	 again.  */
 
-      i = pthread_cond_wait (&channel->cond, &channel->lock);
-      __go_assert (i == 0);
+      runtime_cond_wait (&channel->cond, &channel->lock);
     }
 }
 
@@ -118,8 +118,7 @@ __go_send_release (struct __go_channel *channel)
 		}
 	    }
 
-	  i = pthread_cond_wait (&channel->cond, &channel->lock);
-	  __go_assert (i == 0);
+	  runtime_cond_wait (&channel->cond, &channel->lock);
 	}
 
       channel->waiting_to_send = 0;
diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
index c16b058b79e4bd85f0ecc2ae2a2c728398a72e9c..468235ddf4e51e7df4c413447c7c86468e8c8c38 100644
--- a/libgo/runtime/go-signal.c
+++ b/libgo/runtime/go-signal.c
@@ -6,13 +6,12 @@
 
 #include <signal.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <sys/time.h>
 
+#include "runtime.h"
 #include "go-assert.h"
 #include "go-panic.h"
-#include "go-signal.h"
-
-#include "runtime.h"
 
 #ifndef SA_RESTART
   #define SA_RESTART 0
@@ -24,6 +23,10 @@ struct sigtab
 {
   /* Signal number.  */
   int sig;
+  /* Nonzero if the signal should be caught.  */
+  _Bool catch;
+  /* Nonzero if the signal should be queued.  */
+  _Bool queue;
   /* Nonzero if the signal should be ignored.  */
   _Bool ignore;
   /* Nonzero if we should restart system calls.  */
@@ -34,62 +37,81 @@ struct sigtab
 
 static struct sigtab signals[] =
 {
-  { SIGHUP, 0, 1 },
-  { SIGINT, 0, 1 },
-  { SIGALRM, 1, 1 },
-  { SIGTERM, 0, 1 },
+  { SIGHUP, 0, 1, 0, 1 },
+  { SIGINT, 0, 1, 0, 1 },
+  { SIGQUIT, 0, 1, 0, 1 },
+  { SIGALRM, 0, 1, 1, 1 },
+  { SIGTERM, 0, 1, 0, 1 },
+#ifdef SIGILL
+  { SIGILL, 1, 0, 0, 0 },
+#endif
+#ifdef SIGTRAP
+  { SIGTRAP, 1, 0, 0, 0 },
+#endif
+#ifdef SIGABRT
+  { SIGABRT, 1, 0, 0, 0 },
+#endif
 #ifdef SIGBUS
-  { SIGBUS, 0, 0 },
+  { SIGBUS, 1, 0, 0, 0 },
 #endif
 #ifdef SIGFPE
-  { SIGFPE, 0, 0 },
+  { SIGFPE, 1, 0, 0, 0 },
 #endif
 #ifdef SIGUSR1
-  { SIGUSR1, 1, 1 },
+  { SIGUSR1, 0, 1, 1, 1 },
 #endif
 #ifdef SIGSEGV
-  { SIGSEGV, 0, 0 },
+  { SIGSEGV, 1, 0, 0, 0 },
 #endif
 #ifdef SIGUSR2
-  { SIGUSR2, 1, 1 },
+  { SIGUSR2, 0, 1, 1, 1 },
 #endif
 #ifdef SIGPIPE
-  { SIGPIPE, 1, 0 },
+  { SIGPIPE, 0, 0, 1, 0 },
+#endif
+#ifdef SIGSTKFLT
+  { SIGSTKFLT, 1, 0, 0, 0 },
 #endif
 #ifdef SIGCHLD
-  { SIGCHLD, 1, 1 },
+  { SIGCHLD, 0, 1, 1, 1 },
 #endif
 #ifdef SIGTSTP
-  { SIGTSTP, 1, 1 },
+  { SIGTSTP, 0, 1, 1, 1 },
 #endif
 #ifdef SIGTTIN
-  { SIGTTIN, 1, 1 },
+  { SIGTTIN, 0, 1, 1, 1 },
 #endif
 #ifdef SIGTTOU
-  { SIGTTOU, 1, 1 },
+  { SIGTTOU, 0, 1, 1, 1 },
 #endif
 #ifdef SIGURG
-  { SIGURG, 1, 1 },
+  { SIGURG, 0, 1, 1, 1 },
 #endif
 #ifdef SIGXCPU
-  { SIGXCPU, 1, 1 },
+  { SIGXCPU, 0, 1, 1, 1 },
 #endif
 #ifdef SIGXFSZ
-  { SIGXFSZ, 1, 1 },
+  { SIGXFSZ, 0, 1, 1, 1 },
 #endif
 #ifdef SIGVTARLM
-  { SIGVTALRM, 1, 1 },
+  { SIGVTALRM, 0, 1, 1, 1 },
+#endif
+#ifdef SIGPROF
+  { SIGPROF, 0, 1, 1, 1 },
 #endif
 #ifdef SIGWINCH
-  { SIGWINCH, 1, 1 },
+  { SIGWINCH, 0, 1, 1, 1 },
 #endif
 #ifdef SIGIO
-  { SIGIO, 1, 1 },
+  { SIGIO, 0, 1, 1, 1 },
 #endif
 #ifdef SIGPWR
-  { SIGPWR, 1, 1 },
+  { SIGPWR, 0, 1, 1, 1 },
+#endif
+#ifdef SIGSYS
+  { SIGSYS, 1, 0, 0, 0 },
 #endif
-  { -1, 0, 0 }
+  { -1, 0, 0, 0, 0 }
 };
 
 /* The Go signal handler.  */
@@ -103,7 +125,7 @@ sighandler (int sig)
   if (sig == SIGPROF)
     {
       /* FIXME.  */
-      runtime_sigprof (0, 0, nil);
+      runtime_sigprof (0, 0, nil, nil);
       return;
     }
 
@@ -112,6 +134,12 @@ sighandler (int sig)
   msg = NULL;
   switch (sig)
     {
+#ifdef SIGILL
+    case SIGILL:
+      msg = "illegal instruction";
+      break;
+#endif
+
 #ifdef SIGBUS
     case SIGBUS:
       msg = "invalid memory address or nil pointer dereference";
@@ -138,7 +166,7 @@ sighandler (int sig)
     {
       sigset_t clear;
 
-      if (__sync_bool_compare_and_swap (&m->mallocing, 1, 1))
+      if (runtime_m()->mallocing)
 	{
 	  fprintf (stderr, "caught signal while mallocing: %s\n", msg);
 	  __go_assert (0);
@@ -153,16 +181,22 @@ sighandler (int sig)
       __go_panic_msg (msg);
     }
 
-  if (__go_sigsend (sig))
-    return;
   for (i = 0; signals[i].sig != -1; ++i)
     {
       if (signals[i].sig == sig)
 	{
 	  struct sigaction sa;
 
-	  if (signals[i].ignore)
-	    return;
+	  if (signals[i].queue)
+	    {
+	      if (__go_sigsend (sig) || signals[i].ignore)
+		return;
+	      runtime_exit (2);		// SIGINT, SIGTERM, etc
+	    }
+
+	  if (runtime_panicking)
+	    runtime_exit (2);
+	  runtime_panicking = 1;
 
 	  memset (&sa, 0, sizeof sa);
 
@@ -181,11 +215,18 @@ sighandler (int sig)
   abort ();
 }
 
+/* Ignore a signal.  */
+
+static void
+sigignore (int sig __attribute__ ((unused)))
+{
+}
+
 /* Initialize signal handling for Go.  This is called when the program
    starts.  */
 
 void
-__initsig ()
+runtime_initsig (int32 queue)
 {
   struct sigaction sa;
   int i;
@@ -201,6 +242,12 @@ __initsig ()
 
   for (i = 0; signals[i].sig != -1; ++i)
     {
+      if (signals[i].queue != (queue ? 1 : 0))
+	continue;
+      if (signals[i].catch || signals[i].queue)
+	sa.sa_handler = sighandler;
+      else
+	sa.sa_handler = sigignore;
       sa.sa_flags = signals[i].restart ? SA_RESTART : 0;
       if (sigaction (signals[i].sig, &sa, NULL) != 0)
 	__go_assert (0);
@@ -243,7 +290,7 @@ runtime_resetcpuprofiler(int32 hz)
       __go_assert (i == 0);
     }
 
-  m->profilehz = hz;
+  runtime_m()->profilehz = hz;
 }
 
 /* Used by the os package to raise SIGPIPE.  */
diff --git a/libgo/runtime/go-signal.h b/libgo/runtime/go-signal.h
deleted file mode 100644
index a30173a34de10b609a480080d323b098fd30e2ea..0000000000000000000000000000000000000000
--- a/libgo/runtime/go-signal.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* go-signal.h -- signal handling for Go.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-extern void __initsig (void);
diff --git a/libgo/runtime/go-unwind.c b/libgo/runtime/go-unwind.c
index 58c675a2dc8669d4500e358a17095fe365d2e988..c669a3ce889f543cc157069e5568e5afac384126 100644
--- a/libgo/runtime/go-unwind.c
+++ b/libgo/runtime/go-unwind.c
@@ -47,8 +47,11 @@ static const _Unwind_Exception_Class __go_exception_class =
 void
 __go_check_defer (_Bool *frame)
 {
+  G *g;
   struct _Unwind_Exception *hdr;
 
+  g = runtime_g ();
+
   if (g == NULL)
     {
       /* Some other language has thrown an exception.  We know there
@@ -164,7 +167,7 @@ __go_unwind_stack ()
 		    sizeof hdr->exception_class);
   hdr->exception_cleanup = NULL;
 
-  g->exception = hdr;
+  runtime_g ()->exception = hdr;
 
 #ifdef __USING_SJLJ_EXCEPTIONS__
   _Unwind_SjLj_RaiseException (hdr);
@@ -280,6 +283,7 @@ PERSONALITY_FUNCTION (int version,
   _Unwind_Ptr landing_pad, ip;
   int ip_before_insn = 0;
   _Bool is_foreign;
+  G *g;
 
 #ifdef __ARM_EABI_UNWINDER__
   _Unwind_Action actions;
@@ -416,6 +420,7 @@ PERSONALITY_FUNCTION (int version,
 
   /* It's possible for g to be NULL here for an exception thrown by a
      language other than Go.  */
+  g = runtime_g ();
   if (g == NULL)
     {
       if (!is_foreign)
diff --git a/libgo/runtime/lock_futex.c b/libgo/runtime/lock_futex.c
index e3b4a25bb7528347717eec08f22e0c44f94be470..4f3d507726d9e014eb4593e17b402c604e21967b 100644
--- a/libgo/runtime/lock_futex.c
+++ b/libgo/runtime/lock_futex.c
@@ -35,7 +35,7 @@ runtime_lock(Lock *l)
 {
 	uint32 i, v, wait, spin;
 
-	if(m->locks++ < 0)
+	if(runtime_m()->locks++ < 0)
 		runtime_throw("runtime_lock: lock count");
 
 	// Speculative grab for lock.
@@ -89,7 +89,7 @@ runtime_unlock(Lock *l)
 {
 	uint32 v;
 
-	if(--m->locks < 0)
+	if(--runtime_m()->locks < 0)
 		runtime_throw("runtime_unlock: lock count");
 
 	v = runtime_xchg(&l->key, MUTEX_UNLOCKED);
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index abf020dab45589615f4ebbca89b709940e1ab42b..73446bf83478ae8664ef5ae453dc43275cc8bc14 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -33,14 +33,25 @@ extern volatile int32 runtime_MemProfileRate
 void*
 runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
 {
+	M *m;
+	G *g;
 	int32 sizeclass, rate;
 	MCache *c;
 	uintptr npages;
 	MSpan *s;
 	void *v;
 
-	if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
+	m = runtime_m();
+	g = runtime_g();
+	if(g->status == Gsyscall)
+		dogc = 0;
+	if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && g->status != Gsyscall) {
+		runtime_gosched();
+		m = runtime_m();
+	}
+	if(m->mallocing)
 		runtime_throw("malloc/free - deadlock");
+	m->mallocing = 1;
 	if(size == 0)
 		size = 1;
 
@@ -63,7 +74,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
 		npages = size >> PageShift;
 		if((size & PageMask) != 0)
 			npages++;
-		s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1);
+		s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, !(flag & FlagNoGC));
 		if(s == nil)
 			runtime_throw("out of memory");
 		size = npages<<PageShift;
@@ -77,18 +88,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
 	if(!(flag & FlagNoGC))
 		runtime_markallocated(v, size, (flag&FlagNoPointers) != 0);
 
-	__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
-
-	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) {
-		if(!(flag & FlagNoProfiling))
-			__go_run_goroutine_gc(0);
-		else {
-			// We are being called from the profiler.  Tell it
-			// to invoke the garbage collector when it is
-			// done.  No need to use a sync function here.
-			m->gcing_for_prof = 1;
-		}
-	}
+	m->mallocing = 0;
 
 	if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
 		if(size >= (uint32) rate)
@@ -122,6 +122,7 @@ __go_alloc(uintptr size)
 void
 __go_free(void *v)
 {
+	M *m;
 	int32 sizeclass;
 	MSpan *s;
 	MCache *c;
@@ -134,8 +135,10 @@ __go_free(void *v)
 	// If you change this also change mgc0.c:/^sweepspan,
 	// which has a copy of the guts of free.
 
-	if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
+	m = runtime_m();
+	if(m->mallocing)
 		runtime_throw("malloc/free - deadlock");
+	m->mallocing = 1;
 
 	if(!runtime_mlookup(v, nil, nil, &s)) {
 		// runtime_printf("free %p: not an allocated block\n", v);
@@ -170,11 +173,7 @@ __go_free(void *v)
 	c->local_alloc -= size;
 	if(prof)
 		runtime_MProf_Free(v, size);
-
-	__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
-
-	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
-		__go_run_goroutine_gc(1);
+	m->mallocing = 0;
 }
 
 int32
@@ -184,7 +183,7 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
 	byte *p;
 	MSpan *s;
 
-	m->mcache->local_nlookup++;
+	runtime_m()->mcache->local_nlookup++;
 	s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
 	if(sp)
 		*sp = s;
@@ -229,15 +228,8 @@ runtime_allocmcache(void)
 	int32 rate;
 	MCache *c;
 
-	if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
-		runtime_throw("allocmcache - deadlock");
-
 	runtime_lock(&runtime_mheap);
 	c = runtime_FixAlloc_Alloc(&runtime_mheap.cachealloc);
-
-	// Clear the free list used by FixAlloc; assume the rest is zeroed.
-	c->list[0].list = nil;
-
 	mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
 	mstats.mcache_sys = runtime_mheap.cachealloc.sys;
 	runtime_unlock(&runtime_mheap);
@@ -249,10 +241,6 @@ runtime_allocmcache(void)
 	if(rate != 0)
 		c->next_sample = runtime_fastrand1() % (2*rate);
 
-	__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
-	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
-		__go_run_goroutine_gc(2);
-
 	return c;
 }
 
@@ -374,7 +362,7 @@ runtime_mallocinit(void)
 
 	// Initialize the rest of the allocator.	
 	runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
-	m->mcache = runtime_allocmcache();
+	runtime_m()->mcache = runtime_allocmcache();
 
 	// See if it works.
 	runtime_free(runtime_malloc(1));
diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h
index aad9a65f76a0f94362763987ab1f10dd03f971ed..da0c0f85766ae83fa084bf8dbc93f25f68d25a0e 100644
--- a/libgo/runtime/malloc.h
+++ b/libgo/runtime/malloc.h
@@ -422,4 +422,4 @@ extern int32 runtime_malloc_profile;
 
 struct __go_func_type;
 bool	runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_func_type **ft);
-void	runtime_walkfintab(void (*fn)(void*), void (*scan)(byte*, int64));
+void	runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64));
diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c
index 349425190bd2fb47d19737ad635c0311892dd6e9..a89003716794d477eb9a53d1cea19a3692eafeae 100644
--- a/libgo/runtime/mfinal.c
+++ b/libgo/runtime/mfinal.c
@@ -141,28 +141,24 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
 {
 	Fintab *tab;
 	byte *base;
-	bool ret = false;
 	
 	if(debug) {
 		if(!runtime_mlookup(p, &base, nil, nil) || p != base)
 			runtime_throw("addfinalizer on invalid pointer");
 	}
 	
-	if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1))
-		runtime_throw("finalizer deadlock");
-
 	tab = TAB(p);
 	runtime_lock(tab);
 	if(f == nil) {
 		if(lookfintab(tab, p, true, nil))
 			runtime_setblockspecial(p, false);
-		ret = true;
-		goto unlock;
+		runtime_unlock(tab);
+		return true;
 	}
 
 	if(lookfintab(tab, p, false, nil)) {
-		ret = false;
-		goto unlock;
+		runtime_unlock(tab);
+		return false;
 	}
 
 	if(tab->nkey >= tab->max/2+tab->max/4) {
@@ -173,18 +169,8 @@ runtime_addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
 
 	addfintab(tab, p, f, ft);
 	runtime_setblockspecial(p, true);
-	ret = true;
-
- unlock:
 	runtime_unlock(tab);
-
-	__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
-
-	if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) {
-		__go_run_goroutine_gc(200);
-	}
-
-	return ret;
+	return true;
 }
 
 // get finalizer; if del, delete finalizer.
@@ -196,19 +182,10 @@ runtime_getfinalizer(void *p, bool del, void (**fn)(void*), const struct __go_fu
 	bool res;
 	Fin f;
 	
-	if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1))
-		runtime_throw("finalizer deadlock");
-
 	tab = TAB(p);
 	runtime_lock(tab);
 	res = lookfintab(tab, p, del, &f);
 	runtime_unlock(tab);
-
-	__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
-	if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) {
-		__go_run_goroutine_gc(201);
-	}
-
 	if(res==false)
 		return false;
 	*fn = f.fn;
@@ -223,9 +200,6 @@ runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64))
 	void **ekey;
 	int32 i;
 
-	if(!__sync_bool_compare_and_swap(&m->holds_finlock, 0, 1))
-		runtime_throw("finalizer deadlock");
-
 	for(i=0; i<TABSZ; i++) {
 		runtime_lock(&fintab[i]);
 		key = fintab[i].fkey;
@@ -237,9 +211,4 @@ runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64))
 		scan((byte*)&fintab[i].val, sizeof(void*));
 		runtime_unlock(&fintab[i]);
 	}
-
-	__sync_bool_compare_and_swap(&m->holds_finlock, 1, 0);
-	if(__sync_bool_compare_and_swap(&m->gcing_for_finlock, 1, 0)) {
-		runtime_throw("walkfintab not called from gc");
-	}
 }
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index 5fd9368c35a6c346bf6197f91b0e4ee46d0bca82..0f1cb49e40f4addf83c17ba16c76cfbfb34c9662 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -8,6 +8,16 @@
 #include "arch.h"
 #include "malloc.h"
 
+#ifdef USING_SPLIT_STACK
+
+extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
+				 void **);
+
+extern void * __splitstack_find_context (void *context[10], size_t *, void **,
+					 void **, void **);
+
+#endif
+
 enum {
 	Debug = 0,
 	PtrSize = sizeof(void*),
@@ -85,9 +95,8 @@ struct FinBlock
 	Finalizer fin[1];
 };
 
-static bool finstarted;
-static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
+
+static G *fing;
 static FinBlock *finq; // list of finalizers that are to be executed
 static FinBlock *finc; // cache of free blocks
 static FinBlock *allfin; // list of all blocks
@@ -590,6 +599,79 @@ handoff(Workbuf *b)
 	return b1;
 }
 
+// Scanstack calls scanblock on each of gp's stack segments.
+static void
+scanstack(void (*scanblock)(byte*, int64), G *gp)
+{
+#ifdef USING_SPLIT_STACK
+	M *mp;
+	void* sp;
+	size_t spsize;
+	void* next_segment;
+	void* next_sp;
+	void* initial_sp;
+
+	if(gp == runtime_g()) {
+		// Scanning our own stack.
+		sp = __splitstack_find(nil, nil, &spsize, &next_segment,
+				       &next_sp, &initial_sp);
+	} else if((mp = gp->m) != nil && mp->helpgc) {
+		// gchelper's stack is in active use and has no interesting pointers.
+		return;
+	} else {
+		// Scanning another goroutine's stack.
+		// The goroutine is usually asleep (the world is stopped).
+
+		// The exception is that if the goroutine is about to enter or might
+		// have just exited a system call, it may be executing code such
+		// as schedlock and may have needed to start a new stack segment.
+		// Use the stack segment and stack pointer at the time of
+		// the system call instead, since that won't change underfoot.
+		if(gp->gcstack != nil) {
+			sp = gp->gcstack;
+			spsize = gp->gcstack_size;
+			next_segment = gp->gcnext_segment;
+			next_sp = gp->gcnext_sp;
+			initial_sp = gp->gcinitial_sp;
+		} else {
+			sp = __splitstack_find_context(&gp->stack_context[0],
+						       &spsize, &next_segment,
+						       &next_sp, &initial_sp);
+		}
+	}
+	if(sp != nil) {
+		scanblock(sp, spsize);
+		while((sp = __splitstack_find(next_segment, next_sp,
+					      &spsize, &next_segment,
+					      &next_sp, &initial_sp)) != nil)
+			scanblock(sp, spsize);
+	}
+#else
+	M *mp;
+	byte* bottom;
+	byte* top;
+
+	if(gp == runtime_g()) {
+		// Scanning our own stack.
+		bottom = (byte*)&gp;
+	} else if((mp = gp->m) != nil && mp->helpgc) {
+		// gchelper's stack is in active use and has no interesting pointers.
+		return;
+	} else {
+		// Scanning another goroutine's stack.
+		// The goroutine is usually asleep (the world is stopped).
+		bottom = (byte*)gp->gcnext_sp;
+		if(bottom == nil)
+			return;
+	}
+	top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
+	if(top > bottom)
+		scanblock(bottom, top - bottom);
+	else
+		scanblock(top, bottom - top);
+#endif
+}
+
 // Markfin calls scanblock on the blocks that have finalizers:
 // the things pointed at cannot be freed until the finalizers have run.
 static void
@@ -639,8 +721,10 @@ static void
 mark(void (*scan)(byte*, int64))
 {
 	struct root_list *pl;
+	G *gp;
 	FinBlock *fb;
 
+	// mark data+bss.
 	for(pl = roots; pl != nil; pl = pl->next) {
 		struct root* pr = &pl->roots[0];
 		while(1) {
@@ -654,11 +738,30 @@ mark(void (*scan)(byte*, int64))
 
 	scan((byte*)&runtime_m0, sizeof runtime_m0);
 	scan((byte*)&runtime_g0, sizeof runtime_g0);
-	scan((byte*)&finq, sizeof finq);
+	scan((byte*)&runtime_allg, sizeof runtime_allg);
+	scan((byte*)&runtime_allm, sizeof runtime_allm);
 	runtime_MProf_Mark(scan);
 
 	// mark stacks
-	__go_scanstacks(scan);
+	for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
+		switch(gp->status){
+		default:
+			runtime_printf("unexpected G.status %d\n", gp->status);
+			runtime_throw("mark - bad status");
+		case Gdead:
+			break;
+		case Grunning:
+			if(gp != runtime_g())
+				runtime_throw("mark - world not stopped");
+			scanstack(scan, gp);
+			break;
+		case Grunnable:
+		case Gsyscall:
+		case Gwaiting:
+			scanstack(scan, gp);
+			break;
+		}
+	}
 
 	// mark things pointed at by objects with finalizers
 	if(scan == debug_scanblock)
@@ -714,6 +817,7 @@ handlespecial(byte *p, uintptr size)
 static void
 sweep(void)
 {
+	M *m;
 	MSpan *s;
 	int32 cl, n, npages;
 	uintptr size;
@@ -721,6 +825,7 @@ sweep(void)
 	MCache *c;
 	byte *arena_start;
 
+	m = runtime_m();
 	arena_start = runtime_mheap.arena_start;
 
 	for(;;) {
@@ -799,8 +904,6 @@ sweep(void)
 	}
 }
 
-static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
-
 void
 runtime_gchelper(void)
 {
@@ -818,6 +921,11 @@ runtime_gchelper(void)
 		runtime_notewakeup(&work.alldone);
 }
 
+// Semaphore, not Lock, so that the goroutine
+// reschedules when there is contention rather
+// than spinning.
+static uint32 gcsema = 1;
+
 // Initialized from $GOGC.  GOGC=off means no gc.
 //
 // Next gc is after we've allocated an extra amount of
@@ -829,9 +937,46 @@ runtime_gchelper(void)
 // extra memory used).
 static int32 gcpercent = -2;
 
+static void
+stealcache(void)
+{
+	M *m;
+
+	for(m=runtime_allm; m; m=m->alllink)
+		runtime_MCache_ReleaseAll(m->mcache);
+}
+
+static void
+cachestats(void)
+{
+	M *m;
+	MCache *c;
+	uint32 i;
+	uint64 stacks_inuse;
+	uint64 stacks_sys;
+
+	stacks_inuse = 0;
+	stacks_sys = 0;
+	for(m=runtime_allm; m; m=m->alllink) {
+		runtime_purgecachedstats(m);
+		// stacks_inuse += m->stackalloc->inuse;
+		// stacks_sys += m->stackalloc->sys;
+		c = m->mcache;
+		for(i=0; i<nelem(c->local_by_size); i++) {
+			mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
+			c->local_by_size[i].nmalloc = 0;
+			mstats.by_size[i].nfree += c->local_by_size[i].nfree;
+			c->local_by_size[i].nfree = 0;
+		}
+	}
+	mstats.stacks_inuse = stacks_inuse;
+	mstats.stacks_sys = stacks_sys;
+}
+
 void
-runtime_gc(int32 force __attribute__ ((unused)))
+runtime_gc(int32 force)
 {
+	M *m;
 	int64 t0, t1, t2, t3;
 	uint64 heap0, heap1, obj0, obj1;
 	const byte *p;
@@ -845,7 +990,8 @@ runtime_gc(int32 force __attribute__ ((unused)))
 	// problems, don't bother trying to run gc
 	// while holding a lock.  The next mallocgc
 	// without a lock will do the gc instead.
-	if(!mstats.enablegc || m->locks > 0 /* || runtime_panicking */)
+	m = runtime_m();
+	if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
 		return;
 
 	if(gcpercent == -2) {	// first time through
@@ -864,11 +1010,9 @@ runtime_gc(int32 force __attribute__ ((unused)))
 	if(gcpercent < 0)
 		return;
 
-	pthread_mutex_lock(&finqlock);
-	pthread_mutex_lock(&gcsema);
+	runtime_semacquire(&gcsema);
 	if(!force && mstats.heap_alloc < mstats.next_gc) {
-		pthread_mutex_unlock(&gcsema);
-		pthread_mutex_unlock(&finqlock);
+		runtime_semrelease(&gcsema);
 		return;
 	}
 
@@ -881,7 +1025,7 @@ runtime_gc(int32 force __attribute__ ((unused)))
 	m->gcing = 1;
 	runtime_stoptheworld();
 
-	__go_cachestats();
+	cachestats();
 	heap0 = mstats.heap_alloc;
 	obj0 = mstats.nmalloc - mstats.nfree;
 
@@ -890,12 +1034,10 @@ runtime_gc(int32 force __attribute__ ((unused)))
 
 	extra = false;
 	work.nproc = 1;
-#if 0
 	if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
 		runtime_noteclear(&work.alldone);
 		work.nproc += runtime_helpgc(&extra);
 	}
-#endif
 	work.nwait = 0;
 	work.ndone = 0;
 
@@ -912,14 +1054,25 @@ runtime_gc(int32 force __attribute__ ((unused)))
 		runtime_notesleep(&work.alldone);
 	t2 = runtime_nanotime();
 
-	__go_stealcache();
-	__go_cachestats();
+	stealcache();
+	cachestats();
 
 	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
 	m->gcing = 0;
 
 	m->locks++;	// disable gc during the mallocs in newproc
+	if(finq != nil) {
+		// kick off or wake up goroutine to run queued finalizers
+		if(fing == nil)
+			fing = __go_go(runfinq, nil);
+		else if(fingwait) {
+			fingwait = 0;
+			runtime_ready(fing);
+		}
+	}
+	m->locks--;
 
+	cachestats();
 	heap1 = mstats.heap_alloc;
 	obj1 = mstats.nmalloc - mstats.nfree;
 
@@ -938,7 +1091,7 @@ runtime_gc(int32 force __attribute__ ((unused)))
 			(unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff);
 	}
 
-	pthread_mutex_unlock(&gcsema);
+	runtime_semrelease(&gcsema);
 
 	// If we could have used another helper proc, start one now,
 	// in the hope that it will be available next time.
@@ -949,20 +1102,9 @@ runtime_gc(int32 force __attribute__ ((unused)))
 	// the maximum number of procs.
 	runtime_starttheworld(extra);
 
-	// finqlock is still held.
-	if(finq != nil) {
-		// kick off or wake up goroutine to run queued finalizers
-		if(!finstarted) {
-			__go_go(runfinq, nil);
-			finstarted = 1;
-		}
-		else if(fingwait) {
-			fingwait = 0;
-			pthread_cond_signal(&finqcond);
-		}
-	}
-	m->locks--;
-	pthread_mutex_unlock(&finqlock);
+	// give the queued finalizers, if any, a chance to run	
+	if(finq != nil)	
+		runtime_gosched();
 
 	if(gctrace > 1 && !force)
 		runtime_gc(1);
@@ -974,39 +1116,47 @@ void runtime_UpdateMemStats(void)
 void
 runtime_UpdateMemStats(void)
 {
+	M *m;
+
 	// Have to acquire gcsema to stop the world,
 	// because stoptheworld can only be used by
 	// one goroutine at a time, and there might be
 	// a pending garbage collection already calling it.
-	pthread_mutex_lock(&gcsema);
+	runtime_semacquire(&gcsema);
+	m = runtime_m();
 	m->gcing = 1;
 	runtime_stoptheworld();
-	__go_cachestats();
+	cachestats();
 	m->gcing = 0;
-	pthread_mutex_unlock(&gcsema);
+	runtime_semrelease(&gcsema);
 	runtime_starttheworld(false);
 }
 
 static void
-runfinq(void* dummy)
+runfinq(void* dummy __attribute__ ((unused)))
 {
+	G* gp;
 	Finalizer *f;
 	FinBlock *fb, *next;
 	uint32 i;
 
-	USED(dummy);
-
+	gp = runtime_g();
 	for(;;) {
-		pthread_mutex_lock(&finqlock);
+		// There's no need for a lock in this section
+		// because it only conflicts with the garbage
+		// collector, and the garbage collector only
+		// runs when everyone else is stopped, and
+		// runfinq only stops at the gosched() or
+		// during the calls in the for loop.
 		fb = finq;
 		finq = nil;
 		if(fb == nil) {
 			fingwait = 1;
-			pthread_cond_wait(&finqcond, &finqlock);
-			pthread_mutex_unlock(&finqlock);
+			gp->status = Gwaiting;
+			gp->waitreason = "finalizer wait";
+			runtime_gosched();
 			continue;
 		}
-		pthread_mutex_unlock(&finqlock);
 		for(; fb; fb=next) {
 			next = fb->next;
 			for(i=0; i<(uint32)fb->cnt; i++) {
@@ -1027,8 +1177,6 @@ runfinq(void* dummy)
 	}
 }
 
-#define runtime_singleproc 0
-
 // mark the block at v of size n as allocated.
 // If noptr is true, mark it as having no pointers.
 void
@@ -1231,9 +1379,3 @@ runtime_MHeap_MapBits(MHeap *h)
 	runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
 	h->bitmap_mapped = n;
 }
-
-void
-__go_enable_gc()
-{
-  mstats.enablegc = 1;
-}
diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c
index 4d02bfebd8ee02899ce215ff4d72c5c972d39229..5a5a1e71a1233e24dbbf2e73c0c8f2f670b26fa4 100644
--- a/libgo/runtime/mheap.c
+++ b/libgo/runtime/mheap.c
@@ -58,7 +58,7 @@ runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct)
 	MSpan *s;
 
 	runtime_lock(h);
-	runtime_purgecachedstats(m);
+	runtime_purgecachedstats(runtime_m());
 	s = MHeap_AllocLocked(h, npage, sizeclass);
 	if(s != nil) {
 		mstats.heap_inuse += npage<<PageShift;
@@ -257,7 +257,7 @@ void
 runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct)
 {
 	runtime_lock(h);
-	runtime_purgecachedstats(m);
+	runtime_purgecachedstats(runtime_m());
 	mstats.heap_inuse -= s->npages<<PageShift;
 	if(acct) {
 		mstats.heap_alloc -= s->npages<<PageShift;
diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc
index 600d8a8a9d2c542b76180050ab576dab7426b05f..f44f45083f7244410a177056ba1dcfb966133aa5 100644
--- a/libgo/runtime/mprof.goc
+++ b/libgo/runtime/mprof.goc
@@ -190,12 +190,16 @@ found:
 void
 runtime_MProf_Malloc(void *p, uintptr size)
 {
+	M *m;
 	int32 nstk;
 	uintptr stk[32];
 	Bucket *b;
 
-	if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
+	m = runtime_m();
+	if(m->nomemprof > 0)
 		return;
+
+	m->nomemprof++;
 #if 0
 	nstk = runtime_callers(1, stk, 32);
 #else
@@ -207,21 +211,22 @@ runtime_MProf_Malloc(void *p, uintptr size)
 	b->alloc_bytes += size;
 	setaddrbucket((uintptr)p, b);
 	runtime_unlock(&proflock);
-	__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
-
-	if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
-		__go_run_goroutine_gc(100);
+	m = runtime_m();
+	m->nomemprof--;
 }
 
 // Called when freeing a profiled block.
 void
 runtime_MProf_Free(void *p, uintptr size)
 {
+	M *m;
 	Bucket *b;
 
-	if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
+	m = runtime_m();
+	if(m->nomemprof > 0)
 		return;
 
+	m->nomemprof++;
 	runtime_lock(&proflock);
 	b = getaddrbucket((uintptr)p);
 	if(b != nil) {
@@ -229,10 +234,8 @@ runtime_MProf_Free(void *p, uintptr size)
 		b->free_bytes += size;
 	}
 	runtime_unlock(&proflock);
-	__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
-
-	if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
-		__go_run_goroutine_gc(101);
+	m = runtime_m();
+	m->nomemprof--;
 }
 
 
@@ -267,8 +270,6 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
 	Bucket *b;
 	Record *r;
 
-	__sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
-
 	runtime_lock(&proflock);
 	n = 0;
 	for(b=buckets; b; b=b->allnext)
@@ -283,11 +284,6 @@ func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
 				record(r++, b);
 	}
 	runtime_unlock(&proflock);
-
-	__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
-
-	if(__sync_bool_compare_and_swap(&m->gcing_for_prof, 1, 0))
-		__go_run_goroutine_gc(102);
 }
 
 void
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index e9b7c9083ca294ab9a109f152c6074971e88be59..b243de2424ec9ea05c63ec3d7e4a97e8743ddbda 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -2,21 +2,1323 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+#include <limits.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include "config.h"
 #include "runtime.h"
 #include "arch.h"
-#include "malloc.h"	/* so that acid generated from proc.c includes malloc data structures */
+#include "defs.h"
+#include "malloc.h"
+#include "go-defer.h"
+
+#ifdef USING_SPLIT_STACK
+
+/* FIXME: These are not declared anywhere.  */
+
+extern void __splitstack_getcontext(void *context[10]);
+
+extern void __splitstack_setcontext(void *context[10]);
+
+extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
+
+extern void * __splitstack_resetcontext(void *context[10], size_t *);
+
+extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
+			       void **);
+
+#endif
+
+#if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
+# ifdef PTHREAD_STACK_MIN
+#  define StackMin PTHREAD_STACK_MIN
+# else
+#  define StackMin 8192
+# endif
+#else
+# define StackMin 2 * 1024 * 1024
+#endif
+
+static void schedule(G*);
+static M *startm(void);
 
 typedef struct Sched Sched;
 
-G	runtime_g0;
 M	runtime_m0;
+G	runtime_g0;	// idle goroutine for m0
 
 #ifdef __rtems__
 #define __thread
 #endif
 
-__thread G *g;
-__thread M *m;
+static __thread G *g;
+static __thread M *m;
+
+// We can not always refer to the TLS variables directly.  The
+// compiler will call tls_get_addr to get the address of the variable,
+// and it may hold it in a register across a call to schedule.  When
+// we get back from the call we may be running in a different thread,
+// in which case the register now points to the TLS variable for a
+// different thread.  We use non-inlinable functions to avoid this
+// when necessary.
+
+G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
+
+G*
+runtime_g(void)
+{
+	return g;
+}
+
+M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
+
+M*
+runtime_m(void)
+{
+	return m;
+}
+
+int32	runtime_gcwaiting;
+
+// Go scheduler
+//
+// The go scheduler's job is to match ready-to-run goroutines (`g's)
+// with waiting-for-work schedulers (`m's).  If there are ready g's
+// and no waiting m's, ready() will start a new m running in a new
+// OS thread, so that all ready g's can run simultaneously, up to a limit.
+// For now, m's never go away.
+//
+// By default, Go keeps only one kernel thread (m) running user code
+// at a single time; other threads may be blocked in the operating system.
+// Setting the environment variable $GOMAXPROCS or calling
+// runtime.GOMAXPROCS() will change the number of user threads
+// allowed to execute simultaneously.  $GOMAXPROCS is thus an
+// approximation of the maximum number of cores to use.
+//
+// Even a program that can run without deadlock in a single process
+// might use more m's if given the chance.  For example, the prime
+// sieve will use as many m's as there are primes (up to runtime_sched.mmax),
+// allowing different stages of the pipeline to execute in parallel.
+// We could revisit this choice, only kicking off new m's for blocking
+// system calls, but that would limit the amount of parallel computation
+// that go would try to do.
+//
+// In general, one could imagine all sorts of refinements to the
+// scheduler, but the goal now is just to get something working on
+// Linux and OS X.
+
+struct Sched {
+	Lock;
+
+	G *gfree;	// available g's (status == Gdead)
+	int32 goidgen;
+
+	G *ghead;	// g's waiting to run
+	G *gtail;
+	int32 gwait;	// number of g's waiting to run
+	int32 gcount;	// number of g's that are alive
+	int32 grunning;	// number of g's running on cpu or in syscall
+
+	M *mhead;	// m's waiting for work
+	int32 mwait;	// number of m's waiting for work
+	int32 mcount;	// number of m's that have been created
+
+	volatile uint32 atomic;	// atomic scheduling word (see below)
+
+	int32 profilehz;	// cpu profiling rate
+
+	Note	stopped;	// one g can set waitstop and wait here for m's to stop
+};
+
+// The atomic word in sched is an atomic uint32 that
+// holds these fields.
+//
+//	[15 bits] mcpu		number of m's executing on cpu
+//	[15 bits] mcpumax	max number of m's allowed on cpu
+//	[1 bit] waitstop	some g is waiting on stopped
+//	[1 bit] gwaiting	gwait != 0
+//
+// These fields are the information needed by entersyscall
+// and exitsyscall to decide whether to coordinate with the
+// scheduler.  Packing them into a single machine word lets
+// them use a fast path with a single atomic read/write and
+// no lock/unlock.  This greatly reduces contention in
+// syscall- or cgo-heavy multithreaded programs.
+//
+// Except for entersyscall and exitsyscall, the manipulations
+// to these fields only happen while holding the schedlock,
+// so the routines holding schedlock only need to worry about
+// what entersyscall and exitsyscall do, not the other routines
+// (which also use the schedlock).
+//
+// In particular, entersyscall and exitsyscall only read mcpumax,
+// waitstop, and gwaiting.  They never write them.  Thus, writes to those
+// fields can be done (holding schedlock) without fear of write conflicts.
+// There may still be logic conflicts: for example, the set of waitstop must
+// be conditioned on mcpu >= mcpumax or else the wait may be a
+// spurious sleep.  The Promela model in proc.p verifies these accesses.
+enum {
+	mcpuWidth = 15,
+	mcpuMask = (1<<mcpuWidth) - 1,
+	mcpuShift = 0,
+	mcpumaxShift = mcpuShift + mcpuWidth,
+	waitstopShift = mcpumaxShift + mcpuWidth,
+	gwaitingShift = waitstopShift+1,
+
+	// The max value of GOMAXPROCS is constrained
+	// by the max value we can store in the bit fields
+	// of the atomic word.  Reserve a few high values
+	// so that we can detect accidental decrement
+	// beyond zero.
+	maxgomaxprocs = mcpuMask - 10,
+};
+
+#define atomic_mcpu(v)		(((v)>>mcpuShift)&mcpuMask)
+#define atomic_mcpumax(v)	(((v)>>mcpumaxShift)&mcpuMask)
+#define atomic_waitstop(v)	(((v)>>waitstopShift)&1)
+#define atomic_gwaiting(v)	(((v)>>gwaitingShift)&1)
+
+Sched runtime_sched;
+int32 runtime_gomaxprocs;
+bool runtime_singleproc;
+
+static bool canaddmcpu(void);
+
+// An m that is waiting for notewakeup(&m->havenextg).  This may
+// only be accessed while the scheduler lock is held.  This is used to
+// minimize the number of times we call notewakeup while the scheduler
+// lock is held, since the m will normally move quickly to lock the
+// scheduler itself, producing lock contention.
+static M* mwakeup;
+
+// Scheduling helpers.  Sched must be locked.
+static void gput(G*);	// put/get on ghead/gtail
+static G* gget(void);
+static void mput(M*);	// put/get on mhead
+static M* mget(G*);
+static void gfput(G*);	// put/get on gfree
+static G* gfget(void);
+static void matchmg(void);	// match m's to g's
+static void readylocked(G*);	// ready, but sched is locked
+static void mnextg(M*, G*);
+static void mcommoninit(M*);
+
+void
+setmcpumax(uint32 n)
+{
+	uint32 v, w;
+
+	for(;;) {
+		v = runtime_sched.atomic;
+		w = v;
+		w &= ~(mcpuMask<<mcpumaxShift);
+		w |= n<<mcpumaxShift;
+		if(runtime_cas(&runtime_sched.atomic, v, w))
+			break;
+	}
+}
+
+// First function run by a new goroutine.  This replaces gogocall.
+static void
+kickoff(void)
+{
+	void (*fn)(void*);
+
+	fn = (void (*)(void*))(g->entry);
+	fn(g->param);
+	runtime_goexit();
+}
+
+// Switch context to a different goroutine.  This is like longjmp.
+static void runtime_gogo(G*) __attribute__ ((noinline));
+static void
+runtime_gogo(G* newg)
+{
+#ifdef USING_SPLIT_STACK
+	__splitstack_setcontext(&newg->stack_context[0]);
+#endif
+	g = newg;
+	newg->fromgogo = true;
+	setcontext(&newg->context);
+}
+
+// Save context and call fn passing g as a parameter.  This is like
+// setjmp.  Because getcontext always returns 0, unlike setjmp, we use
+// g->fromgogo as a code.  It will be true if we got here via
+// setcontext.  g == nil the first time this is called in a new m.
+static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
+static void
+runtime_mcall(void (*pfn)(G*))
+{
+#ifndef USING_SPLIT_STACK
+	int i;
+#endif
+
+	// Ensure that all registers are on the stack for the garbage
+	// collector.
+	__builtin_unwind_init();
+
+	if(g == m->g0)
+		runtime_throw("runtime: mcall called on m->g0 stack");
+
+	if(g != nil) {
+
+#ifdef USING_SPLIT_STACK
+		__splitstack_getcontext(&g->stack_context[0]);
+#else
+		g->gcnext_sp = &i;
+#endif
+		g->fromgogo = false;
+		getcontext(&g->context);
+	}
+	if (g == nil || !g->fromgogo) {
+#ifdef USING_SPLIT_STACK
+		__splitstack_setcontext(&m->g0->stack_context[0]);
+#endif
+		m->g0->entry = (byte*)pfn;
+		m->g0->param = g;
+		g = m->g0;
+		setcontext(&m->g0->context);
+		runtime_throw("runtime: mcall function returned");
+	}
+}
+
+// The bootstrap sequence is:
+//
+//	call osinit
+//	call schedinit
+//	make & queue new G
+//	call runtime_mstart
+//
+// The new G does:
+//
+//	call main_init_function
+//	call initdone
+//	call main_main
+void
+runtime_schedinit(void)
+{
+	int32 n;
+	const byte *p;
+
+	m = &runtime_m0;
+	g = &runtime_g0;
+	m->g0 = g;
+	m->curg = g;
+	g->m = m;
+
+	m->nomemprof++;
+	runtime_mallocinit();
+	mcommoninit(m);
+
+	runtime_goargs();
+	runtime_goenvs();
+
+	// For debugging:
+	// Allocate internal symbol table representation now,
+	// so that we don't need to call malloc when we crash.
+	// runtime_findfunc(0);
+
+	runtime_gomaxprocs = 1;
+	p = runtime_getenv("GOMAXPROCS");
+	if(p != nil && (n = runtime_atoi(p)) != 0) {
+		if(n > maxgomaxprocs)
+			n = maxgomaxprocs;
+		runtime_gomaxprocs = n;
+	}
+	setmcpumax(runtime_gomaxprocs);
+	runtime_singleproc = runtime_gomaxprocs == 1;
+
+	canaddmcpu();	// mcpu++ to account for bootstrap m
+	m->helpgc = 1;	// flag to tell schedule() to mcpu--
+	runtime_sched.grunning++;
+
+	// Can not enable GC until all roots are registered.
+	// mstats.enablegc = 1;
+	m->nomemprof--;
+}
+
+// Lock the scheduler.
+static void
+schedlock(void)
+{
+	runtime_lock(&runtime_sched);
+}
+
+// Unlock the scheduler.
+static void
+schedunlock(void)
+{
+	M *m;
+
+	m = mwakeup;
+	mwakeup = nil;
+	runtime_unlock(&runtime_sched);
+	if(m != nil)
+		runtime_notewakeup(&m->havenextg);
+}
+
+void
+runtime_goexit(void)
+{
+	g->status = Gmoribund;
+	runtime_gosched();
+}
+
+void
+runtime_goroutineheader(G *g)
+{
+	const char *status;
+
+	switch(g->status) {
+	case Gidle:
+		status = "idle";
+		break;
+	case Grunnable:
+		status = "runnable";
+		break;
+	case Grunning:
+		status = "running";
+		break;
+	case Gsyscall:
+		status = "syscall";
+		break;
+	case Gwaiting:
+		if(g->waitreason)
+			status = g->waitreason;
+		else
+			status = "waiting";
+		break;
+	case Gmoribund:
+		status = "moribund";
+		break;
+	default:
+		status = "???";
+		break;
+	}
+	runtime_printf("goroutine %d [%s]:\n", g->goid, status);
+}
+
+void
+runtime_tracebackothers(G *me)
+{
+	G *g;
+
+	for(g = runtime_allg; g != nil; g = g->alllink) {
+		if(g == me || g->status == Gdead)
+			continue;
+		runtime_printf("\n");
+		runtime_goroutineheader(g);
+		// runtime_traceback(g->sched.pc, g->sched.sp, 0, g);
+	}
+}
+
+// Mark this g as m's idle goroutine.
+// This functionality might be used in environments where programs
+// are limited to a single thread, to simulate a select-driven
+// network server.  It is not exposed via the standard runtime API.
+void
+runtime_idlegoroutine(void)
+{
+	if(g->idlem != nil)
+		runtime_throw("g is already an idle goroutine");
+	g->idlem = m;
+}
+
+static void
+mcommoninit(M *m)
+{
+	// Add to runtime_allm so garbage collector doesn't free m
+	// when it is just in a register or thread-local storage.
+	m->alllink = runtime_allm;
+	// runtime_Cgocalls() iterates over allm w/o schedlock,
+	// so we need to publish it safely.
+	runtime_atomicstorep((void**)&runtime_allm, m);
+
+	m->id = runtime_sched.mcount++;
+	m->fastrand = 0x49f6428aUL + m->id;
+
+	if(m->mcache == nil)
+		m->mcache = runtime_allocmcache();
+}
+
+// Try to increment mcpu.  Report whether succeeded.
+static bool
+canaddmcpu(void)
+{
+	uint32 v;
+
+	for(;;) {
+		v = runtime_sched.atomic;
+		if(atomic_mcpu(v) >= atomic_mcpumax(v))
+			return 0;
+		if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
+			return 1;
+	}
+}
+
+// Put on `g' queue.  Sched must be locked.
+static void
+gput(G *g)
+{
+	M *m;
+
+	// If g is wired, hand it off directly.
+	if((m = g->lockedm) != nil && canaddmcpu()) {
+		mnextg(m, g);
+		return;
+	}
+
+	// If g is the idle goroutine for an m, hand it off.
+	if(g->idlem != nil) {
+		if(g->idlem->idleg != nil) {
+			runtime_printf("m%d idle out of sync: g%d g%d\n",
+				g->idlem->id,
+				g->idlem->idleg->goid, g->goid);
+			runtime_throw("runtime: double idle");
+		}
+		g->idlem->idleg = g;
+		return;
+	}
+
+	g->schedlink = nil;
+	if(runtime_sched.ghead == nil)
+		runtime_sched.ghead = g;
+	else
+		runtime_sched.gtail->schedlink = g;
+	runtime_sched.gtail = g;
+
+	// increment gwait.
+	// if it transitions to nonzero, set atomic gwaiting bit.
+	if(runtime_sched.gwait++ == 0)
+		runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
+}
+
+// Report whether gget would return something.
+static bool
+haveg(void)
+{
+	return runtime_sched.ghead != nil || m->idleg != nil;
+}
+
+// Get from `g' queue.  Sched must be locked.
+static G*
+gget(void)
+{
+	G *g;
+
+	g = runtime_sched.ghead;
+	if(g){
+		runtime_sched.ghead = g->schedlink;
+		if(runtime_sched.ghead == nil)
+			runtime_sched.gtail = nil;
+		// decrement gwait.
+		// if it transitions to zero, clear atomic gwaiting bit.
+		if(--runtime_sched.gwait == 0)
+			runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
+	} else if(m->idleg != nil) {
+		g = m->idleg;
+		m->idleg = nil;
+	}
+	return g;
+}
+
+// Put on `m' list.  Sched must be locked.
+static void
+mput(M *m)
+{
+	m->schedlink = runtime_sched.mhead;
+	runtime_sched.mhead = m;
+	runtime_sched.mwait++;
+}
+
+// Get an `m' to run `g'.  Sched must be locked.
+static M*
+mget(G *g)
+{
+	M *m;
+
+	// if g has its own m, use it.
+	if(g && (m = g->lockedm) != nil)
+		return m;
+
+	// otherwise use general m pool.
+	if((m = runtime_sched.mhead) != nil){
+		runtime_sched.mhead = m->schedlink;
+		runtime_sched.mwait--;
+	}
+	return m;
+}
+
+// Mark g ready to run.
+void
+runtime_ready(G *g)
+{
+	schedlock();
+	readylocked(g);
+	schedunlock();
+}
+
+// Mark g ready to run.  Sched is already locked.
+// G might be running already and about to stop.
+// The sched lock protects g->status from changing underfoot.
+static void
+readylocked(G *g)
+{
+	if(g->m){
+		// Running on another machine.
+		// Ready it when it stops.
+		g->readyonstop = 1;
+		return;
+	}
+
+	// Mark runnable.
+	if(g->status == Grunnable || g->status == Grunning) {
+		runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
+		runtime_throw("bad g->status in ready");
+	}
+	g->status = Grunnable;
+
+	gput(g);
+	matchmg();
+}
+
+// Same as readylocked but a different symbol so that
+// debuggers can set a breakpoint here and catch all
+// new goroutines.
+static void
+newprocreadylocked(G *g)
+{
+	readylocked(g);
+}
+
+// Pass g to m for running.
+// Caller has already incremented mcpu.
+static void
+mnextg(M *m, G *g)
+{
+	runtime_sched.grunning++;
+	m->nextg = g;
+	if(m->waitnextg) {
+		m->waitnextg = 0;
+		if(mwakeup != nil)
+			runtime_notewakeup(&mwakeup->havenextg);
+		mwakeup = m;
+	}
+}
+
+// Get the next goroutine that m should run.
+// Sched must be locked on entry, is unlocked on exit.
+// Makes sure that at most $GOMAXPROCS g's are
+// running on cpus (not in system calls) at any given time.
+static G*
+nextgandunlock(void)
+{
+	G *gp;
+	uint32 v;
+
+top:
+	if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
+		runtime_throw("negative mcpu");
+
+	// If there is a g waiting as m->nextg, the mcpu++
+	// happened before it was passed to mnextg.
+	if(m->nextg != nil) {
+		gp = m->nextg;
+		m->nextg = nil;
+		schedunlock();
+		return gp;
+	}
+
+	if(m->lockedg != nil) {
+		// We can only run one g, and it's not available.
+		// Make sure some other cpu is running to handle
+		// the ordinary run queue.
+		if(runtime_sched.gwait != 0) {
+			matchmg();
+			// m->lockedg might have been on the queue.
+			if(m->nextg != nil) {
+				gp = m->nextg;
+				m->nextg = nil;
+				schedunlock();
+				return gp;
+			}
+		}
+	} else {
+		// Look for work on global queue.
+		while(haveg() && canaddmcpu()) {
+			gp = gget();
+			if(gp == nil)
+				runtime_throw("gget inconsistency");
+
+			if(gp->lockedm) {
+				mnextg(gp->lockedm, gp);
+				continue;
+			}
+			runtime_sched.grunning++;
+			schedunlock();
+			return gp;
+		}
+
+		// The while loop ended either because the g queue is empty
+		// or because we have maxed out our m procs running go
+		// code (mcpu >= mcpumax).  We need to check that
+		// concurrent actions by entersyscall/exitsyscall cannot
+		// invalidate the decision to end the loop.
+		//
+		// We hold the sched lock, so no one else is manipulating the
+		// g queue or changing mcpumax.  Entersyscall can decrement
+		// mcpu, but if does so when there is something on the g queue,
+		// the gwait bit will be set, so entersyscall will take the slow path
+		// and use the sched lock.  So it cannot invalidate our decision.
+		//
+		// Wait on global m queue.
+		mput(m);
+	}
+
+	v = runtime_atomicload(&runtime_sched.atomic);
+	if(runtime_sched.grunning == 0)
+		runtime_throw("all goroutines are asleep - deadlock!");
+	m->nextg = nil;
+	m->waitnextg = 1;
+	runtime_noteclear(&m->havenextg);
+
+	// Stoptheworld is waiting for all but its cpu to go to stop.
+	// Entersyscall might have decremented mcpu too, but if so
+	// it will see the waitstop and take the slow path.
+	// Exitsyscall never increments mcpu beyond mcpumax.
+	if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
+		// set waitstop = 0 (known to be 1)
+		runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
+		runtime_notewakeup(&runtime_sched.stopped);
+	}
+	schedunlock();
+
+	runtime_notesleep(&m->havenextg);
+	if(m->helpgc) {
+		runtime_gchelper();
+		m->helpgc = 0;
+		runtime_lock(&runtime_sched);
+		goto top;
+	}
+	if((gp = m->nextg) == nil)
+		runtime_throw("bad m->nextg in nextgoroutine");
+	m->nextg = nil;
+	return gp;
+}
+
+int32
+runtime_helpgc(bool *extra)
+{
+	M *mp;
+	int32 n, max;
+
+	// Figure out how many CPUs to use.
+	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+	max = runtime_gomaxprocs;
+	if(max > runtime_ncpu)
+		max = runtime_ncpu > 0 ? runtime_ncpu : 1;
+	if(max > MaxGcproc)
+		max = MaxGcproc;
+
+	// We're going to use one CPU no matter what.
+	// Figure out the max number of additional CPUs.
+	max--;
+
+	runtime_lock(&runtime_sched);
+	n = 0;
+	while(n < max && (mp = mget(nil)) != nil) {
+		n++;
+		mp->helpgc = 1;
+		mp->waitnextg = 0;
+		runtime_notewakeup(&mp->havenextg);
+	}
+	runtime_unlock(&runtime_sched);
+	if(extra)
+		*extra = n != max;
+	return n;
+}
+
+void
+runtime_stoptheworld(void)
+{
+	uint32 v;
+
+	schedlock();
+	runtime_gcwaiting = 1;
+
+	setmcpumax(1);
+
+	// while mcpu > 1
+	for(;;) {
+		v = runtime_sched.atomic;
+		if(atomic_mcpu(v) <= 1)
+			break;
+
+		// It would be unsafe for multiple threads to be using
+		// the stopped note at once, but there is only
+		// ever one thread doing garbage collection.
+		runtime_noteclear(&runtime_sched.stopped);
+		if(atomic_waitstop(v))
+			runtime_throw("invalid waitstop");
+
+		// atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
+		// still being true.
+		if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
+			continue;
+
+		schedunlock();
+		runtime_notesleep(&runtime_sched.stopped);
+		schedlock();
+	}
+	runtime_singleproc = runtime_gomaxprocs == 1;
+	schedunlock();
+}
+
+void
+runtime_starttheworld(bool extra)
+{
+	M *m;
+
+	schedlock();
+	runtime_gcwaiting = 0;
+	setmcpumax(runtime_gomaxprocs);
+	matchmg();
+	if(extra && canaddmcpu()) {
+		// Start a new m that will (we hope) be idle
+		// and so available to help when the next
+		// garbage collection happens.
+		// canaddmcpu above did mcpu++
+		// (necessary, because m will be doing various
+		// initialization work so is definitely running),
+		// but m is not running a specific goroutine,
+		// so set the helpgc flag as a signal to m's
+		// first schedule(nil) to mcpu-- and grunning--.
+		m = startm();
+		m->helpgc = 1;
+		runtime_sched.grunning++;
+	}
+	schedunlock();
+}
+
+// Called to start an M.
+void*
+runtime_mstart(void* mp)
+{
+	m = (M*)mp;
+	g = m->g0;
+
+	g->entry = nil;
+	g->param = nil;
+
+	// Record top of stack for use by mcall.
+	// Once we call schedule we're never coming back,
+	// so other calls can reuse this stack space.
+#ifdef USING_SPLIT_STACK
+	__splitstack_getcontext(&g->stack_context[0]);
+#else
+	g->gcinitial_sp = &mp;
+	g->gcstack_size = StackMin;
+	g->gcnext_sp = &mp;
+#endif
+	getcontext(&g->context);
+
+	if(g->entry != nil) {
+		// Got here from mcall.
+		void (*pfn)(G*) = (void (*)(G*))g->entry;
+		G* gp = (G*)g->param;
+		pfn(gp);
+		*(int*)0x21 = 0x21;
+	}
+	runtime_minit();
+	schedule(nil);
+	return nil;
+}
+
+typedef struct CgoThreadStart CgoThreadStart;
+struct CgoThreadStart
+{
+	M *m;
+	G *g;
+	void (*fn)(void);
+};
+
+// Kick off new m's as needed (up to mcpumax).
+// There are already `other' other cpus that will
+// start looking for goroutines shortly.
+// Sched is locked.
+static void
+matchmg(void)
+{
+	G *gp;
+	M *mp;
+
+	if(m->mallocing || m->gcing)
+		return;
+
+	while(haveg() && canaddmcpu()) {
+		gp = gget();
+		if(gp == nil)
+			runtime_throw("gget inconsistency");
+
+		// Find the m that will run gp.
+		if((mp = mget(gp)) == nil)
+			mp = startm();
+		mnextg(mp, gp);
+	}
+}
+
+static M*
+startm(void)
+{
+	M *m;
+	pthread_attr_t attr;
+	pthread_t tid;
+
+	m = runtime_malloc(sizeof(M));
+	mcommoninit(m);
+	m->g0 = runtime_malg(-1, nil, nil);
+
+	if(pthread_attr_init(&attr) != 0)
+		runtime_throw("pthread_attr_init");
+	if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
+		runtime_throw("pthread_attr_setdetachstate");
+
+#ifndef PTHREAD_STACK_MIN
+#define PTHREAD_STACK_MIN 8192
+#endif
+	if(pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
+		runtime_throw("pthread_attr_setstacksize");
+
+	if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
+		runtime_throw("pthread_create");
+
+	return m;
+}
+
+// One round of scheduler: find a goroutine and run it.
+// The argument is the goroutine that was running before
+// schedule was called, or nil if this is the first call.
+// Never returns.
+static void
+schedule(G *gp)
+{
+	int32 hz;
+	uint32 v;
+
+	schedlock();
+	if(gp != nil) {
+		// Just finished running gp.
+		gp->m = nil;
+		runtime_sched.grunning--;
+
+		// atomic { mcpu-- }
+		v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
+		if(atomic_mcpu(v) > maxgomaxprocs)
+			runtime_throw("negative mcpu in scheduler");
+
+		switch(gp->status){
+		case Grunnable:
+		case Gdead:
+			// Shouldn't have been running!
+			runtime_throw("bad gp->status in sched");
+		case Grunning:
+			gp->status = Grunnable;
+			gput(gp);
+			break;
+		case Gmoribund:
+			gp->status = Gdead;
+			if(gp->lockedm) {
+				gp->lockedm = nil;
+				m->lockedg = nil;
+			}
+			gp->idlem = nil;
+			gfput(gp);
+			if(--runtime_sched.gcount == 0)
+				runtime_exit(0);
+			break;
+		}
+		if(gp->readyonstop){
+			gp->readyonstop = 0;
+			readylocked(gp);
+		}
+	} else if(m->helpgc) {
+		// Bootstrap m or new m started by starttheworld.
+		// atomic { mcpu-- }
+		v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
+		if(atomic_mcpu(v) > maxgomaxprocs)
+			runtime_throw("negative mcpu in scheduler");
+		// Compensate for increment in starttheworld().
+		runtime_sched.grunning--;
+		m->helpgc = 0;
+	} else if(m->nextg != nil) {
+		// New m started by matchmg.
+	} else {
+		runtime_throw("invalid m state in scheduler");
+	}
+
+	// Find (or wait for) g to run.  Unlocks runtime_sched.
+	gp = nextgandunlock();
+	gp->readyonstop = 0;
+	gp->status = Grunning;
+	m->curg = gp;
+	gp->m = m;
+
+	// Check whether the profiler needs to be turned on or off.
+	hz = runtime_sched.profilehz;
+	if(m->profilehz != hz)
+		runtime_resetcpuprofiler(hz);
+
+	runtime_gogo(gp);
+}
+
+// Enter scheduler.  If g->status is Grunning,
+// re-queues g and runs everyone else who is waiting
+// before running g again.  If g->status is Gmoribund,
+// kills off g.
+void
+runtime_gosched(void)
+{
+	if(m->locks != 0)
+		runtime_throw("gosched holding locks");
+	if(g == m->g0)
+		runtime_throw("gosched of g0");
+	runtime_mcall(schedule);
+}
+
+// The goroutine g is about to enter a system call.
+// Record that it's not using the cpu anymore.
+// This is called only from the go syscall library and cgocall,
+// not from the low-level system calls used by the runtime.
+//
+// Entersyscall cannot split the stack: the runtime_gosave must
+// make g->sched refer to the caller's stack segment, because
+// entersyscall is going to return immediately after.
+// It's okay to call matchmg and notewakeup even after
+// decrementing mcpu, because we haven't released the
+// sched lock yet, so the garbage collector cannot be running.
+
+void runtime_entersyscall(void) __attribute__ ((no_split_stack));
+
+void
+runtime_entersyscall(void)
+{
+	uint32 v;
+
+	// Leave SP around for gc and traceback.
+#ifdef USING_SPLIT_STACK
+	g->gcstack = __splitstack_find(NULL, NULL, &g->gcstack_size,
+				       &g->gcnext_segment, &g->gcnext_sp,
+				       &g->gcinitial_sp);
+#else
+	g->gcnext_sp = (byte *) &v;
+#endif
+
+	// Save the registers in the g structure so that any pointers
+	// held in registers will be seen by the garbage collector.
+	// We could use getcontext here, but setjmp is more efficient
+	// because it doesn't need to save the signal mask.
+	setjmp(g->gcregs);
+
+	g->status = Gsyscall;
+
+	// Fast path.
+	// The slow path inside the schedlock/schedunlock will get
+	// through without stopping if it does:
+	//	mcpu--
+	//	gwait not true
+	//	waitstop && mcpu <= mcpumax not true
+	// If we can do the same with a single atomic add,
+	// then we can skip the locks.
+	v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
+	if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
+		return;
+
+	schedlock();
+	v = runtime_atomicload(&runtime_sched.atomic);
+	if(atomic_gwaiting(v)) {
+		matchmg();
+		v = runtime_atomicload(&runtime_sched.atomic);
+	}
+	if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
+		runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
+		runtime_notewakeup(&runtime_sched.stopped);
+	}
+
+	schedunlock();
+}
+
+// The goroutine g exited its system call.
+// Arrange for it to run on a cpu again.
+// This is called only from the go syscall library, not
+// from the low-level system calls used by the runtime.
+void
+runtime_exitsyscall(void)
+{
+	G *gp;
+	uint32 v;
+
+	// Fast path.
+	// If we can do the mcpu++ bookkeeping and
+	// find that we still have mcpu <= mcpumax, then we can
+	// start executing Go code immediately, without having to
+	// schedlock/schedunlock.
+	gp = g;
+	v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
+	if(m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
+		// There's a cpu for us, so we can run.
+		gp->status = Grunning;
+		// Garbage collector isn't running (since we are),
+		// so okay to clear gcstack.
+#ifdef USING_SPLIT_STACK
+		gp->gcstack = nil;
+#endif
+		gp->gcnext_sp = nil;
+		runtime_memclr(gp->gcregs, sizeof gp->gcregs);
+		return;
+	}
+
+	// Tell scheduler to put g back on the run queue:
+	// mostly equivalent to g->status = Grunning,
+	// but keeps the garbage collector from thinking
+	// that g is running right now, which it's not.
+	gp->readyonstop = 1;
+
+	// All the cpus are taken.
+	// The scheduler will ready g and put this m to sleep.
+	// When the scheduler takes g away from m,
+	// it will undo the runtime_sched.mcpu++ above.
+	runtime_gosched();
+
+	// Gosched returned, so we're allowed to run now.
+	// Delete the gcstack information that we left for
+	// the garbage collector during the system call.
+	// Must wait until now because until gosched returns
+	// we don't know for sure that the garbage collector
+	// is not running.
+#ifdef USING_SPLIT_STACK
+	gp->gcstack = nil;
+#endif
+	gp->gcnext_sp = nil;
+	runtime_memclr(gp->gcregs, sizeof gp->gcregs);
+}
+
+G*
+runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
+{
+	G *newg;
+
+	newg = runtime_malloc(sizeof(G));
+	if(stacksize >= 0) {
+#if USING_SPLIT_STACK
+		*ret_stack = __splitstack_makecontext(stacksize,
+						      &newg->stack_context[0],
+						      ret_stacksize);
+#else
+		*ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
+		*ret_stacksize = stacksize;
+		newg->gcinitial_sp = *ret_stack;
+		newg->gcstack_size = stacksize;
+#endif
+	}
+	return newg;
+}
+
+G*
+__go_go(void (*fn)(void*), void* arg)
+{
+	byte *sp;
+	size_t spsize;
+	G * volatile newg;	// volatile to avoid longjmp warning
+
+	schedlock();
+
+	if((newg = gfget()) != nil){
+#ifdef USING_SPLIT_STACK
+		sp = __splitstack_resetcontext(&newg->stack_context[0],
+					       &spsize);
+#else
+		sp = newg->gcinitial_sp;
+		spsize = newg->gcstack_size;
+		newg->gcnext_sp = sp;
+#endif
+	} else {
+		newg = runtime_malg(StackMin, &sp, &spsize);
+		if(runtime_lastg == nil)
+			runtime_allg = newg;
+		else
+			runtime_lastg->alllink = newg;
+		runtime_lastg = newg;
+	}
+	newg->status = Gwaiting;
+	newg->waitreason = "new goroutine";
+
+	newg->entry = (byte*)fn;
+	newg->param = arg;
+	newg->gopc = (uintptr)__builtin_return_address(0);
+
+	runtime_sched.gcount++;
+	runtime_sched.goidgen++;
+	newg->goid = runtime_sched.goidgen;
+
+	if(sp == nil)
+		runtime_throw("nil g->stack0");
+
+	getcontext(&newg->context);
+	newg->context.uc_stack.ss_sp = sp;
+	newg->context.uc_stack.ss_size = spsize;
+	makecontext(&newg->context, kickoff, 0);
+
+	newprocreadylocked(newg);
+	schedunlock();
+
+	return newg;
+//printf(" goid=%d\n", newg->goid);
+}
+
+// Put on gfree list.  Sched must be locked.
+static void
+gfput(G *g)
+{
+	g->schedlink = runtime_sched.gfree;
+	runtime_sched.gfree = g;
+}
+
+// Get from gfree list.  Sched must be locked.
+static G*
+gfget(void)
+{
+	G *g;
+
+	g = runtime_sched.gfree;
+	if(g)
+		runtime_sched.gfree = g->schedlink;
+	return g;
+}
+
+// Run all deferred functions for the current goroutine.
+static void
+rundefer(void)
+{
+	Defer *d;
+
+	while((d = g->defer) != nil) {
+		void (*pfn)(void*);
+
+		pfn = d->__pfn;
+		d->__pfn = nil;
+		if (pfn != nil)
+			(*pfn)(d->__arg);
+		g->defer = d->__next;
+		runtime_free(d);
+	}
+}
+
+void runtime_Goexit (void) asm ("libgo_runtime.runtime.Goexit");
+
+void
+runtime_Goexit(void)
+{
+	rundefer();
+	runtime_goexit();
+}
+
+void runtime_Gosched (void) asm ("libgo_runtime.runtime.Gosched");
+
+void
+runtime_Gosched(void)
+{
+	runtime_gosched();
+}
+
+void runtime_LockOSThread (void)
+  __asm__ ("libgo_runtime.runtime.LockOSThread");
+
+void
+runtime_LockOSThread(void)
+{
+	m->lockedg = g;
+	g->lockedm = m;
+}
+
+// delete when scheduler is stronger
+int32
+runtime_gomaxprocsfunc(int32 n)
+{
+	int32 ret;
+	uint32 v;
+
+	schedlock();
+	ret = runtime_gomaxprocs;
+	if(n <= 0)
+		n = ret;
+	if(n > maxgomaxprocs)
+		n = maxgomaxprocs;
+	runtime_gomaxprocs = n;
+	if(runtime_gomaxprocs > 1)
+		runtime_singleproc = false;
+ 	if(runtime_gcwaiting != 0) {
+ 		if(atomic_mcpumax(runtime_sched.atomic) != 1)
+ 			runtime_throw("invalid mcpumax during gc");
+		schedunlock();
+		return ret;
+	}
+
+	setmcpumax(n);
+
+	// If there are now fewer allowed procs
+	// than procs running, stop.
+	v = runtime_atomicload(&runtime_sched.atomic);
+	if((int32)atomic_mcpu(v) > n) {
+		schedunlock();
+		runtime_gosched();
+		return ret;
+	}
+	// handle more procs
+	matchmg();
+	schedunlock();
+	return ret;
+}
+
+void runtime_UnlockOSThread (void)
+  __asm__ ("libgo_runtime.runtime.UnlockOSThread");
+
+void
+runtime_UnlockOSThread(void)
+{
+	m->lockedg = nil;
+	g->lockedm = nil;
+}
+
+bool
+runtime_lockedOSThread(void)
+{
+	return g->lockedm != nil && m->lockedg != nil;
+}
+
+// for testing of wire, unwire
+uint32
+runtime_mid()
+{
+	return m->id;
+}
+
+int32 runtime_Goroutines (void)
+  __asm__ ("libgo_runtime.runtime.Goroutines");
+
+int32
+runtime_Goroutines()
+{
+	return runtime_sched.gcount;
+}
+
+int32
+runtime_mcount(void)
+{
+	return runtime_sched.mcount;
+}
 
 static struct {
 	Lock;
@@ -28,22 +1330,22 @@ static struct {
 void
 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
 		uint8 *sp __attribute__ ((unused)),
-		uint8 *lr __attribute__ ((unused)))
+		uint8 *lr __attribute__ ((unused)),
+		G *gp __attribute__ ((unused)))
 {
-	int32 n;
-	
+	// int32 n;
+
 	if(prof.fn == nil || prof.hz == 0)
 		return;
-	
+
 	runtime_lock(&prof);
 	if(prof.fn == nil) {
 		runtime_unlock(&prof);
 		return;
 	}
-	n = 0;
-	// n = runtimeÂ·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
-	if(n > 0)
-		prof.fn(prof.pcbuf, n);
+	// n = runtime_gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
+	// if(n > 0)
+	// 	prof.fn(prof.pcbuf, n);
 	runtime_unlock(&prof);
 }
 
@@ -67,28 +1369,10 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
 	prof.fn = fn;
 	prof.hz = hz;
 	runtime_unlock(&prof);
-	// runtime_lock(&runtime_sched);
-	// runtime_sched.profilehz = hz;
-	// runtime_unlock(&runtime_sched);
-	
+	runtime_lock(&runtime_sched);
+	runtime_sched.profilehz = hz;
+	runtime_unlock(&runtime_sched);
+
 	if(hz != 0)
 		runtime_resetcpuprofiler(hz);
 }
-
-/* The entersyscall and exitsyscall functions aren't used for anything
-   yet.  Eventually they will be used to switch to a new OS thread
-   when making a potentially-blocking library call.  */
-
-void runtime_entersyscall() __asm__("libgo_syscall.syscall.entersyscall");
-
-void
-runtime_entersyscall()
-{
-}
-
-void runtime_exitsyscall() __asm__("libgo_syscall.syscall.exitsyscall");
-
-void
-runtime_exitsyscall()
-{
-}
diff --git a/libgo/runtime/runtime.c b/libgo/runtime/runtime.c
index 182ec0e3257727999c4335798966db2556025b81..de4c982620f42c64cb96b8c190f82eebc4e48473 100644
--- a/libgo/runtime/runtime.c
+++ b/libgo/runtime/runtime.c
@@ -16,6 +16,9 @@ static Lock paniclk;
 void
 runtime_startpanic(void)
 {
+	M *m;
+
+	m = runtime_m();
 	if(m->dying) {
 		runtime_printf("panic during panic\n");
 		runtime_exit(3);
@@ -156,8 +159,10 @@ runtime_atoi(const byte *p)
 uint32
 runtime_fastrand1(void)
 {
+	M *m;
 	uint32 x;
 
+	m = runtime_m();
 	x = m->fastrand;
 	x += x;
 	if(x & 0x80000000L)
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index 8a4153091e0c856340fb6e99165defc8ab9511f7..818465cb897f2e40a6210de0ae9958c533fabb31 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -8,6 +8,7 @@
 
 #define _GNU_SOURCE
 #include "go-assert.h"
+#include <setjmp.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -17,6 +18,7 @@
 #include <fcntl.h>
 #include <pthread.h>
 #include <semaphore.h>
+#include <ucontext.h>
 
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
@@ -59,24 +61,33 @@ typedef	struct	__go_panic_stack	Panic;
 typedef	struct	__go_open_array		Slice;
 typedef	struct	__go_string		String;
 
-/* Per CPU declarations.  */
-
-#ifdef __rtems__
-#define __thread
-#endif
-
-extern __thread		G*	g;
-extern __thread		M* 	m;
+/*
+ * per-cpu declaration.
+ */
+extern M*	runtime_m(void);
+extern G*	runtime_g(void);
 
 extern M	runtime_m0;
 extern G	runtime_g0;
 
-#ifdef __rtems__
-#undef __thread
-#endif
-
-/* Constants.  */
-
+/*
+ * defined constants
+ */
+enum
+{
+	// G status
+	//
+	// If you add to this list, add to the list
+	// of "okay during garbage collection" status
+	// in mgc0.c too.
+	Gidle,
+	Grunnable,
+	Grunning,
+	Gsyscall,
+	Gwaiting,
+	Gmoribund,
+	Gdead,
+};
 enum
 {
 	true	= 1,
@@ -102,12 +113,19 @@ struct	G
 	Panic*	panic;
 	void*	exception;	// current exception being thrown
 	bool	is_foreign;	// whether current exception from other language
+	void	*gcstack;	// if status==Gsyscall, gcstack = stackbase to use during gc
+	uintptr	gcstack_size;
+	void*	gcnext_segment;
+	void*	gcnext_sp;
+	void*	gcinitial_sp;
+	jmp_buf	gcregs;
 	byte*	entry;		// initial function
 	G*	alllink;	// on allg
 	void*	param;		// passed parameter on wakeup
+	bool	fromgogo;	// reached from gogo
 	int16	status;
 	int32	goid;
-	int8*	waitreason;	// if status==Gwaiting
+	const char*	waitreason;	// if status==Gwaiting
 	G*	schedlink;
 	bool	readyonstop;
 	bool	ispanic;
@@ -118,38 +136,38 @@ struct	G
 	// uintptr	sigcode0;
 	// uintptr	sigcode1;
 	// uintptr	sigpc;
-	// uintptr	gopc;	// pc of go statement that created this goroutine
+	uintptr	gopc;	// pc of go statement that created this goroutine
+
+	ucontext_t	context;
+	void*		stack_context[10];
 };
 
 struct	M
 {
+	G*	g0;		// goroutine with scheduling stack
+	G*	gsignal;	// signal-handling G
 	G*	curg;		// current running goroutine
 	int32	id;
 	int32	mallocing;
 	int32	gcing;
 	int32	locks;
 	int32	nomemprof;
-	int32	gcing_for_prof;
-	int32	holds_finlock;
-	int32	gcing_for_finlock;
+	int32	waitnextg;
 	int32	dying;
 	int32	profilehz;
+	int32	helpgc;
 	uint32	fastrand;
+	Note	havenextg;
+	G*	nextg;
+	M*	alllink;	// on allm
+	M*	schedlink;
 	MCache	*mcache;
+	G*	lockedg;
+	G*	idleg;
 	M*	nextwaitm;	// next M waiting for lock
 	uintptr	waitsema;	// semaphore for parking on locks
 	uint32	waitsemacount;
 	uint32	waitsemalock;
-
-	/* For the list of all threads.  */
-	struct __go_thread_id *list_entry;
-
-	/* For the garbage collector.  */
-	void	*gc_sp;
-	size_t	gc_len;
-	void	*gc_next_segment;
-	void	*gc_next_sp;
-	void	*gc_initial_sp;
 };
 
 /* Macros.  */
@@ -171,7 +189,13 @@ enum {
 /*
  * external data
  */
+G*	runtime_allg;
+G*	runtime_lastg;
+M*	runtime_allm;
+extern	int32	runtime_gomaxprocs;
+extern	bool	runtime_singleproc;
 extern	uint32	runtime_panicking;
+extern	int32	runtime_gcwaiting;		// gc is waiting to run
 int32	runtime_ncpu;
 
 /*
@@ -188,21 +212,24 @@ void	runtime_goargs(void);
 void	runtime_goenvs(void);
 void	runtime_throw(const char*);
 void*	runtime_mal(uintptr);
+void	runtime_schedinit(void);
+void	runtime_initsig(int32);
 String	runtime_gostringnocopy(byte*);
+void*	runtime_mstart(void*);
+G*	runtime_malg(int32, byte**, size_t*);
+void	runtime_minit(void);
 void	runtime_mallocinit(void);
+void	runtime_gosched(void);
+void	runtime_goexit(void);
+void	runtime_entersyscall(void) __asm__("libgo_syscall.syscall.entersyscall");
+void	runtime_exitsyscall(void) __asm__("libgo_syscall.syscall.exitsyscall");
 void	siginit(void);
 bool	__go_sigsend(int32 sig);
 int64	runtime_nanotime(void);
 
 void	runtime_stoptheworld(void);
 void	runtime_starttheworld(bool);
-void	__go_go(void (*pfn)(void*), void*);
-void	__go_gc_goroutine_init(void*);
-void	__go_enable_gc(void);
-int	__go_run_goroutine_gc(int);
-void	__go_scanstacks(void (*scan)(byte *, int64));
-void	__go_stealcache(void);
-void	__go_cachestats(void);
+G*	__go_go(void (*pfn)(void*), void*);
 
 /*
  * mutual exclusion locks.  in the uncontended case,
@@ -274,14 +301,16 @@ bool	runtime_addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *
 
 void	runtime_dopanic(int32) __attribute__ ((noreturn));
 void	runtime_startpanic(void);
+void	runtime_ready(G*);
 const byte*	runtime_getenv(const char*);
 int32	runtime_atoi(const byte*);
-void	runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr);
+void	runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp);
 void	runtime_resetcpuprofiler(int32);
 void	runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
 uint32	runtime_fastrand1(void);
-void	runtime_semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
-void	runtime_semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
+void	runtime_semacquire(uint32 volatile *);
+void	runtime_semrelease(uint32 volatile *);
+int32	runtime_gomaxprocsfunc(int32 n);
 void	runtime_procyield(uint32);
 void	runtime_osyield(void);
 void	runtime_usleep(uint32);
@@ -294,3 +323,6 @@ void reflect_call(const struct __go_func_type *, const void *, _Bool, _Bool,
 #ifdef __rtems__
 void __wrap_rtems_task_variable_add(void **);
 #endif
+
+/* Temporary.  */
+void	runtime_cond_wait(pthread_cond_t*, pthread_mutex_t*);
diff --git a/libgo/runtime/runtime1.goc b/libgo/runtime/runtime1.goc
new file mode 100644
index 0000000000000000000000000000000000000000..4cd98041717f3137352e74a42a6ec9d4ec5c000c
--- /dev/null
+++ b/libgo/runtime/runtime1.goc
@@ -0,0 +1,10 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+#include "runtime.h"
+
+func GOMAXPROCS(n int32) (ret int32) {
+	ret = runtime_gomaxprocsfunc(n);
+}
diff --git a/libgo/runtime/sema.goc b/libgo/runtime/sema.goc
new file mode 100644
index 0000000000000000000000000000000000000000..dd58cf38fb88f6e4893242ea6a8f479673348052
--- /dev/null
+++ b/libgo/runtime/sema.goc
@@ -0,0 +1,181 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Semaphore implementation exposed to Go.
+// Intended use is provide a sleep and wakeup
+// primitive that can be used in the contended case
+// of other synchronization primitives.
+// Thus it targets the same goal as Linux's futex,
+// but it has much simpler semantics.
+//
+// That is, don't think of these as semaphores.
+// Think of them as a way to implement sleep and wakeup
+// such that every sleep is paired with a single wakeup,
+// even if, due to races, the wakeup happens before the sleep.
+//
+// See Mullender and Cox, ``Semaphores in Plan 9,''
+// http://swtch.com/semaphore.pdf
+
+package runtime
+#include "runtime.h"
+#include "arch.h"
+
+typedef struct Sema Sema;
+struct Sema
+{
+	uint32 volatile *addr;
+	G *g;
+	Sema *prev;
+	Sema *next;
+};
+
+typedef struct SemaRoot SemaRoot;
+struct SemaRoot
+{
+        Lock;
+	Sema *head;
+	Sema *tail;
+	// Number of waiters. Read w/o the lock.
+	uint32 volatile nwait;
+};
+
+// Prime to not correlate with any user patterns.
+#define SEMTABLESZ 251
+
+static union
+{
+	SemaRoot;
+	uint8 pad[CacheLineSize];
+} semtable[SEMTABLESZ];
+
+static SemaRoot*
+semroot(uint32 volatile *addr)
+{
+	return &semtable[((uintptr)addr >> 3) % SEMTABLESZ];
+}
+
+static void
+semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s)
+{
+	s->g = runtime_g();
+	s->addr = addr;
+	s->next = nil;
+	s->prev = root->tail;
+	if(root->tail)
+		root->tail->next = s;
+	else
+		root->head = s;
+	root->tail = s;
+}
+
+static void
+semdequeue(SemaRoot *root, Sema *s)
+{
+	if(s->next)
+		s->next->prev = s->prev;
+	else
+		root->tail = s->prev;
+	if(s->prev)
+		s->prev->next = s->next;
+	else
+		root->head = s->next;
+	s->prev = nil;
+	s->next = nil;
+}
+
+static int32
+cansemacquire(uint32 volatile *addr)
+{
+	uint32 v;
+
+	while((v = runtime_atomicload(addr)) > 0)
+		if(runtime_cas(addr, v, v-1))
+			return 1;
+	return 0;
+}
+
+void
+runtime_semacquire(uint32 volatile *addr)
+{
+	G *g;
+	Sema s;
+	SemaRoot *root;
+
+	// Easy case.
+	if(cansemacquire(addr))
+		return;
+
+	// Harder case:
+	//	increment waiter count
+	//	try cansemacquire one more time, return if succeeded
+	//	enqueue itself as a waiter
+	//	sleep
+	//	(waiter descriptor is dequeued by signaler)
+	g = runtime_g();
+	root = semroot(addr);
+	for(;;) {
+
+		runtime_lock(root);
+		// Add ourselves to nwait to disable "easy case" in semrelease.
+		runtime_xadd(&root->nwait, 1);
+		// Check cansemacquire to avoid missed wakeup.
+		if(cansemacquire(addr)) {
+			runtime_xadd(&root->nwait, -1);
+			runtime_unlock(root);
+			return;
+		}
+		// Any semrelease after the cansemacquire knows we're waiting
+		// (we set nwait above), so go to sleep.
+		semqueue(root, addr, &s);
+		g->status = Gwaiting;
+		g->waitreason = "semacquire";
+		runtime_unlock(root);
+		runtime_gosched();
+		if(cansemacquire(addr))
+			return;
+	}
+}
+
+void
+runtime_semrelease(uint32 volatile *addr)
+{
+	Sema *s;
+	SemaRoot *root;
+
+	root = semroot(addr);
+	runtime_xadd(addr, 1);
+
+	// Easy case: no waiters?
+	// This check must happen after the xadd, to avoid a missed wakeup
+	// (see loop in semacquire).
+	if(runtime_atomicload(&root->nwait) == 0)
+		return;
+
+	// Harder case: search for a waiter and wake it.
+	runtime_lock(root);
+	if(runtime_atomicload(&root->nwait) == 0) {
+		// The count is already consumed by another goroutine,
+		// so no need to wake up another goroutine.
+		runtime_unlock(root);
+		return;
+	}
+	for(s = root->head; s; s = s->next) {
+		if(s->addr == addr) {
+			runtime_xadd(&root->nwait, -1);
+			semdequeue(root, s);
+			break;
+		}
+	}
+	runtime_unlock(root);
+	if(s)
+		runtime_ready(s->g);
+}
+
+func Semacquire(addr *uint32) {
+	runtime_semacquire(addr);
+}
+
+func Semrelease(addr *uint32) {
+	runtime_semrelease(addr);
+}
diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc
index 3a908680ecec2e206bf2067a73914d44b65becf2..502dc442c8379bc70e2236373e42e954f12c24d4 100644
--- a/libgo/runtime/sigqueue.goc
+++ b/libgo/runtime/sigqueue.goc
@@ -81,9 +81,9 @@ __go_sigsend(int32 s)
 
 // Called to receive a bitmask of queued signals.
 func Sigrecv() (m uint32) {
-	// runtimeÂ·entersyscall();
+	runtime_entersyscall();
 	runtime_notesleep(&sig);
-	// runtimeÂ·exitsyscall();
+	runtime_exitsyscall();
 	runtime_noteclear(&sig);
 	for(;;) {
 		m = sig.mask;
@@ -110,5 +110,6 @@ func Signame(sig int32) (name String) {
 }
 
 func Siginit() {
+	runtime_initsig(1);
 	sig.inuse = true;	// enable reception of signals; cannot disable
 }
diff --git a/libgo/runtime/thread-linux.c b/libgo/runtime/thread-linux.c
index dde4e707a2dd3b34f96964b095e2c66b951f045c..b51f4970d258ec84162a4cd8cbe8863325712179 100644
--- a/libgo/runtime/thread-linux.c
+++ b/libgo/runtime/thread-linux.c
@@ -66,7 +66,8 @@ static int32
 getproccount(void)
 {
 	int32 fd, rd, cnt, cpustrlen;
-	const byte *cpustr, *pos;
+	const char *cpustr;
+	const byte *pos;
 	byte *bufpos;
 	byte buf[256];
 
@@ -75,14 +76,14 @@ getproccount(void)
 		return 1;
 	cnt = 0;
 	bufpos = buf;
-	cpustr = (const byte*)"\ncpu";
-	cpustrlen = runtime_findnull((const byte*)cpustr);
+	cpustr = "\ncpu";
+	cpustrlen = strlen(cpustr);
 	for(;;) {
 		rd = read(fd, bufpos, sizeof(buf)-cpustrlen);
 		if(rd == -1)
 			break;
 		bufpos[rd] = 0;
-		for(pos=buf; (pos=(const byte*)strstr((const char*)pos, (const char*)cpustr)) != nil; cnt++, pos++) {
+		for(pos=buf; (pos=(const byte*)strstr((const char*)pos, cpustr)) != nil; cnt++, pos++) {
 		}
 		if(rd < cpustrlen)
 			break;
diff --git a/libgo/runtime/thread.c b/libgo/runtime/thread.c
index 821668bc17af6e26ace086042e0433963579e08a..459fc85c7802c945fbf31b78619bf399422ab650 100644
--- a/libgo/runtime/thread.c
+++ b/libgo/runtime/thread.c
@@ -3,6 +3,8 @@
 // license that can be found in the LICENSE file.
 
 #include <errno.h>
+#include <signal.h>
+
 #include "runtime.h"
 #include "go-assert.h"
 
@@ -71,3 +73,44 @@ __sync_fetch_and_add_4 (uint32* ptr, uint32 add)
 }
 
 #endif
+
+// Called to initialize a new m (including the bootstrap m).
+void
+runtime_minit(void)
+{
+	byte* stack;
+	size_t stacksize;
+	stack_t ss;
+
+	// Initialize signal handling.
+	runtime_m()->gsignal = runtime_malg(32*1024, &stack, &stacksize);	// OS X wants >=8K, Linux >=2K
+	ss.ss_sp = stack;
+	ss.ss_flags = 0;
+	ss.ss_size = stacksize;
+	if(sigaltstack(&ss, nil) < 0)
+		*(int *)0xf1 = 0xf1;
+}
+
+// Temporary functions, which will be removed when we stop using
+// condition variables.
+
+void
+runtime_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex)
+{
+	int i;
+
+	runtime_entersyscall();
+
+	i = pthread_cond_wait(cond, mutex);
+	if(i != 0)
+		runtime_throw("pthread_cond_wait");
+	i = pthread_mutex_unlock(mutex);
+	if(i != 0)
+		runtime_throw("pthread_mutex_unlock");
+
+	runtime_exitsyscall();
+
+	i = pthread_mutex_lock(mutex);
+	if(i != 0)
+		runtime_throw("pthread_mutex_lock");
+}