Skip to content

Commit 8bb8eaf

Browse files
committed
Revert "runtime: use fixed TLS offsets on darwin/amd64 and darwin/386"
This reverts commit 76e92d1. Reason for revert: Seems to have broken the darwin/386 builder, the toolchain is barfing on the new inline assembly. Change-Id: Ic83fa3c85148946529c5fd47d1e1669898031ace Reviewed-on: https://go-review.googlesource.com/106155 Reviewed-by: Keith Randall <khr@golang.org>
1 parent c4a3a9c commit 8bb8eaf

File tree

5 files changed

+167
-28
lines changed

5 files changed

+167
-28
lines changed

src/cmd/link/internal/ld/sym.go

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,30 +113,24 @@ func (ctxt *Link) computeTLSOffset() {
113113

114114
/*
115115
* OS X system constants - offset from 0(GS) to our TLS.
116+
* Explained in src/runtime/cgo/gcc_darwin_*.c.
116117
*/
117118
case objabi.Hdarwin:
118119
switch ctxt.Arch.Family {
119120
default:
120121
log.Fatalf("unknown thread-local storage offset for darwin/%s", ctxt.Arch.Name)
121122

122-
/*
123-
* For x86, Apple has reserved a slot in the TLS for Go. See issue 23617.
124-
* That slot is at offset 0x30 on amd64, and 0x18 on 386.
125-
* The slot will hold the G pointer.
126-
* These constants should match those in runtime/sys_darwin_{386,amd64}.s
127-
* and runtime/cgo/gcc_darwin_{386,amd64}.c.
128-
*/
129-
case sys.I386:
130-
ctxt.Tlsoffset = 0x18
131-
132-
case sys.AMD64:
133-
ctxt.Tlsoffset = 0x30
134-
135123
case sys.ARM:
136124
ctxt.Tlsoffset = 0 // dummy value, not needed
137125

126+
case sys.AMD64:
127+
ctxt.Tlsoffset = 0x8a0
128+
138129
case sys.ARM64:
139130
ctxt.Tlsoffset = 0 // dummy value, not needed
131+
132+
case sys.I386:
133+
ctxt.Tlsoffset = 0x468
140134
}
141135
}
142136

src/runtime/cgo/gcc_darwin_386.c

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,89 @@
99
#include "libcgo_unix.h"
1010

1111
static void* threadentry(void*);
12+
static pthread_key_t k1;
13+
14+
#define magic1 (0x23581321U)
15+
16+
static void
17+
inittls(void)
18+
{
19+
uint32 x;
20+
pthread_key_t tofree[128], k;
21+
int i, ntofree;
22+
23+
/*
24+
* Allocate thread-local storage slot for g.
25+
* The key numbers start at 0x100, and we expect to be
26+
* one of the early calls to pthread_key_create, so we
27+
* should be able to get a pretty low number.
28+
*
29+
* In Darwin/386 pthreads, %gs points at the thread
30+
* structure, and each key is an index into the thread-local
31+
* storage array that begins at offset 0x48 within in that structure.
32+
* It may happen that we are not quite the first function to try
33+
* to allocate thread-local storage keys, so instead of depending
34+
* on getting 0x100, we try for 0x108, allocating keys until
35+
* we get the one we want and then freeing the ones we didn't want.
36+
*
37+
* Thus the final offset to use in %gs references is
38+
* 0x48+4*0x108 = 0x468.
39+
*
40+
* The linker and runtime hard-code this constant offset
41+
* from %gs where we expect to find g.
42+
* Known to src/cmd/link/internal/ld/sym.go:/0x468
43+
* and to src/runtime/sys_darwin_386.s:/0x468
44+
*
45+
* This is truly disgusting and a bit fragile, but taking care
46+
* of it here protects the rest of the system from damage.
47+
* The alternative would be to use a global variable that
48+
* held the offset and refer to that variable each time we
49+
* need a %gs variable (g). That approach would
50+
* require an extra instruction and memory reference in
51+
* every stack growth prolog and would also require
52+
* rewriting the code that 8c generates for extern registers.
53+
*
54+
* Things get more disgusting on OS X 10.7 Lion.
55+
* The 0x48 base mentioned above is the offset of the tsd
56+
* array within the per-thread structure on Leopard and Snow Leopard.
57+
* On Lion, the base moved a little, so while the math above
58+
* still applies, the base is different. Thus, we cannot
59+
* look for specific key values if we want to build binaries
60+
* that run on both systems. Instead, forget about the
61+
* specific key values and just allocate and initialize per-thread
62+
* storage until we find a key that writes to the memory location
63+
* we want. Then keep that key.
64+
*/
65+
ntofree = 0;
66+
for(;;) {
67+
if(pthread_key_create(&k, nil) != 0) {
68+
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
69+
abort();
70+
}
71+
pthread_setspecific(k, (void*)magic1);
72+
asm volatile("movl %%gs:0x468, %0" : "=r"(x));
73+
pthread_setspecific(k, 0);
74+
if(x == magic1) {
75+
k1 = k;
76+
break;
77+
}
78+
if(ntofree >= nelem(tofree)) {
79+
fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n");
80+
fprintf(stderr, "\ttried");
81+
for(i=0; i<ntofree; i++)
82+
fprintf(stderr, " %#x", (unsigned)tofree[i]);
83+
fprintf(stderr, "\n");
84+
abort();
85+
}
86+
tofree[ntofree++] = k;
87+
}
88+
89+
/*
90+
* We got the key we wanted. Free the others.
91+
*/
92+
for(i=0; i<ntofree; i++)
93+
pthread_key_delete(tofree[i]);
94+
}
1295

1396
void
1497
x_cgo_init(G *g)
@@ -20,6 +103,8 @@ x_cgo_init(G *g)
20103
pthread_attr_getstacksize(&attr, &size);
21104
g->stacklo = (uintptr)&attr - size + 4096;
22105
pthread_attr_destroy(&attr);
106+
107+
inittls();
23108
}
24109

25110

@@ -57,9 +142,10 @@ threadentry(void *v)
57142
ts = *(ThreadStart*)v;
58143
free(v);
59144

60-
// Move the g pointer into the slot reserved in thread local storage.
61-
// Constant must match the one in cmd/link/internal/ld/sym.go.
62-
asm volatile("movq %0, %%gs:0x18" :: "r"(ts.g));
145+
if (pthread_setspecific(k1, (void*)ts.g) != 0) {
146+
fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n");
147+
abort();
148+
}
63149

64150
crosscall_386(ts.fn);
65151
return nil;

src/runtime/cgo/gcc_darwin_amd64.c

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,60 @@
99
#include "libcgo_unix.h"
1010

1111
static void* threadentry(void*);
12+
static pthread_key_t k1;
13+
14+
#define magic1 (0x23581321345589ULL)
15+
16+
static void
17+
inittls(void)
18+
{
19+
uint64 x;
20+
pthread_key_t tofree[128], k;
21+
int i, ntofree;
22+
23+
/*
24+
* Same logic, code as darwin_386.c:/inittls, except that words
25+
* are 8 bytes long now, and the thread-local storage starts
26+
* at 0x60 on Leopard / Snow Leopard. So the offset is
27+
* 0x60+8*0x108 = 0x8a0.
28+
*
29+
* The linker and runtime hard-code this constant offset
30+
* from %gs where we expect to find g.
31+
* Known to src/cmd/link/internal/ld/sym.go:/0x8a0
32+
* and to src/runtime/sys_darwin_amd64.s:/0x8a0
33+
*
34+
* As disgusting as on the 386; same justification.
35+
*/
36+
ntofree = 0;
37+
for(;;) {
38+
if(pthread_key_create(&k, nil) != 0) {
39+
fprintf(stderr, "runtime/cgo: pthread_key_create failed\n");
40+
abort();
41+
}
42+
pthread_setspecific(k, (void*)magic1);
43+
asm volatile("movq %%gs:0x8a0, %0" : "=r"(x));
44+
pthread_setspecific(k, 0);
45+
if(x == magic1) {
46+
k1 = k;
47+
break;
48+
}
49+
if(ntofree >= nelem(tofree)) {
50+
fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n");
51+
fprintf(stderr, "\ttried");
52+
for(i=0; i<ntofree; i++)
53+
fprintf(stderr, " %#x", (unsigned)tofree[i]);
54+
fprintf(stderr, "\n");
55+
abort();
56+
}
57+
tofree[ntofree++] = k;
58+
}
59+
60+
/*
61+
* We got the key we wanted. Free the others.
62+
*/
63+
for(i=0; i<ntofree; i++)
64+
pthread_key_delete(tofree[i]);
65+
}
1266

1367
void
1468
x_cgo_init(G *g)
@@ -20,6 +74,8 @@ x_cgo_init(G *g)
2074
pthread_attr_getstacksize(&attr, &size);
2175
g->stacklo = (uintptr)&attr - size + 4096;
2276
pthread_attr_destroy(&attr);
77+
78+
inittls();
2379
}
2480

2581

@@ -57,9 +113,10 @@ threadentry(void *v)
57113
ts = *(ThreadStart*)v;
58114
free(v);
59115

60-
// Move the g pointer into the slot reserved in thread local storage.
61-
// Constant must match the one in cmd/link/internal/ld/sym.go.
62-
asm volatile("movq %0, %%gs:0x30" :: "r"(ts.g));
116+
if (pthread_setspecific(k1, (void*)ts.g) != 0) {
117+
fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n");
118+
abort();
119+
}
63120

64121
crosscall_amd64(ts.fn);
65122
return nil;

src/runtime/sys_darwin_386.s

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -525,14 +525,15 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
525525
* When linking against the system libraries,
526526
* we use its pthread_create and let it set up %gs
527527
* for us. When we do that, the private storage
528-
* we get is not at 0(GS) but at 0x18(GS).
529-
* The linker rewrites 0(TLS) into 0x18(GS) for us.
528+
* we get is not at 0(GS) but at 0x468(GS).
529+
* 8l rewrites 0(TLS) into 0x468(GS) for us.
530530
* To accommodate that rewrite, we translate the
531-
* address here so that 0x18(GS) maps to 0(address).
531+
* address and limit here so that 0x468(GS) maps to 0(address).
532532
*
533-
* Constant must match the one in cmd/link/internal/ld/sym.go.
533+
* See cgo/gcc_darwin_386.c:/468 for the derivation
534+
* of the constant.
534535
*/
535-
SUBL $0x18, BX
536+
SUBL $0x468, BX
536537

537538
/*
538539
* Must set up as USER_CTHREAD segment because

src/runtime/sys_darwin_amd64.s

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -569,10 +569,11 @@ TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
569569
// set tls base to DI
570570
TEXT runtime·settls(SB),NOSPLIT,$32
571571
/*
572-
* Same as in sys_darwin_386.s, but a different constant.
573-
* Constant must match the one in cmd/link/internal/ld/sym.go.
574-
*/
575-
SUBQ $0x30, DI
572+
* Same as in sys_darwin_386.s:/ugliness, different constant.
573+
* See cgo/gcc_darwin_amd64.c for the derivation
574+
* of the constant.
575+
*/
576+
SUBQ $0x8a0, DI
576577

577578
MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3
578579
SYSCALL

0 commit comments

Comments
 (0)