Skip to content

Commit f16f128

Browse files
committed
Merge pull request #441 from dawgfoto/SharedRuntime_6
SectionGroup support for TLS
2 parents 53480d9 + 448edfe commit f16f128

13 files changed

+244
-279
lines changed

src/core/sys/windows/threadaux.d

+8-2
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,15 @@ private:
266266
// execute function on the TLS for the given thread
267267
alias extern(C) void function() externCVoidFunc;
268268
static void impersonate_thread( uint id, externCVoidFunc fn )
269+
{
270+
impersonate_thread(id, () => fn());
271+
}
272+
273+
static void impersonate_thread( uint id, scope void delegate() dg)
269274
{
270275
if( id == GetCurrentThreadId() )
271276
{
272-
fn();
277+
dg();
273278
return;
274279
}
275280

@@ -284,7 +289,7 @@ private:
284289
return;
285290

286291
curteb[11] = tlsarray;
287-
fn();
292+
dg();
288293
curteb[11] = curtlsarray;
289294
}
290295
}
@@ -295,6 +300,7 @@ public:
295300
alias thread_aux.getThreadStackBottom getThreadStackBottom;
296301
alias thread_aux.OpenThreadHandle OpenThreadHandle;
297302
alias thread_aux.enumProcessThreads enumProcessThreads;
303+
alias thread_aux.impersonate_thread impersonate_thread;
298304

299305
// get the start of the TLS memory of the thread with the given handle
300306
void* GetTlsDataAddress( HANDLE hnd ) nothrow

src/core/thread.d

+17-193
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ module core.thread;
1414

1515
public import core.time; // for Duration
1616
static import rt.tlsgc;
17+
import rt.sections;
1718

1819
// this should be true for most architectures
1920
version = StackGrowsDown;
@@ -124,40 +125,6 @@ version( Windows )
124125
extern (Windows) alias uint function(void*) btex_fptr;
125126
extern (C) uintptr_t _beginthreadex(void*, uint, btex_fptr, void*, uint, uint*);
126127

127-
version( DigitalMars )
128-
{
129-
version (Win32)
130-
{
131-
// NOTE: The memory between the addresses of _tlsstart and _tlsend
132-
// is the storage for thread-local data in D 2.0. Both of
133-
// these are defined in dm\src\win32\tlsseg.asm by DMC.
134-
extern (C)
135-
{
136-
extern int _tlsstart;
137-
extern int _tlsend;
138-
}
139-
}
140-
version (Win64)
141-
{
142-
// NOTE: The memory between the addresses of _tls_start and _tls_end
143-
// is the storage for thread-local data in D 2.0. Both of
144-
// these are defined in LIBCMT:tlssub.obj
145-
extern (C)
146-
{
147-
extern int _tls_start;
148-
extern int _tls_end;
149-
}
150-
alias _tls_start _tlsstart;
151-
alias _tls_end _tlsend;
152-
}
153-
}
154-
else
155-
{
156-
__gshared int _tlsstart;
157-
alias _tlsstart _tlsend;
158-
}
159-
160-
161128
//
162129
// Entry point for Windows threads
163130
//
@@ -169,10 +136,7 @@ version( Windows )
169136
assert( obj.m_curr is &obj.m_main );
170137
obj.m_main.bstack = getStackBottom();
171138
obj.m_main.tstack = obj.m_main.bstack;
172-
173-
void* pstart = cast(void*) &_tlsstart;
174-
void* pend = cast(void*) &_tlsend;
175-
obj.m_tls = pstart[0 .. pend - pstart];
139+
obj.m_tlsgcdata = rt.tlsgc.init();
176140

177141
Thread.setThis( obj );
178142
//Thread.add( obj );
@@ -181,7 +145,6 @@ version( Windows )
181145
Thread.remove( obj );
182146
}
183147
Thread.add( &obj.m_main );
184-
obj.m_tlsgcdata = rt.tlsgc.init();
185148

186149
// NOTE: No GC allocations may occur until the stack pointers have
187150
// been set and Thread.getThis returns a valid reference to
@@ -267,44 +230,6 @@ else version( Posix )
267230
import gcc.builtins;
268231
}
269232

270-
version( DigitalMars )
271-
{
272-
version( linux )
273-
{
274-
extern (C)
275-
{
276-
extern int _tlsstart;
277-
extern int _tlsend;
278-
}
279-
}
280-
else version( OSX )
281-
{
282-
extern (C)
283-
{
284-
__gshared void[][2] _tls_data_array;
285-
}
286-
}
287-
else version( FreeBSD )
288-
{
289-
extern (C)
290-
{
291-
extern void* _tlsstart;
292-
extern void* _tlsend;
293-
}
294-
}
295-
else
296-
{
297-
__gshared int _tlsstart;
298-
alias _tlsstart _tlsend;
299-
}
300-
}
301-
else
302-
{
303-
__gshared int _tlsstart;
304-
alias _tlsstart _tlsend;
305-
}
306-
307-
308233
//
309234
// Entry point for POSIX threads
310235
//
@@ -316,27 +241,7 @@ else version( Posix )
316241
assert( obj.m_curr is &obj.m_main );
317242
obj.m_main.bstack = getStackBottom();
318243
obj.m_main.tstack = obj.m_main.bstack;
319-
320-
version (OSX)
321-
{
322-
// NOTE: OSX does not support TLS, so we do it ourselves. The TLS
323-
// data output by the compiler is bracketed by _tls_data_array[2],
324-
// so make a copy of it for each thread.
325-
const sz0 = (_tls_data_array[0].length + 15) & ~cast(size_t)15;
326-
const sz2 = sz0 + _tls_data_array[1].length;
327-
auto p = malloc( sz2 );
328-
assert( p );
329-
obj.m_tls = p[0 .. sz2];
330-
memcpy( p, _tls_data_array[0].ptr, _tls_data_array[0].length );
331-
memcpy( p + sz0, _tls_data_array[1].ptr, _tls_data_array[1].length );
332-
scope (exit) { free( p ); obj.m_tls = null; }
333-
}
334-
else
335-
{
336-
auto pstart = cast(void*) &_tlsstart;
337-
auto pend = cast(void*) &_tlsend;
338-
obj.m_tls = pstart[0 .. pend - pstart];
339-
}
244+
obj.m_tlsgcdata = rt.tlsgc.init();
340245

341246
obj.m_isRunning = true;
342247
Thread.setThis( obj );
@@ -350,7 +255,6 @@ else version( Posix )
350255
obj.m_isRunning = false;
351256
}
352257
Thread.add( &obj.m_main );
353-
obj.m_tlsgcdata = rt.tlsgc.init();
354258

355259
static extern (C) void thread_cleanupHandler( void* arg ) nothrow
356260
{
@@ -1388,7 +1292,6 @@ private:
13881292
Context m_main;
13891293
Context* m_curr;
13901294
bool m_lock;
1391-
void[] m_tls; // spans implicit thread local storage
13921295
rt.tlsgc.Data* m_tlsgcdata;
13931296

13941297
version( Windows )
@@ -1678,13 +1581,13 @@ private:
16781581
version (D_LP64)
16791582
{
16801583
version (Windows)
1681-
static assert(__traits(classInstanceSize, Thread) == 312);
1584+
static assert(__traits(classInstanceSize, Thread) == 296);
16821585
else version (OSX)
1683-
static assert(__traits(classInstanceSize, Thread) == 320);
1586+
static assert(__traits(classInstanceSize, Thread) == 304);
16841587
else version (Solaris)
1685-
static assert(__traits(classInstanceSize, Thread) == 176);
1588+
static assert(__traits(classInstanceSize, Thread) == 160);
16861589
else version (Posix)
1687-
static assert(__traits(classInstanceSize, Thread) == 184);
1590+
static assert(__traits(classInstanceSize, Thread) == 168);
16881591
else
16891592
static assert(0, "Platform not supported.");
16901593
}
@@ -1693,11 +1596,11 @@ else
16931596
static assert((void*).sizeof == 4); // 32-bit
16941597

16951598
version (Windows)
1696-
static assert(__traits(classInstanceSize, Thread) == 128);
1599+
static assert(__traits(classInstanceSize, Thread) == 120);
16971600
else version (OSX)
1698-
static assert(__traits(classInstanceSize, Thread) == 128);
1601+
static assert(__traits(classInstanceSize, Thread) == 120);
16991602
else version (Posix)
1700-
static assert(__traits(classInstanceSize, Thread) == 92);
1603+
static assert(__traits(classInstanceSize, Thread) == 84);
17011604
else
17021605
static assert(0, "Platform not supported.");
17031606
}
@@ -1857,6 +1760,7 @@ extern (C) Thread thread_attachThis()
18571760
thisThread.m_isRunning = true;
18581761
}
18591762
thisThread.m_isDaemon = true;
1763+
thisThread.m_tlsgcdata = rt.tlsgc.init();
18601764
Thread.setThis( thisThread );
18611765

18621766
version( OSX )
@@ -1865,34 +1769,10 @@ extern (C) Thread thread_attachThis()
18651769
assert( thisThread.m_tmach != thisThread.m_tmach.init );
18661770
}
18671771

1868-
version (OSX)
1869-
{
1870-
//printf("test3 %p %p\n", _tls_data_array[0].ptr, &_tls_data_array[1][length]);
1871-
//printf("test3 %p %p\n", &_tls_beg, &_tls_end);
1872-
// NOTE: OSX does not support TLS, so we do it ourselves. The TLS
1873-
// data output by the compiler is bracketed by _tls_data_array[2],
1874-
// so make a copy of it for each thread.
1875-
const sz0 = (_tls_data_array[0].length + 15) & ~cast(size_t)15;
1876-
const sz2 = sz0 + _tls_data_array[1].length;
1877-
auto p = gc_malloc( sz2 );
1878-
assert( p );
1879-
thisThread.m_tls = p[0 .. sz2];
1880-
memcpy( p, _tls_data_array[0].ptr, _tls_data_array[0].length );
1881-
memcpy( p + sz0, _tls_data_array[1].ptr, _tls_data_array[1].length );
1882-
// used gc_malloc so no need to free
1883-
}
1884-
else
1885-
{
1886-
auto pstart = cast(void*) &_tlsstart;
1887-
auto pend = cast(void*) &_tlsend;
1888-
thisThread.m_tls = pstart[0 .. pend - pstart];
1889-
}
1890-
18911772
Thread.add( thisThread );
18921773
Thread.add( thisContext );
18931774
if( Thread.sm_main !is null )
18941775
multiThreadedFlag = true;
1895-
thisThread.m_tlsgcdata = rt.tlsgc.init();
18961776
return thisThread;
18971777
}
18981778

@@ -1932,44 +1812,24 @@ version( Windows )
19321812
thisContext.bstack = bstack;
19331813
thisContext.tstack = thisContext.bstack;
19341814

1935-
if( addr == GetCurrentThreadId() )
1936-
{
1937-
thisThread.m_hndl = GetCurrentThreadHandle();
1938-
}
1939-
else
1940-
{
1941-
thisThread.m_hndl = OpenThreadHandle( addr );
1942-
}
1943-
19441815
thisThread.m_isDaemon = true;
19451816

19461817
if( addr == GetCurrentThreadId() )
19471818
{
1948-
auto pstart = cast(void*) &_tlsstart;
1949-
auto pend = cast(void*) &_tlsend;
1950-
thisThread.m_tls = pstart[0 .. pend - pstart];
1819+
thisThread.m_hndl = GetCurrentThreadHandle();
1820+
thisThread.m_tlsgcdata = rt.tlsgc.init();
19511821
Thread.setThis( thisThread );
19521822
}
19531823
else
19541824
{
1955-
// TODO: This seems wrong. If we're binding threads from
1956-
// a DLL, will they always have space reserved for
1957-
// the TLS chunk we expect? I don't know Windows
1958-
// well enough to say.
1959-
auto pstart = cast(void*) &_tlsstart;
1960-
auto pend = cast(void*) &_tlsend;
1961-
auto pos = GetTlsDataAddress( thisThread.m_hndl );
1962-
if( pos ) // on x64, threads without TLS happen to exist
1963-
thisThread.m_tls = pos[0 .. pend - pstart];
1964-
else
1965-
thisThread.m_tls = [];
1825+
thisThread.m_hndl = OpenThreadHandle( addr );
1826+
impersonate_thread(addr, { thisThread.m_tlsgcdata = rt.tlsgc.init(); });
19661827
}
19671828

19681829
Thread.add( thisThread );
19691830
Thread.add( thisContext );
19701831
if( Thread.sm_main !is null )
19711832
multiThreadedFlag = true;
1972-
thisThread.m_tlsgcdata = rt.tlsgc.init();
19731833
return thisThread;
19741834
}
19751835
}
@@ -2632,16 +2492,15 @@ private void scanAllTypeImpl( scope ScanAllThreadsTypeFn scan, void* curStackTop
26322492

26332493
for( Thread t = Thread.sm_tbeg; t; t = t.next )
26342494
{
2635-
scan( ScanType.tls, t.m_tls.ptr, t.m_tls.ptr + t.m_tls.length );
2636-
26372495
version( Windows )
26382496
{
26392497
// Ideally, we'd pass ScanType.regs or something like that, but this
26402498
// would make portability annoying because it only makes sense on Windows.
26412499
scan( ScanType.stack, t.m_reg.ptr, t.m_reg.ptr + t.m_reg.length );
26422500
}
26432501

2644-
rt.tlsgc.scan(t.m_tlsgcdata, (p1, p2) => scan(ScanType.tls, p1, p2));
2502+
if (t.m_tlsgcdata !is null)
2503+
rt.tlsgc.scan(t.m_tlsgcdata, (p1, p2) => scan(ScanType.tls, p1, p2));
26452504
}
26462505
}
26472506

@@ -4534,38 +4393,3 @@ version( AsmX86_64_Posix )
45344393
fib.call();
45354394
}
45364395
}
4537-
4538-
4539-
version( OSX )
4540-
{
4541-
// NOTE: The Mach-O object file format does not allow for thread local
4542-
// storage declarations. So instead we roll our own by putting tls
4543-
// into the sections bracketed by _tls_beg and _tls_end.
4544-
//
4545-
// This function is called by the code emitted by the compiler. It
4546-
// is expected to translate an address into the TLS static data to
4547-
// the corresponding address in the TLS dynamic per-thread data.
4548-
extern (D) void* ___tls_get_addr( void* p )
4549-
{
4550-
// NOTE: p is an address in the TLS static data emitted by the
4551-
// compiler. If it isn't, something is disastrously wrong.
4552-
auto obj = Thread.getThis();
4553-
4554-
immutable off0 = cast(size_t)(p - _tls_data_array[0].ptr);
4555-
if (off0 < _tls_data_array[0].length)
4556-
{
4557-
return obj.m_tls.ptr + off0;
4558-
}
4559-
immutable off1 = cast(size_t)(p - _tls_data_array[1].ptr);
4560-
if (off1 < _tls_data_array[1].length)
4561-
{
4562-
size_t sz = (_tls_data_array[0].length + 15) & ~cast(size_t)15;
4563-
return obj.m_tls.ptr + sz + off1;
4564-
}
4565-
else
4566-
assert(0);
4567-
4568-
//assert( p >= cast(void*) &_tls_beg && p < cast(void*) &_tls_end );
4569-
//return obj.m_tls.ptr + (p - cast(void*) &_tls_beg);
4570-
}
4571-
}

0 commit comments

Comments
 (0)