3 * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers
4 * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved.
5 * Copyright (c) 2000 by Hewlett-Packard Company. All rights reserved.
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
20 # include "private/gc_pmark.h"
22 #if defined(MSWIN32) && defined(__GNUC__)
26 /* We put this here to minimize the risk of inlining. */
29 void GC_noop(void *p
, ...) {}
34 /* Single argument version, robust against whole program analysis. */
37 static volatile word sink
;
42 /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */
44 unsigned GC_n_mark_procs
= GC_RESERVED_MARK_PROCS
;
46 /* Initialize GC_obj_kinds properly and standard free lists properly. */
47 /* This must be done statically since they may be accessed before */
48 /* GC_init is called. */
49 /* It's done here, since we need to deal with mark descriptors. */
50 struct obj_kind GC_obj_kinds
[MAXOBJKINDS
] = {
51 /* PTRFREE */ { &GC_aobjfreelist
[0], 0 /* filled in dynamically */,
52 0 | GC_DS_LENGTH
, FALSE
, FALSE
},
53 /* NORMAL */ { &GC_objfreelist
[0], 0,
54 0 | GC_DS_LENGTH
, /* Adjusted in GC_init_inner for EXTRA_BYTES */
55 TRUE
/* add length to descr */, TRUE
},
57 { &GC_uobjfreelist
[0], 0,
58 0 | GC_DS_LENGTH
, TRUE
/* add length to descr */, TRUE
},
59 # ifdef ATOMIC_UNCOLLECTABLE
61 { &GC_auobjfreelist
[0], 0,
62 0 | GC_DS_LENGTH
, FALSE
/* add length to descr */, FALSE
},
64 # ifdef STUBBORN_ALLOC
65 /*STUBBORN*/ { &GC_sobjfreelist
[0], 0,
66 0 | GC_DS_LENGTH
, TRUE
/* add length to descr */, TRUE
},
70 # ifdef ATOMIC_UNCOLLECTABLE
71 # ifdef STUBBORN_ALLOC
72 unsigned GC_n_kinds
= 5;
74 unsigned GC_n_kinds
= 4;
77 # ifdef STUBBORN_ALLOC
78 unsigned GC_n_kinds
= 4;
80 unsigned GC_n_kinds
= 3;
85 # ifndef INITIAL_MARK_STACK_SIZE
86 # define INITIAL_MARK_STACK_SIZE (1*HBLKSIZE)
87 /* INITIAL_MARK_STACK_SIZE * sizeof(mse) should be a */
88 /* multiple of HBLKSIZE. */
89 /* The incremental collector actually likes a larger */
90 /* size, since it want to push all marked dirty objs */
91 /* before marking anything new. Currently we let it */
92 /* grow dynamically. */
96 * Limits of stack for GC_mark routine.
97 * All ranges between GC_mark_stack(incl.) and GC_mark_stack_top(incl.) still
98 * need to be marked from.
101 word GC_n_rescuing_pages
; /* Number of dirty pages we marked from */
102 /* excludes ptrfree pages, etc. */
106 mse
* GC_mark_stack_limit
;
108 size_t GC_mark_stack_size
= 0;
111 # include "atomic_ops.h"
113 mse
* volatile GC_mark_stack_top
;
114 /* Updated only with mark lock held, but read asynchronously. */
115 volatile AO_t GC_first_nonempty
;
116 /* Lowest entry on mark stack */
117 /* that may be nonempty. */
118 /* Updated only by initiating */
121 mse
* GC_mark_stack_top
;
124 static struct hblk
* scan_ptr
;
126 mark_state_t GC_mark_state
= MS_NONE
;
128 GC_bool GC_mark_stack_too_small
= FALSE
;
130 GC_bool GC_objects_are_marked
= FALSE
; /* Are there collectable marked */
131 /* objects in the heap? */
133 /* Is a collection in progress? Note that this can return true in the */
134 /* nonincremental case, if a collection has been abandoned and the */
135 /* mark state is now MS_INVALID. */
136 GC_bool
GC_collection_in_progress(void)
138 return(GC_mark_state
!= MS_NONE
);
141 /* clear all mark bits in the header */
142 void GC_clear_hdr_marks(hdr
*hhdr
)
144 size_t last_bit
= FINAL_MARK_BIT(hhdr
-> hb_sz
);
146 # ifdef USE_MARK_BYTES
147 BZERO(hhdr
-> hb_marks
, MARK_BITS_SZ
);
148 hhdr
-> hb_marks
[last_bit
] = 1;
150 BZERO(hhdr
-> hb_marks
, MARK_BITS_SZ
*sizeof(word
));
151 set_mark_bit_from_hdr(hhdr
, last_bit
);
153 hhdr
-> hb_n_marks
= 0;
156 /* Set all mark bits in the header. Used for uncollectable blocks. */
157 void GC_set_hdr_marks(hdr
*hhdr
)
160 size_t sz
= hhdr
-> hb_sz
;
161 size_t n_marks
= FINAL_MARK_BIT(sz
);
163 # ifdef USE_MARK_BYTES
164 for (i
= 0; i
<= n_marks
; i
+= MARK_BIT_OFFSET(sz
)) {
165 hhdr
-> hb_marks
[i
] = 1;
168 for (i
= 0; i
< divWORDSZ(n_marks
+ WORDSZ
); ++i
) {
169 hhdr
-> hb_marks
[i
] = ONES
;
172 # ifdef MARK_BIT_PER_OBJ
173 hhdr
-> hb_n_marks
= n_marks
- 1;
175 hhdr
-> hb_n_marks
= HBLK_OBJS(sz
);
180 * Clear all mark bits associated with block h.
183 static void clear_marks_for_block(struct hblk
*h
, word dummy
)
185 register hdr
* hhdr
= HDR(h
);
187 if (IS_UNCOLLECTABLE(hhdr
-> hb_obj_kind
)) return;
188 /* Mark bit for these is cleared only once the object is */
189 /* explicitly deallocated. This either frees the block, or */
190 /* the bit is cleared once the object is on the free list. */
191 GC_clear_hdr_marks(hhdr
);
194 /* Slow but general routines for setting/clearing/asking about mark bits */
195 void GC_set_mark_bit(ptr_t p
)
197 struct hblk
*h
= HBLKPTR(p
);
199 word bit_no
= MARK_BIT_NO(p
- (ptr_t
)h
, hhdr
-> hb_sz
);
201 if (!mark_bit_from_hdr(hhdr
, bit_no
)) {
202 set_mark_bit_from_hdr(hhdr
, bit_no
);
203 ++hhdr
-> hb_n_marks
;
207 void GC_clear_mark_bit(ptr_t p
)
209 struct hblk
*h
= HBLKPTR(p
);
211 word bit_no
= MARK_BIT_NO(p
- (ptr_t
)h
, hhdr
-> hb_sz
);
213 if (mark_bit_from_hdr(hhdr
, bit_no
)) {
215 clear_mark_bit_from_hdr(hhdr
, bit_no
);
216 n_marks
= hhdr
-> hb_n_marks
- 1;
217 # ifdef PARALLEL_MARK
219 hhdr
-> hb_n_marks
= n_marks
;
220 /* Don't decrement to zero. The counts are approximate due to */
221 /* concurrency issues, but we need to ensure that a count of */
222 /* zero implies an empty block. */
224 hhdr
-> hb_n_marks
= n_marks
;
229 GC_bool
GC_is_marked(ptr_t p
)
231 struct hblk
*h
= HBLKPTR(p
);
233 word bit_no
= MARK_BIT_NO(p
- (ptr_t
)h
, hhdr
-> hb_sz
);
235 return((GC_bool
)mark_bit_from_hdr(hhdr
, bit_no
));
240 * Clear mark bits in all allocated heap blocks. This invalidates
241 * the marker invariant, and sets GC_mark_state to reflect this.
242 * (This implicitly starts marking to reestablish the invariant.)
244 void GC_clear_marks(void)
246 GC_apply_to_all_blocks(clear_marks_for_block
, (word
)0);
247 GC_objects_are_marked
= FALSE
;
248 GC_mark_state
= MS_INVALID
;
252 /* Initiate a garbage collection. Initiates a full collection if the */
253 /* mark state is invalid. */
255 void GC_initiate_gc(void)
257 if (GC_dirty_maintained
) GC_read_dirty();
258 # ifdef STUBBORN_ALLOC
263 extern void GC_check_dirty();
265 if (GC_dirty_maintained
) GC_check_dirty();
268 GC_n_rescuing_pages
= 0;
269 if (GC_mark_state
== MS_NONE
) {
270 GC_mark_state
= MS_PUSH_RESCUERS
;
271 } else if (GC_mark_state
!= MS_INVALID
) {
272 ABORT("unexpected state");
273 } /* else this is really a full collection, and mark */
274 /* bits are invalid. */
279 static void alloc_mark_stack(size_t);
281 # if defined(MSWIN32) || defined(USE_PROC_FOR_LIBRARIES) && defined(THREADS)
282 /* Under rare conditions, we may end up marking from nonexistent memory. */
283 /* Hence we need to be prepared to recover by running GC_mark_some */
284 /* with a suitable handler in place. */
285 # define WRAP_MARK_SOME
288 /* Perform a small amount of marking. */
289 /* We try to touch roughly a page of memory. */
290 /* Return TRUE if we just finished a mark phase. */
291 /* Cold_gc_frame is an address inside a GC frame that */
292 /* remains valid until all marking is complete. */
293 /* A zero value indicates that it's OK to miss some */
294 /* register values. */
295 /* We hold the allocation lock. In the case of */
296 /* incremental collection, the world may not be stopped.*/
297 #ifdef WRAP_MARK_SOME
298 /* For win32, this is called after we establish a structured */
299 /* exception handler, in case Windows unmaps one of our root */
300 /* segments. See below. In either case, we acquire the */
301 /* allocator lock long before we get here. */
302 GC_bool
GC_mark_some_inner(ptr_t cold_gc_frame
)
304 GC_bool
GC_mark_some(ptr_t cold_gc_frame
)
307 switch(GC_mark_state
) {
311 case MS_PUSH_RESCUERS
:
312 if (GC_mark_stack_top
313 >= GC_mark_stack_limit
- INITIAL_MARK_STACK_SIZE
/2) {
314 /* Go ahead and mark, even though that might cause us to */
315 /* see more marked dirty objects later on. Avoid this */
317 GC_mark_stack_too_small
= TRUE
;
318 MARK_FROM_MARK_STACK();
321 scan_ptr
= GC_push_next_marked_dirty(scan_ptr
);
323 if (GC_print_stats
) {
324 GC_log_printf("Marked from %u dirty pages\n",
325 GC_n_rescuing_pages
);
327 GC_push_roots(FALSE
, cold_gc_frame
);
328 GC_objects_are_marked
= TRUE
;
329 if (GC_mark_state
!= MS_INVALID
) {
330 GC_mark_state
= MS_ROOTS_PUSHED
;
336 case MS_PUSH_UNCOLLECTABLE
:
337 if (GC_mark_stack_top
338 >= GC_mark_stack
+ GC_mark_stack_size
/4) {
339 # ifdef PARALLEL_MARK
340 /* Avoid this, since we don't parallelize the marker */
342 if (GC_parallel
) GC_mark_stack_too_small
= TRUE
;
344 MARK_FROM_MARK_STACK();
347 scan_ptr
= GC_push_next_marked_uncollectable(scan_ptr
);
349 GC_push_roots(TRUE
, cold_gc_frame
);
350 GC_objects_are_marked
= TRUE
;
351 if (GC_mark_state
!= MS_INVALID
) {
352 GC_mark_state
= MS_ROOTS_PUSHED
;
358 case MS_ROOTS_PUSHED
:
359 # ifdef PARALLEL_MARK
360 /* In the incremental GC case, this currently doesn't */
361 /* quite do the right thing, since it runs to */
362 /* completion. On the other hand, starting a */
363 /* parallel marker is expensive, so perhaps it is */
364 /* the right thing? */
365 /* Eventually, incremental marking should run */
366 /* asynchronously in multiple threads, without grabbing */
367 /* the allocation lock. */
369 GC_do_parallel_mark();
370 GC_ASSERT(GC_mark_stack_top
< (mse
*)GC_first_nonempty
);
371 GC_mark_stack_top
= GC_mark_stack
- 1;
372 if (GC_mark_stack_too_small
) {
373 alloc_mark_stack(2*GC_mark_stack_size
);
375 if (GC_mark_state
== MS_ROOTS_PUSHED
) {
376 GC_mark_state
= MS_NONE
;
383 if (GC_mark_stack_top
>= GC_mark_stack
) {
384 MARK_FROM_MARK_STACK();
387 GC_mark_state
= MS_NONE
;
388 if (GC_mark_stack_too_small
) {
389 alloc_mark_stack(2*GC_mark_stack_size
);
395 case MS_PARTIALLY_INVALID
:
396 if (!GC_objects_are_marked
) {
397 GC_mark_state
= MS_PUSH_UNCOLLECTABLE
;
400 if (GC_mark_stack_top
>= GC_mark_stack
) {
401 MARK_FROM_MARK_STACK();
404 if (scan_ptr
== 0 && GC_mark_state
== MS_INVALID
) {
405 /* About to start a heap scan for marked objects. */
406 /* Mark stack is empty. OK to reallocate. */
407 if (GC_mark_stack_too_small
) {
408 alloc_mark_stack(2*GC_mark_stack_size
);
410 GC_mark_state
= MS_PARTIALLY_INVALID
;
412 scan_ptr
= GC_push_next_marked(scan_ptr
);
413 if (scan_ptr
== 0 && GC_mark_state
== MS_PARTIALLY_INVALID
) {
414 GC_push_roots(TRUE
, cold_gc_frame
);
415 GC_objects_are_marked
= TRUE
;
416 if (GC_mark_state
!= MS_INVALID
) {
417 GC_mark_state
= MS_ROOTS_PUSHED
;
422 ABORT("GC_mark_some: bad state");
428 #if defined(MSWIN32) && defined(__GNUC__)
431 EXCEPTION_REGISTRATION ex_reg
;
436 static EXCEPTION_DISPOSITION
mark_ex_handler(
437 struct _EXCEPTION_RECORD
*ex_rec
,
439 struct _CONTEXT
*context
,
442 if (ex_rec
->ExceptionCode
== STATUS_ACCESS_VIOLATION
) {
443 ext_ex_regn
*xer
= (ext_ex_regn
*)est_frame
;
445 /* Unwind from the inner function assuming the standard */
446 /* function prologue. */
447 /* Assumes code has not been compiled with */
448 /* -fomit-frame-pointer. */
449 context
->Esp
= context
->Ebp
;
450 context
->Ebp
= *((DWORD
*)context
->Esp
);
451 context
->Esp
= context
->Esp
- 8;
453 /* Resume execution at the "real" handler within the */
454 /* wrapper function. */
455 context
->Eip
= (DWORD
)(xer
->alt_path
);
457 return ExceptionContinueExecution
;
460 return ExceptionContinueSearch
;
463 # endif /* __GNUC__ && MSWIN32 */
465 #ifdef GC_WIN32_THREADS
466 extern GC_bool
GC_started_thread_while_stopped(void);
467 /* In win32_threads.c. Did we invalidate mark phase with an */
468 /* unexpected thread start? */
471 # ifdef WRAP_MARK_SOME
472 GC_bool
GC_mark_some(ptr_t cold_gc_frame
)
478 /* Windows 98 appears to asynchronously create and remove */
479 /* writable memory mappings, for reasons we haven't yet */
480 /* understood. Since we look for writable regions to */
481 /* determine the root set, we may try to mark from an */
482 /* address range that disappeared since we started the */
483 /* collection. Thus we have to recover from faults here. */
484 /* This code does not appear to be necessary for Windows */
485 /* 95/NT/2000. Note that this code should never generate */
486 /* an incremental GC write fault. */
487 /* It's conceivable that this is the same issue with */
488 /* terminating threads that we see with Linux and */
489 /* USE_PROC_FOR_LIBRARIES. */
492 ret_val
= GC_mark_some_inner(cold_gc_frame
);
493 } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION
?
494 EXCEPTION_EXECUTE_HANDLER
: EXCEPTION_CONTINUE_SEARCH
) {
497 # ifdef GC_WIN32_THREADS
498 /* With DllMain-based thread tracking, a thread may have */
499 /* started while we were marking. This is logically equivalent */
500 /* to the exception case; our results are invalid and we have */
501 /* to start over. This cannot be prevented since we can't */
502 /* block in DllMain. */
503 if (GC_started_thread_while_stopped()) goto handle_ex
;
508 # else /* __GNUC__ */
510 /* Manually install an exception handler since GCC does */
511 /* not yet support Structured Exception Handling (SEH) on */
516 er
.alt_path
= &&handle_ex
;
517 er
.ex_reg
.handler
= mark_ex_handler
;
518 asm volatile ("movl %%fs:0, %0" : "=r" (er
.ex_reg
.prev
));
519 asm volatile ("movl %0, %%fs:0" : : "r" (&er
));
520 ret_val
= GC_mark_some_inner(cold_gc_frame
);
521 /* Prevent GCC from considering the following code unreachable */
522 /* and thus eliminating it. */
523 if (er
.alt_path
== 0)
526 /* Uninstall the exception handler */
527 asm volatile ("mov %0, %%fs:0" : : "r" (er
.ex_reg
.prev
));
530 # endif /* __GNUC__ */
531 # else /* !MSWIN32 */
532 /* Here we are handling the case in which /proc is used for root */
533 /* finding, and we have threads. We may find a stack for a */
534 /* thread that is in the process of exiting, and disappears */
535 /* while we are marking it. This seems extremely difficult to */
536 /* avoid otherwise. */
538 WARN("Incremental GC incompatible with /proc roots\n", 0);
539 /* I'm not sure if this could still work ... */
540 GC_setup_temporary_fault_handler();
541 if(SETJMP(GC_jmp_buf
) != 0) goto handle_ex
;
542 ret_val
= GC_mark_some_inner(cold_gc_frame
);
544 GC_reset_fault_handler();
547 # endif /* !MSWIN32 */
550 /* Exception handler starts here for all cases. */
551 if (GC_print_stats
) {
552 GC_log_printf("Caught ACCESS_VIOLATION in marker. "
553 "Memory mapping disappeared.\n");
556 /* We have bad roots on the stack. Discard mark stack. */
557 /* Rescan from marked objects. Redetermine roots. */
558 GC_invalidate_mark_state();
562 goto rm_handler
; // Back to platform-specific code.
564 #endif /* WRAP_MARK_SOME */
567 GC_bool
GC_mark_stack_empty(void)
569 return(GC_mark_stack_top
< GC_mark_stack
);
572 void GC_invalidate_mark_state(void)
574 GC_mark_state
= MS_INVALID
;
575 GC_mark_stack_top
= GC_mark_stack
-1;
578 mse
* GC_signal_mark_stack_overflow(mse
*msp
)
580 GC_mark_state
= MS_INVALID
;
581 GC_mark_stack_too_small
= TRUE
;
582 if (GC_print_stats
) {
583 GC_log_printf("Mark stack overflow; current size = %lu entries\n",
586 return(msp
- GC_MARK_STACK_DISCARDS
);
590 * Mark objects pointed to by the regions described by
591 * mark stack entries between mark_stack and mark_stack_top,
592 * inclusive. Assumes the upper limit of a mark stack entry
593 * is never 0. A mark stack entry never has size 0.
594 * We try to traverse on the order of a hblk of memory before we return.
595 * Caller is responsible for calling this until the mark stack is empty.
596 * Note that this is the most performance critical routine in the
597 * collector. Hence it contains all sorts of ugly hacks to speed
598 * things up. In particular, we avoid procedure calls on the common
599 * path, we take advantage of peculiarities of the mark descriptor
600 * encoding, we optionally maintain a cache for the block address to
601 * header mapping, we prefetch when an object is "grayed", etc.
603 mse
* GC_mark_from(mse
*mark_stack_top
, mse
*mark_stack
, mse
*mark_stack_limit
)
605 signed_word credit
= HBLKSIZE
; /* Remaining credit for marking work */
606 ptr_t current_p
; /* Pointer to current candidate ptr. */
607 word current
; /* Candidate pointer. */
608 ptr_t limit
; /* (Incl) limit of current candidate */
611 ptr_t greatest_ha
= GC_greatest_plausible_heap_addr
;
612 ptr_t least_ha
= GC_least_plausible_heap_addr
;
615 # define SPLIT_RANGE_WORDS 128 /* Must be power of 2. */
617 GC_objects_are_marked
= TRUE
;
619 # ifdef OS2 /* Use untweaked version to circumvent compiler problem */
620 while (mark_stack_top
>= mark_stack
&& credit
>= 0) {
622 while ((((ptr_t
)mark_stack_top
- (ptr_t
)mark_stack
) | credit
)
625 current_p
= mark_stack_top
-> mse_start
;
626 descr
= mark_stack_top
-> mse_descr
;
628 /* current_p and descr describe the current object. */
629 /* *mark_stack_top is vacant. */
630 /* The following is 0 only for small objects described by a simple */
631 /* length descriptor. For many applications this is the common */
632 /* case, so we try to detect it quickly. */
633 if (descr
& ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS
) - 1)) | GC_DS_TAGS
)) {
634 word tag
= descr
& GC_DS_TAGS
;
639 /* Process part of the range to avoid pushing too much on the */
641 GC_ASSERT(descr
< (word
)GC_greatest_plausible_heap_addr
642 - (word
)GC_least_plausible_heap_addr
);
644 if (GC_trace_addr
>= current_p
645 && GC_trace_addr
< current_p
+ descr
) {
646 GC_log_printf("GC:%d Large section; start %p len %lu\n",
647 GC_gc_no
, current_p
, (unsigned long) descr
);
649 # endif /* ENABLE_TRACE */
650 # ifdef PARALLEL_MARK
651 # define SHARE_BYTES 2048
652 if (descr
> SHARE_BYTES
&& GC_parallel
653 && mark_stack_top
< mark_stack_limit
- 1) {
654 int new_size
= (descr
/2) & ~(sizeof(word
)-1);
655 mark_stack_top
-> mse_start
= current_p
;
656 mark_stack_top
-> mse_descr
= new_size
+ sizeof(word
);
657 /* makes sure we handle */
658 /* misaligned pointers. */
661 if (GC_trace_addr
>= current_p
662 && GC_trace_addr
< current_p
+ descr
) {
663 GC_log_printf("GC:%d splitting (parallel) %p at %p\n",
664 GC_gc_no
, current_p
, current_p
+ new_size
);
666 # endif /* ENABLE_TRACE */
667 current_p
+= new_size
;
671 # endif /* PARALLEL_MARK */
672 mark_stack_top
-> mse_start
=
673 limit
= current_p
+ WORDS_TO_BYTES(SPLIT_RANGE_WORDS
-1);
674 mark_stack_top
-> mse_descr
=
675 descr
- WORDS_TO_BYTES(SPLIT_RANGE_WORDS
-1);
677 if (GC_trace_addr
>= current_p
678 && GC_trace_addr
< current_p
+ descr
) {
679 GC_log_printf("GC:%d splitting %p at %p\n",
680 GC_gc_no
, current_p
, limit
);
682 # endif /* ENABLE_TRACE */
683 /* Make sure that pointers overlapping the two ranges are */
685 limit
+= sizeof(word
) - ALIGNMENT
;
690 if (GC_trace_addr
>= current_p
691 && GC_trace_addr
< current_p
+ WORDS_TO_BYTES(WORDSZ
-2)) {
692 GC_log_printf("GC:%d Tracing from %p bitmap descr %lu\n",
693 GC_gc_no
, current_p
, (unsigned long) descr
);
695 # endif /* ENABLE_TRACE */
696 descr
&= ~GC_DS_TAGS
;
697 credit
-= WORDS_TO_BYTES(WORDSZ
/2); /* guess */
699 if ((signed_word
)descr
< 0) {
700 current
= *(word
*)current_p
;
701 FIXUP_POINTER(current
);
702 if ((ptr_t
)current
>= least_ha
&& (ptr_t
)current
< greatest_ha
) {
703 PREFETCH((ptr_t
)current
);
705 if (GC_trace_addr
== current_p
) {
706 GC_log_printf("GC:%d Considering(3) %p -> %p\n",
707 GC_gc_no
, current_p
, (ptr_t
) current
);
709 # endif /* ENABLE_TRACE */
710 PUSH_CONTENTS((ptr_t
)current
, mark_stack_top
,
711 mark_stack_limit
, current_p
, exit1
);
715 current_p
+= sizeof(word
);
721 if (GC_trace_addr
>= current_p
722 && GC_base(current_p
) != 0
723 && GC_base(current_p
) == GC_base(GC_trace_addr
)) {
724 GC_log_printf("GC:%d Tracing from %p proc descr %lu\n",
725 GC_gc_no
, current_p
, (unsigned long) descr
);
727 # endif /* ENABLE_TRACE */
728 credit
-= GC_PROC_BYTES
;
731 ((word
*)current_p
, mark_stack_top
,
732 mark_stack_limit
, ENV(descr
));
734 case GC_DS_PER_OBJECT
:
735 if ((signed_word
)descr
>= 0) {
736 /* Descriptor is in the object. */
737 descr
= *(word
*)(current_p
+ descr
- GC_DS_PER_OBJECT
);
739 /* Descriptor is in type descriptor pointed to by first */
740 /* word in object. */
741 ptr_t type_descr
= *(ptr_t
*)current_p
;
742 /* type_descr is either a valid pointer to the descriptor */
743 /* structure, or this object was on a free list. If it */
744 /* it was anything but the last object on the free list, */
745 /* we will misinterpret the next object on the free list as */
746 /* the type descriptor, and get a 0 GC descriptor, which */
747 /* is ideal. Unfortunately, we need to check for the last */
748 /* object case explicitly. */
749 if (0 == type_descr
) {
750 /* Rarely executed. */
754 descr
= *(word
*)(type_descr
755 - (descr
- (GC_DS_PER_OBJECT
756 - GC_INDIR_PER_OBJ_BIAS
)));
759 /* Can happen either because we generated a 0 descriptor */
760 /* or we saw a pointer to a free object. */
766 } else /* Small object with length descriptor */ {
768 limit
= current_p
+ (word
)descr
;
771 if (GC_trace_addr
>= current_p
772 && GC_trace_addr
< limit
) {
773 GC_log_printf("GC:%d Tracing from %p len %lu\n",
774 GC_gc_no
, current_p
, (unsigned long) descr
);
776 # endif /* ENABLE_TRACE */
777 /* The simple case in which we're scanning a range. */
778 GC_ASSERT(!((word
)current_p
& (ALIGNMENT
-1)));
779 credit
-= limit
- current_p
;
780 limit
-= sizeof(word
);
784 # ifndef SMALL_CONFIG
787 /* Try to prefetch the next pointer to be examined asap. */
788 /* Empirically, this also seems to help slightly without */
789 /* prefetches, at least on linux/X86. Presumably this loop */
790 /* ends up with less register pressure, and gcc thus ends up */
791 /* generating slightly better code. Overall gcc code quality */
792 /* for this loop is still not great. */
794 PREFETCH(limit
- PREF_DIST
*CACHE_LINE_SIZE
);
795 GC_ASSERT(limit
>= current_p
);
796 deferred
= *(word
*)limit
;
797 FIXUP_POINTER(deferred
);
799 if ((ptr_t
)deferred
>= least_ha
&& (ptr_t
)deferred
< greatest_ha
) {
800 PREFETCH((ptr_t
)deferred
);
803 if (current_p
> limit
) goto next_object
;
804 /* Unroll once, so we don't do too many of the prefetches */
805 /* based on limit. */
806 deferred
= *(word
*)limit
;
807 FIXUP_POINTER(deferred
);
809 if ((ptr_t
)deferred
>= least_ha
&& (ptr_t
)deferred
< greatest_ha
) {
810 PREFETCH((ptr_t
)deferred
);
813 if (current_p
> limit
) goto next_object
;
817 while (current_p
<= limit
) {
818 /* Empirically, unrolling this loop doesn't help a lot. */
819 /* Since PUSH_CONTENTS expands to a lot of code, */
821 current
= *(word
*)current_p
;
822 FIXUP_POINTER(current
);
823 PREFETCH(current_p
+ PREF_DIST
*CACHE_LINE_SIZE
);
824 if ((ptr_t
)current
>= least_ha
&& (ptr_t
)current
< greatest_ha
) {
825 /* Prefetch the contents of the object we just pushed. It's */
826 /* likely we will need them soon. */
827 PREFETCH((ptr_t
)current
);
829 if (GC_trace_addr
== current_p
) {
830 GC_log_printf("GC:%d Considering(1) %p -> %p\n",
831 GC_gc_no
, current_p
, (ptr_t
) current
);
833 # endif /* ENABLE_TRACE */
834 PUSH_CONTENTS((ptr_t
)current
, mark_stack_top
,
835 mark_stack_limit
, current_p
, exit2
);
837 current_p
+= ALIGNMENT
;
840 # ifndef SMALL_CONFIG
841 /* We still need to mark the entry we previously prefetched. */
842 /* We already know that it passes the preliminary pointer */
845 if (GC_trace_addr
== current_p
) {
846 GC_log_printf("GC:%d Considering(2) %p -> %p\n",
847 GC_gc_no
, current_p
, (ptr_t
) deferred
);
849 # endif /* ENABLE_TRACE */
850 PUSH_CONTENTS((ptr_t
)deferred
, mark_stack_top
,
851 mark_stack_limit
, current_p
, exit4
);
856 return mark_stack_top
;
861 /* We assume we have an ANSI C Compiler. */
862 GC_bool GC_help_wanted
= FALSE
;
863 unsigned GC_helper_count
= 0;
864 unsigned GC_active_count
= 0;
867 #define LOCAL_MARK_STACK_SIZE HBLKSIZE
868 /* Under normal circumstances, this is big enough to guarantee */
869 /* We don't overflow half of it in a single call to */
873 /* Steal mark stack entries starting at mse low into mark stack local */
874 /* until we either steal mse high, or we have max entries. */
875 /* Return a pointer to the top of the local mark stack. */
876 /* *next is replaced by a pointer to the next unscanned mark stack */
878 mse
* GC_steal_mark_stack(mse
* low
, mse
* high
, mse
* local
,
879 unsigned max
, mse
**next
)
882 mse
*top
= local
- 1;
885 GC_ASSERT(high
>= low
-1 && high
- low
+ 1 <= GC_mark_stack_size
);
886 for (p
= low
; p
<= high
&& i
<= max
; ++p
) {
887 word descr
= AO_load((volatile AO_t
*) &(p
-> mse_descr
));
889 /* Must be ordered after read of descr: */
890 AO_store_release_write((volatile AO_t
*) &(p
-> mse_descr
), 0);
891 /* More than one thread may get this entry, but that's only */
892 /* a minor performance problem. */
894 top
-> mse_descr
= descr
;
895 top
-> mse_start
= p
-> mse_start
;
896 GC_ASSERT((top
-> mse_descr
& GC_DS_TAGS
) != GC_DS_LENGTH
||
897 top
-> mse_descr
< (ptr_t
)GC_greatest_plausible_heap_addr
898 - (ptr_t
)GC_least_plausible_heap_addr
);
899 /* If this is a big object, count it as */
900 /* size/256 + 1 objects. */
902 if ((descr
& GC_DS_TAGS
) == GC_DS_LENGTH
) i
+= (descr
>> 8);
909 /* Copy back a local mark stack. */
910 /* low and high are inclusive bounds. */
911 void GC_return_mark_stack(mse
* low
, mse
* high
)
917 if (high
< low
) return;
918 stack_size
= high
- low
+ 1;
919 GC_acquire_mark_lock();
920 my_top
= GC_mark_stack_top
; /* Concurrent modification impossible. */
921 my_start
= my_top
+ 1;
922 if (my_start
- GC_mark_stack
+ stack_size
> GC_mark_stack_size
) {
923 if (GC_print_stats
) {
924 GC_log_printf("No room to copy back mark stack.");
926 GC_mark_state
= MS_INVALID
;
927 GC_mark_stack_too_small
= TRUE
;
928 /* We drop the local mark stack. We'll fix things later. */
930 BCOPY(low
, my_start
, stack_size
* sizeof(mse
));
931 GC_ASSERT((mse
*)AO_load((volatile AO_t
*)(&GC_mark_stack_top
))
933 AO_store_release_write((volatile AO_t
*)(&GC_mark_stack_top
),
934 (AO_t
)(my_top
+ stack_size
));
935 /* Ensures visibility of previously written stack contents. */
937 GC_release_mark_lock();
938 GC_notify_all_marker();
941 /* Mark from the local mark stack. */
942 /* On return, the local mark stack is empty. */
943 /* But this may be achieved by copying the */
944 /* local mark stack back into the global one. */
945 void GC_do_local_mark(mse
*local_mark_stack
, mse
*local_top
)
948 # define N_LOCAL_ITERS 1
950 # ifdef GC_ASSERTIONS
951 /* Make sure we don't hold mark lock. */
952 GC_acquire_mark_lock();
953 GC_release_mark_lock();
956 for (n
= 0; n
< N_LOCAL_ITERS
; ++n
) {
957 local_top
= GC_mark_from(local_top
, local_mark_stack
,
958 local_mark_stack
+ LOCAL_MARK_STACK_SIZE
);
959 if (local_top
< local_mark_stack
) return;
960 if (local_top
- local_mark_stack
>= LOCAL_MARK_STACK_SIZE
/2) {
961 GC_return_mark_stack(local_mark_stack
, local_top
);
965 if ((mse
*)AO_load((volatile AO_t
*)(&GC_mark_stack_top
))
966 < (mse
*)AO_load(&GC_first_nonempty
)
967 && GC_active_count
< GC_helper_count
968 && local_top
> local_mark_stack
+ 1) {
969 /* Try to share the load, since the main stack is empty, */
970 /* and helper threads are waiting for a refill. */
971 /* The entries near the bottom of the stack are likely */
972 /* to require more work. Thus we return those, eventhough */
974 mse
* new_bottom
= local_mark_stack
975 + (local_top
- local_mark_stack
)/2;
976 GC_ASSERT(new_bottom
> local_mark_stack
977 && new_bottom
< local_top
);
978 GC_return_mark_stack(local_mark_stack
, new_bottom
- 1);
979 memmove(local_mark_stack
, new_bottom
,
980 (local_top
- new_bottom
+ 1) * sizeof(mse
));
981 local_top
-= (new_bottom
- local_mark_stack
);
986 #define ENTRIES_TO_GET 5
988 long GC_markers
= 2; /* Normally changed by thread-library- */
989 /* -specific code. */
991 /* Mark using the local mark stack until the global mark stack is empty */
992 /* and there are no active workers. Update GC_first_nonempty to reflect */
994 /* Caller does not hold mark lock. */
995 /* Caller has already incremented GC_helper_count. We decrement it, */
996 /* and maintain GC_active_count. */
997 void GC_mark_local(mse
*local_mark_stack
, int id
)
999 mse
* my_first_nonempty
;
1001 GC_acquire_mark_lock();
1003 my_first_nonempty
= (mse
*)AO_load(&GC_first_nonempty
);
1004 GC_ASSERT((mse
*)AO_load(&GC_first_nonempty
) >= GC_mark_stack
&&
1005 (mse
*)AO_load(&GC_first_nonempty
) <=
1006 (mse
*)AO_load((volatile AO_t
*)(&GC_mark_stack_top
)) + 1);
1007 if (GC_print_stats
== VERBOSE
)
1008 GC_log_printf("Starting mark helper %lu\n", (unsigned long)id
);
1009 GC_release_mark_lock();
1015 mse
* global_first_nonempty
= (mse
*)AO_load(&GC_first_nonempty
);
1017 GC_ASSERT(my_first_nonempty
>= GC_mark_stack
&&
1018 my_first_nonempty
<=
1019 (mse
*)AO_load((volatile AO_t
*)(&GC_mark_stack_top
)) + 1);
1020 GC_ASSERT(global_first_nonempty
>= GC_mark_stack
&&
1021 global_first_nonempty
<=
1022 (mse
*)AO_load((volatile AO_t
*)(&GC_mark_stack_top
)) + 1);
1023 if (my_first_nonempty
< global_first_nonempty
) {
1024 my_first_nonempty
= global_first_nonempty
;
1025 } else if (global_first_nonempty
< my_first_nonempty
) {
1026 AO_compare_and_swap(&GC_first_nonempty
,
1027 (AO_t
) global_first_nonempty
,
1028 (AO_t
) my_first_nonempty
);
1029 /* If this fails, we just go ahead, without updating */
1030 /* GC_first_nonempty. */
1032 /* Perhaps we should also update GC_first_nonempty, if it */
1033 /* is less. But that would require using atomic updates. */
1034 my_top
= (mse
*)AO_load_acquire((volatile AO_t
*)(&GC_mark_stack_top
));
1035 n_on_stack
= my_top
- my_first_nonempty
+ 1;
1036 if (0 == n_on_stack
) {
1037 GC_acquire_mark_lock();
1038 my_top
= GC_mark_stack_top
;
1039 /* Asynchronous modification impossible here, */
1040 /* since we hold mark lock. */
1041 n_on_stack
= my_top
- my_first_nonempty
+ 1;
1042 if (0 == n_on_stack
) {
1044 GC_ASSERT(GC_active_count
<= GC_helper_count
);
1045 /* Other markers may redeposit objects */
1047 if (0 == GC_active_count
) GC_notify_all_marker();
1048 while (GC_active_count
> 0
1049 && (mse
*)AO_load(&GC_first_nonempty
)
1050 > GC_mark_stack_top
) {
1051 /* We will be notified if either GC_active_count */
1052 /* reaches zero, or if more objects are pushed on */
1053 /* the global mark stack. */
1056 if (GC_active_count
== 0 &&
1057 (mse
*)AO_load(&GC_first_nonempty
) > GC_mark_stack_top
) {
1058 GC_bool need_to_notify
= FALSE
;
1059 /* The above conditions can't be falsified while we */
1060 /* hold the mark lock, since neither */
1061 /* GC_active_count nor GC_mark_stack_top can */
1062 /* change. GC_first_nonempty can only be */
1063 /* incremented asynchronously. Thus we know that */
1064 /* both conditions actually held simultaneously. */
1066 if (0 == GC_helper_count
) need_to_notify
= TRUE
;
1067 if (GC_print_stats
== VERBOSE
)
1069 "Finished mark helper %lu\n", (unsigned long)id
);
1070 GC_release_mark_lock();
1071 if (need_to_notify
) GC_notify_all_marker();
1074 /* else there's something on the stack again, or */
1075 /* another helper may push something. */
1077 GC_ASSERT(GC_active_count
> 0);
1078 GC_release_mark_lock();
1081 GC_release_mark_lock();
1084 n_to_get
= ENTRIES_TO_GET
;
1085 if (n_on_stack
< 2 * ENTRIES_TO_GET
) n_to_get
= 1;
1086 local_top
= GC_steal_mark_stack(my_first_nonempty
, my_top
,
1087 local_mark_stack
, n_to_get
,
1088 &my_first_nonempty
);
1089 GC_ASSERT(my_first_nonempty
>= GC_mark_stack
&&
1090 my_first_nonempty
<=
1091 (mse
*)AO_load((volatile AO_t
*)(&GC_mark_stack_top
)) + 1);
1092 GC_do_local_mark(local_mark_stack
, local_top
);
1096 /* Perform Parallel mark. */
1097 /* We hold the GC lock, not the mark lock. */
1098 /* Currently runs until the mark stack is */
1100 void GC_do_parallel_mark()
1102 mse local_mark_stack
[LOCAL_MARK_STACK_SIZE
];
1104 GC_acquire_mark_lock();
1105 GC_ASSERT(I_HOLD_LOCK());
1106 /* This could be a GC_ASSERT, but it seems safer to keep it on */
1107 /* all the time, especially since it's cheap. */
1108 if (GC_help_wanted
|| GC_active_count
!= 0 || GC_helper_count
!= 0)
1109 ABORT("Tried to start parallel mark in bad state");
1110 if (GC_print_stats
== VERBOSE
)
1111 GC_log_printf("Starting marking for mark phase number %lu\n",
1112 (unsigned long)GC_mark_no
);
1113 GC_first_nonempty
= (AO_t
)GC_mark_stack
;
1114 GC_active_count
= 0;
1115 GC_helper_count
= 1;
1116 GC_help_wanted
= TRUE
;
1117 GC_release_mark_lock();
1118 GC_notify_all_marker();
1119 /* Wake up potential helpers. */
1120 GC_mark_local(local_mark_stack
, 0);
1121 GC_acquire_mark_lock();
1122 GC_help_wanted
= FALSE
;
1123 /* Done; clean up. */
1124 while (GC_helper_count
> 0) GC_wait_marker();
1125 /* GC_helper_count cannot be incremented while GC_help_wanted == FALSE */
1126 if (GC_print_stats
== VERBOSE
)
1128 "Finished marking for mark phase number %lu\n",
1129 (unsigned long)GC_mark_no
);
1131 GC_release_mark_lock();
1132 GC_notify_all_marker();
1136 /* Try to help out the marker, if it's running. */
1137 /* We do not hold the GC lock, but the requestor does. */
1138 void GC_help_marker(word my_mark_no
)
1140 mse local_mark_stack
[LOCAL_MARK_STACK_SIZE
];
1143 if (!GC_parallel
) return;
1144 GC_acquire_mark_lock();
1145 while (GC_mark_no
< my_mark_no
1146 || (!GC_help_wanted
&& GC_mark_no
== my_mark_no
)) {
1149 my_id
= GC_helper_count
;
1150 if (GC_mark_no
!= my_mark_no
|| my_id
>= GC_markers
) {
1151 /* Second test is useful only if original threads can also */
1152 /* act as helpers. Under Linux they can't. */
1153 GC_release_mark_lock();
1156 GC_helper_count
= my_id
+ 1;
1157 GC_release_mark_lock();
1158 GC_mark_local(local_mark_stack
, my_id
);
1159 /* GC_mark_local decrements GC_helper_count. */
1162 #endif /* PARALLEL_MARK */
1164 /* Allocate or reallocate space for mark stack of size n entries. */
1165 /* May silently fail. */
1166 static void alloc_mark_stack(size_t n
)
1168 mse
* new_stack
= (mse
*)GC_scratch_alloc(n
* sizeof(struct GC_ms_entry
));
1170 /* Don't recycle a stack segment obtained with the wrong flags. */
1171 /* Win32 GetWriteWatch requires the right kind of memory. */
1172 static GC_bool GC_incremental_at_stack_alloc
= 0;
1173 GC_bool recycle_old
= (!GC_incremental
|| GC_incremental_at_stack_alloc
);
1175 GC_incremental_at_stack_alloc
= GC_incremental
;
1177 # define recycle_old 1
1180 GC_mark_stack_too_small
= FALSE
;
1181 if (GC_mark_stack_size
!= 0) {
1182 if (new_stack
!= 0) {
1184 /* Recycle old space */
1185 size_t page_offset
= (word
)GC_mark_stack
& (GC_page_size
- 1);
1186 size_t size
= GC_mark_stack_size
* sizeof(struct GC_ms_entry
);
1189 if (0 != page_offset
) displ
= GC_page_size
- page_offset
;
1190 size
= (size
- displ
) & ~(GC_page_size
- 1);
1192 GC_add_to_heap((struct hblk
*)
1193 ((word
)GC_mark_stack
+ displ
), (word
)size
);
1196 GC_mark_stack
= new_stack
;
1197 GC_mark_stack_size
= n
;
1198 GC_mark_stack_limit
= new_stack
+ n
;
1199 if (GC_print_stats
) {
1200 GC_log_printf("Grew mark stack to %lu frames\n",
1201 (unsigned long) GC_mark_stack_size
);
1204 if (GC_print_stats
) {
1205 GC_log_printf("Failed to grow mark stack to %lu frames\n",
1210 if (new_stack
== 0) {
1211 GC_err_printf("No space for mark stack\n");
1214 GC_mark_stack
= new_stack
;
1215 GC_mark_stack_size
= n
;
1216 GC_mark_stack_limit
= new_stack
+ n
;
1218 GC_mark_stack_top
= GC_mark_stack
-1;
1223 alloc_mark_stack(INITIAL_MARK_STACK_SIZE
);
1227 * Push all locations between b and t onto the mark stack.
1228 * b is the first location to be checked. t is one past the last
1229 * location to be checked.
1230 * Should only be used if there is no possibility of mark stack
1233 void GC_push_all(ptr_t bottom
, ptr_t top
)
1235 register word length
;
1237 bottom
= (ptr_t
)(((word
) bottom
+ ALIGNMENT
-1) & ~(ALIGNMENT
-1));
1238 top
= (ptr_t
)(((word
) top
) & ~(ALIGNMENT
-1));
1239 if (top
== 0 || bottom
== top
) return;
1240 GC_mark_stack_top
++;
1241 if (GC_mark_stack_top
>= GC_mark_stack_limit
) {
1242 ABORT("unexpected mark stack overflow");
1244 length
= top
- bottom
;
1245 # if GC_DS_TAGS > ALIGNMENT - 1
1246 length
+= GC_DS_TAGS
;
1247 length
&= ~GC_DS_TAGS
;
1249 GC_mark_stack_top
-> mse_start
= bottom
;
1250 GC_mark_stack_top
-> mse_descr
= length
;
1254 * Analogous to the above, but push only those pages h with dirty_fn(h) != 0.
1255 * We use push_fn to actually push the block.
1256 * Used both to selectively push dirty pages, or to push a block
1257 * in piecemeal fashion, to allow for more marking concurrency.
1258 * Will not overflow mark stack if push_fn pushes a small fixed number
1259 * of entries. (This is invoked only if push_fn pushes a single entry,
1260 * or if it marks each object before pushing it, thus ensuring progress
1261 * in the event of a stack overflow.)
1263 void GC_push_selected(ptr_t bottom
, ptr_t top
,
1264 int (*dirty_fn
) (struct hblk
*),
1265 void (*push_fn
) (ptr_t
, ptr_t
))
1269 bottom
= (ptr_t
)(((word
) bottom
+ ALIGNMENT
-1) & ~(ALIGNMENT
-1));
1270 top
= (ptr_t
)(((word
) top
) & ~(ALIGNMENT
-1));
1272 if (top
== 0 || bottom
== top
) return;
1273 h
= HBLKPTR(bottom
+ HBLKSIZE
);
1274 if (top
<= (ptr_t
) h
) {
1275 if ((*dirty_fn
)(h
-1)) {
1276 (*push_fn
)(bottom
, top
);
1280 if ((*dirty_fn
)(h
-1)) {
1281 (*push_fn
)(bottom
, (ptr_t
)h
);
1283 while ((ptr_t
)(h
+1) <= top
) {
1284 if ((*dirty_fn
)(h
)) {
1285 if ((word
)(GC_mark_stack_top
- GC_mark_stack
)
1286 > 3 * GC_mark_stack_size
/ 4) {
1287 /* Danger of mark stack overflow */
1288 (*push_fn
)((ptr_t
)h
, top
);
1291 (*push_fn
)((ptr_t
)h
, (ptr_t
)(h
+1));
1296 if ((ptr_t
)h
!= top
) {
1297 if ((*dirty_fn
)(h
)) {
1298 (*push_fn
)((ptr_t
)h
, top
);
1301 if (GC_mark_stack_top
>= GC_mark_stack_limit
) {
1302 ABORT("unexpected mark stack overflow");
1306 # ifndef SMALL_CONFIG
1308 #ifdef PARALLEL_MARK
1309 /* Break up root sections into page size chunks to better spread */
1311 GC_bool
GC_true_func(struct hblk
*h
) { return TRUE
; }
1312 # define GC_PUSH_ALL(b,t) GC_push_selected(b,t,GC_true_func,GC_push_all);
1314 # define GC_PUSH_ALL(b,t) GC_push_all(b,t);
1318 void GC_push_conditional(ptr_t bottom
, ptr_t top
, GC_bool all
)
1321 if (GC_dirty_maintained
) {
1323 /* Pages that were never dirtied cannot contain pointers */
1324 GC_push_selected(bottom
, top
, GC_page_was_ever_dirty
, GC_push_all
);
1326 GC_push_all(bottom
, top
);
1329 GC_push_all(bottom
, top
);
1332 GC_push_selected(bottom
, top
, GC_page_was_dirty
, GC_push_all
);
1337 # if defined(MSWIN32) || defined(MSWINCE)
1338 void __cdecl
GC_push_one(word p
)
1340 void GC_push_one(word p
)
1343 GC_PUSH_ONE_STACK((ptr_t
)p
, MARKED_FROM_REGISTER
);
1346 struct GC_ms_entry
*GC_mark_and_push(void *obj
,
1347 mse
*mark_stack_ptr
,
1348 mse
*mark_stack_limit
,
1355 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr
),FALSE
)) {
1356 if (GC_all_interior_pointers
) {
1357 hhdr
= GC_find_header(GC_base(obj
));
1359 GC_ADD_TO_BLACK_LIST_NORMAL(obj
, src
);
1360 return mark_stack_ptr
;
1363 GC_ADD_TO_BLACK_LIST_NORMAL(obj
, src
);
1364 return mark_stack_ptr
;
1367 if (EXPECT(HBLK_IS_FREE(hhdr
),0)) {
1368 GC_ADD_TO_BLACK_LIST_NORMAL(obj
, src
);
1369 return mark_stack_ptr
;
1372 PUSH_CONTENTS_HDR(obj
, mark_stack_ptr
/* modified */, mark_stack_limit
,
1373 src
, was_marked
, hhdr
, TRUE
);
1375 return mark_stack_ptr
;
1378 /* Mark and push (i.e. gray) a single object p onto the main */
1379 /* mark stack. Consider p to be valid if it is an interior */
1381 /* The object p has passed a preliminary pointer validity */
1382 /* test, but we do not definitely know whether it is valid. */
1383 /* Mark bits are NOT atomically updated. Thus this must be the */
1384 /* only thread setting them. */
1385 # if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS)
1386 void GC_mark_and_push_stack(ptr_t p
, ptr_t source
)
1388 void GC_mark_and_push_stack(ptr_t p
)
1397 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr
),FALSE
)) {
1403 GC_ADD_TO_BLACK_LIST_STACK(p
, source
);
1407 if (EXPECT(HBLK_IS_FREE(hhdr
),0)) {
1408 GC_ADD_TO_BLACK_LIST_NORMAL(p
, src
);
1411 # if defined(MANUAL_VDB) && defined(THREADS)
1412 /* Pointer is on the stack. We may have dirtied the object */
1413 /* it points to, but not yet have called GC_dirty(); */
1414 GC_dirty(p
); /* Implicitly affects entire object. */
1416 PUSH_CONTENTS_HDR(r
, GC_mark_stack_top
, GC_mark_stack_limit
,
1417 source
, mark_and_push_exit
, hhdr
, FALSE
);
1418 mark_and_push_exit
: ;
1419 /* We silently ignore pointers to near the end of a block, */
1420 /* which is very mildly suboptimal. */
1421 /* FIXME: We should probably add a header word to address */
1427 # define TRACE_ENTRIES 1000
1429 struct trace_entry
{
1435 } GC_trace_buf
[TRACE_ENTRIES
];
1437 int GC_trace_buf_ptr
= 0;
1439 void GC_add_trace_entry(char *kind
, word arg1
, word arg2
)
1441 GC_trace_buf
[GC_trace_buf_ptr
].kind
= kind
;
1442 GC_trace_buf
[GC_trace_buf_ptr
].gc_no
= GC_gc_no
;
1443 GC_trace_buf
[GC_trace_buf_ptr
].bytes_allocd
= GC_bytes_allocd
;
1444 GC_trace_buf
[GC_trace_buf_ptr
].arg1
= arg1
^ 0x80000000;
1445 GC_trace_buf
[GC_trace_buf_ptr
].arg2
= arg2
^ 0x80000000;
1447 if (GC_trace_buf_ptr
>= TRACE_ENTRIES
) GC_trace_buf_ptr
= 0;
1450 void GC_print_trace(word gc_no
, GC_bool lock
)
1453 struct trace_entry
*p
;
1456 for (i
= GC_trace_buf_ptr
-1; i
!= GC_trace_buf_ptr
; i
--) {
1457 if (i
< 0) i
= TRACE_ENTRIES
-1;
1458 p
= GC_trace_buf
+ i
;
1459 if (p
-> gc_no
< gc_no
|| p
-> kind
== 0) return;
1460 printf("Trace:%s (gc:%d,bytes:%d) 0x%X, 0x%X\n",
1461 p
-> kind
, p
-> gc_no
, p
-> bytes_allocd
,
1462 (p
-> arg1
) ^ 0x80000000, (p
-> arg2
) ^ 0x80000000);
1464 printf("Trace incomplete\n");
1468 # endif /* TRACE_BUF */
1471 * A version of GC_push_all that treats all interior pointers as valid
1472 * and scans the entire region immediately, in case the contents
1475 void GC_push_all_eager(ptr_t bottom
, ptr_t top
)
1477 word
* b
= (word
*)(((word
) bottom
+ ALIGNMENT
-1) & ~(ALIGNMENT
-1));
1478 word
* t
= (word
*)(((word
) top
) & ~(ALIGNMENT
-1));
1482 register ptr_t greatest_ha
= GC_greatest_plausible_heap_addr
;
1483 register ptr_t least_ha
= GC_least_plausible_heap_addr
;
1484 # define GC_greatest_plausible_heap_addr greatest_ha
1485 # define GC_least_plausible_heap_addr least_ha
1487 if (top
== 0) return;
1488 /* check all pointers in range and push if they appear */
1490 lim
= t
- 1 /* longword */;
1491 for (p
= b
; p
<= lim
; p
= (word
*)(((ptr_t
)p
) + ALIGNMENT
)) {
1493 GC_PUSH_ONE_STACK((ptr_t
)q
, p
);
1495 # undef GC_greatest_plausible_heap_addr
1496 # undef GC_least_plausible_heap_addr
1501 * A version of GC_push_all that treats all interior pointers as valid
1502 * and scans part of the area immediately, to make sure that saved
1503 * register values are not lost.
1504 * Cold_gc_frame delimits the stack section that must be scanned
1505 * eagerly. A zero value indicates that no eager scanning is needed.
1506 * We don't need to worry about the MANUAL_VDB case here, since this
1507 * is only called in the single-threaded case. We assume that we
1508 * cannot collect between an assignment and the corresponding
1511 void GC_push_all_stack_partially_eager(ptr_t bottom
, ptr_t top
,
1512 ptr_t cold_gc_frame
)
1514 if (!NEED_FIXUP_POINTER
&& GC_all_interior_pointers
) {
1515 /* Push the hot end of the stack eagerly, so that register values */
1516 /* saved inside GC frames are marked before they disappear. */
1517 /* The rest of the marking can be deferred until later. */
1518 if (0 == cold_gc_frame
) {
1519 GC_push_all_stack(bottom
, top
);
1522 GC_ASSERT(bottom
<= cold_gc_frame
&& cold_gc_frame
<= top
);
1523 # ifdef STACK_GROWS_DOWN
1524 GC_push_all(cold_gc_frame
- sizeof(ptr_t
), top
);
1525 GC_push_all_eager(bottom
, cold_gc_frame
);
1526 # else /* STACK_GROWS_UP */
1527 GC_push_all(bottom
, cold_gc_frame
+ sizeof(ptr_t
));
1528 GC_push_all_eager(cold_gc_frame
, top
);
1529 # endif /* STACK_GROWS_UP */
1531 GC_push_all_eager(bottom
, top
);
1534 GC_add_trace_entry("GC_push_all_stack", bottom
, top
);
1537 #endif /* !THREADS */
1539 void GC_push_all_stack(ptr_t bottom
, ptr_t top
)
1541 # if defined(THREADS) && defined(MPROTECT_VDB)
1542 GC_push_all_eager(bottom
, top
);
1544 if (!NEED_FIXUP_POINTER
&& GC_all_interior_pointers
) {
1545 GC_push_all(bottom
, top
);
1547 GC_push_all_eager(bottom
, top
);
1552 #if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
1553 defined(MARK_BIT_PER_GRANULE)
1554 # if GC_GRANULE_WORDS == 1
1555 # define USE_PUSH_MARKED_ACCELERATORS
1556 # define PUSH_GRANULE(q) \
1557 { ptr_t qcontents = (ptr_t)((q)[0]); \
1558 GC_PUSH_ONE_HEAP(qcontents, (q)); }
1559 # elif GC_GRANULE_WORDS == 2
1560 # define USE_PUSH_MARKED_ACCELERATORS
1561 # define PUSH_GRANULE(q) \
1562 { ptr_t qcontents = (ptr_t)((q)[0]); \
1563 GC_PUSH_ONE_HEAP(qcontents, (q)); \
1564 qcontents = (ptr_t)((q)[1]); \
1565 GC_PUSH_ONE_HEAP(qcontents, (q)+1); }
1566 # elif GC_GRANULE_WORDS == 4
1567 # define USE_PUSH_MARKED_ACCELERATORS
1568 # define PUSH_GRANULE(q) \
1569 { ptr_t qcontents = (ptr_t)((q)[0]); \
1570 GC_PUSH_ONE_HEAP(qcontents, (q)); \
1571 qcontents = (ptr_t)((q)[1]); \
1572 GC_PUSH_ONE_HEAP(qcontents, (q)+1); \
1573 qcontents = (ptr_t)((q)[2]); \
1574 GC_PUSH_ONE_HEAP(qcontents, (q)+2); \
1575 qcontents = (ptr_t)((q)[3]); \
1576 GC_PUSH_ONE_HEAP(qcontents, (q)+3); }
1580 #ifdef USE_PUSH_MARKED_ACCELERATORS
1581 /* Push all objects reachable from marked objects in the given block */
1582 /* containing objects of size 1 granule. */
1583 void GC_push_marked1(struct hblk
*h
, hdr
*hhdr
)
1585 word
* mark_word_addr
= &(hhdr
->hb_marks
[0]);
1591 /* Allow registers to be used for some frequently acccessed */
1592 /* global variables. Otherwise aliasing issues are likely */
1593 /* to prevent that. */
1594 ptr_t greatest_ha
= GC_greatest_plausible_heap_addr
;
1595 ptr_t least_ha
= GC_least_plausible_heap_addr
;
1596 mse
* mark_stack_top
= GC_mark_stack_top
;
1597 mse
* mark_stack_limit
= GC_mark_stack_limit
;
1598 # define GC_mark_stack_top mark_stack_top
1599 # define GC_mark_stack_limit mark_stack_limit
1600 # define GC_greatest_plausible_heap_addr greatest_ha
1601 # define GC_least_plausible_heap_addr least_ha
1603 p
= (word
*)(h
->hb_body
);
1604 plim
= (word
*)(((word
)h
) + HBLKSIZE
);
1606 /* go through all words in block */
1608 mark_word
= *mark_word_addr
++;
1610 while(mark_word
!= 0) {
1611 if (mark_word
& 1) {
1614 q
+= GC_GRANULE_WORDS
;
1617 p
+= WORDSZ
*GC_GRANULE_WORDS
;
1620 # undef GC_greatest_plausible_heap_addr
1621 # undef GC_least_plausible_heap_addr
1622 # undef GC_mark_stack_top
1623 # undef GC_mark_stack_limit
1625 GC_mark_stack_top
= mark_stack_top
;
1631 /* Push all objects reachable from marked objects in the given block */
1632 /* of size 2 (granules) objects. */
1633 void GC_push_marked2(struct hblk
*h
, hdr
*hhdr
)
1635 word
* mark_word_addr
= &(hhdr
->hb_marks
[0]);
1641 ptr_t greatest_ha
= GC_greatest_plausible_heap_addr
;
1642 ptr_t least_ha
= GC_least_plausible_heap_addr
;
1643 mse
* mark_stack_top
= GC_mark_stack_top
;
1644 mse
* mark_stack_limit
= GC_mark_stack_limit
;
1646 # define GC_mark_stack_top mark_stack_top
1647 # define GC_mark_stack_limit mark_stack_limit
1648 # define GC_greatest_plausible_heap_addr greatest_ha
1649 # define GC_least_plausible_heap_addr least_ha
1651 p
= (word
*)(h
->hb_body
);
1652 plim
= (word
*)(((word
)h
) + HBLKSIZE
);
1654 /* go through all words in block */
1656 mark_word
= *mark_word_addr
++;
1658 while(mark_word
!= 0) {
1659 if (mark_word
& 1) {
1661 PUSH_GRANULE(q
+ GC_GRANULE_WORDS
);
1663 q
+= 2 * GC_GRANULE_WORDS
;
1666 p
+= WORDSZ
*GC_GRANULE_WORDS
;
1669 # undef GC_greatest_plausible_heap_addr
1670 # undef GC_least_plausible_heap_addr
1671 # undef GC_mark_stack_top
1672 # undef GC_mark_stack_limit
1674 GC_mark_stack_top
= mark_stack_top
;
1677 # if GC_GRANULE_WORDS < 4
1678 /* Push all objects reachable from marked objects in the given block */
1679 /* of size 4 (granules) objects. */
1680 /* There is a risk of mark stack overflow here. But we handle that. */
1681 /* And only unmarked objects get pushed, so it's not very likely. */
1682 void GC_push_marked4(struct hblk
*h
, hdr
*hhdr
)
1684 word
* mark_word_addr
= &(hhdr
->hb_marks
[0]);
1690 ptr_t greatest_ha
= GC_greatest_plausible_heap_addr
;
1691 ptr_t least_ha
= GC_least_plausible_heap_addr
;
1692 mse
* mark_stack_top
= GC_mark_stack_top
;
1693 mse
* mark_stack_limit
= GC_mark_stack_limit
;
1694 # define GC_mark_stack_top mark_stack_top
1695 # define GC_mark_stack_limit mark_stack_limit
1696 # define GC_greatest_plausible_heap_addr greatest_ha
1697 # define GC_least_plausible_heap_addr least_ha
1699 p
= (word
*)(h
->hb_body
);
1700 plim
= (word
*)(((word
)h
) + HBLKSIZE
);
1702 /* go through all words in block */
1704 mark_word
= *mark_word_addr
++;
1706 while(mark_word
!= 0) {
1707 if (mark_word
& 1) {
1709 PUSH_GRANULE(q
+ GC_GRANULE_WORDS
);
1710 PUSH_GRANULE(q
+ 2*GC_GRANULE_WORDS
);
1711 PUSH_GRANULE(q
+ 3*GC_GRANULE_WORDS
);
1713 q
+= 4 * GC_GRANULE_WORDS
;
1716 p
+= WORDSZ
*GC_GRANULE_WORDS
;
1718 # undef GC_greatest_plausible_heap_addr
1719 # undef GC_least_plausible_heap_addr
1720 # undef GC_mark_stack_top
1721 # undef GC_mark_stack_limit
1722 GC_mark_stack_top
= mark_stack_top
;
1725 #endif /* GC_GRANULE_WORDS < 4 */
1727 #endif /* UNALIGNED */
1729 #endif /* USE_PUSH_MARKED_ACCELERATORS */
1731 /* Push all objects reachable from marked objects in the given block */
1732 void GC_push_marked(struct hblk
*h
, hdr
*hhdr
)
1734 size_t sz
= hhdr
-> hb_sz
;
1735 word descr
= hhdr
-> hb_descr
;
1739 mse
* GC_mark_stack_top_reg
;
1740 mse
* mark_stack_limit
= GC_mark_stack_limit
;
1742 /* Some quick shortcuts: */
1743 if ((0 | GC_DS_LENGTH
) == descr
) return;
1744 if (GC_block_empty(hhdr
)/* nothing marked */) return;
1745 GC_n_rescuing_pages
++;
1746 GC_objects_are_marked
= TRUE
;
1747 if (sz
> MAXOBJBYTES
) {
1750 lim
= (h
+ 1)->hb_body
- sz
;
1753 switch(BYTES_TO_GRANULES(sz
)) {
1754 # if defined(USE_PUSH_MARKED_ACCELERATORS)
1756 GC_push_marked1(h
, hhdr
);
1758 # if !defined(UNALIGNED)
1760 GC_push_marked2(h
, hhdr
);
1762 # if GC_GRANULE_WORDS < 4
1764 GC_push_marked4(h
, hhdr
);
1770 GC_mark_stack_top_reg
= GC_mark_stack_top
;
1771 for (p
= h
-> hb_body
, bit_no
= 0; p
<= lim
;
1772 p
+= sz
, bit_no
+= MARK_BIT_OFFSET(sz
)) {
1773 if (mark_bit_from_hdr(hhdr
, bit_no
)) {
1774 /* Mark from fields inside the object */
1775 PUSH_OBJ(p
, hhdr
, GC_mark_stack_top_reg
, mark_stack_limit
);
1778 GC_mark_stack_top
= GC_mark_stack_top_reg
;
1782 #ifndef SMALL_CONFIG
1783 /* Test whether any page in the given block is dirty */
1784 GC_bool
GC_block_was_dirty(struct hblk
*h
, hdr
*hhdr
)
1786 size_t sz
= hhdr
-> hb_sz
;
1788 if (sz
<= MAXOBJBYTES
) {
1789 return(GC_page_was_dirty(h
));
1792 while (p
< (ptr_t
)h
+ sz
) {
1793 if (GC_page_was_dirty((struct hblk
*)p
)) return(TRUE
);
1799 #endif /* SMALL_CONFIG */
1801 /* Similar to GC_push_next_marked, but return address of next block */
1802 struct hblk
* GC_push_next_marked(struct hblk
*h
)
1804 hdr
* hhdr
= HDR(h
);
1806 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr
), FALSE
)) {
1807 h
= GC_next_used_block(h
);
1808 if (h
== 0) return(0);
1809 hhdr
= GC_find_header((ptr_t
)h
);
1811 GC_push_marked(h
, hhdr
);
1812 return(h
+ OBJ_SZ_TO_BLOCKS(hhdr
-> hb_sz
));
1815 #ifndef SMALL_CONFIG
1816 /* Identical to above, but mark only from dirty pages */
1817 struct hblk
* GC_push_next_marked_dirty(struct hblk
*h
)
1819 hdr
* hhdr
= HDR(h
);
1821 if (!GC_dirty_maintained
) { ABORT("dirty bits not set up"); }
1823 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr
), FALSE
)) {
1824 h
= GC_next_used_block(h
);
1825 if (h
== 0) return(0);
1826 hhdr
= GC_find_header((ptr_t
)h
);
1828 # ifdef STUBBORN_ALLOC
1829 if (hhdr
-> hb_obj_kind
== STUBBORN
) {
1830 if (GC_page_was_changed(h
) && GC_block_was_dirty(h
, hhdr
)) {
1834 if (GC_block_was_dirty(h
, hhdr
)) break;
1837 if (GC_block_was_dirty(h
, hhdr
)) break;
1839 h
+= OBJ_SZ_TO_BLOCKS(hhdr
-> hb_sz
);
1842 GC_push_marked(h
, hhdr
);
1843 return(h
+ OBJ_SZ_TO_BLOCKS(hhdr
-> hb_sz
));
1847 /* Similar to above, but for uncollectable pages. Needed since we */
1848 /* do not clear marks for such pages, even for full collections. */
1849 struct hblk
* GC_push_next_marked_uncollectable(struct hblk
*h
)
1851 hdr
* hhdr
= HDR(h
);
1854 if (EXPECT(IS_FORWARDING_ADDR_OR_NIL(hhdr
), FALSE
)) {
1855 h
= GC_next_used_block(h
);
1856 if (h
== 0) return(0);
1857 hhdr
= GC_find_header((ptr_t
)h
);
1859 if (hhdr
-> hb_obj_kind
== UNCOLLECTABLE
) break;
1860 h
+= OBJ_SZ_TO_BLOCKS(hhdr
-> hb_sz
);
1863 GC_push_marked(h
, hhdr
);
1864 return(h
+ OBJ_SZ_TO_BLOCKS(hhdr
-> hb_sz
));