Lua GC 之 Ephemeron

lua 在 5.2 中对弱键表(table with weak keys)引入了 ephemeron 机制,来解决弱表的循环引用问题。我们先来解释下几个基本概念:

弱引用(weak reference):可以访问对象,但不会阻止对象被收集。

弱表(weak table):键或(和)值是弱引用。

ephemeron:指的是键值对(pair),键是弱引用,但键对值的 mark 有如下影响。如果键可达(reachable),则 mark 其值;如果键不可达,则不必 mark 其值。

先来看看一个循环引用的例子:

et = {}
mt = {}
setmetatable(et, mt)
-- sets the table to be an ephemerons table,
-- in the new implementation.
mt.__mode = "k"
a = {}
b = {}
et[a] = b
et[b] = a
a = nil
b = nil

collectgarbage()

count = 0
for k,v in pairs(et) do
    count = count + 1
end
print(count) -- 0
在5.1下运行输出: 2
在5.2下运行输出: 0

也就是 5.1 下表et的2组键值对永远不会被收集了。

先来简单看看5.1的实现:

static int traversetable (global_State *g, Table *h) {
  int i;
  int weakkey = 0;
  int weakvalue = 0;
  const TValue *mode;
  if (h->metatable)
    markobject(g, h->metatable);
  mode = gfasttm(g, h->metatable, TM_MODE);
  if (mode && ttisstring(mode)) {  /* is there a weak mode? */
    weakkey = (strchr(svalue(mode), 'k') != NULL);
    weakvalue = (strchr(svalue(mode), 'v') != NULL);
    if (weakkey || weakvalue) {  /* is really weak? */
      h->marked &= ~(KEYWEAK | VALUEWEAK);  /* clear bits */
      h->marked |= cast_byte((weakkey << KEYWEAKBIT) |
                             (weakvalue << VALUEWEAKBIT));
      // 将弱表挂入weak链表,等待 atomic 阶段重新扫描
      h->gclist = g->weak;  /* must be cleared after GC, ... */
      g->weak = obj2gco(h);  /* ... so put in the appropriate list */
    }
  }
  if (weakkey && weakvalue) return 1; // 弱键弱值表,无需 mark
  if (!weakvalue) { // mark 弱键强值表的数组部分
    i = h->sizearray;
    while (i--)
      markvalue(g, &h->array[i]);
  }
  i = sizenode(h);
  while (i--) {
    Node *n = gnode(h, i);
    lua_assert(ttype(gkey(n)) != LUA_TDEADKEY || ttisnil(gval(n)));
    if (ttisnil(gval(n)))
      removeentry(n);  /* remove empty entries */ // 移除空键值对
    else {
      lua_assert(!ttisnil(gkey(n)));
      if (!weakkey) markvalue(g, gkey(n)); // 强键直接mark
      if (!weakvalue) markvalue(g, gval(n)); // 强值直接mark
    }
  }
  return weakkey || weakvalue;
}
static void atomic (lua_State *L) {
  global_State *g = G(L);

  propagateall(g);
  /* remark weak tables */ // weak 链 放入 gray 链,为 mark 弱表作准备
  g->gray = g->weak;
  g->weak = NULL;

  markobject(g, L);  /* mark running thread */
  markmt(g);  /* mark basic metatables (again) */
  propagateall(g); // 重新 mark 弱表

  cleartable(g->weak);  /* remove collected objects from weak tables */
}

扫描(mark) 弱表时很简单,将弱表放入 weak 链(因为 atomic 阶段,才可以真正断定一个弱引用是否还有效。即扫描阶段的 unreachable 不是真正的unreachable,扫描完成后的 unreachable 才是真正的 unreachable),然后 mark 掉强引用的键或值。

二、Lua 5.2 实现

相比之下 5.2 的实现就复杂不少。weak 链被分成了3个单独的链 allweak,ephemeron,weak。但和 5.1 一样依然是分为扫描阶段(propagate)和 atomic 阶段

1. 扫描阶段

扫描弱键表的时候,有条件的去 mark 它的值——如果键可达,则 mark 其值,否则不 mark 其值。

static int traverseephemeron (global_State *g, Table *h) {
  int marked = 0;  /* true if an object is marked in this traversal */
  int hasclears = 0;  /* true if table has white keys */
  int prop = 0;  /* true if table has entry "white-key -> white-value" */
  Node *n, *limit = gnodelast(h);
  int i;
  /* traverse array part (numeric keys are 'strong') */
  for (i = 0; i < h->sizearray; i++) { // mark 数组部分(弱键强值表)
    if (valiswhite(&h->array[i])) {
      marked = 1;
      reallymarkobject(g, gcvalue(&h->array[i]));
    }
  }
  /* traverse hash part */
  for (n = gnode(h, 0); n < limit; n++) {
    checkdeadkey(n);
    if (ttisnil(gval(n)))  /* entry is empty? */
      removeentry(n);  /* remove it */
    else if (iscleared(g, gkey(n))) {  /* key is not marked (yet)? */
      hasclears = 1;  /* table must be cleared */
      if (valiswhite(gval(n)))  /* value not marked yet? */
        prop = 1;  /* must propagate again */ // 键无效,值同时也没mark,则放入ephemeron链(ephemeron链用途:如果键在后面的 atomic 阶段发现是有效的,则需 mark 其值)
    }
    else if (valiswhite(gval(n))) {  /* value not marked yet? */ // 键有效,但值还没mark,则mark值(原则:键有效则值也有效)
      marked = 1;
      reallymarkobject(g, gcvalue(gval(n)));  /* mark it now */
    }
  }
  if (prop)
    linktable(h, &g->ephemeron);  /* have to propagate again */ // 键值均不可达,则需要重新扫描
  else if (hasclears)  /* does table have white keys? */
    linktable(h, &g->allweak);  /* may have to clean white keys */ // 键不可达,值可达,后期需要清理掉不可达的键
  else  /* no white keys */
    linktable(h, &g->grayagain);  /* no need to clean */
  return marked;
}

2. atomic阶段

ephemeron 链上的表的值是可以被连锁复活的,例如:有键值对 a->b 和 b->c,如果 a 在后来确认是可达的,则 b 应该被 mark,而 b 的mark,又会导致 c 需要被 mark ,这就需要反复遍历 ephemeron 链作值的 mark 操作,直到ephemeron 链上的无对象可 mark 为止。见 convergeephemerons 函数。

static l_mem atomic (lua_State *L) {
  global_State *g = G(L);

  /* traverse objects caught by write barrier and by 'remarkupvals' */
  retraversegrays(g);
  work -= g->GCmemtrav;  /* restart counting */
  convergeephemerons(g); // 键是否可达已最终确定,mark 掉其可达键所关联的值

  /* at this point, all strongly accessible objects are marked. */
  /* clear values from weak tables, before checking finalizers */
  clearvalues(g, g->weak, NULL);
  clearvalues(g, g->allweak, NULL);
  origweak = g->weak; origall = g->allweak;
  work += g->GCmemtrav;  /* stop counting (objects being finalized) */

  separatetobefnz(L, 0);  /* separate objects to be finalized */
  markbeingfnz(g);  /* mark objects that will be finalized */ // 复活需 finalizer 的对象
  propagateall(g);  /* remark, to propagate `preserveness' */

  convergeephemerons(g); // 复活需 finalizer 的对象及这些对象所关联的对象后,重新mark ephemeron
  /* at this point, all resurrected objects are marked. */
  /* remove dead objects from weak tables */
  clearkeys(g, g->ephemeron, NULL);  /* clear keys from all ephemeron tables */
  clearkeys(g, g->allweak, NULL);  /* clear keys from all allweak tables */
  /* clear values from resurrected weak tables */
  clearvalues(g, g->weak, origweak);
  clearvalues(g, g->allweak, origall);
  g->currentwhite = cast_byte(otherwhite(g));  /* flip current white */
  work += g->GCmemtrav;  /* complete counting */
  return work;  /* estimate of memory marked by 'atomic' */
}


static void convergeephemerons (global_State *g) {
  int changed; // 关键值,指示对 g->ephemeron 的一次遍历中是否有对象被重新mark
  do {
    GCObject *w;
    GCObject *next = g->ephemeron;  /* get ephemeron list */
    g->ephemeron = NULL;  /* tables will return to this list when traversed */
    changed = 0;
    while ((w = next) != NULL) {
      next = gco2t(w)->gclist;
      if (traverseephemeron(g, gco2t(w))) {  /* traverse marked some value? */
        propagateall(g);  /* propagate changes */
        changed = 1;  /* will have to revisit all ephemeron tables */ 
      }
    }
  } while (changed); // 无对象可 mark 时,退出
}

参考资料:

Alexandra Barros 和 Roberto Ierusalimschy 编写的 Eliminating Cycles inWeak Tables