apisix 中的 DNS 解析

基于 3.10.0 版本

入口

在 apisix 的 ngx_tpl.lua 中相关的配置

            access_by_lua_block {
                apisix.http_access_phase()
            }

即,请求在 access phase, 会走到 http_access_phase 函数

在 apisix/init.lua 的 http_access_phase 中确定匹配到的 route之后, 调用了 _M.handle_upstream(api_ctx, route, enable_websocket)

主入口: handle_upstream

_M.handle_upstream

function _M.handle_upstream(api_ctx, route, enable_websocket)
    local up_id = route.value.upstream_id

    if up_id then
        local upstream = apisix_upstream.get_by_id(up_id)
        if not upstream then
            if is_http then
                return core.response.exit(502)
            end

            return ngx_exit(1)
        end

        api_ctx.matched_upstream = upstream

    else
        if route.has_domain then
            local err
            route, err = parse_domain_in_route(route)
            if err then
                core.log.error("failed to get resolved route: ", err)
                return core.response.exit(500)
            end


        end

        local route_val = route.value

        api_ctx.matched_upstream = (route.dns_value and
                                    route.dns_value.upstream)
                                   or route_val.upstream
    end

这里,如果有 upstream, 走upstream, 否则,走route; route包含域名,则 parse_domain_in_route(route)

调用链路:

if up_id:

  apisix/init.lua: apisix_upstream.get_by_id(up_id)
  ->
  apisix/upstream.lua: get_by_id(up_id) 
                       -> upstream_util.parse_domain_in_up(upstream)
  ->
  apisix/utils/upstream.lua: parse_domain_in_up(up) 
                           -> parse_domain_for_nodes(nodes) 
                           -> core.resolver.parse_domain(host)
                           
else:
  if route.has_domain:
  apisix/init.lua: parse_domain_in_route(route) 
                   -> upstream_util.parse_domain_for_nodes(nodes)
  ->
  apisix/utils/upstream.lua: parse_domain_for_nodes(nodes) 
                           -> core.resolver.parse_domain(host)

公共部分:

->
apisix/core/resolver.lua: parse_domain(host) -> utils.dns_parse(host)
-> 
apisix/core/utils.lua: dns_client.new(opts) ->  current_dns_client:resolve(domain, selector)
->
lib: resty.dns.client

1. parse_domain_for_nodes

parse_domain_for_nodes

local function parse_domain_for_nodes(nodes)
    local new_nodes = core.table.new(#nodes, 0)
    for _, node in ipairs(nodes) do
        local host = node.host
        if not ipmatcher.parse_ipv4(host) and
                not ipmatcher.parse_ipv6(host) then
            local ip, err = core.resolver.parse_domain(host)
            if ip then
                local new_node = core.table.clone(node)
                new_node.host = ip
                new_node.domain = host
                core.table.insert(new_nodes, new_node)
            end

            if err then
                core.log.error("dns resolver domain: ", host, " error: ", err)
            end
        else
            core.table.insert(new_nodes, node)
        end
    end
    return new_nodes
end
_M.parse_domain_for_nodes = parse_domain_for_nodes

注意, 这里没有检测最终结果是否是空的,导致 new_nodes为空 table 返回, local new_nodes, err = parse_domain_for_nodes(nodes)

会带来一个问题,当dns server短时间不可用之后又恢复了,此时apisix中的dns解析并没有恢复,还是会持续失败, 报

dns server 异常时

[lua] upstream.lua:79: parse_domain_for_nodes(): dns resolver domain: test.com error: failed to query the DNS server: dns client error: 101 empty record received
[lua] resolver.lua:80: parse_domain(): failed to parse domain: test.com, error: failed to query the DNS server: dns client error: 101 empty record received

并且, 在dns server恢复后

[lua] init.lua:486: handle_upstream(): failed to set upstream: no valid upstream node

直到重启apisix, 或者执行apisix reload

之前提过一个issue bug: dns resolution did not resume immediately after the dns server resume, 后来由于开发没有复现,该单据已经被关闭(实际上是可以复现的);目前的修复方式是patch, 在 return new_nodes前面加上判断, 具体可以参考 fix(build/patches): add patch for parse_domain_for_nodes in apisix/utils/upstream.lua

2. parse_domain(host)

parse_domain

function _M.parse_domain(host)

    local ip_info, err = utils.dns_parse(host)
    if not ip_info then
        log.error("failed to parse domain: ", host, ", error: ",err) -- line 80
        return nil, err
    end

3. dns_parse(domain, selector)

dns_parse

local function dns_parse(domain, selector)
    ....
    return current_dns_client:resolve(domain, selector)

4. resolve(domain)

resolve

function _M.resolve(self, domain, selector)
    local client = self.client

    -- this function will dereference the CNAME records
    local answers, err = client.resolve(domain)
    if not answers then
        return nil, "failed to query the DNS server: " .. err
    end

    if answers.errcode then
        return nil, "server returned error code: " .. answers.errcode
                    .. ": " .. answers.errstr
    end

    if selector == _M.RETURN_ALL then
        log.info("dns resolve ", domain, ", result: ", json.delay_encode(answers))
        for _, answer in ipairs(answers) do
            if answer.type == client.TYPE_SRV then
                return resolve_srv(client, answers)
            end
        end
        return table.deepcopy(answers)
    end

    local idx = math_random(1, #answers)
    local answer = answers[idx]
    local dns_type = answer.type
    if dns_type == client.TYPE_A or dns_type == client.TYPE_AAAA then
        log.info("dns resolve ", domain, ", result: ", json.delay_encode(answer))
        return table.deepcopy(answer)
    end

    return nil, "unsupported DNS answer"
end

依赖库

api7/lua-resty-dns-client

https://github.com/Kong/lua-resty-dns-client fork 过来,改了什么: