apisix 中的 DNS 解析
基于 3.10.0 版本
入口
在 apisix 的 ngx_tpl.lua 中相关的配置
access_by_lua_block {
apisix.http_access_phase()
}
即,请求在 access phase, 会走到 http_access_phase
函数
在 apisix/init.lua 的 http_access_phase
中确定匹配到的 route
之后, 调用了 _M.handle_upstream(api_ctx, route, enable_websocket)
主入口: handle_upstream
function _M.handle_upstream(api_ctx, route, enable_websocket)
local up_id = route.value.upstream_id
if up_id then
local upstream = apisix_upstream.get_by_id(up_id)
if not upstream then
if is_http then
return core.response.exit(502)
end
return ngx_exit(1)
end
api_ctx.matched_upstream = upstream
else
if route.has_domain then
local err
route, err = parse_domain_in_route(route)
if err then
core.log.error("failed to get resolved route: ", err)
return core.response.exit(500)
end
end
local route_val = route.value
api_ctx.matched_upstream = (route.dns_value and
route.dns_value.upstream)
or route_val.upstream
end
这里,如果有 upstream, 走upstream, 否则,走route; route包含域名,则 parse_domain_in_route(route)
调用链路:
if up_id:
apisix/init.lua: apisix_upstream.get_by_id(up_id)
->
apisix/upstream.lua: get_by_id(up_id)
-> upstream_util.parse_domain_in_up(upstream)
->
apisix/utils/upstream.lua: parse_domain_in_up(up)
-> parse_domain_for_nodes(nodes)
-> core.resolver.parse_domain(host)
else:
if route.has_domain:
apisix/init.lua: parse_domain_in_route(route)
-> upstream_util.parse_domain_for_nodes(nodes)
->
apisix/utils/upstream.lua: parse_domain_for_nodes(nodes)
-> core.resolver.parse_domain(host)
公共部分:
->
apisix/core/resolver.lua: parse_domain(host) -> utils.dns_parse(host)
->
apisix/core/utils.lua: dns_client.new(opts) -> current_dns_client:resolve(domain, selector)
->
lib: resty.dns.client
1. parse_domain_for_nodes
local function parse_domain_for_nodes(nodes)
local new_nodes = core.table.new(#nodes, 0)
for _, node in ipairs(nodes) do
local host = node.host
if not ipmatcher.parse_ipv4(host) and
not ipmatcher.parse_ipv6(host) then
local ip, err = core.resolver.parse_domain(host)
if ip then
local new_node = core.table.clone(node)
new_node.host = ip
new_node.domain = host
core.table.insert(new_nodes, new_node)
end
if err then
core.log.error("dns resolver domain: ", host, " error: ", err)
end
else
core.table.insert(new_nodes, node)
end
end
return new_nodes
end
_M.parse_domain_for_nodes = parse_domain_for_nodes
注意, 这里没有检测最终结果是否是空的,导致 new_nodes为空 table 返回, local new_nodes, err = parse_domain_for_nodes(nodes)
会带来一个问题,当dns server短时间不可用之后又恢复了,此时apisix中的dns解析并没有恢复,还是会持续失败, 报
dns server 异常时
[lua] upstream.lua:79: parse_domain_for_nodes(): dns resolver domain: test.com error: failed to query the DNS server: dns client error: 101 empty record received
[lua] resolver.lua:80: parse_domain(): failed to parse domain: test.com, error: failed to query the DNS server: dns client error: 101 empty record received
并且, 在dns server恢复后
[lua] init.lua:486: handle_upstream(): failed to set upstream: no valid upstream node
直到重启apisix, 或者执行apisix reload
之前提过一个issue bug: dns resolution did not resume immediately after the dns server resume, 后来由于开发没有复现,该单据已经被关闭(实际上是可以复现的);目前的修复方式是patch, 在 return new_nodes
前面加上判断, 具体可以参考 fix(build/patches): add patch for parse_domain_for_nodes in apisix/utils/upstream.lua
2. parse_domain(host)
function _M.parse_domain(host)
local ip_info, err = utils.dns_parse(host)
if not ip_info then
log.error("failed to parse domain: ", host, ", error: ",err) -- line 80
return nil, err
end
3. dns_parse(domain, selector)
local function dns_parse(domain, selector)
....
return current_dns_client:resolve(domain, selector)
4. resolve(domain)
function _M.resolve(self, domain, selector)
local client = self.client
-- this function will dereference the CNAME records
local answers, err = client.resolve(domain)
if not answers then
return nil, "failed to query the DNS server: " .. err
end
if answers.errcode then
return nil, "server returned error code: " .. answers.errcode
.. ": " .. answers.errstr
end
if selector == _M.RETURN_ALL then
log.info("dns resolve ", domain, ", result: ", json.delay_encode(answers))
for _, answer in ipairs(answers) do
if answer.type == client.TYPE_SRV then
return resolve_srv(client, answers)
end
end
return table.deepcopy(answers)
end
local idx = math_random(1, #answers)
local answer = answers[idx]
local dns_type = answer.type
if dns_type == client.TYPE_A or dns_type == client.TYPE_AAAA then
log.info("dns resolve ", domain, ", result: ", json.delay_encode(answer))
return table.deepcopy(answer)
end
return nil, "unsupported DNS answer"
end
依赖库
从 https://github.com/Kong/lua-resty-dns-client fork 过来,改了什么:
- feat: support run in stream subsystem https://github.com/api7/lua-resty-dns-client/pull/1