#!/bin/bash

aembit_raw_prerouting_chain_name=aembit_raw_prerouting_chain
aembit_raw_output_chain_name=aembit_raw_output_chain
aembit_nat_output_chain_name=aembit_nat_output_chain
aembit_filter_input_chain_name=aembit_filter_input_chain

aembit_agent_proxy_user_name=aembit_agent_proxy
aembit_group_id=$(id --group "${aembit_agent_proxy_user_name}")
agent_proxy_port=38080
agent_dns_port=8053
# https://learn.microsoft.com/en-us/azure/virtual-network/what-is-ip-address-168-63-129-16
azure_platform_resources_ip="168.63.129.16"

FIREWALL_CMD="firewall-cmd --quiet"

# ----- aembit_nat_output_chain ------

# Create chain
${FIREWALL_CMD} --direct --add-chain ipv4 raw "${aembit_raw_output_chain_name}"

# We encountered a bug where egress traffic from the Agent Proxy's DNS proxy will
# be routed back to itself. This rule prevents this bug.
# This was happening for the following reasons:
#
# 1. The Client Workload and the Agent Proxy operate on shared networking infrastructure.
#    This means that they have the same IP address.
# 2. When the Client Workload initially connects, the DNS NAT rule in this file would
#    overwrite the destination IP and port to point at the Agent Proxy.
# 3. After an iptables rule is evaluated, it is saved as a "connection" by conntrack.
#    It is not evaluated again, unless the connection in conntrack expires.
#    This means that the connection is saved in conntrack as:
#        - Original direction: <CW/AP IP>:<EPHEMERAL PORT X> -> <EXTERNAL DNS IP>:53
#        - Reply direction: 127.0.0.1:8053 -> <CW/AP IP>:<EPHEMERAL PORT X>
# 4. Since the Agent Proxy requests an ephemeral port on the host to handle the
#    outgoing DNS request, sometimes it is assigned a port that was previously
#    used and released by the Client Workload. However, since the NAT rule was already
#    evaluated, the mapping from step #3 was already saved, meaning that the outbound
#    DNS is sent to the Agent Proxy, forming a loop.
#
# To avoid this loop, in this line we set the NOTRACK target on outgoing DNS packets on
# the "raw" table, which is evaluated prior to conntrack. This causes conntrack to not
# try to associate the packet with any connection, even if one already exists.
#
# See https://www.frozentux.net/iptables-tutorial/iptables-tutorial.html#STATEMACHINE
# for more information on conntrack.
${FIREWALL_CMD} --direct --add-rule ipv4 raw "${aembit_raw_output_chain_name}" 0 -p udp --dport 53 -m owner --gid-owner "${aembit_group_id}" -j CT --notrack

# The story for this rule is extremely similar to the above (only occurring in the VM environment).
#
# 1. CW used an ephemeral port to communicate with ResolveD (running on 127.0.0.53:53), and it's added to conntrack
# 2. CW releases this port, and it becomes immediately available
# 3. AP was accidentally given this port by the system when it tried to do DNS resolution as part of connecting to
# a server workload.
# 4. AP request to 127.0.0.53:53 is NOTRACK (per the rule above)
# 5. ResolveD (127.0.0.53:53) response to AP hits a match in conntrack and NAT (for some reason, decides to modify
# resolved port), and this packet is unreachable.
#
# As a result, we are adding this rule to make sure that traffic from ResolveD is not NATed.
${FIREWALL_CMD} --direct --add-rule ipv4 raw "${aembit_raw_output_chain_name}" 0 -p udp -s 127.0.0.53 --sport 53 -j CT --notrack

# Add chain
${FIREWALL_CMD} --direct --add-rule ipv4 raw OUTPUT 0 -j "${aembit_raw_output_chain_name}"

# ----- aembit_raw_prerouting_chain -----

${FIREWALL_CMD} --direct --add-chain ipv4 raw "${aembit_raw_prerouting_chain_name}"

${FIREWALL_CMD} --direct --add-rule ipv4 raw "${aembit_raw_prerouting_chain_name}" 0 -p udp --sport 53 -j CT --notrack

# firewalld by default disallows packets for a connection that is untracked in its filter INPUT chain.
# Trace log for such rejects (see the last rule in filter_INPUT chain below) -
#   "trace id c124d79a inet firewalld filter_INPUT rule reject with icmpx type admin-prohibited (verdict drop)"
# Snippet of the default filter_INPUT rules.
# chain filter_INPUT {
#                type filter hook input priority filter + 10; policy accept;
#                ct state { established, related } accept
#                ct status dnat accept
#                iifname "lo" accept
#                jump filter_INPUT_POLICIES_pre
#                jump filter_INPUT_ZONES_SOURCE
#                jump filter_INPUT_ZONES
#                jump filter_INPUT_POLICIES_post
#                ct state { invalid } drop
#                reject with icmpx type admin-prohibited
# }
# This would add the rule to `filter_IN_public_allow` chain which is called from filter_INPUT through filter_INPUT_ZONES
# iptables equivalent rule would be `iptables -t filter -A INPUT -p udp --sport 53 -m conntrack --ctstate UNTRACKED -j ACCEPT`

${FIREWALL_CMD} --add-rich-rule='rule source-port port=53 protocol=udp accept'
${FIREWALL_CMD} --direct --add-rule ipv4 raw PREROUTING 0 -j "${aembit_raw_prerouting_chain_name}"

# ----- aembit_nat_output_chain ------

# Create chain
${FIREWALL_CMD} --direct --add-chain ipv4 nat "${aembit_nat_output_chain_name}"

# Ignore all packets from the Agent Proxy
${FIREWALL_CMD} --direct --add-rule ipv4 nat "${aembit_nat_output_chain_name}" 0 -m owner --gid-owner "${aembit_group_id}" -j RETURN

# Ignore TCP loopback
${FIREWALL_CMD} --direct --add-rule ipv4 nat "${aembit_nat_output_chain_name}" 0 -p tcp -o lo -j RETURN

# Ignore outbound traffic to the Azure communication channel.
# See https://learn.microsoft.com/en-us/azure/virtual-network/what-is-ip-address-168-63-129-16.
${FIREWALL_CMD} --direct --add-rule ipv4 nat "${aembit_nat_output_chain_name}" 0 -p tcp --destination "${azure_platform_resources_ip}" -j RETURN

# Redirect all new TCP connections to the Agent Proxy.
# We use the "--syn" flag to forward new connections only, since,
# prior to adding NAT rules, conntrack is not loaded. Once we add NAT rules,
# conntrack is loaded, but mistakenly identifies existing connections as new
# connections, redirecting them erroneously to the Agent Proxy mid-connection.
${FIREWALL_CMD} --direct --add-rule ipv4 nat "${aembit_nat_output_chain_name}" 0 -p tcp --syn -j REDIRECT --to-port "${agent_proxy_port}"

# Redirect DNS traffic to agent proxy DNS port, except for outbound traffic from systemd-resolved.
resolve_d_user_id=$(id --user systemd-resolve);
if [ -n "${resolve_d_user_id}" ]; then
    ${FIREWALL_CMD} --direct --add-rule ipv4 nat "${aembit_nat_output_chain_name}" 0 -p udp --dport 53 -m owner --uid-owner "${resolve_d_user_id}" -j RETURN
fi
${FIREWALL_CMD} --direct --add-rule ipv4 nat "${aembit_nat_output_chain_name}" 0 -p udp --dport 53 -j REDIRECT --to "${agent_dns_port}"

# Add chain
${FIREWALL_CMD} --direct --add-rule ipv4 nat OUTPUT 0 -j "${aembit_nat_output_chain_name}"

# ----- aembit_filter_input_chain ------

# Create chain
${FIREWALL_CMD} --direct --add-chain ipv4 filter "${aembit_filter_input_chain_name}"

# Allow local traffic to be sent to the Agent Proxy traffic port.
${FIREWALL_CMD} --direct --add-rule ipv4 filter "${aembit_filter_input_chain_name}" 0 -p tcp --dport "${agent_proxy_port}" -i lo -j ACCEPT

# Disallow the rest of the traffic (coming from external interfaces) to be sent to the Agent Proxy traffic port.
${FIREWALL_CMD}  --direct --add-rule ipv4 filter "${aembit_filter_input_chain_name}" 0 -p tcp --dport "${agent_proxy_port}" -j REJECT

# Add chain
${FIREWALL_CMD} --direct --add-rule ipv4 filter INPUT 0 -j "${aembit_filter_input_chain_name}"

${FIREWALL_CMD} --runtime-to-permanent

${FIREWALL_CMD} --reload
