1 Journey of a received ACK in response to our SYN-ACK as seen by a TCP server in IPv4 TCP/IP
5 -----------------------
7 Where does ethereal (& presumably tcpdump) hook into the new Linux kernel 2.4.16 using netfilter & iptables?
9 Nowhere. They just call libpcap, and let *it* do the hooking. [:-)]
11 Now, the next question would then be "where does libpcap hook into the
12 new Linux kernel 2.4.16 using netfilter & iptables?"
14 The answer to that question is "the same place it hooks into any other
15 Linux 2.2[.x] or 2.4[.x] kernel - through a PF_PACKET socket."
17 The next question would be "where do PF_PACKETS tap into the network
18 data stream in a 2.4.16 system using netfilter & iptables?"
20 I don't know the answer offhand, and don't have time to search for it
21 (either in documentation or, as I fear would be required, the code); I'd
22 suggest asking on, say, the linux-net mailing list, if nobody else on
23 this list happens to know the answer.
26 net/core/sock.c::sk_alloc()
29 net/core/sock.c::sk_receive_skb()
30 include/net/sock.h::sock_put()
33 net/core/sock.c::sock_alloc_send_skb()
34 net/core/sock.c::sock_alloc_send_pskb()
36 ------------------------
38 Where does Netfilters hook into the kernel for IPv4 ?
40 net/ipv4/arp.c:954: return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
41 net/ipv4/ip_output.c:158: return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
42 net/ipv4/ip_output.c:364: return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
43 net/ipv4/ip_input.c:275: return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL,
44 net/ipv4/ip_input.c:434: return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
45 net/ipv4/igmp.c:357: return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
46 net/ipv4/igmp.c:692: return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
47 net/ipv4/ip_forward.c:106: return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev,
50 -------------------------
55 -------------------------
56 Socket changing state to TCP_ESTABLISHED
58 //=include/net/tcp.h::tcp_set_state(sock *, int) {
62 if (oldstate != TCP_ESTABLISHED)
63 TCP_INC_STATS(TCP_MIB_CURRESTAB);
66 /* Change state AFTER socket is unhashed to avoid closed
67 * socket sitting in hash tables.
71 } //=include/net/tcp.h::tcp_set_state()
73 -------------------------
74 Socket promotion from SYN_RECEIVED to TCP_ESTABLISHED
76 //=include/linux/net.h::struct proto_ops->accept
77 //=net/ipv4/af_inet.c::const struct proto_ops inet_stream_ops->accept
78 //=net/ipv4/af_inet.c::inet_accept(socket*, socket*, int) {
80 struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
82 //=include/net/sock.h::proto->accept (sock, int, int*)
83 //=net/ipv4/af_inet.c::struct inet_protosw inetsw_array[] = {
87 //=net/ipv4/tcp_ipv4.c::struct proto tcp_prot = {
89 .accept = inet_csk_accept,
90 //=net/ipv4/inet_connection_sock.c::inet_csk_accept(sock*, int, int*) {
92 newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
93 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
98 } //=net/ipv4/inet_connection_sock.c::inet_csk_accept()
100 } //=net/ipv4/tcp_ipv4.c::struct proto tcp_prot
102 } //=net/ipv4/af_inet.c::struct inet_protosw inetsw_array
104 }=net/ipv4/af_inet.c::inet_accept() {
105 -------------------------
107 // Socket in state SYN_RECEIVED
111 //=net/core/dev.c::net_rx_action() {
112 //=net/core/dev.c::netif_receive_skb() {
114 //=net/ipv4/ip_input.c:ip_rcv() {
116 //=net/ipv4/ipv_input.c:ip_rcv_finish() {
118 //=net/ipv4/ip_input.c:ip_local_deliver() {
120 //=net/ipv4/ip_input.c:ip_local_deliver_finish() {
121 ret = ipprot->handler(skb);
122 //=net/ipv4/af_inet.c::struct net_protocol tcp_protocol->handler
124 //=net/ipv4/tcp_ipv4.c::tcp_v4_rcv() {
126 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4);
128 if (!sock_owned_by_user(sk)) {
130 if (!tcp_prequeue(sk, skb))
131 ret = tcp_v4_do_rcv(sk, skb);
134 sk_add_backlog(sk, skb);
136 //=net/ipv4/tcp_ipv4.c::tcp_v4_do_rcv() {
138 if (sk->sk_state == TCP_LISTEN) {
139 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
145 // ******* TCP_ESTABLISHED is set at the end of the function, so skip down to avoid getting a headache! *****
147 //=net/ipv4/tcp_ipv4.c::tcp_v4_hnd_req() {
149 /* Find possible connection requests. */
150 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,iph->saddr, iph->daddr);
152 return tcp_check_req(sk, skb, req, prev);
154 //=net/ipv4/tcp_minisocks.c::tcp_check_req() {
156 * Process an incoming packet for SYN_RECV sockets represented
160 /* ACK sequence verified above, just make sure ACK is
161 * set. If ACK not set, just silently drop the packet.
163 if (!(flg & TCP_FLAG_ACK))
166 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
167 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
168 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
169 inet_rsk(req)->acked = 1;
173 /* OK, ACK is valid, create big socket and
174 * feed this segment to it. It will repeat all
175 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
176 * ESTABLISHED STATE. If it will be dropped after
177 * socket is created, wait for troubles.
179 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
181 //=net/ipv4/tcp_ipv4.c::struct inet_connection_sock_af_ops ipv4_specific->syn_recv_sock
182 //=net/ipv4/tcp_ipv4.c::tcp_v4_syn_recv_sock() {
184 * The three way handshake has completed - we got a valid synack -
185 * now create the new socket.
188 if (sk_acceptq_is_full(sk))
191 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
194 // **************************** PROMOTION to FULL SOCKET ************************
195 newsk = tcp_create_openreq_child(sk, req, skb);
197 //=net/ipv4/tcp_minisocks.c::tcp_create_openreq_child() {
199 tcp_set_ca_state(newsk, TCP_CA_Open);
200 //=include/net/tcp.h::tcp_set_ca_state() {
202 //=include/linux/tcp.h::enum tcp_ca_state {
204 #define TCPF_CA_Open (1<<TCP_CA_Open)
206 #define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
208 #define TCPF_CA_CWR (1<<TCP_CA_CWR)
210 #define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
212 #define TCPF_CA_Loss (1<<TCP_CA_Loss)
214 }; // =include/linux/tcp.h::enum tcp_ca_state
216 struct inet_connection_sock *icsk = inet_csk(sk);
218 //=include/net/inet_connection_sock.h::struct inet_connection_sock->icsk_ca_ops
220 //=include/net/tcp.h::struct tcp_congestion_ops
222 * Interface for adding new TCP congestion control handlers
225 /* call before changing ca_state (optional) */
226 void (*set_state)(struct sock *sk, u8 new_state);
229 }//=include/net/tcp.h::struct tcp_congestion_ops
231 }//=include/net/inet_connection_sock.h::struct inet_connection_sock
233 if (icsk->icsk_ca_ops->set_state)
234 icsk->icsk_ca_ops->set_state(sk, ca_state);
236 //=net/ipv4/tcp_bic.c::bictcp_state() {
238 if (new_state == TCP_CA_Loss)
239 bictcp_reset(inet_csk_ca(sk));
241 } //=net/ipv4/tcp_bic.c::bictcp_state()
243 icsk->icsk_ca_state = ca_state;
245 } //=include/net/tcp.h::tcp_set_ca_state()
247 } //=net/ipv4/tcp_minisocks.c::tcp_create_openreq_child()
252 newsk->sk_gso_type = SKB_GSO_TCPV4;
253 sk_setup_caps(newsk, dst);
255 __inet_hash(&tcp_hashinfo, newsk, 0);
256 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
261 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
263 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
267 } //=net/ipv4/tcp_ipv4.c::tcp_v4_syn_recv_sock()
270 goto listen_overflow;
272 inet_csk_reqsk_queue_unlink(sk, req, prev);
273 inet_csk_reqsk_queue_removed(sk, req);
275 inet_csk_reqsk_queue_add(sk, req, child);
279 if (!sysctl_tcp_abort_on_overflow) {
280 inet_rsk(req)->acked = 1;
285 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
286 if (!(flg & TCP_FLAG_RST))
287 req->rsk_ops->send_reset(sk, skb);
289 inet_csk_reqsk_queue_drop(sk, req, prev);
292 } //=net/ipv4/tcp_minisocks.c::tcp_check_req()
293 } //=net/ipv4/tcp_ipv4.c::tcp_v4_hnd_req()
296 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
298 //=net/ipv4/tcp_input::tcp_rcv_state_process(sock*, sk_buff*, tcphdr*, unsigned) {
300 /* step 5: check the ACK field */
302 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
304 switch(sk->sk_state) {
307 tp->copied_seq = tp->rcv_nxt;
310 // **************************** FINALLY TCP_ESTABLISHED ************************
311 tcp_set_state(sk, TCP_ESTABLISHED);
312 // *****************************************************************************
314 sk->sk_state_change(sk);
324 } //=net/ipv4/tcp_input::tcp_rcv_state_process()
330 } //=net/ipv4/tcp_ipv4.c::tcp_v4_do_rcv()
331 } //=net/ipv4/tcp_ipv4.c::tcp_v4_rcv()
333 } //=net/ipv4/ip_input.c:ip_local_deliver_finish()
335 return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL, ip_local_deliver_finish);
337 } //=net/ipv4/ip_input.c:ip_local_deliver()
338 } //=net/ipv4/ipv_input.c:ip_rcv_finish()
340 return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);
342 } //=net/ipv4/ip_input.c:ip_rcv()
343 // Socket in state ESTABLISHED
348 See Apache directive for TCP_DEFER_ACCEPT http://httpd.apache.org/docs/trunk/mod/core.html#acceptfilter
352 To set or get a TCP socket option, call getsockopt(2) to read or setsockopt(2) to write
355 Allows a listener to be awakened only when data arrives on the socket. Takes an integer value (seconds), this can bound the max‐
356 imum number of attempts TCP will make to complete the connection. This option should not be used in code intended to be