pfd_nat.c

Go to the documentation of this file.
00001 /*
00002  * $Id: pfd_nat.c 346460 2009-11-14 05:06:47Z ssiano $
00003  *
00004  * This code is provided as is by Juniper Networks SDK Developer Support.
00005  * It is provided with no warranties or guarantees, and Juniper Networks
00006  * will not provide support or maintenance of this code in any fashion.
00007  * The code is provided only to help a developer better understand how
00008  * the SDK can be used.
00009  *
00010  * Copyright (c) 2007-2008, Juniper Networks, Inc.
00011  * All rights reserved.
00012  */
00013 
00024 #include <string.h>
00025 #include <sys/types.h>
00026 #include <time.h>
00027 #include <pthread.h>
00028 #include <errno.h>
00029 #include <isc/eventlib.h>
00030 #include <jnx/aux_types.h>
00031 #include <jnx/vrf_util_pub.h>
00032 #include <jnx/rt_shared_pub.h>
00033 #include <jnx/jnx_types.h>
00034 #include <jnx/mpsdk.h>
00035 #include <sys/jnx/jbuf.h>
00036 #include <sys/socket.h>
00037 #include <netinet/in_systm.h>
00038 #include <netinet/in.h>
00039 #include <netinet/ip.h>
00040 #include <netinet/tcp.h>
00041 #include <arpa/inet.h>
00042 #include <hashtable.h>
00043 #include "pfd_logging.h"
00044 #include "pfd_config.h"
00045 #include "pfd_nat.h"
00046 
00047 /*** Constants ***/
00048 
00052 #define NAT_ENTRY_LIFETIME 60
00053 
00058 #define NAT_MAX_ENTRIES 1000
00059 
00065 #define NAT_LPORT_RANGE_MIN 50000 
00066 
00070 #define NAT_LPORT_RANGE_MAX (NAT_LPORT_RANGE_MIN + NAT_MAX_ENTRIES - 1)
00071 
00075 #define CPD_HTTP_PORT 80
00076 
00077 /*** Data Structures ***/
00078 
00079 
00083 typedef struct {
00084     in_addr_t       ipsrc;    
00085     in_addr_t       ipdst;    
00086     uint16_t        srcport;  
00087     uint16_t        dstport;  
00088     time_t          exp_time; 
00089     pthread_mutex_t lock;     
00090 } nat_table_t;
00091 
00096 static nat_table_t nat_table[NAT_MAX_ENTRIES];
00097 
00102 static struct hashtable * lookup_table = NULL;
00103 static uint16_t next_lport_num; 
00104 static pthread_mutex_t lookup_table_lock; 
00105 
00106 /*
00107  * Note: next_lport_num is one entry past the last free index found in nat_table
00108  * It is only ever accessed when holding the lookup_table_lock so we don't need
00109  * yet another lock for it too.
00110  */
00111 
00112 static uint16_t cpd_port; 
00113 
00117 typedef struct key_s {
00118     in_addr_t       ipsrc;    
00119     in_addr_t       ipdst;    
00120     uint16_t        srcport;  
00121 } hash_key_t;
00122 
00123 
00124 /*** STATIC/INTERNAL Functions ***/
00125 
00126 
00127 // Define functions ht_insert, ht_get, and ht_remove
00128 
00132 static DEFINE_HASHTABLE_INSERT(insert_entry, hash_key_t, uint16_t);
00133 
00137 static DEFINE_HASHTABLE_SEARCH(get_entry, hash_key_t, uint16_t);
00138 
00142 static DEFINE_HASHTABLE_REMOVE(remove_entry, hash_key_t, uint16_t);
00143 
00144 
00164 static void
00165 checksum_adjust(
00166     unsigned char * chksum,
00167     unsigned char * optr,
00168     int olen,
00169     unsigned char * nptr,
00170     int nlen)
00171 {
00172     long x, old, new_;
00173     x=chksum[0]*256+chksum[1];
00174     x=~x & 0xFFFF;
00175     while (olen)
00176     {
00177         old=optr[0]*256+optr[1]; optr+=2;
00178         x-=old & 0xffff;
00179         if (x<=0) { x--; x&=0xffff; }
00180         olen-=2;
00181     }
00182     while (nlen)
00183     {
00184         new_=nptr[0]*256+nptr[1]; nptr+=2;
00185         x+=new_ & 0xffff;
00186         if (x & 0x10000) { x++; x&=0xffff; }
00187         nlen-=2;
00188     }
00189     x=~x & 0xFFFF;
00190     chksum[0]=x/256; chksum[1]=x & 0xff;
00191 }
00192 
00193 
00202 static unsigned int
00203 hashFromKey(void * key)
00204 {
00205     static unsigned int key_length = (2 * sizeof(in_addr_t)) + sizeof(uint16_t);
00206     unsigned int hash, i;
00207     uint8_t * k = (uint8_t *)key;
00208 
00209     for (i = 0, hash = sizeof(hash_key_t); i < key_length; ++i) {
00210         hash = (hash<<4) ^ (hash>>28) ^ (k[i]);
00211     }
00212     
00213     hash += (hash << 3);
00214     hash ^= (hash >> 11);
00215     hash += (hash << 15);
00216    
00217     return hash;
00218 }
00219 
00220 
00232 static int
00233 equalKeys(void * k1, void * k2)
00234 {
00235   return (memcmp(k1, k2, sizeof(hash_key_t)) == 0);
00236 }
00237 
00238 
00239 /*** GLOBAL/EXTERNAL Functions ***/
00240 
00241 
00245 void 
00246 init_nat(void)
00247 {
00248     int i;
00249     
00250     bzero(&nat_table, sizeof(nat_table));
00251     
00252     for(i = 0; i < NAT_MAX_ENTRIES; ++i) {
00253         pthread_mutex_init(&nat_table[i].lock, NULL);
00254     }
00255     
00256     next_lport_num = 0;
00257     
00258     lookup_table = create_hashtable(2*NAT_MAX_ENTRIES, hashFromKey, equalKeys);
00259     INSIST_ERR(lookup_table != NULL);
00260     
00261     pthread_mutex_init(&lookup_table_lock, NULL);
00262     
00263     cpd_port = htons(CPD_HTTP_PORT);
00264 }
00265 
00266 
00270 void 
00271 terminate_nat(void)
00272 {
00273     int i;
00274     
00275     pthread_mutex_destroy(&lookup_table_lock);
00276     
00277     for(i = 0; i < NAT_MAX_ENTRIES; ++i) {
00278         pthread_mutex_destroy(&nat_table[i].lock);
00279     }
00280     
00281     if(lookup_table) {
00282         hashtable_destroy(lookup_table, TRUE); // TRUE to free values
00283         lookup_table = NULL;
00284     }
00285 }
00286 
00287 
00302 boolean
00303 nat_packet(struct ip * ip_pkt, address_bundle_t * addresses)
00304 {
00305     uint16_t * entry_index, start, port;
00306     hash_key_t * key, tmp_key;
00307     struct tcphdr * tcp_hdr = 
00308         (struct tcphdr *)((uint32_t *)ip_pkt + ip_pkt->ip_hl);
00309     int rc;
00310     time_t current_time;
00311     boolean found_free_port = FALSE;
00312     
00313     struct ports_s {
00314         uint16_t src_port;
00315         uint16_t dst_port;
00316     } port_bundle; // bundle ports to speed up checksum calc
00317 
00318     // Lookup into hashtable (lookup_table) with src IP, dst IP, and src port
00319     // Keep network byte order to speed this up (no need to switch) 
00320     
00321     bzero(&tmp_key, sizeof(tmp_key));
00322     tmp_key.ipdst = ip_pkt->ip_dst.s_addr;
00323     tmp_key.ipsrc = ip_pkt->ip_src.s_addr;
00324     tmp_key.srcport = tcp_hdr->th_sport;
00325     
00326     /*
00327      * We need to lock access to the lookup table until we are guaranteed to 
00328      * have a lock on the NAT table entry that it is associated with. Otherwise
00329      * we could lookup an entry and get the local port number, but then another
00330      * thread could take over the processing and find the same port number free 
00331      * (expired) and overwrite the entry.
00332      */
00333     LOCK_MUTEX(&lookup_table_lock);
00334     
00335     entry_index = get_entry(lookup_table, &tmp_key);
00336     
00337     if(entry_index == NULL) { // nothing found in lookup table
00338         current_time = get_current_time();
00339         start = port = next_lport_num;
00340         
00341         /*
00342          * We start searching for a free local port and spot in the nat_table,
00343          * then create a new lookup entry. We start looking at next_lport_num
00344          * which was set by the last thread that found a free spot/port.
00345          */
00346         
00347         do {
00348             if(nat_table[port].exp_time < current_time) {
00349                 // the entry in this spot/for this local port # is expired
00350 
00351                 rc = pthread_mutex_trylock(&nat_table[port].lock);
00352                 
00353                 if(rc == 0) { // we got the lock for this free spot/port
00354                     
00355                     // test an unlikely case (that it is no longer expired)
00356                     if(nat_table[port].exp_time > current_time) {
00357                         /*
00358                          * Somebody overwrote the expiry before we got the lock
00359                          * and after we checked that it was expired, so this
00360                          * entry is still in use. Keep searching ...
00361                          */
00362                         UNLOCK_MUTEX(&nat_table[port].lock);
00363 
00364                     } else { // We're safe to overwrite the entry in this spot
00365                         
00366                         /*
00367                          * Setup next_lport_num better for the next thread that
00368                          * needs to search for a free spot/port. Hopefully it
00369                          * will have "better" luck starting its search from here
00370                          * because (we assume) it is more likely that it is free
00371                          */
00372                         next_lport_num = port + 1;
00373 
00374                         // remove the entry in the hashtable for the entry that
00375                         // was previously in this spot, and insert a new one
00376     
00377                         key = (hash_key_t *)malloc(sizeof(hash_key_t));
00378                         INSIST_ERR(key != NULL);
00379                         bzero(key, sizeof(hash_key_t));
00380     
00381                         key->ipdst = tmp_key.ipdst; // note: tmp_key has values
00382                         key->ipsrc = tmp_key.ipsrc; // from the current packet
00383                         key->srcport = tmp_key.srcport;
00384                         
00385                         entry_index = (uint16_t *)malloc(sizeof(uint16_t));
00386                         INSIST_ERR(entry_index != NULL);
00387                         
00388                         *entry_index = port;
00389                         
00390                         insert_entry(lookup_table, key, entry_index);
00391     
00392                         tmp_key.ipdst = nat_table[port].ipdst;
00393                         tmp_key.ipsrc = nat_table[port].ipsrc;
00394                         tmp_key.srcport = nat_table[port].srcport;
00395                         
00396                         // if the entry was never used freshly init'd these will
00397                         // all be zero and this remove will fail, but it's ok
00398                         entry_index = remove_entry(lookup_table, &tmp_key);
00399                         
00400                         if(entry_index) {
00401                             free(entry_index);
00402                         }
00403                         
00404                         /*
00405                          * Because this is round-robin packet delivery we can 
00406                          * only release this lock here. If it was flow-based, 
00407                          * then there would be no potential race between two 
00408                          * thread doing a get_entry on the lookup_table with the
00409                          * same key.
00410                          * 
00411                          * If this was flow-based we could release the lock at 
00412                          * the top of this block i.e. when we know we have 
00413                          * safely locked nat_table[port].lock 
00414                          */
00415                         UNLOCK_MUTEX(&lookup_table_lock);
00416                         
00417     
00418                         // Setup the new NAT table entry
00419                         nat_table[port].ipdst = ip_pkt->ip_dst.s_addr;
00420                         nat_table[port].ipsrc = ip_pkt->ip_src.s_addr;
00421                         // Destination port should be HTTP/80
00422                         nat_table[port].dstport = tcp_hdr->th_dport;
00423                         nat_table[port].srcport = tcp_hdr->th_sport;
00424                         nat_table[port].exp_time = 
00425                             get_current_time() + NAT_ENTRY_LIFETIME;
00426                         
00427                         UNLOCK_MUTEX(&nat_table[port].lock);
00428     
00429                         found_free_port = TRUE;
00430                         
00431                         break;
00432                     }
00433                     
00434                 } else if(rc != EBUSY) {
00435                     // this should never happen, so abort.
00436                     LOG(LOG_EMERG, "%s:%s: pthread_mutex_trylock failed with "
00437                         "EINVAL (Mutex is invalid for local port %d)",
00438                         __FILE__, __func__, port + NAT_LPORT_RANGE_MIN);
00439                 }
00440                 // if rc == EBUSY, then another thread beat us to it
00441             }
00442 
00443             // increment port being careful to wrap around range of local ports
00444             if(++port == NAT_MAX_ENTRIES) {
00445                 port = 0;
00446             }
00447 
00448         } while(port != start);
00449         
00450         if(!found_free_port) {
00451             UNLOCK_MUTEX(&lookup_table_lock);
00452             return FALSE; // the NAT table is full
00453         }
00454     } else { // Entry exists, so use it
00455         port = *entry_index;
00456         
00457         LOCK_MUTEX(&nat_table[port].lock);
00458         UNLOCK_MUTEX(&lookup_table_lock);
00459             
00460         // refresh expiry time:
00461         nat_table[port].exp_time = get_current_time() + NAT_ENTRY_LIFETIME;
00462         
00463         UNLOCK_MUTEX(&nat_table[port].lock);
00464     }
00465     port += NAT_LPORT_RANGE_MIN; // shift the value into the local port range
00466     
00467     port = htons(port);
00468     
00469     port_bundle.src_port = port;
00470     port_bundle.dst_port = cpd_port;
00471     
00472     // adjust IP checksum taking IP addresses into account
00473     checksum_adjust((unsigned char *)&ip_pkt->ip_sum,
00474         (unsigned char *)&ip_pkt->ip_src, sizeof(address_bundle_t),
00475         (unsigned char *)addresses, sizeof(address_bundle_t));
00476     
00477     // adjust TCP checksum taking IP addresses into account
00478     checksum_adjust((unsigned char *)&tcp_hdr->th_sum,
00479         (unsigned char *)&ip_pkt->ip_src, sizeof(address_bundle_t),
00480         (unsigned char *)addresses, sizeof(address_bundle_t));
00481     
00482     // adjust TCP checksum taking TCP ports into account
00483     checksum_adjust((unsigned char *)&tcp_hdr->th_sum,
00484         (unsigned char *)&tcp_hdr->th_sport, sizeof(struct ports_s),
00485         (unsigned char *)&port_bundle.src_port, sizeof(struct ports_s));
00486     
00487     ip_pkt->ip_dst.s_addr = addresses->cpd_addr;
00488     ip_pkt->ip_src.s_addr = addresses->pfd_addr;
00489     tcp_hdr->th_dport = cpd_port;
00490     tcp_hdr->th_sport = port;
00491     
00492     return TRUE;
00493 }
00494 
00495 
00506 boolean
00507 reverse_nat_packet(struct ip * ip_pkt)
00508 {
00509     uint16_t port; // local port
00510     struct tcphdr * tcp_hdr = 
00511         (struct tcphdr *)((uint32_t *)ip_pkt + ip_pkt->ip_hl);
00512     time_t current_time;
00513     
00514     port = ntohs(tcp_hdr->th_dport);
00515     
00516     if(port < NAT_LPORT_RANGE_MIN || port > NAT_LPORT_RANGE_MAX) {
00517         return FALSE;
00518     }
00519     
00520     port -= NAT_LPORT_RANGE_MIN; // shift into the range for nat_table array
00521     
00522     current_time = get_current_time();
00523     
00524     LOCK_MUTEX(&nat_table[port].lock);
00525     
00526     if(nat_table[port].exp_time < current_time) { // if entry is expired
00527         UNLOCK_MUTEX(&nat_table[port].lock);
00528         return FALSE;
00529     }
00530     
00531     // refresh expiry time
00532     nat_table[port].exp_time = current_time + NAT_ENTRY_LIFETIME;
00533 
00534     UNLOCK_MUTEX(&nat_table[port].lock);
00535     
00536     /*
00537      * It's safe to unlock before reading these values because the entry won't 
00538      * be modified for at least NAT_ENTRY_LIFETIME seconds (other than possibly
00539      * the expiry time
00540      */
00541 
00542     // adjust IP checksum taking IP addresses into account
00543     checksum_adjust((unsigned char *)&ip_pkt->ip_sum,
00544         (unsigned char *)&ip_pkt->ip_src, sizeof(address_bundle_t),
00545         (unsigned char *)&nat_table[port].ipsrc, sizeof(address_bundle_t));
00546     
00547     // adjust TCP checksum taking IP addresses into account
00548     checksum_adjust((unsigned char *)&tcp_hdr->th_sum,
00549         (unsigned char *)&ip_pkt->ip_src, sizeof(address_bundle_t),
00550         (unsigned char *)&nat_table[port].ipsrc, sizeof(address_bundle_t));
00551     
00552     // adjust TCP checksum taking TCP ports into account
00553     checksum_adjust((unsigned char *)&tcp_hdr->th_sum,
00554         (unsigned char *)&tcp_hdr->th_sport, sizeof(u_short) * 2,
00555         (unsigned char *)&nat_table[port].srcport, sizeof(u_short) * 2);
00556     
00557     // rewrite IP addresses and ports
00558     
00559     ip_pkt->ip_dst.s_addr = nat_table[port].ipsrc;
00560     ip_pkt->ip_src.s_addr = nat_table[port].ipdst;
00561     tcp_hdr->th_dport = nat_table[port].srcport;
00562     tcp_hdr->th_sport = nat_table[port].dstport; // Should be HTTP / 80
00563     
00564     return TRUE;
00565 }
00566 
00567 
00579 void
00580 nat_fragment(struct ip * ip_pkt, address_bundle_t * addresses)
00581 {
00582     // adjust IP checksum taking IP addresses into account
00583     checksum_adjust((unsigned char *)&ip_pkt->ip_sum,
00584         (unsigned char *)&ip_pkt->ip_src, sizeof(address_bundle_t),
00585         (unsigned char *)addresses, sizeof(address_bundle_t));
00586     
00587     ip_pkt->ip_src.s_addr = addresses->pfd_addr;
00588     ip_pkt->ip_dst.s_addr = addresses->cpd_addr;
00589 }
00590 

2007-2009 Juniper Networks, Inc. All rights reserved. The information contained herein is confidential information of Juniper Networks, Inc., and may not be used, disclosed, distributed, modified, or copied without the prior written consent of Juniper Networks, Inc. in an express license. This information is subject to change by Juniper Networks, Inc. Juniper Networks, the Juniper Networks logo, and JUNOS are registered trademarks of Juniper Networks, Inc. in the United States and other countries. All other trademarks, service marks, registered trademarks, or registered service marks are the property of their respective owners.
Generated on Sun May 30 20:27:07 2010 for SDK Your Net Corporation Policy Manager Example: Packet Filtering Daemon (pfd) 1.0 by Doxygen 1.5.1