diff -Nru iproute-20111117/debian/changelog iproute-20111117/debian/changelog --- iproute-20111117/debian/changelog 2013-10-15 16:00:29.000000000 +0000 +++ iproute-20111117/debian/changelog 2014-04-15 19:46:08.000000000 +0000 @@ -1,3 +1,9 @@ +iproute (20111117-1ubuntu2.3) precise; urgency=medium + + * Fix ip netns scaling issues for 1000s of routers. (LP: #1281366) + + -- Rafael David Tinoco Tue, 15 Apr 2014 14:05:33 -0500 + iproute (20111117-1ubuntu2.1) precise; urgency=low * Fix ip netns delete failures. (LP: #1238981) diff -Nru iproute-20111117/debian/patches/ip-remove-unnecessary-ll_ini_map.patch iproute-20111117/debian/patches/ip-remove-unnecessary-ll_ini_map.patch --- iproute-20111117/debian/patches/ip-remove-unnecessary-ll_ini_map.patch 1970-01-01 00:00:00.000000000 +0000 +++ iproute-20111117/debian/patches/ip-remove-unnecessary-ll_ini_map.patch 2014-04-15 19:05:31.000000000 +0000 @@ -0,0 +1,112 @@ +Description: iproute2: Remove unnecessary calls to ll_init_map. + +On iproute code ll_init_map is the function responsible to start populating +the ll_cache structure. This involves sending netlink requests to get all +interface names (and its indexes) and populating them into the cache +structure. + +Removing some of these calls makes us get rid of some overhead and scale +iproute better. the call to ll_init_map inside iplink_modify ("ip link add" +cmd) was also removed helping the performance in the case of several +hundreds existent interfaces (specific to the bug #1281366) + +Author: Stephen Hemminger + +Origin: upstream, commit: f0124b0f0aa0e5b9288114eb8e6ff9b4f8c33ec8 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1281366 +Last-Update: 2014-04-14 + +--- + ip/ipaddress.c | 2 -- + ip/ipaddrlabel.c | 2 -- + ip/iplink.c | 2 -- + ip/iproute.c | 6 ------ + ip/xfrm_monitor.c | 2 -- + 5 files changed, 14 deletions(-) + +diff --git a/ip/ipaddress.c b/ip/ipaddress.c +index 85f05a2..40b6515 100644 +--- a/ip/ipaddress.c ++++ b/ip/ipaddress.c +@@ -1171,8 +1171,6 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv) + if (!scoped && cmd != RTM_DELADDR) + req.ifa.ifa_scope = default_scope(&lcl); + +- ll_init_map(&rth); +- + if ((req.ifa.ifa_index = ll_name_to_index(d)) == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", d); + return -1; +diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c +index a71855d..173e794 100644 +--- a/ip/ipaddrlabel.c ++++ b/ip/ipaddrlabel.c +@@ -246,8 +246,6 @@ static int ipaddrlabel_flush(int argc, char **argv) + + int do_ipaddrlabel(int argc, char **argv) + { +- ll_init_map(&rth); +- + if (argc < 1) { + return ipaddrlabel_list(0, NULL); + } else if (matches(argv[0], "list") == 0 || +diff --git a/ip/iplink.c b/ip/iplink.c +index 35e6dc6..256c487 100644 +--- a/ip/iplink.c ++++ b/ip/iplink.c +@@ -473,8 +473,6 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv) + } + } + +- ll_init_map(&rth); +- + if (type) { + struct rtattr *linkinfo = NLMSG_TAIL(&req.n); + addattr_l(&req.n, sizeof(req), IFLA_LINKINFO, NULL, 0); +diff --git a/ip/iproute.c b/ip/iproute.c +index 15b36e8..7a78f84 100644 +--- a/ip/iproute.c ++++ b/ip/iproute.c +@@ -964,8 +964,6 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv) + if (d || nhs_ok) { + int idx; + +- ll_init_map(&rth); +- + if (d) { + if ((idx = ll_name_to_index(d)) == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", d); +@@ -1240,8 +1238,6 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action) + if (do_ipv6 == AF_UNSPEC && filter.tb) + do_ipv6 = AF_INET; + +- ll_init_map(&rth); +- + if (id || od) { + int idx; + +@@ -1427,8 +1423,6 @@ int iproute_get(int argc, char **argv) + exit(1); + } + +- ll_init_map(&rth); +- + if (idev || odev) { + int idx; + +diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c +index 6a5b331..e27fd2a 100644 +--- a/ip/xfrm_monitor.c ++++ b/ip/xfrm_monitor.c +@@ -366,8 +366,6 @@ int do_xfrm_monitor(int argc, char **argv) + return rtnl_from_file(fp, xfrm_accept_msg, (void*)stdout); + } + +- //ll_init_map(&rth); +- + if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0) + exit(1); + +-- +1.9.1 + diff -Nru iproute-20111117/debian/patches/libnetlink-add-attribute-access-inline-functions.patch iproute-20111117/debian/patches/libnetlink-add-attribute-access-inline-functions.patch --- iproute-20111117/debian/patches/libnetlink-add-attribute-access-inline-functions.patch 1970-01-01 00:00:00.000000000 +0000 +++ iproute-20111117/debian/patches/libnetlink-add-attribute-access-inline-functions.patch 2014-04-15 19:05:31.000000000 +0000 @@ -0,0 +1,55 @@ +Description: add netlink attribute inline functions. + +These functions are needed by patch ll_map-add-name-and-index-hash.patch. + +These 3 patches: + +ip-remove-unnecessary-ll_ini_map.patch +ll_map-add-name-and-index-hash.patch +libnetlink-add-attribute-access-inline-functions.patch + +Solves performance issues seen on bug: # 1281366. + +Author: Stephen Hemminger + +Origin: upstream, commit: 46c5d64d6967da4401122159a8d9488c1878be65 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1281366 +Last-Update: 2013-04-14 + +--- + include/libnetlink.h | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/include/libnetlink.h b/include/libnetlink.h +index 07bd707..b890ce6 100644 +--- a/include/libnetlink.h ++++ b/include/libnetlink.h +@@ -71,6 +71,25 @@ extern int __parse_rtattr_nested_compat(struct rtattr *tb[], int max, struct rta + ({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ + __parse_rtattr_nested_compat(tb, max, rta, len); }) + ++static inline __u16 rta_getattr_u16(const struct rtattr *rta) ++{ ++ return *(__u16 *)RTA_DATA(rta); ++} ++static inline __u16 rta_getattr_u32(const struct rtattr *rta) ++{ ++ return *(__u32 *)RTA_DATA(rta); ++} ++static inline __u64 rta_getattr_u64(const struct rtattr *rta) ++{ ++ __u64 tmp; ++ memcpy(&tmp, RTA_DATA(rta), sizeof(__u64)); ++ return tmp; ++} ++static inline const char *rta_getattr_str(const struct rtattr *rta) ++{ ++ return RTA_DATA(rta); ++} ++ + extern int rtnl_listen(struct rtnl_handle *, rtnl_filter_t handler, + void *jarg); + extern int rtnl_from_file(FILE *, rtnl_filter_t handler, +-- +1.9.1 + diff -Nru iproute-20111117/debian/patches/ll_map-add-name-and-index-hash.patch iproute-20111117/debian/patches/ll_map-add-name-and-index-hash.patch --- iproute-20111117/debian/patches/ll_map-add-name-and-index-hash.patch 1970-01-01 00:00:00.000000000 +0000 +++ iproute-20111117/debian/patches/ll_map-add-name-and-index-hash.patch 2014-04-15 19:05:31.000000000 +0000 @@ -0,0 +1,385 @@ +Description: add a name hash list to ll_map. + +The old ll_cache list is replace by new hash list. This makes iproute +perform better on situations with high amount of interfaces. This +also allow ll_* functions to work without calling ll_init_map (our +main objective, solving bug #1281366). + +Author: Stephen Hemminger + +Origin: upstream, commit: 0025e5d63d5d1598ab622867834a3bcb9f518f9f +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1281366 +Last-Update: 2013-04-14 + +--- + include/hlist.h | 56 +++++++++++++++++ + include/ll_map.h | 5 +- + lib/ll_map.c | 181 +++++++++++++++++++++++++++++++------------------------ + 3 files changed, 163 insertions(+), 79 deletions(-) + create mode 100644 include/hlist.h + +diff --git a/include/hlist.h b/include/hlist.h +new file mode 100644 +index 0000000..2cbb1c1 +--- /dev/null ++++ b/include/hlist.h +@@ -0,0 +1,56 @@ ++#ifndef __HLIST_H__ ++#define __HLIST_H__ 1 ++/* Hash list stuff from kernel */ ++ ++#include ++ ++#define container_of(ptr, type, member) ({ \ ++ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ++ (type *)( (char *)__mptr - offsetof(type,member) );}) ++ ++struct hlist_head { ++ struct hlist_node *first; ++}; ++ ++struct hlist_node { ++ struct hlist_node *next, **pprev; ++}; ++ ++static inline void hlist_del(struct hlist_node *n) ++{ ++ struct hlist_node *next = n->next; ++ struct hlist_node **pprev = n->pprev; ++ *pprev = next; ++ if (next) ++ next->pprev = pprev; ++} ++ ++static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) ++{ ++ struct hlist_node *first = h->first; ++ n->next = first; ++ if (first) ++ first->pprev = &n->next; ++ h->first = n; ++ n->pprev = &h->first; ++} ++ ++#define hlist_for_each(pos, head) \ ++ for (pos = (head)->first; pos ; pos = pos->next) ++ ++ ++#define hlist_for_each_safe(pos, n, head) \ ++ for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ ++ pos = n) ++ ++#define hlist_entry_safe(ptr, type, member) \ ++ ({ typeof(ptr) ____ptr = (ptr); \ ++ ____ptr ? hlist_entry(____ptr, type, member) : NULL; \ ++ }) ++ ++#define hlist_for_each_entry(pos, head, member) \ ++ for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ ++ pos; \ ++ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) ++ ++#endif /* __HLIST_H__ */ +\ No newline at end of file +diff --git a/include/ll_map.h b/include/ll_map.h +index 752b827..0fb5076 100644 +--- a/include/ll_map.h ++++ b/include/ll_map.h +@@ -1,9 +1,12 @@ + #ifndef __LL_MAP_H__ + #define __LL_MAP_H__ 1 + ++extern char *if_indextoname (unsigned int __ifindex, char *__ifname) __THROW; ++ + extern int ll_remember_index(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg); +-extern int ll_init_map(struct rtnl_handle *rth); ++ ++extern void ll_init_map(struct rtnl_handle *rth); + extern unsigned ll_name_to_index(const char *name); + extern const char *ll_index_to_name(unsigned idx); + extern const char *ll_idx_n2a(unsigned idx, char *buf); +diff --git a/lib/ll_map.c b/lib/ll_map.c +index d3cde22..0dd5468 100644 +--- a/lib/ll_map.c ++++ b/lib/ll_map.c +@@ -18,16 +18,18 @@ + #include + #include + #include +-#include ++//#include ++#include + + #include "libnetlink.h" + #include "ll_map.h" ++#include "hlist.h" + + extern unsigned int if_nametoindex (const char *); + +-struct ll_cache +-{ +- struct ll_cache *idx_next; ++struct ll_cache { ++ struct hlist_node idx_hash; ++ struct hlist_node name_hash; + unsigned flags; + int index; + unsigned short type; +@@ -37,46 +39,99 @@ struct ll_cache + }; + + #define IDXMAP_SIZE 1024 +-static struct ll_cache *idx_head[IDXMAP_SIZE]; ++static struct hlist_head idx_head[IDXMAP_SIZE]; ++static struct hlist_head name_head[IDXMAP_SIZE]; + +-static inline struct ll_cache *idxhead(int idx) ++static struct ll_cache *ll_get_by_index(unsigned index) + { +- return idx_head[idx & (IDXMAP_SIZE - 1)]; ++ struct hlist_node *n; ++ unsigned h = index & (IDXMAP_SIZE - 1); ++ ++ hlist_for_each(n, &idx_head[h]) { ++ struct ll_cache *im ++ = container_of(n, struct ll_cache, idx_hash); ++ if (im->index == index) ++ return im; ++ } ++ ++ return NULL; ++} ++ ++static unsigned namehash(const char *str) ++{ ++ unsigned hash = 5381; ++ ++ while (*str) ++ hash = ((hash << 5) + hash) + *str++; /* hash * 33 + c */ ++ ++ return hash; ++} ++ ++static struct ll_cache *ll_get_by_name(const char *name) ++{ ++ struct hlist_node *n; ++ unsigned h = namehash(name) & (IDXMAP_SIZE - 1); ++ ++ hlist_for_each(n, &name_head[h]) { ++ struct ll_cache *im ++ = container_of(n, struct ll_cache, name_hash); ++ ++ if (strncmp(im->name, name, IFNAMSIZ) == 0) ++ return im; ++ } ++ ++ return NULL; + } + + int ll_remember_index(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) + { +- int h; ++ unsigned int h; ++ const char *ifname; + struct ifinfomsg *ifi = NLMSG_DATA(n); +- struct ll_cache *im, **imp; ++ struct ll_cache *im; + struct rtattr *tb[IFLA_MAX+1]; + +- if (n->nlmsg_type != RTM_NEWLINK) +- return 0; ++ if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK) ++ return 0; + + if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifi))) + return -1; + ++ im = ll_get_by_index(ifi->ifi_index); ++ if (n->nlmsg_type == RTM_DELLINK) { ++ if (im) { ++ hlist_del(&im->name_hash); ++ hlist_del(&im->idx_hash); ++ free(im); ++ } ++ return 0; ++ } ++ + memset(tb, 0, sizeof(tb)); + parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(n)); +- if (tb[IFLA_IFNAME] == NULL) ++ ifname = rta_getattr_str(tb[IFLA_IFNAME]); ++ if (ifname == NULL) + return 0; + +- h = ifi->ifi_index & (IDXMAP_SIZE - 1); +- for (imp = &idx_head[h]; (im=*imp)!=NULL; imp = &im->idx_next) +- if (im->index == ifi->ifi_index) +- break; +- +- if (im == NULL) { +- im = malloc(sizeof(*im)); +- if (im == NULL) +- return 0; +- im->idx_next = *imp; +- im->index = ifi->ifi_index; +- *imp = im; ++ if (im) { ++ /* change to existing entry */ ++ if (strcmp(im->name, ifname) != 0) { ++ hlist_del(&im->name_hash); ++ h = namehash(ifname) & (IDXMAP_SIZE - 1); ++ hlist_add_head(&im->name_hash, &name_head[h]); ++ } ++ ++ im->flags = ifi->ifi_flags; ++ return 0; + } + ++ ++ im = malloc(sizeof(*im)); ++ if (im == NULL) ++ return 0; ++ im->index = ifi->ifi_index; ++ strcpy(im->name, ifname); + im->type = ifi->ifi_type; + im->flags = ifi->ifi_flags; + if (tb[IFLA_ADDRESS]) { +@@ -89,7 +144,13 @@ int ll_remember_index(const struct sockaddr_nl *who, + im->alen = 0; + memset(im->addr, 0, sizeof(im->addr)); + } +- strcpy(im->name, RTA_DATA(tb[IFLA_IFNAME])); ++ ++ h = ifi->ifi_index & (IDXMAP_SIZE - 1); ++ hlist_add_head(&im->idx_hash, &idx_head[h]); ++ ++ h = namehash(ifname) & (IDXMAP_SIZE - 1); ++ hlist_add_head(&im->name_hash, &name_head[h]); ++ + return 0; + } + +@@ -100,11 +161,13 @@ const char *ll_idx_n2a(unsigned idx, char *buf) + if (idx == 0) + return "*"; + +- for (im = idxhead(idx); im; im = im->idx_next) +- if (im->index == idx) +- return im->name; ++ im = ll_get_by_index(idx); ++ if (im) ++ return im->name; ++ ++ if (if_indextoname(idx, buf) == NULL) ++ snprintf(buf, IFNAMSIZ, "if%d", idx); + +- snprintf(buf, IFNAMSIZ, "if%d", idx); + return buf; + } + +@@ -122,69 +185,33 @@ int ll_index_to_type(unsigned idx) + + if (idx == 0) + return -1; +- for (im = idxhead(idx); im; im = im->idx_next) +- if (im->index == idx) +- return im->type; +- return -1; +-} +- +-unsigned ll_index_to_flags(unsigned idx) +-{ +- const struct ll_cache *im; +- +- if (idx == 0) +- return 0; + +- for (im = idxhead(idx); im; im = im->idx_next) +- if (im->index == idx) +- return im->flags; +- return 0; ++ im = ll_get_by_index(idx); ++ return im ? im->type : -1; + } + +-unsigned ll_index_to_addr(unsigned idx, unsigned char *addr, +- unsigned alen) ++unsigned ll_index_to_flags(unsigned idx) + { + const struct ll_cache *im; + + if (idx == 0) + return 0; + +- for (im = idxhead(idx); im; im = im->idx_next) { +- if (im->index == idx) { +- if (alen > sizeof(im->addr)) +- alen = sizeof(im->addr); +- if (alen > im->alen) +- alen = im->alen; +- memcpy(addr, im->addr, alen); +- return alen; +- } +- } +- return 0; ++ im = ll_get_by_index(idx); ++ return im ? im->flags : -1; + } + + unsigned ll_name_to_index(const char *name) + { +- static char ncache[IFNAMSIZ]; +- static int icache; +- struct ll_cache *im; +- int i; ++ const struct ll_cache *im; + unsigned idx; + + if (name == NULL) + return 0; + +- if (icache && strcmp(name, ncache) == 0) +- return icache; +- +- for (i=0; iidx_next) { +- if (strcmp(im->name, name) == 0) { +- icache = im->index; +- strcpy(ncache, name); +- return im->index; +- } +- } +- } ++ im = ll_get_by_name(name); ++ if (im) ++ return im->index; + + idx = if_nametoindex(name); + if (idx == 0) +@@ -192,12 +219,12 @@ unsigned ll_name_to_index(const char *name) + return idx; + } + +-int ll_init_map(struct rtnl_handle *rth) ++void ll_init_map(struct rtnl_handle *rth) + { + static int initialized; + + if (initialized) +- return 0; ++ return; + + if (rtnl_wilddump_request(rth, AF_UNSPEC, RTM_GETLINK) < 0) { + perror("Cannot send dump request"); +@@ -210,6 +237,4 @@ int ll_init_map(struct rtnl_handle *rth) + } + + initialized = 1; +- +- return 0; + } +-- +1.9.1 + diff -Nru iproute-20111117/debian/patches/series iproute-20111117/debian/patches/series --- iproute-20111117/debian/patches/series 2013-10-15 15:57:34.000000000 +0000 +++ iproute-20111117/debian/patches/series 2014-04-15 19:05:31.000000000 +0000 @@ -2,3 +2,6 @@ txtdocs.diff cross.diff make-ip-netns-delete-more-likely-to-succeed.patch +libnetlink-add-attribute-access-inline-functions.patch +ll_map-add-name-and-index-hash.patch +ip-remove-unnecessary-ll_ini_map.patch