- 论坛徽章:
- 0
|
本帖最后由 狼族狼心 于 2010-07-01 14:26 编辑
工作中碰到netlink相关的问题,请教大侠们,用户态和Kerner之间netlink通讯,使用的场景如下:
1. 局端nd系统,终端CPE系统(ND和CPE都是linux系统,中间采用无线通讯,采用802.11n协议,内核版本2.6.15);
2. ND下可挂载多个CPE,ND内核用一个链表记录CPE相关信息,链表中一个节点记录一个CPE的属性特征;
3. ND用户态各进程(如shell)可以通过netlink查看内核下边挂载的CPE情况;
4. ND用户态还有个特殊进程mib,对外与远端网管系统通讯(SNMP协议),在ND内部也通过netlink获取内核态挂载的CPE信息;
5. ND中netlink的实现:在用户态以库的形式做成了统一接口,供用户态各个进程调用,每一个调用过程都是: 初始化netlink(创建,绑定),发送,接收,关闭netlink
网络结构如下:
ND
------------
| user |<----------> 网管
(netlink) | |
| kernel |
|---------- |
|
|
-------|----------
| | |
| | |
V V V
CPE CPE CPE
***问题:
网管定时(时间比较短,秒级别)去ND获取CPE的Top信息(即ND中kernel挂载的CPE信息),开始时可以取到(netlink通讯正常),过一段时间之后(几秒钟之后),失败,跟踪原因是Netlink(ND的mib进程)绑定失败; 而此时ND本身用户态进程(shell)通过netlink去获取仍然是成功的;我还做了这样的实验,在shell其定时器,每隔一秒变调用接口去获取top信息,这是成功的;
*** 为什么会出现这样的现象,mib进程读取为什么会绑定失败,而shell却一直成功?
附ND 用户态netlink接口代码:
1. 获取TOP信息接口:
int l2mng_get_topo(l2mng_send_param_t *send_param, l2mng_topo_info_t *ti)
{
int ret = 0;
int maxlen = sizeof(l2mng_topo_info_t);
char *buff = (char *)ti;
if((send_param == NULL)||(ti == NULL))
{
l2mng_print("%s[%d]: param error\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
/*--init--*/
send_param->pid = getpid();
ret = l2mng_init(send_param->pid, &send_param->nl_fd);
if(ret != L2MNG_SUCCESS)
{
l2mng_print("%s[%d]: l2mng_init, ret=%d\n",__FUNCTION__,__LINE__,ret);
return L2MNG_ERROR;
}
/*--send: no data--*/
ret = l2mng_sento(send_param, UNL_GET_TOPO_INFO, (char *)ti, 0);
if(ret <= 0)
{
l2mng_print("%s[%d]: send failed, ret=%d\n",__FUNCTION__,__LINE__,ret);
ret = L2MNG_ERROR;
goto l2mng_get_topo_exit;
}
/*--recv--*/
ret = l2mng_recv(send_param->nl_fd, buff, maxlen, NL_RECV_TIMEOUT);
if(ret <= 0)
{
l2mng_print("%s[%d]: recv failed, ret=%d\n",__FUNCTION__,__LINE__,ret);
ret = L2MNG_ERROR;
goto l2mng_get_topo_exit;
}
if(ret < maxlen)
{
l2mng_print("%s[%d]: recv data error, ret=%d\n",__FUNCTION__,__LINE__,ret);
ret = L2MNG_ERROR;
goto l2mng_get_topo_exit;
}
ret = L2MNG_SUCCESS;
l2mng_get_topo_exit:
/*--fini--*/
l2mng_fini(send_param->nl_fd);
return ret;
}
2. 其中结构l2mng_send_param_t如下:
typedef struct _l2mng_send_param
{
int inode;
int port;
int type;
int pid;
int nl_fd;
}l2mng_send_param_t;
说明:这里只用到pid,即用户态调用该接口传入的pid;
3. 初始化函数l2mng_init如下:
int l2mng_init(int pid, int *nl_fd)
{
struct sockaddr_nl local;
if(nl_fd == NULL)
{
l2mng_print("%s[%d]: param error\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
local.nl_family = AF_NETLINK;
local.nl_pad = 0;
local.nl_pid = pid;
local.nl_groups = 0;
/*--create socket--*/
*nl_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_RMTMNG);
if(*nl_fd < 0)
{
l2mng_print("%s[%d]: can not create a netlink socket\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
if(bind(*nl_fd, (struct sockaddr*)&local, sizeof(local)) != 0)
{
l2mng_print("%s[%d]: bind netlink socket failed\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
return L2MNG_SUCCESS;
}
4. 发送函数如下:
static int l2mng_sento(l2mng_send_param_t *send_param, int nl_type, char *msgbuf, int msg_len)
{
int ret = 0;
int buflen = 0;
int len = 0;
char *buf = NULL;
struct nlmsghdr nl_hdr;
struct sockaddr_nl kpeer;
if(msgbuf == NULL)
{
l2mng_print("%s[%d]: param error\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
buflen = sizeof(struct nlmsghdr) + msg_len + sizeof(l2mng_send_param_t);
buf = (char *)malloc(buflen);
if(!buf)
{
l2mng_print("%s[%d]: malloc failed\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
memset(buf, 0, buflen);
kpeer.nl_family = AF_NETLINK;
kpeer.nl_pad = 0;
kpeer.nl_pid = 0;
kpeer.nl_groups = 0;
nl_hdr.nlmsg_len = NLMSG_LENGTH(msg_len+16);
nl_hdr.nlmsg_type = nl_type;
nl_hdr.nlmsg_flags = 0;
nl_hdr.nlmsg_seq = 0;
nl_hdr.nlmsg_pid = send_param->pid;
/*--userinfo: inode,port,type,pid--*/
len = sizeof(struct nlmsghdr);
memcpy(buf, &nl_hdr, len);
memcpy((buf+len), send_param, 16);
len += 16;
if((len+msg_len) > buflen)
{
l2mng_print("%s[%d]: malloc buflen is error.\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
memcpy((buf+len), msgbuf, msg_len);
len += msg_len;
/*--send to kenel, tell the pid--*/
ret = sendto( send_param->nl_fd, buf, nl_hdr.nlmsg_len, 0, (struct sockaddr*)&kpeer, sizeof(struct sockaddr_nl) );
if(buf)
free(buf);
return ret;
}
5. 接收函数如下:
static int l2mng_recv(int nl_fd, char *buff, int maxlen, int timeout_val)
{
int ret = 0;
int selret = 0;
int kpeerlen = 0;
int rcvlen = 0;
int maxfd = 0;
int tmplen = 0;
char *tmpbuf = NULL;
struct timeval timeout = {0,0}; /*--5 seconds timeout--*/
fd_set fds;
struct nlmsghdr nl_hdr;
struct sockaddr_nl kpeer;
if((buff == NULL) || (maxlen <= 0))
{
l2mng_print("%s[%d]: param error\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
tmplen = maxlen + sizeof(struct nlmsghdr);
tmpbuf = (char *)malloc(tmplen);
if(!tmpbuf)
{
l2mng_print("%s[%d]: malloc error\n",__FUNCTION__,__LINE__);
return L2MNG_ERROR;
}
kpeer.nl_family = AF_NETLINK;
kpeer.nl_pad = 0;
kpeer.nl_pid = 0;
kpeer.nl_groups = 0;
maxfd = nl_fd;
timeout.tv_sec = timeout_val;
/*--select, recvfrom--*/
while(1)
{
FD_ZERO(&fds);
FD_SET(nl_fd,&fds);
selret = select(maxfd+1,&fds,NULL,NULL,&timeout);
if(selret == 0)
{
ret = L2MNG_TIMEOUT; /*--timeout--*/
l2mng_print("%s[%d]\n",__FUNCTION__,__LINE__);
goto l2mng_recv_exit;
}
else if(selret > 0)
{
if(FD_ISSET(nl_fd,&fds))
{
//printf("%s[%d]: tmplen=%d,maxlen=%d\n",__FUNCTION__,__LINE__,tmplen,maxlen);
kpeerlen = sizeof(struct sockaddr_nl);
rcvlen = recvfrom(nl_fd, tmpbuf, tmplen, 0, (struct sockaddr*)&kpeer, &kpeerlen);
memcpy(&nl_hdr, tmpbuf, sizeof(struct nlmsghdr));
/*--¼ì²éÊý¾Ý½ÓÊÕÊÇ·ñÍêÕû--*/
if(rcvlen == nl_hdr.nlmsg_len)
{
ret = rcvlen - sizeof(struct nlmsghdr);
if((maxlen >= ret) && (ret > 0))
memcpy(buff, (tmpbuf + sizeof(struct nlmsghdr)), ret);
else
l2mng_print("%s[%d]: recv datalen is bigger than maxlen\n",__FUNCTION__,__LINE__);
}
else
{
l2mng_print("%s[%d]: recv is not compelet\n",__FUNCTION__,__LINE__);
ret = L2MNG_ERROR; /*--Êý¾Ý½ÓÊÕ²»ÍêÕû--*/
}
break; /*--break, goto RECV_END--*/
}
} /*--end else--*/
} /*--end while--*/
l2mng_recv_exit:
if(tmpbuf)
free(tmpbuf);
return ret;
}
6. 关闭netlink函数如下:
void l2mng_fini(int nl_fd)
{
int ret = 0;
ret = close(nl_fd);
return ;
} |
|