论坛徽章:: 0

电梯直达

1楼 [收藏(0)] [报告]

发表于 2011-07-13 13:15 |只看该作者 |倒序浏览

本帖最后由 leonwang202 于 2011-07-13 19:49 编辑

小弟最近写了个测试程序
1.通过dev_add_pack() 加入了自己的钩子函数，
2.软中断里调用钩子函数，把从eth1网卡上来的skb放入到一个接收队列中，
3.当用户调read的时候，从队列上取出skb，将skb->data传给用户空间。

模块程序跑在 linux-2.6.18 smp 上，总是panic 死在skb_queue_tail() 这个函数上。。。
各位大牛看一下，哪里出的问题，下面是主要代码

#define IF_NAME "eth1"
struct net_device *netdev; //eth1所对应的网卡
int hack_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
if(netdev != dev){ //只接受eth1网卡上的skb
kfree_skb(skb);
return 0;
}
nskb = skb_clone(skb, GFP_ATOMIC);
kfree_skb(skb);
skb_queue_tail(&rcv_queue, nskb); //将skb加入到接收队列
if( waitqueue_active(&rcv_waitqueue) ) //唤醒阻塞的进程
wake_up_interruptible(&rcv_waitqueue);
return 0;
}
static struct packet_type net_hack_type =
{
.type = __constant_htons(ETH_P_IP), //只把IP包传给用户空间
.func = hack_recv, //钩子函数
};
int my_open(struct inode *inode, struct file *filp) //字符设备对应的open操作
{
netdev = dev_get_by_name(IF_NAME); //找到eth1对应的netdev
skb_queue_head_init(&rcv_queue); //初始化接收队列
dev_add_pack(&net_hack_type); //添加自己的钩子函数
return 0;
}
int my_release(struct inode * inode, struct file * filp) //字符设备对应的release操作
{
dev_remove_pack(&net_hack_type);
skb_queue_purge(&rcv_queue);
return 0;
}
ssize_t my_read(struct file *filp, char __user *buf, size_t len, loff_t *f_pos)//字符设备对应的read操作
{
ssize_t copied = 0;
ssize_t to_copy = 0;
struct sk_buff *skb = NULL;
char *tmp = buf;
do{
wait_event_interruptible(rcv_waitqueue,!skb_queue_empty(&rcv_queue)); //接收队列没包的话，阻塞在这里
skb = skb_dequeue(&rcv_queue); //取包
if(!skb)
goto out;
to_copy = skb->len;
if( to_copy > (len - copied)) { //用户空间buf不足
skb_queue_head(&rcv_queue, skb);
goto out;
}
copy_to_user(tmp, skb->data, skb->len);
kfree_skb(skb);
copied += to_copy;
tmp = buf + copied;
} while(copied < len);
out:
return copied;
}

复制代码

模块程序运行后，总是随机panic在 skb_queue_tail()
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000000
EIP is at skb_queue_tail+0x17/0x2d
初步怀疑是同步没做好，
接收队列rcv_queue 只在软中断和用户进程上下文中共享
我看了下skb_queue_tail() 和skb_dequeue()的实现，操作接收队列时，用spin_lock_irqsave()和spin_unlock_irqrestore()保护了啊，为什么还会出现上面的panic 呢？

panic, 软中断, 同步, panic, 软中断, 同步

文库|博客

leonwang202

白手起家

论坛徽章:: 0

2楼 [报告]

发表于 2011-07-13 20:13 |只看该作者

回复 1# leonwang202

完整的panic信息如下：

BUG: unable to handle kernel NULL pointer dereference at virtual address 00000000
printing eip:
c05b57db
*pde = 1b3db001
Oops: 0002 [#1]
SMP
last sysfs file: /devices/pci0000:00/0000:00:1c.2/0000:04:00.1/irq
Modules linked in: net(U) autofs4 hidp rfcomm l2cap bluetooth lockd sunrpc dm_mu ltipath scsi_dh video hwmon backlight sbs i2c_ec i2c_core button battery asus_ac pi ac ipv6 xfrm_nalgo crypto_api parport_pc lp parport joydev sr_mod cdrom sg hp ilo pcspkr serio_raw bnx2 dm_raid45 dm_message dm_region_hash dm_mem_cache dm_sn apshot dm_zero dm_mirror dm_log dm_mod ata_piix libata cciss sd_mod scsi_mod ext 3 jbd uhci_hcd ohci_hcd ehci_hcd
CPU: 3
EIP: 0060:[<c05b57db>] Tainted: G VLI
EFLAGS: 00010006 (2.6.18-164.el5PAE #1)
EIP is at [color=Red]skb_queue_tail+0x17/0x2d[/color]
eax: 00000246 ebx: f8a7ba70 ecx: 00000007 edx: c08850c0
esi: 00000000 edi: f8a7ba7c ebp: f3926000 esp: c0740eb0
ds: 007b es: 007b ss: 0068
Process net_main (pid: 8001, ti=c0740000 task=f1174000 task.ti=daa39000)
Stack: 00000000 f0dd0e00 00000008 f8a79337 c06b4b60 f8a7a760 c05ba65f f3926000
c07e1720 00000000 f0dd0e00 00000000 00000000 f3926400 f0dd0e00 f8986b56
c0740f8c 00000000 f392656c f3926400 f2a3c550 c041df20 006b26ab 0000a307
Call Trace:
[<f8a79337>] hack_recv+0xc1/0xe9 [net]
[<c05ba65f>] netif_receive_skb+0x323/0x370
[<f8986b56>] bnx2_poll_work+0xc22/0xd2c [bnx2]
[<c041df20>] enqueue_task+0x29/0x39
[<c0405946>] common_interrupt+0x1a/0x20
[<c041df20>] enqueue_task+0x29/0x39
[<c041df7a>] __activate_task+0x4a/0x59
[<c041e83d>] try_to_wake_up+0x3e8/0x3f2
[<c04074ba>] do_IRQ+0xb5/0xc3
[<f898a08a>] bnx2_poll_msix+0x29/0xbf [bnx2]
[<c05bc4c4>] net_rx_action+0x9c/0x1a7
[<c04292fb>] __do_softirq+0x87/0x114
[<c04073bb>] do_softirq+0x52/0x9c
[<c044b5b0>] __do_IRQ+0x0/0xd6
[<c04074ba>] do_IRQ+0xb5/0xc3
[<c0405946>] common_interrupt+0x1a/0x20
[<c0617378>] _spin_unlock_irqrestore+0x8/0x9
[<c05b589f>] skb_dequeue+0x39/0x3f
[<f8a79409>] get_one_skb+0x15/0x31 [net]
[<f8a7943c>] net_hack_read+0x17/0x103 [net]
[<c044840a>] audit_syscall_entry+0x15a/0x18c
[<f8a79425>] net_hack_read+0x0/0x103 [net]
[<c047331c>] vfs_read+0x9f/0x141
[<c047376a>] sys_read+0x3c/0x63
[<c0404f17>] syscall_call+0x7/0xb
=======================
Code: 00 00 00 89 51 04 89 0a 89 c2 89 f8 5b 5e 5f e9 ac 1b 06 00 57 56 89 d6 8d 78 0c 53 89 c3 89 f8 e8 3b 1b 06 00 8b 53 04 ff 43 08 <89> 1e 89 56 04 89 32 89 73 04 89 c2 89 f8 5b 5e 5f e9 7f 1b 06
EIP: [<c05b57db>] [color=Red]skb_queue_tail+0x17/0x2d[/color] SS:ESP 0068:c0740eb0
<0>Kernel panic - not syncing: Fatal exception in interrupt
BUG: warning at arch/i386/kernel/smp.c:550/smp_call_function() (Tainted: G )
[<c0415ae0>] stop_this_cpu+0x0/0x33
[<c04158cf>] smp_call_function+0x57/0xc3
[<c0424f2b>] printk+0x18/0x8e
[<c041594e>] smp_send_stop+0x13/0x1c
[<c04244c3>] panic+0x4c/0x16d
[<c04064ce>] die+0x240/0x274
[<c0618716>] do_page_fault+0x52a/0x607
[<c04596c4>] __alloc_pages+0xd8/0x297
[<c06181ec>] do_page_fault+0x0/0x607
[<c0405a89>] error_code+0x39/0x40
[<c05b57db>] skb_queue_tail+0x17/0x2d
[<f8a79337>] hack_recv+0xc1/0xe9 [net]
[<c05ba65f>] netif_receive_skb+0x323/0x370
[<f8986b56>] bnx2_poll_work+0xc22/0xd2c [bnx2]
[<c041df20>] enqueue_task+0x29/0x39
[<c0405946>] common_interrupt+0x1a/0x20
[<c041df20>] enqueue_task+0x29/0x39
[<c041df7a>] __activate_task+0x4a/0x59
[<c041e83d>] try_to_wake_up+0x3e8/0x3f2
[<c04074ba>] do_IRQ+0xb5/0xc3
[<f898a08a>] bnx2_poll_msix+0x29/0xbf [bnx2]
[<c05bc4c4>] net_rx_action+0x9c/0x1a7
[<c04292fb>] __do_softirq+0x87/0x114
[<c04073bb>] do_softirq+0x52/0x9c
[<c044b5b0>] __do_IRQ+0x0/0xd6
[<c04074ba>] do_IRQ+0xb5/0xc3
[<c0405946>] common_interrupt+0x1a/0x20
[<c0617378>] _spin_unlock_irqrestore+0x8/0x9
[<c05b589f>] skb_dequeue+0x39/0x3f
[<f8a79409>] get_one_skb+0x15/0x31 [net]
[<f8a7943c>] net_hack_read+0x17/0x103 [net]
[<c044840a>] audit_syscall_entry+0x15a/0x18c
[<f8a79425>] net_hack_read+0x0/0x103 [net]
[<c047331c>] vfs_read+0x9f/0x141
[<c047376a>] sys_read+0x3c/0x63
[<c0404f17>] syscall_call+0x7/0xb
======================