9.1. 阻塞进程

当有人向你请求你无法立即完成的事情时,你会怎么做? 如果你是人类,并且你被另一个人打扰,你唯一能说的就是:“现在不行,我很忙。 走开!”。 但是,如果你是一个内核模块,并且你被一个进程打扰,你还有另一种可能性。 你可以让进程进入睡眠状态,直到你可以为它服务。 毕竟,进程一直在被内核置于睡眠状态并唤醒(这就是多个进程看起来像在同一时间在单个 CPU 上运行的方式)。

此内核模块是这方面的一个例子。 该文件(名为/proc/sleep)一次只能被一个进程打开。 如果文件已被打开,则内核模块会调用wait_event_interruptible[1]。 此函数将任务的状态(任务是内核数据结构,用于保存有关进程及其所处系统调用的信息,如果有的话)更改为TASK_INTERRUPTIBLE,这意味着该任务在被唤醒之前不会运行,并将其添加到 WaitQ,即等待访问文件的任务队列。 然后,该函数调用调度器以上下文切换到另一个进程,即对 CPU 有些用处的进程。

当一个进程完成文件操作后,它会关闭文件,并module_close被调用。 该函数会唤醒队列中的所有进程(没有机制只唤醒其中一个)。 然后它返回,刚刚关闭文件的进程可以继续运行。 稍后,调度器决定该进程已运行足够时间,并将 CPU 控制权交给另一个进程。 最终,队列中的一个进程将由调度器赋予 CPU 控制权。 它从调用module_interruptible_sleep_on[2] 之后的位置开始。 然后它可以继续设置一个全局变量,以告知所有其他进程该文件仍处于打开状态,并继续执行。 当其他进程获得 CPU 时间片时,它们将看到该全局变量并返回睡眠状态。

因此,我们将使用 tail -f 在后台保持文件打开,同时尝试使用另一个进程访问它(同样在后台,这样我们就不需要切换到不同的 vt)。 一旦第一个后台进程被 kill %1 杀死,第二个进程就会被唤醒,能够访问该文件并最终终止。

为了让我们的生活更有趣,module_close并非只有 才能唤醒等待访问文件的进程。 信号,例如 Ctrl+c (SIGINT)也可以唤醒进程。 [3] 在这种情况下,我们希望立即返回-EINTR。 这很重要,这样用户就可以例如在进程接收文件之前终止它。

还有一点需要记住。 有时进程不想睡眠,它们要么想立即获得它们想要的东西,要么想被告知无法完成。 这样的进程在打开文件时使用O_NONBLOCK标志。 内核应该通过返回错误代码-EAGAIN来响应那些原本会阻塞的操作,例如在本例中打开文件。 程序 cat_noblock(在本章的源代码目录中可用)可用于打开文件,使用O_NONBLOCK.

hostname:~/lkmpg-examples/09-BlockingProcesses# insmod sleep.ko
hostname:~/lkmpg-examples/09-BlockingProcesses# cat_noblock /proc/sleep
Last input:
hostname:~/lkmpg-examples/09-BlockingProcesses# tail -f /proc/sleep &
Last input:
Last input:
Last input:
Last input:
Last input:
Last input:
Last input:
tail: /proc/sleep: file truncated
[1] 6540
hostname:~/lkmpg-examples/09-BlockingProcesses# cat_noblock /proc/sleep
Open would block
hostname:~/lkmpg-examples/09-BlockingProcesses# kill %1
[1]+  Terminated              tail -f /proc/sleep
hostname:~/lkmpg-examples/09-BlockingProcesses# cat_noblock /proc/sleep
Last input:
hostname:~/lkmpg-examples/09-BlockingProcesses#

示例 9-1. sleep.c

/*
 *  sleep.c - create a /proc file, and if several processes try to open it at
 *  the same time, put all but one to sleep
 */

#include <linux/kernel.h>	/* We're doing kernel work */
#include <linux/module.h>	/* Specifically, a module */
#include <linux/proc_fs.h>	/* Necessary because we use proc fs */
#include <linux/sched.h>	/* For putting processes to sleep and 
				   waking them up */
#include <asm/uaccess.h>	/* for get_user and put_user */

/* 
 * The module's file functions 
 */

/* 
 * Here we keep the last message received, to prove that we can process our
 * input
 */
#define MESSAGE_LENGTH 80
static char Message[MESSAGE_LENGTH];

static struct proc_dir_entry *Our_Proc_File;
#define PROC_ENTRY_FILENAME "sleep"

/* 
 * Since we use the file operations struct, we can't use the special proc
 * output provisions - we have to use a standard read function, which is this
 * function
 */
static ssize_t module_output(struct file *file,	/* see include/linux/fs.h   */
			     char *buf,	/* The buffer to put data to 
					   (in the user segment)    */
			     size_t len,	/* The length of the buffer */
			     loff_t * offset)
{
	static int finished = 0;
	int i;
	char message[MESSAGE_LENGTH + 30];

	/* 
	 * Return 0 to signify end of file - that we have nothing 
	 * more to say at this point.
	 */
	if (finished) {
		finished = 0;
		return 0;
	}

	/* 
	 * If you don't understand this by now, you're hopeless as a kernel
	 * programmer.
	 */
	sprintf(message, "Last input:%s\n", Message);
	for (i = 0; i < len && message[i]; i++)
		put_user(message[i], buf + i);

	finished = 1;
	return i;		/* Return the number of bytes "read" */
}

/* 
 * This function receives input from the user when the user writes to the /proc
 * file.
 */
static ssize_t module_input(struct file *file,	/* The file itself */
			    const char *buf,	/* The buffer with input */
			    size_t length,	/* The buffer's length */
			    loff_t * offset)
{				/* offset to file - ignore */
	int i;

	/* 
	 * Put the input into Message, where module_output will later be 
	 * able to use it
	 */
	for (i = 0; i < MESSAGE_LENGTH - 1 && i < length; i++)
		get_user(Message[i], buf + i);
	/* 
	 * we want a standard, zero terminated string 
	 */
	Message[i] = '\0';

	/* 
	 * We need to return the number of input characters used 
	 */
	return i;
}

/* 
 * 1 if the file is currently open by somebody 
 */
int Already_Open = 0;

/* 
 * Queue of processes who want our file 
 */
DECLARE_WAIT_QUEUE_HEAD(WaitQ);
/* 
 * Called when the /proc file is opened 
 */
static int module_open(struct inode *inode, struct file *file)
{
	/* 
	 * If the file's flags include O_NONBLOCK, it means the process doesn't
	 * want to wait for the file.  In this case, if the file is already 
	 * open, we should fail with -EAGAIN, meaning "you'll have to try 
	 * again", instead of blocking a process which would rather stay awake.
	 */
	if ((file->f_flags & O_NONBLOCK) && Already_Open)
		return -EAGAIN;

	/* 
	 * This is the correct place for try_module_get(THIS_MODULE) because 
	 * if a process is in the loop, which is within the kernel module,
	 * the kernel module must not be removed.
	 */
	try_module_get(THIS_MODULE);

	/* 
	 * If the file is already open, wait until it isn't 
	 */

	while (Already_Open) {
		int i, is_sig = 0;

		/* 
		 * This function puts the current process, including any system
		 * calls, such as us, to sleep.  Execution will be resumed right
		 * after the function call, either because somebody called 
		 * wake_up(&WaitQ) (only module_close does that, when the file 
		 * is closed) or when a signal, such as Ctrl-C, is sent 
		 * to the process
		 */
		wait_event_interruptible(WaitQ, !Already_Open);

		/* 
		 * If we woke up because we got a signal we're not blocking, 
		 * return -EINTR (fail the system call).  This allows processes
		 * to be killed or stopped.
		 */

/*
 * Emmanuel Papirakis:
 *
 * This is a little update to work with 2.2.*.  Signals now are contained in
 * two words (64 bits) and are stored in a structure that contains an array of
 * two unsigned longs.  We now have to make 2 checks in our if.
 *
 * Ori Pomerantz:
 *
 * Nobody promised me they'll never use more than 64 bits, or that this book
 * won't be used for a version of Linux with a word size of 16 bits.  This code
 * would work in any case.
 */
		for (i = 0; i < _NSIG_WORDS && !is_sig; i++)
			is_sig =
			    current->pending.signal.sig[i] & ~current->
			    blocked.sig[i];

		if (is_sig) {
			/* 
			 * It's important to put module_put(THIS_MODULE) here,
			 * because for processes where the open is interrupted
			 * there will never be a corresponding close. If we 
			 * don't decrement the usage count here, we will be 
			 * left with a positive usage count which we'll have no
			 * way to bring down to zero, giving us an immortal 
			 * module, which can only be killed by rebooting 
			 * the machine.
			 */
			module_put(THIS_MODULE);
			return -EINTR;
		}
	}

	/* 
	 * If we got here, Already_Open must be zero 
	 */

	/* 
	 * Open the file 
	 */
	Already_Open = 1;
	return 0;		/* Allow the access */
}

/* 
 * Called when the /proc file is closed 
 */
int module_close(struct inode *inode, struct file *file)
{
	/* 
	 * Set Already_Open to zero, so one of the processes in the WaitQ will
	 * be able to set Already_Open back to one and to open the file. All 
	 * the other processes will be called when Already_Open is back to one,
	 * so they'll go back to sleep.
	 */
	Already_Open = 0;

	/* 
	 * Wake up all the processes in WaitQ, so if anybody is waiting for the
	 * file, they can have it.
	 */
	wake_up(&WaitQ);

	module_put(THIS_MODULE);

	return 0;		/* success */
}

/*
 * This function decides whether to allow an operation (return zero) or not
 * allow it (return a non-zero which indicates why it is not allowed).
 *
 * The operation can be one of the following values:
 * 0 - Execute (run the "file" - meaningless in our case)
 * 2 - Write (input to the kernel module)
 * 4 - Read (output from the kernel module)
 *
 * This is the real function that checks file permissions. The permissions
 * returned by ls -l are for reference only, and can be overridden here.
 */
static int module_permission(struct inode *inode, int op, struct nameidata *nd)
{
	/* 
	 * We allow everybody to read from our module, but only root (uid 0)
	 * may write to it
	 */
	if (op == 4 || (op == 2 && current->euid == 0))
		return 0;

	/* 
	 * If it's anything else, access is denied 
	 */
	return -EACCES;
}

/* 
 * Structures to register as the /proc file, with pointers to all the relevant
 * functions.
 */

/* 
 * File operations for our proc file. This is where we place pointers to all
 * the functions called when somebody tries to do something to our file. NULL
 * means we don't want to deal with something.
 */
static struct file_operations File_Ops_4_Our_Proc_File = {
	.read = module_output,	/* "read" from the file */
	.write = module_input,	/* "write" to the file */
	.open = module_open,	/* called when the /proc file is opened */
	.release = module_close,	/* called when it's closed */
};

/* 
 * Inode operations for our proc file.  We need it so we'll have somewhere to
 * specify the file operations structure we want to use, and the function we
 * use for permissions. It's also possible to specify functions to be called
 * for anything else which could be done to an inode (although we don't bother,
 * we just put NULL).
 */

static struct inode_operations Inode_Ops_4_Our_Proc_File = {
	.permission = module_permission,	/* check for permissions */
};

/* 
 * Module initialization and cleanup 
 */

/* 
 * Initialize the module - register the proc file 
 */

int init_module()
{

	Our_Proc_File = create_proc_entry(PROC_ENTRY_FILENAME, 0644, NULL);
	
	if (Our_Proc_File == NULL) {
		remove_proc_entry(PROC_ENTRY_FILENAME, &proc_root);
		printk(KERN_ALERT "Error: Could not initialize /proc/test\n");
		return -ENOMEM;
	}
	
	Our_Proc_File->owner = THIS_MODULE;
	Our_Proc_File->proc_iops = &Inode_Ops_4_Our_Proc_File;
	Our_Proc_File->proc_fops = &File_Ops_4_Our_Proc_File;
	Our_Proc_File->mode = S_IFREG | S_IRUGO | S_IWUSR;
	Our_Proc_File->uid = 0;
	Our_Proc_File->gid = 0;
	Our_Proc_File->size = 80;
	
	printk(KERN_INFO "/proc/test created\n");
	
	return 0;
}

/* 
 * Cleanup - unregister our file from /proc.  This could get dangerous if
 * there are still processes waiting in WaitQ, because they are inside our
 * open function, which will get unloaded. I'll explain how to avoid removal
 * of a kernel module in such a case in chapter 10.
 */
void cleanup_module()
{
	remove_proc_entry(PROC_ENTRY_FILENAME, &proc_root);
	
	printk(KERN_INFO "/proc/test removed\n");
}

示例 9-2. cat_noblock.c

/* cat_noblock.c - open a file and display its contents, but exit rather than
 * wait for input */


/* Copyright (C) 1998 by Ori Pomerantz */



#include <stdio.h>    /* standard I/O */
#include <fcntl.h>    /* for open */
#include <unistd.h>   /* for read */ 
#include <stdlib.h>   /* for exit */
#include <errno.h>    /* for errno */

#define MAX_BYTES 1024*4


main(int argc, char *argv[])
{
  int    fd;  /* The file descriptor for the file to read */
  size_t bytes; /* The number of bytes read */
  char   buffer[MAX_BYTES]; /* The buffer for the bytes */  


  /* Usage */
  if (argc != 2) {
    printf("Usage: %s <filename>\n", argv[0]);
    puts("Reads the content of a file, but doesn't wait for input");
    exit(-1);
  }

  /* Open the file for reading in non blocking mode */ 
  fd = open(argv[1], O_RDONLY | O_NONBLOCK);

  /* If open failed */
  if (fd == -1) {
    if (errno = EAGAIN)
      puts("Open would block");
    else
      puts("Open failed");
    exit(-1);
  }

  /* Read the file and output its contents */
  do {
    int i;

    /* Read characters from the file */
    bytes = read(fd, buffer, MAX_BYTES);

    /* If there's an error, report it and die */
    if (bytes == -1) {
      if (errno = EAGAIN)
	puts("Normally I'd block, but you told me not to");
      else
	puts("Another read error");
      exit(-1);
    }

    /* Print the characters */
    if (bytes > 0) {
      for(i=0; i<bytes; i++)
	putchar(buffer[i]);
    }

    /* While there are no errors and the file isn't over */
  } while (bytes > 0);
}

注释

[1]

保持文件打开的最简单方法是使用 tail -f 打开它。

[2]

这意味着进程仍然处于内核模式 —— 就进程而言,它发出了open系统调用,并且系统调用尚未返回。 进程不知道在它发出调用和返回的时刻之间的大部分时间里,其他人使用了 CPU。

[3]

这是因为我们使用了module_interruptible_sleep_onwait_event_interruptible。 我们可以使用module_sleep_on