我们现在位于 bvmlinux 中!在 misc.c:decompress_kernel() 的帮助下,我们将要解压缩 piggy.o 以获得驻留内核镜像linux/vmlinux.
此文件是纯 32 位启动代码。与之前的两个文件不同,它在源文件中没有 ".code16" 语句。有关详细信息,请参阅 Using as: Writing 16-bit Code。
段描述符(对应于段选择符 __KERNEL_CS 和 __KERNEL_DS)中的段基址等于 0;因此,如果使用这些段选择符中的任何一个,则逻辑地址偏移(以段:偏移格式)将等于其线性地址。对于 zImage,CS:EIP 现在的逻辑地址为 10:1000(线性地址 0x1000);对于 bzImage,逻辑地址为 10:100000(线性地址 0x100000)。
由于分页未启用,线性地址与物理地址相同。有关地址问题,请查看 IA-32 手册(卷 1 第 3.3 章 内存组织,以及卷 3 第 3 章 保护模式内存管理)和 Linux 设备驱动程序:Linux 中的内存管理。
它来自setup.S其中 BX=0 且 ESI=INITSEG<<4。
.text
///////////////////////////////////////////////////////////////////////////////
startup_32()
{
cld;
cli;
DS = ES = FS = GS = __KERNEL_DS;
SS:ESP = *stack_start; // end of user_stack[], defined in misc.c
// all segment registers are reloaded after protected mode is enabled
// check that A20 really IS enabled
EAX = 0;
do {
1: DS:[0] = ++EAX;
} while (DS:[0x100000]==EAX);
EFLAGS = 0;
clear BSS; // from _edata to _end
struct moveparams mp; // subl $16,%esp
if (!decompress_kernel(&mp, ESI)) { // return value in AX
restore ESI from stack;
EBX = 0;
goto __KERNEL_CS:100000;
// see linux/arch/i386/kernel/head.S:startup_32
}
/*
* We come here, if we were loaded high.
* We need to move the move-in-place routine down to 0x1000
* and then start it with the buffer addresses in registers,
* which we got from the stack.
*/
3: move move_rountine_start..move_routine_end to 0x1000;
// move_routine_start & move_routine_end are defined below
// prepare move_routine_start() parameters
EBX = real mode pointer; // ESI value passed from setup.S
ESI = mp.low_buffer_start;
ECX = mp.lcount;
EDX = mp.high_buffer_star;
EAX = mp.hcount;
EDI = 0x100000;
cli; // make sure we don't get interrupted
goto __KERNEL_CS:1000; // move_routine_start();
}
/* Routine (template) for moving the decompressed kernel in place,
* if we were high loaded. This _must_ PIC-code ! */
///////////////////////////////////////////////////////////////////////////////
move_routine_start()
{
move mp.low_buffer_start to 0x100000, mp.lcount bytes,
in two steps: (lcount >> 2) words + (lcount & 3) bytes;
move/append mp.high_buffer_start, ((mp.hcount + 3) >> 2) words
// 1 word == 4 bytes, as I mean 32-bit code/data.
ESI = EBX; // real mode pointer, as that from setup.S
EBX = 0;
goto __KERNEL_CS:100000;
// see linux/arch/i386/kernel/head.S:startup_32()
move_routine_end:
} |
没有找到 _edata 和 _end 的定义?没问题,它们在“内部链接脚本”中定义。在没有指定 -T (--script=) 选项的情况下,ld 使用此内建脚本来链接 compressed/bvmlinux。使用“ld --verbose”显示此脚本,或查看附录 B。内部链接脚本。
有关 -T (--script=)、-L (--library-path=) 和 --verbose 选项的描述,请参阅 Using LD, the GNU linker: Command Line Options。“man ld”和“info ld”也可能有所帮助。
piggy.o 已解压缩,控制权已传递给 __KERNEL_CS:100000,即 linux/arch/i386/kernel/head.S:startup_32()。请参阅 第 6 节。
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_MAX 0x90000
#define HEAP_SIZE 0x3000
///////////////////////////////////////////////////////////////////////////////
asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
|-- setup real_mode(=rmode), vidmem, vidport, lines and cols;
|-- if (is_zImage) setup_normal_output_buffer() {
| output_data = 0x100000;
| free_mem_end_ptr = real_mode;
| } else (is_bzImage) setup_output_buffer_if_we_run_high(mv) {
| output_data = LOW_BUFFER_START;
| low_buffer_end = MIN(real_mode, LOW_BUFFER_MAX) & ~0xfff;
| low_buffer_size = low_buffer_end - LOW_BUFFER_START;
| free_mem_end_ptr = &end + HEAP_SIZE;
| // get mv->low_buffer_start and mv->high_buffer_start
| mv->low_buffer_start = LOW_BUFFER_START;
| /* To make this program work, we must have
| * high_buffer_start > &end+HEAP_SIZE;
| * As we will move low_buffer from LOW_BUFFER_START to 0x100000
| * (max low_buffer_size bytes) finally, we should have
| * high_buffer_start > 0x100000+low_buffer_size; */
| mv->high_buffer_start = high_buffer_start
| = MAX(&end+HEAP_SIZE, 0x100000+low_buffer_size);
| mv->hcount = 0 if (0x100000+low_buffer_size > &end+HEAP_SIZE);
| = -1 if (0x100000+low_buffer_size <= &end+HEAP_SIZE);
| /* mv->hcount==0 : we need not move high_buffer later,
| * as it is already at 0x100000+low_buffer_size.
| * Used by close_output_buffer_if_we_run_high() below. */
| }
|-- makecrc(); // create crc_32_tab[]
| puts("Uncompressing Linux... ");
|-- gunzip();
| puts("Ok, booting the kernel.\n");
|-- if (is_bzImage) close_output_buffer_if_we_run_high(mv) {
| // get mv->lcount and mv->hcount
| if (bytes_out > low_buffer_size) {
| mv->lcount = low_buffer_size;
| if (mv->hcount)
| mv->hcount = bytes_out - low_buffer_size;
| } else {
| mv->lcount = bytes_out;
| mv->hcount = 0;
| }
| }
`-- return is_bzImage; // return value in AX |
decompress_kernel() 具有 "asmlinkage" 修饰符。在linux/include/linux/linkage.h:
#ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else #define CPP_ASMLINKAGE #endif #if defined __i386__ #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #elif defined __ia64__ #define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage)) #else #define asmlinkage CPP_ASMLINKAGE #endif |
decompress_kernel() 调用 gunzip() -> inflate(),它们定义在linux/lib/inflate.c中,以将驻留内核镜像解压缩到低缓冲区(由 output_data 指向)和高缓冲区(仅适用于 bzImage,由 high_buffer_start 指向)。
gzip 文件格式在 RFC 1952 中指定。
表 6. gzip 文件格式
| 组件 | 含义 | 字节 | 注释 |
|---|---|---|---|
| ID1 | 标识符 1 | 1 | 31 (0x1f, \037) |
| ID2 | 标识符 2 | 1 | 139 (0x8b, \213) [a] |
| CM | 压缩方法 | 1 | 8 - 表示 “deflate” 压缩方法 |
| FLG | 标志 | 1 | 大多数情况下为 0 |
| MTIME | 修改时间 | 4 | 原始文件的修改时间 |
| XFL | 额外标志 | 1 | 2 - 压缩器使用最大压缩率,最慢的算法 [b] |
| OS | 操作系统 | 1 | 3 - Unix |
| 额外字段 | - | - | 可变长度,由 FLG 指示的字段 [c] |
| 压缩块 | - | - | 可变长度 |
| CRC32 | - | 4 | 未压缩数据的 CRC 值 |
| ISIZE | 输入大小 | 4 | 未压缩输入数据的大小模 2^32 |
| 注释 a. 对于 gzip 0.5,ID2 值可以为 158 (0x9e, \236); b. XFL 值 4 - 压缩器使用最快算法; c. FLG 位 0,FTEXT,不指示任何“额外字段”。 | |||
我们可以使用此文件格式知识来找出 gzipped 的开头linux/vmlinux.
[root@localhost boot]# hexdump -C /boot/vmlinuz-2.4.20-28.9 | grep '1f 8b 08 00'
00004c50 1f 8b 08 00 01 f6 e1 3f 02 03 ec 5d 7d 74 14 55 |.......?...]}t.U|
[root@localhost boot]# hexdump -C /boot/vmlinuz-2.4.20-28.9 -s 0x4c40 -n 64
00004c40 00 80 0b 00 00 fc 21 00 68 00 00 00 1e 01 11 00 |......!.h.......|
00004c50 1f 8b 08 00 01 f6 e1 3f 02 03 ec 5d 7d 74 14 55 |.......?...]}t.U|
00004c60 96 7f d5 a9 d0 1d 4d ac 56 93 35 ac 01 3a 9c 6a |......M.V.5..:.j|
00004c70 4d 46 5c d3 7b f8 48 36 c9 6c 84 f0 25 88 20 9f |MF\.{.H6.l..%. .|
00004c80
[root@localhost boot]# hexdump -C /boot/vmlinuz-2.4.20-28.9 | tail -n 4
00114d40 bd 77 66 da ce 6f 3d d6 33 5c 14 a2 9f 7e fa e9 |.wf..o=.3\...~..|
00114d50 a7 9f 7e fa ff 57 3f 00 00 00 00 00 d8 bc ab ea |..~..W?.........|
00114d60 44 5d 76 d1 fd 03 33 58 c2 f0 00 51 27 00 |D]v...3X...Q'.|
00114d6e |
static uch *inbuf; /* input buffer */
static unsigned insize = 0; /* valid bytes in inbuf */
static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
///////////////////////////////////////////////////////////////////////////////
static int gunzip(void)
{
Check input buffer for {ID1, ID2, CM}, must be
{0x1f, 0x8b, 0x08} (normal case), or
{0x1f, 0x9e, 0x08} (for gzip 0.5);
Check FLG (flag byte), must not set bit 1, 5, 6 and 7;
Ignore {MTIME, XFL, OS};
Handle optional structures, which correspond to FLG bit 2, 3 and 4;
inflate(); // handle compressed blocks
Validate {CRC32, ISIZE};
} |
// some important definitions in misc.c
#define WSIZE 0x8000 /* Window size must be at least 32k,
* and a power of two */
static uch window[WSIZE]; /* Sliding window buffer */
static unsigned outcnt = 0; /* bytes in output buffer */
// linux/lib/inflate.c
#define wp outcnt
#define flush_output(w) (wp=(w),flush_window())
STATIC unsigned long bb; /* bit buffer */
STATIC unsigned bk; /* bits in bit buffer */
STATIC unsigned hufts; /* track memory usage */
static long free_mem_ptr = (long)&end;
///////////////////////////////////////////////////////////////////////////////
STATIC int inflate()
{
int e; /* last block flag */
int r; /* result code */
unsigned h; /* maximum struct huft's malloc'ed */
void *ptr;
wp = bb = bk = 0;
// inflate compressed blocks one by one
do {
hufts = 0;
gzip_mark() { ptr = free_mem_ptr; };
if ((r = inflate_block(&e)) != 0) {
gzip_release() { free_mem_ptr = ptr; };
return r;
}
gzip_release() { free_mem_ptr = ptr; };
if (hufts > h)
h = hufts;
} while (!e);
/* Undo too much lookahead. The next read will be byte aligned so we
* can discard unused bits in the last meaningful byte. */
while (bk >= 8) {
bk -= 8;
inptr--;
}
/* write the output window window[0..outcnt-1] to output_data,
* update output_ptr/output_data, crc and bytes_out accordingly, and
* reset outcnt to 0. */
flush_output(wp);
/* return success */
return 0;
} |
Gzip 使用 Lempel-Ziv 编码 (LZ77) 来压缩文件。压缩数据格式在 RFC 1951 中指定。inflate_block() 将解压缩压缩块,这些压缩块可以被视为位序列。
每个压缩块的数据结构概述如下
BFINAL (1 bit)
0 - not the last block
1 - the last block
BTYPE (2 bits)
00 - no compression
remaining bits until the byte boundary;
LEN (2 bytes);
NLEN (2 bytes, the one's complement of LEN);
data (LEN bytes);
01 - compressed with fixed Huffman codes
{
literal (7-9 bits, represent code 0..287, excluding 256);
// See RFC 1951, table in Paragraph 3.2.6.
length (0-5 bits if literal > 256, represent length 3..258);
// See RFC 1951, 1st alphabet table in Paragraph 3.2.5.
data (of literal bytes if literal < 256);
distance (5 plus 0-13 extra bits if literal == 257..285, represent
distance 1..32768);
/* See RFC 1951, 2nd alphabet table in Paragraph 3.2.5,
* but statement in Paragraph 3.2.6. */
/* Move backward "distance" bytes in the output stream,
* and copy "length" bytes */
}* // can be of multiple instances
literal (7 bits, all 0, literal == 256, means end of block);
10 - compressed with dynamic Huffman codes
HLIT (5 bits, # of Literal/Length codes - 257, 257-286);
HDIST (5 bits, # of Distance codes - 1, 1-32);
HCLEN (4 bits, # of Code Length codes - 4, 4 - 19);
Code Length sequence ((HCLEN+4)*3 bits)
/* The following two alphabet tables will be decoded using
* the Huffman decoding table which is generated from
* the preceeding Code Length sequence. */
Literal/Length alphabet (HLIT+257 codes)
Distance alphabet (HDIST+1 codes)
// Decoding tables will be built from these alphpabet tables.
/* The following is similar to that of fixed Huffman codes portion,
* except that they use different decoding tables. */
{
literal/length
(variable length, depending on Literal/Length alphabet);
data (of literal bytes if literal < 256);
distance (variable length if literal == 257..285, depending on
Distance alphabet);
}* // can be of multiple instances
literal (literal value 256, which means end of block);
11 - reserved (error) |
记住上述数据结构并手边备有 RFC 1951,理解 inflate_block() 并不太难。有关 Huffman 编码和字母表表生成的更多信息,请参阅 RFC 1951 中的相关段落。
有关更多详细信息,请参阅linux/lib/inflate.cgzip 源代码(许多内联注释)和相关参考资料。