首页 研究所简介 规章制度 开放指南 仪器设备 技术支持 资源中心 数据知识 对外服务 联系我们
 
 
最新推荐
大型数据库起死回生
数据恢复优秀论文下载
数据恢复教程——PC-3000的工
《数据恢复高级技术》已经出
洁净间的运行管理规范
研究所创造数据恢复速度的新
《数据恢复高级技术》前言
《数据恢复高级技术》一书即
科技日报:关于数据恢复研究
中国计算机报: 北信创建“数
 
热点新闻
研究所成员
关于下达2005年教委资助项目
2005年度第三次研究所学术活
人事处通知《高校基础课任课
关于申报2005年国家留学基金
关于2005年度申请国家留学基
2006年度国家自然科学基金项
关于在北京市开展推荐和初选
各课题小组提交四月活动计划
关于在研究所的研究生中开展
当前位置:首页-> 资源中心-> 技术资料-> 新闻
 
数据恢复教程—Office文档修复原理
 

数据恢复—Office 文档修复原理

 

上节介绍的劳拉文件格式,对修复损坏的Office文档有什么帮助呢?其实Office文档的修复过程, 与硬盘的FAT 文件系统的数据恢复过程十分相似。以修复一个损坏的 Word 文件为例,其过程如下表7-4 所示:

 

 

1.

通过文件块头确定目录链根的开始块序号;

2.

提取文件根目录结构;

3.

通过目录链表定位要挽救的OLE 对象的位置;

4.

拷贝文件未损坏的OLE 对象;

5

对得到的OLE 对象进行组合,重新构造一个新的Word 文档。

                 7-4 Word 文档的修复过程

 

得到的未损坏的OLE 对象越多, 被修复的Word 文档就越理想。如果想自己编写文件修复程序,这里提供一个参考程序ole.c ,能够给出很多有参考价值的信息。

 

 

程序:ole.c

.

编程语言: Borland C++

.

功能:列出一个 Word 文件的内部目录结构

 

 

#include <stdio.h>

#include <stdarg.h>

#include <stdlib.h>

#include <string.h>

#include <malloc.h>

#include <ctype.h>

#include <sys/types.h>

#include <assert.h>

 

#define MIN(a,b) ((a)<(b) ? (a) : (b))

#define MAXBLOCKS 64

 

struct pps_block

  {

  char name[64];

  int nsize;

  char type;

  struct pps_block *previous;

  struct pps_block *next;

  struct pps_block *directory;

  long int start;

  long int size;

  int level;

  int index;

  };

 

typedef struct pps_block pps_entry;

 

char *pps_type[]={"","DIR ","FILE","","","ROOT"};

 

/* Routine prototypes */

unsigned short int ShortInt(unsigned char* array);

unsigned long int LongInt(unsigned char* array);

 

unsigned short int ShortInt(unsigned char* array)

{

union two_byte {

 unsigned short int num;

 char  ch[2];

 } Short;

 

#ifndef INTEL

  Short.ch[1] = *array++;

  Short.ch[0] = *array;

#else

  Short.ch[0] = *array++;

  Short.ch[1] = *array;

#endif

return Short.num;

 

}

 

unsigned long int LongInt(unsigned char* array)

{

 union four_byte {

   unsigned long int num;

   char  ch[4];

 } Long;

 

#ifndef INTEL

  Long.ch[3] = *array++;

  Long.ch[2] = *array++;

  Long.ch[1] = *array++;

  Long.ch[0] = *array;

#else

  Long.ch[0] = *array++;

  Long.ch[1] = *array++;

  Long.ch[2] = *array++;

  Long.ch[3] = *array;

#endif

return Long.num;

}

 

/* recurse to follow forward/backward list of root pps's */

void unravel(pps_entry *pps_node, int level)

{

  if(pps_node->nsize == 0) return;

  if(pps_node->previous != NULL) unravel(pps_node->previous,level);

  pps_node->level = level;

  printf("PPS %s: %*x:  ->%s\n",pps_type[pps_node->type],level*3,pps_node->index,pps_node->name);

  if(pps_node->directory != NULL) unravel(pps_node->directory,level+1);

  if(pps_node->next != NULL) unravel(pps_node->next,level);

}

 

 

int main(int argc, char **argv)

{

  FILE *input = NULL;

  FILE *OLEfile = NULL;

  FILE *sbfile = NULL;

  FILE *infile = NULL;

  char Target[64];

  int debug = 0, BlockSize = 0, Offset = 0;

  int c, i, j, k, len, bytes;

  char *s, *p, *t;

  char *Block, *BDepot, *SDepot, *Depot, *Root;

  char Name[64];

  unsigned long int FilePos=0x00000000;

  long int num_bbd_blocks;

  long int root_list[MAXBLOCKS], sbd_list[MAXBLOCKS];

  long int pps_size, pps_start = -1;

  long int linkto;

  int root_entry;

  pps_entry **pps_list;

 

  if(argc < 2) {

    fprintf(stderr,"No input file name\n");

    exit (12);

  }

  fprintf(stderr,"File given was %s\n",argv[1]);

  input = fopen(argv[1], "rb");

  if(input==NULL) {

    fprintf(stderr,"Error opening file %s\n",argv[1]);

    exit (12);

  }

  if(argc < 3) {

    fprintf(stderr,"Listing contents\n");

    strncpy(Target,"UnLiKeLy",8);

  } else {

    strncpy(Target, argv[2], 64);

    fprintf(stderr, "Extracting %s...\n", Target);

  }

 

  /* peek into file to guess file type */

  c = getc (input);

  ungetc(c,input);

 

  if(isprint(c)) {

     fprintf(stderr,"File looks like a plain text file.\n");

     return 8;

  /* check for MS OLE wrapper */

  } else if(c == 0xd0) {

     Block =(char *) malloc(512);

     /* read header block */

     if(fread(Block,512,1,input) != 1 ) {

       fprintf(stderr,"1 =========> Input file has faulty OLE format\n");

    exit (5);

     }

     num_bbd_blocks=LongInt(Block+0x2c);

     BDepot =(char *) malloc(512*num_bbd_blocks);

     s = BDepot;

     root_list[0]= LongInt(Block+0x30);

     sbd_list[0] = LongInt(Block+0x3c);

     if(debug) fprintf(stderr,"num_bbd_blocks %ld, root start %ld, sbd start %ld\n",num_bbd_blocks,root_list[0],sbd_list[0]);

 

     /* read big block Depot */

     for(i=0; i<(int)num_bbd_blocks; i++) {

       FilePos = 512*(LongInt(Block+0x4c+(i*4))+1);

       fseek(input, FilePos, SEEK_SET);

       if(fread(s,512,1,input) != 1) {

         fprintf(stderr,"2 =========> Input file has faulty bbd\n");

         exit (5);

       }

       s += 0x200;

     }

 

     /* Extract the sbd block list */

     for(len = 1; len < MAXBLOCKS; len++){

       sbd_list[len] = LongInt(BDepot+(sbd_list[len-1]*4));

       if(sbd_list[len] == -2) break;

     }

     if(len >= MAXBLOCKS) fprintf(stderr,"Help too many sbd blocks\n");

     SDepot =(char *) malloc(512*len);

     s = SDepot;

   

     /* Read in Small Block Depot */

     for(i=0; i<len; i++) {

       FilePos = 512 *(sbd_list[i]+1);

       fseek(input, FilePos, SEEK_SET);

       if(fread(s, 512, 1, input) != 1 ) {

     fprintf(stderr,"3 =========> Input file has faulty OLE format\n");

         return 5;

       }

       s += 0x200;

     }

     /* Extract the root block list */

     for(len = 1; len < MAXBLOCKS; len++){

       root_list[len] = LongInt(BDepot+(root_list[len-1]*4));

       fprintf(stderr,"root block %d\n",len);

       if(root_list[len] == -2) break;

     }

     if(len >= MAXBLOCKS) fprintf(stderr,"Help too many root blocks\n");

     Root =(char *) malloc(512*len);

     s = Root;

     /* Read in Root stream data */

     for(i=0; i<len; i++) {

       FilePos = 512 *(root_list[i]+1);

       fseek(input,FilePos,SEEK_SET);

       if(fread(s,512,1,input) != 1) {

     fprintf(stderr,"4 =========> Input file has faulty OLE format\n");

     return 5;

       }

       s += 0x200;

     }

 

     /* assign space for pps list */

     pps_list = (pps_entry **)malloc(len*4*sizeof(pps_entry *));

     for(j=0; j<len*4; j++) pps_list[j] =(pps_entry *)malloc(sizeof(pps_entry));

     /* Store pss entry details and look out for Root Entry */

     for(j=0; j<len*4; j++) {

       pps_list[j]->level = -1;

       pps_list[j]->index = j;

       s = Root+(j*0x80);

       /* some pps names have first byte as an integer !!

          so we make it visible so you can extract a named pps */

       if(!isprint(*s)) *s = *s + 48;

     

       pps_list[j]->nsize=ShortInt(s+0x40);

       if(pps_list[j]->nsize == 0) continue;

       for(p=pps_list[j]->name,t=s; t<s+pps_list[j]->nsize; t++) *p++ = *t++;

       s+=0x42;

       pps_list[j]->type = *s;

       if(pps_list[j]->type == 5) {

     root_entry = j; /* this is root */

       }

       s+=0x02;

       linkto = LongInt(s);

       if(linkto != -1) pps_list[j]->previous = pps_list[linkto];

       else pps_list[j]->previous = NULL;

       s+=0x04;

       linkto = LongInt(s);

       if(linkto != -1) pps_list[j]->next = pps_list[linkto];

       else pps_list[j]->next = NULL;

       s+=0x04;

       linkto = LongInt(s);

       if(linkto != -1) pps_list[j]->directory = pps_list[linkto];

       else pps_list[j]->directory = NULL;

       s+=0x28;

       pps_list[j]->start = LongInt(s);

       s+=0x04;

       pps_list[j]->size = LongInt(s);

     }

 

     /* go through the pps entries, tagging them with level number

    use recursive routine to follow list starting at root entry */

   

        unravel(pps_list[root_entry],0);

 

     /* go through the level 0 list looking for named entries */

     for(j=0; j<len*4; j++) {

       if(pps_list[j]->nsize == 0) continue; /* skip empty pps */

       /* we mostly only want the top level (level 1) stuff, so

      here we skip anything more deeply nested. */

       if(pps_list[j]->level > 1) continue;

       pps_start = pps_list[j]->start;

       pps_size  = pps_list[j]->size;

       OLEfile = NULL;

       if(pps_list[j]->type == 5) {  /* Root entry */

     OLEfile = tmpfile();

     sbfile = OLEfile;

     if(debug) fprintf(stderr,"Reading sbFile %ld\n",pps_start);

       }

       else if(!strcmp(pps_list[j]->name, Target)) {

     OLEfile=fopen("OLE.tmp","w+b");  /* try and open */

     printf("Reading Target %s\n", Target);

       }

       if(pps_size<=0) OLEfile = NULL;

       if(OLEfile == NULL) continue;

       if(pps_size>=4096 | OLEfile==sbfile) {

     Offset = 1;

     BlockSize = 512;

         infile = input;

         Depot = BDepot;

       } else {

     Offset = 0;

         BlockSize = 64;

         infile = sbfile;

         Depot = SDepot;

       }

       while(pps_start != -2) {

         if(debug) fprintf(stderr,"Reading block %ld\n",pps_start);

         FilePos = (pps_start+Offset)* BlockSize;

         bytes = MIN(BlockSize,pps_size);

         fseek(infile,FilePos,SEEK_SET);

         if(fread(Block,bytes,1,infile) != 1) {

       fprintf(stderr,"5 =========> Input file has faulty OLE format\n");

           exit (5);

     }

         fwrite(Block,bytes,1,OLEfile);

     pps_start = LongInt(Depot+(pps_start*4));

     pps_size -= BlockSize;

         if(pps_size <= 0) pps_start=-2;

       }

       rewind(OLEfile);

     }

    for(j=0; j<len*4; j++) free(pps_list[j]);

    free(pps_list);

    free(Root);

    free(BDepot);

    free(Block);

    fclose(input);

    return 0;

  } else {

    /* not a OLE file! */

    fprintf(stderr,"7 =========> Input file is not an OLE file\n");

    exit (8);

  }

 return 0;

}

 

对上述 Ole.c程序编译和连接之后,生成 ole.exe 程序。

 

例子:列出Word 文档 a.doc 的目录结构。

 

DOS 提示符下执行命令: ole a.doc 

 

本节介绍的Office 文档的数据恢复的原理和程序设计方法,都来源于对Office 文档结构的分析。如果对Office 文档的结构进行更深入的研究,将会发现更好的修复方法,从而挽救更多的数据,并有可能开发出能对各类受损Office 文档自动进行全面恢复的工具软件。

《数据恢复高级技术》汪中夏老师

北京信息工程学院数据恢复实验室

 
发表日期:2007-1-23 10:32:44【关闭

·相关新闻· ·评 论·
北京信息科技大学数据恢复研究所
地址: 北京信息科技大学图书馆405 (100101)
电话:010-51659019 / 64842878 传真: 0086-010-64843842
电子邮件:wzx@biti.edu.cn