pio应用excel,word转html

love398146779

浏览: 711244 次
性别:
来自: 天津

最近访客更多访客>>

bbwang8088

u012363178

独孤不求败

johntsu

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

package com.acdm.util;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;

public class WordExcelToHtml {

/**
* 回车符ASCII码
*/
private static final short ENTER_ASCII = 13;

/**
* 空格符ASCII码
*/
private static final short SPACE_ASCII = 32;

/**
* 水平制表符ASCII码
*/
private static final short TABULATION_ASCII = 9;

public static String htmlText = "";
public static String htmlTextTbl = "";
public static int counter=0;
public static int beginPosi=0;
public static int endPosi=0;
public static int beginArray[];
public static int endArray[];
public static String htmlTextArray[];
public static boolean tblExist=false;

public static final String inputFile="f://22.doc";
public static void main(String argv[])
{
  try {
   //getWordAndStyle(inputFile);
  } catch (Exception e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
}

/**
* 读取每个文字样式
*
* @param fileName
* @throws Exception
*/

public static void getWordAndStyle(String fileName,String path,String path2) throws Exception {
  FileInputStream in = new FileInputStream(new File(fileName));
  HWPFDocument doc = new HWPFDocument(in);

    Range rangetbl = doc.getRange();//得到文档的读取范围
   TableIterator it = new TableIterator(rangetbl);
   int num=100;


   beginArray=new int[num];
   endArray=new int[num];
   htmlTextArray=new String[num];

  // 取得文档中字符的总数
  int length = doc.characterLength();
  // 创建图片容器
  PicturesTable pTable = doc.getPicturesTable();

  htmlText = "<html><head><title>" + doc.getSummaryInformation().getTitle() + "</title></head><body>";
  // 创建临时字符串,好加以判断一串字符是否存在相同格式

   if(it.hasNext())
   {
    readTable(it,rangetbl);
   }

   int cur=0;

  String tempString = "";
  for (int i = 0; i < length - 1; i++) {
   // 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
   Range range = new Range(i, i + 1, doc);



   CharacterRun cr = range.getCharacterRun(0);
   //beginArray=new int[num];
    //endArray=new int[num];
    //htmlTextArray=new String[num];
   if(tblExist)
   {
    if(i==beginArray[cur])
    {
     htmlText+=tempString+htmlTextArray[cur];
     tempString="";
     i=endArray[cur]-1;
     cur++;
     continue;
    }
   }
   if (pTable.hasPicture(cr)) {
    htmlText += tempString ;
    // 读写图片
    readPicture(pTable, cr,path2);
    tempString = "";
   }
   else {

    Range range2 = new Range(i + 1, i + 2, doc);
    // 第二个字符
    CharacterRun cr2 = range2.getCharacterRun(0);
    char c = cr.text().charAt(0);

    System.out.println(i+"::"+range.getEndOffset()+"::"+range.getStartOffset()+"::"+c);

    // 判断是否为回车符
    if (c == ENTER_ASCII)
     {
     tempString += "<br/>";

     }
    // 判断是否为空格符
    else if (c == SPACE_ASCII)
     tempString += " ";
    // 判断是否为水平制表符
    else if (c == TABULATION_ASCII)
     tempString += "    ";
    // 比较前后2个字符是否具有相同的格式
    boolean flag = compareCharStyle(cr, cr2);
    if (flag)
     tempString += cr.text();
    else {
     String fontStyle = "<span style="+"font-family:"+" + cr.getFontName() + "+";font-size:" + cr.getFontSize() / 2 + "pt;";

     if (cr.isBold())
      fontStyle += "font-weight:bold;";
     if (cr.isItalic())
      fontStyle += "font-style:italic;";

       htmlText += fontStyle + "mce_style="+"font-family:"+" + cr.getFontName() + "+";font-size:" + cr.getFontSize() / 2 + "pt;";
     if (cr.isBold())
      fontStyle += "font-weight:bold;";
     if (cr.isItalic())
      fontStyle += "font-style:italic;";

     htmlText += fontStyle + ">" + tempString + cr.text()+"</span> " ;

     tempString = "";
    }
   }
  }

htmlText += tempString+"</body></html>";
writeFile(htmlText,path);
}

/**
* 读写文档中的表格
*
* @param pTable
* @param cr
* @throws Exception
*/
public static void readTable(TableIterator it, Range rangetbl) throws Exception {

  htmlTextTbl="";
   //迭代文档中的表格

        counter=-1;
        while (it.hasNext())
        {
        tblExist=true;
          htmlTextTbl="";
        Table tb = (Table) it.next();
        beginPosi=tb.getStartOffset() ;
        endPosi=tb.getEndOffset();

        System.out.println("............"+beginPosi+"...."+endPosi);
        counter=counter+1;
        //迭代行，默认从0开始
        beginArray[counter]=beginPosi;
        endArray[counter]=endPosi;

        htmlTextTbl+="<table border>";
       for (int i = 0; i < tb.numRows(); i++) {
    TableRow tr = tb.getRow(i);

    htmlTextTbl+="<tr>";
    //迭代列，默认从0开始
    for (int j = 0; j < tr.numCells(); j++) {
     TableCell td = tr.getCell(j);//取得单元格
     int cellWidth=td.getWidth();

     //取得单元格的内容
     for(int k=0;k<td.numParagraphs();k++){
                Paragraph para =td.getParagraph(k);
                String s = para.text().toString().trim();
                if(s=="")
                {
               s=" ";
                }
                System.out.println(s);
                htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";
                System.out.println(i+":"+j+":"+cellWidth+":"+s);
           } //end for
        }   //end for
     }   //end for
      htmlTextTbl+="</table>" ;
      htmlTextArray[counter]=htmlTextTbl;

        } //end while
}

/**
* 读写文档中的图片
*
* @param pTable
* @param cr
* @throws Exception
*/
public static void readPicture(PicturesTable pTable, CharacterRun cr,String path2) throws Exception {
  // 提取图片
  Picture pic = pTable.extractPicture(cr, false);
  // 返回POI建议的图片文件名
  String afileName = pic.suggestFullFileName();
  OutputStream out = new FileOutputStream(new File(path2 + File.separator + afileName));
  pic.writeImageContent(out);
  htmlText += "<img src='"+path2 +"\\"+afileName + "' mce_src='"+path2 +"\\"+ afileName + "'/>";
}
public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2)
{
  boolean flag = false;
  if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName()) && cr1.getFontSize() == cr2.getFontSize())
  {
   flag = true;
  }
  return flag;
}

/**
* 写文件
*
* @param s
*/
public static void writeFile(String s,String path) {
  FileOutputStream fos = null;
  BufferedWriter bw = null;
  try {
   File file = new File(path);
   fos = new FileOutputStream(file);
   bw = new BufferedWriter(new OutputStreamWriter(fos));
   bw.write(s);
  } catch (FileNotFoundException fnfe) {
   fnfe.printStackTrace();
  } catch (IOException ioe) {
   ioe.printStackTrace();
  } finally {
   try {
    if (bw != null)
     bw.close();
    if (fos != null)
     fos.close();
   } catch (IOException ie) {
   }
  }
}

}

分享到：

jacob转html | clob用在dao层

2011-08-04 17:58
浏览 3128
评论(1)
分类:行业应用
查看更多

1 楼 room2007126 2011-10-10

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论