word域代码转换html丢失解决办法

更新时间:2023-05-04 07:42:01 阅读量: 实用文档 文档下载

说明:文章内容仅供预览,部分内容可能不全。下载后的文档,内容与下面显示的完全一致。下载之前请确认下面内容是否您想要的,是否完整无缺。

Word转html存在域代码丢失。

Aspose , jacob , poi 者E无法解

在使用jocob转换成html时域代码会被

private void processFormula(List no des ) throws

Un supportedE ncod in gExcepti

on{ for

(int i = no des .size()-1; i >=0; i --){

Node node = no des .get( i );

if ( node instanceof Element){

Element e = (Element) node ;

processFormula( e.childNodes());

} else if ( node instanceof Comment){

String commentText = node .toString();

if ( commentText .contains( "","") .replace((char) 10 +

"","") .replace((char) 13 + "","");

Docume nt doc = Jsoup.parse(html);

Eleme nts sups = doc.select("sup");

Eleme nts subs = doc.select("sub");

//纠正上标

for(Eleme nt sup:sups){ String text = sup.text(); sup.tagName("spa n");

sup.text("\\s("+text+", )");

//纠正下标

for(Eleme nt sub:subs){ String text = sub.text(); sub.tagName("spa n"); sub.text("\\s(,"+text+")");

String eqtext = doc.body().text();

eqtext = mergeSubSup(eqtext);

//无法被StringUtil判断为空白的空白

eqtext = eqtext.replace((char) 8203 + "","");

eqtext = eqtext.replace((char) 160 + "", " ").replace("\\ ", "");

eqtext = eqtext.trim();

// System.out.println(eqtext+" ------------->");

if(eqtext.startsWith("eq")){

eqtext = eqtext.replaceFirst("eq", "").trim();

eqtext = eqtext.replace("\\"+LEFTBRACKET, "\\"+LEFTBRACKETRE).replace("\\"+RIGHTBRACKET,

"\\"+RIGHTBRACKETRE).replace("\\"+COMMA, "\\"+COMMARE);

Stri ng latex = parserEleme nts(eqtext);

return latex.replace("\\"+LEFTBRACKETRE, "\\"+LEFTBRACKET).replace("\\"+RIGHTBRACKETRE,

"\\"+RIGHTBRACKET).replace("\\"+COMMARE, "\\"+COMMA);

}

return "";

}

private static String mergeSubSup(String latex){

// 这里需要合并上下标例如:eq \i\su(\s(i, )\s( = , )\s(1, ),\s( ,3),x)

〃--->eq \i\su(\s(i = 1, ),\s( ,3),x)

//TODO

return latex;

}

/**

*

* @param eqtext

* @return

*/

private static String parserEleme nts(Stri ng eqtext){

Stri ng latex ="";

if(eqtext.co ntai ns("\\")&&eqtext.co nta in s(LEFTBRACKET)&&eqtext.co ntai ns(RIGHTBR ACKET)){

String preText = eqtext.substri ng(0,eqtext.i ndexOf("\\"));

int startI ndex = eqtext.i ndexOf("\\");

int endln dex= getNextLeftBra(startI ndex,'(',eqtext);

int n extBra = getNextBra In dex(e ndln dex+1,eqtext);

String name = eqtext.substring(startIndex,endlndex);

String text = eqtext.substring(endlndex+1,nextBra);

String suffText= eqtext.substring(nextBra+1,eqtext.length());

latex = parserEleme nts(preText) + parserToLatex (n ame.trim(),text)

+parserEleme nts(suffText);

}else{

latex = eqtext;

}

return latex;

}

private static int getNextLeftBra(int start,char sym,String eqtext){ for(i nt i =start;i

char leftbra = eqtext.charAt(i);

if(leftbra == sym){

return i;

}

}

return eqtext .len gth();

/**

获取配对括号的位置

* @param text 文本

* @return 位置

*/

private static int getNextBraIndex(int start,String text){

int leftbra = 0;

int rightbra = 0;

for(i nt i = start-1 ; i

char c = text.charAt(i);

if(c == ')'){

leftbra ++;

}

if(c =='('){ rightbra++;

} if(rightbra!=O&&leftbra==rightbra&&>=start){ return i;

}

}

return 0;

}

private static String parserToLatex(String name,String text){

Stri ng latex ="";

n ame = n ame.toLowerCase();

//分式

if(n ame.equals("\\f')){

latex +=getFLatex (n ame, text);

//根式

}else if(n ame.equals("\\r")){

latex +=getRLatex (n ame, text);

//上下标

}else if(n ame.startsWith("\\s")){

latex+=getSLatex (n ame, text);

//\a矩阵\al左对齐;\ac居中;\ar右对齐;\con元素排成n列;\vsn行间增加n磅;\hsn 列间增加n磅

}else if(n ame.startsWith("\\a")){

latex += getALatex (n ame, text);

}else if(n ame.startsWith("\\b")){

latex+=getBLatex (n ame, text);

//平移

}else if(n ame.startsWith("\\d")){

latex+=getDLatex (n ame, text);

//积分

}else if(n ame.startsWith("\\i")){

latex+=getILatex (n ame, text);

//列表

}else if(n ame.startsWith("\\l")){

latex+=getLLatex (n ame, text);

//重叠

}else if(n ame.startsWith("\\o")){

latex+=getOLatex (n ame, text);

//框

}else if(n ame.startsWith("\\x")){

latex+=getXLatex (n ame, text);

//空白

}else if(Stri ngUtils.isBla nk(n ame)){

latex+=parserEleme nts(text);

}else{

System.err.pri ntln ("error parserToLatex");

}

return latex;

}

/**

*根式多次根式

* @param n ame n ame

* @param text text

* @return latex

*/

private static String getRLatex(String name,String text){

Stri ng latex ="";

List args = getArgs(text); if(args.size()==2){ latex +=" \\sqrt[";

latex +=parserEleme nts(args.get(0)); latex +="]{";

latex +=parserEleme nts(args.get(1)); latex +="}";

}else if(args.size()==1){

latex +=" \\sqrt{";

latex +=parserEleme nts(args.get(0)); latex +="}";

}else{

System.err.pri ntln ("error getRLatex"); } return latex;

}

*分式

* @param n ame n ame

* @param text text

* @return latex

*/

private static String getFLatex(String name,String text){

Stri ng latex ="";

List args = getArgs(text); if(args.size()==2){ latex +=" \\frac{";

latex +=parserEleme nts(args.get(0));

latex +="}{";

latex +=parserEleme nts(args.get(1)); latex +="}";

}else{

System.err.pri ntln ("error getFLatex");

}

return latex;

}

* //\a矩阵\al左对齐;\ac居中;\ar右对齐;\con元素排成n列;\vsn行间增加n 磅;\hsn列间增加n磅

* @param n ame n ame

* @param text text

* @return latex

*/

private static String getALatex(String name,String text){

Stri ng latex ="";

List args =getArgs(text);

Stri ng n = name.replaceAII("A[\\S\\s]*\\\\co([0-9]*)[\\S\\s]*$", "$1");

//列数

int col = 1;

if(n.matches("[0-9]*")) col = Integer.valueOf(n);

for(i nt i = 0 ;i

if(i!=0&&i %col==0){

latex += " \\\\ ";

}

latex += parserEleme nts(args.get(i));

}

本文来源:https://www.bwwdw.com/article/rtee.html

Top