package com.yeejoin.amos.fas.core.util;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

/**
 * 文档转换工具
 *
 * @date
 * @author nihuanshan
 *
 */
public class WordConverterUtils {

	/**
	 * 图片存储相对文档路径
	 */
	private static String imgPath = File.separator + "image" + File.separator;

	/**
	 * word文档转html文档
	 *
	 * @author: nihuanshan
	 * @date: 2018年12月6日 下午2:55:32
	 * @param srcFile    原文档
	 * @param targetFile 目标文档
	 * @param readUrl    html中img标签的图片存储路径
	 */
	public static void wordToHtml(String srcFile, String targetFile, String readUrl) {
		File doc = new File(srcFile);
		File html = new File(targetFile);
		if (!doc.exists() || html.exists() || !targetFile.endsWith(".html")) {
			return;
		} else {
			if (srcFile.endsWith(".doc")) {
				docToHtml(doc, html, readUrl);
			} else if (srcFile.endsWith(".docx")) {
				docxToHtml(doc, html, readUrl);
			}
		}
	}

	/**
	 * word转html字符串
	 * @param srcFile
	 * @param targetFile
	 * @param readUrl
	 */
	public static String wordToHtmlString(String srcFile, String readUrl) {
		File doc = new File(srcFile);
		if (srcFile.endsWith(".doc")) {
			return docToHtmlString(doc, readUrl);
		} else if (srcFile.endsWith(".docx")) {
			return docxToHtmlString(doc, readUrl);
		}
		return null;
	}

	/**
	 * .doc文档转换成html
	 *
	 * @author: nihuanshan
	 * @date: 2018年12月6日 下午2:53:43
	 * @param srcFile
	 * @param targetFile
	 * @param readUrl    html中img标签的图片存储路径
	 */
	private static void docToHtml(File srcFile, File targetFile, String readUrl) {
		try (
				FileInputStream inputStream = new FileInputStream(srcFile);
				HWPFDocument wordDocument = new HWPFDocument(inputStream);
		) {
			String imagePathStr = srcFile.getParentFile().getAbsolutePath() + imgPath;
			File imagePath = new File(imagePathStr);
			if (!imagePath.exists()) {
				imagePath.mkdirs();
			}
			String srcName = srcFile.getName();
			String suffix = srcName.substring(0, srcName.lastIndexOf(".")) + "_";

			org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
			WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
			String uri = readUrl + imagePathStr.substring(imagePathStr.indexOf("docs"));
			wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
				try (
						FileOutputStream out = new FileOutputStream(imagePathStr + suffix + name);
				) {
					out.write(content);
					return uri + suffix + name;
				} catch (Exception e) {
					e.printStackTrace();
					return "";
				}
			});
			wordToHtmlConverter.processDocument(wordDocument);
			org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
			DOMSource domSource = new DOMSource(htmlDocument);
			StreamResult streamResult = new StreamResult(targetFile);
			TransformerFactory tf = TransformerFactory.newInstance();
			tf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
			tf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
			Transformer serializer = tf.newTransformer();
			serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
			serializer.setOutputProperty(OutputKeys.INDENT, "yes");
			serializer.setOutputProperty(OutputKeys.METHOD, "html");
			serializer.transform(domSource, streamResult);
		} catch (Exception e) {
			e.printStackTrace();
		}

	}

	/**
	 * doc转htmlString
	 * @param srcFile
	 * @param readUrl
	 * @return
	 */
	private static String docToHtmlString(File srcFile, String readUrl) {
		try (
				FileInputStream inputStream = new FileInputStream(srcFile);
				HWPFDocument wordDocument = new HWPFDocument(inputStream);
				) {
			String imagePathStr = srcFile.getParentFile().getAbsolutePath() + imgPath;
			File imagePath = new File(imagePathStr);
			if (!imagePath.exists()) {
				imagePath.mkdirs();
			}
			String srcName = srcFile.getName();
			String suffix = srcName.substring(0, srcName.lastIndexOf(".")) + "_";
			org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
			WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
			String uri = readUrl + imagePathStr.substring(imagePathStr.indexOf("docs"));
			wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
				try {
					FileOutputStream out = new FileOutputStream(imagePathStr + suffix + name);
					out.write(content);
					return uri + suffix + name;
				} catch (Exception e) {
					e.printStackTrace();
					return "";
				}
			});
			wordToHtmlConverter.processDocument(wordDocument);
			org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
			DOMSource domSource = new DOMSource(htmlDocument);
			StringWriter stringWriter = new StringWriter();
			StreamResult streamResult = new StreamResult(stringWriter);
			TransformerFactory tf = TransformerFactory.newInstance();
			tf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
			tf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
			Transformer serializer = tf.newTransformer();
			serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
			serializer.setOutputProperty(OutputKeys.INDENT, "yes");
			serializer.setOutputProperty(OutputKeys.METHOD, "html");
			serializer.transform(domSource, streamResult);
			return stringWriter.toString();

		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}

	/**
	 * .docx文档转换成html
	 *
	 * @author: nihuanshan
	 * @date: 2018年12月6日 下午2:52:11
	 * @param srcFile
	 * @param targetFile
	 * @param readUrl    html中img标签的图片存储路径
	 */
	private static void docxToHtml(File srcFile, File targetFile, String readUrl) {
		String imagePathStr = srcFile.getParentFile().getAbsolutePath() + imgPath;
		File imagePath = new File(imagePathStr);
		if (!imagePath.exists()) {
			imagePath.mkdirs();
		}
		String temp = srcFile.getName();
		String suffix = temp.substring(0, temp.lastIndexOf(".")) + "_";
		try (
				FileInputStream inputStream = new FileInputStream(srcFile);
				XWPFDocument document = new XWPFDocument(inputStream);
				FileOutputStream fileOutputStream = new FileOutputStream(targetFile);
				OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "utf-8");
		) {
			XHTMLOptions options = XHTMLOptions.create();
			options.setExtractor(new MyImageExtractor(imagePath, suffix));
			String uri = readUrl + imagePathStr.substring(imagePathStr.indexOf("docs"));
			System.out.println("uri :" + uri);
			options.URIResolver(new MyURIResolver(uri));
			XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
			xhtmlConverter.convert(document, outputStreamWriter, options);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	/**
	 *docx转htmlString
	 * @param srcFile
	 * @param readUrl
	 * @return
	 */
	private static String docxToHtmlString(File srcFile, String readUrl) {
		String imagePathStr = srcFile.getParentFile().getAbsolutePath() + imgPath;
		File imagePath = new File(imagePathStr);
		if (!imagePath.exists()) {
			imagePath.mkdirs();
		}
		String temp = srcFile.getName();
		String suffix = temp.substring(0, temp.lastIndexOf(".")) + "_";
		try (
				FileInputStream inputStream = new FileInputStream(srcFile);
				XWPFDocument document = new XWPFDocument(inputStream);
				)
		{
			XHTMLOptions options = XHTMLOptions.create();
			options.setExtractor(new MyImageExtractor(imagePath, suffix));
			String uri = readUrl + imagePathStr.substring(imagePathStr.indexOf("docs"));
			System.out.println("uri :" + uri);
			options.URIResolver(new MyURIResolver(uri));
			XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
			StringWriter stringWriter = new StringWriter();
			xhtmlConverter.convert(document, stringWriter, options);
			return stringWriter.toString();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;

	}
}
