package com.yeejoin.amos.knowledgebase.face.util.word;

import com.yeejoin.amos.feign.systemctl.Systemctl;
import fr.opensagres.poi.xwpf.converter.core.BasicURIResolver;
import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.xmlbeans.XmlException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.commons.CommonsMultipartFile;
import org.typroject.tyboot.core.foundation.utils.ValidationUtil;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class wordToHtml {

    private static List<Integer> titleList = new ArrayList<>();

    public static String wordParase(MultipartFile file) throws IOException, XmlException, ParserConfigurationException, TransformerException {
        String filesuffix = file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf("."));
        if (".docx".equals(filesuffix) || ".DOCX".equals(filesuffix)) {
            return uploadDocXFile(file);
        } else {
            return uploadDocFile(file);
        }
    }

    public static String wordReplaceTag(String htmlString, String fileName) {
        String filename = fileName.substring(0, fileName.lastIndexOf("."));
        String filesuffix = fileName.substring(fileName.lastIndexOf("."));
        if (".docx".equals(filesuffix) || ".DOCX".equals(filesuffix)) {
            return docXReplaceTag(htmlString, filename);
        } else {
            return docReplaceTag(htmlString, filename);
        }
    }

    //docx上传并转html
    private static String uploadDocXFile(MultipartFile file) throws IOException {
        InputStream inputStream = file.getInputStream();
        XWPFDocument wordDocument = new XWPFDocument(inputStream);
        List<XWPFParagraph> paragraphs = wordDocument.getParagraphs();
        titleList = new ArrayList<>();
        for (XWPFParagraph para : paragraphs) {
            String titlelevel = para.getStyleID();
            if (!ValidationUtil.isEmpty(titlelevel) && isInteger(titlelevel) && Integer.parseInt(titlelevel) < 6) {
                titleList.add(Integer.parseInt(titlelevel));
            }
        }

        //上传图片
        XHTMLOptions options = XHTMLOptions.create();
        // 设置图片存储路径
        String path = System.getProperty("java.io.tmpdir");
        String firstImagePathStr = path + System.currentTimeMillis();
        options.setExtractor(new FileImageExtractor(new File(firstImagePathStr)));
        options.URIResolver(new BasicURIResolver(firstImagePathStr));

        // 将Word文档转换为HTML文档
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        XHTMLConverter.getInstance().convert(wordDocument, outputStream, options);
        String html = new String(outputStream.toByteArray(), StandardCharsets.UTF_8);

        // 将image文件转换为base64并替换到html字符串里
        String middleImageDirStr = "/word/media";
        String imageDirStr = firstImagePathStr + middleImageDirStr;

        File imageDir = new File(imageDirStr);
        String[] imageList = imageDir.list();

        if (imageList != null) {
            for (int i = 0; i < imageList.length; i++) {
                String oneImagePathStr = imageDirStr + "/" + imageList[i];
                MultipartFile multipartFile = getMulFileByPath(oneImagePathStr);
                Map<String, String> imgResult = Systemctl.fileStorageClient.updateCommonFile(multipartFile).getResult();
                String imageUrl = "";
                if (!ValidationUtil.isEmpty(imgResult) && imgResult.keySet().iterator().hasNext()) {
                    imageUrl = imgResult.keySet().iterator().next();
                }
                //修改文档中的图片信息
                html = html.replace(oneImagePathStr, imageUrl);
            }
        }
        //删除图片路径
        File firstImagePath = new File(firstImagePathStr);
        FileUtils.deleteDirectory(firstImagePath);

        // 关闭流
        outputStream.close();
        wordDocument.close();

        return html;

    }

    //doc上传并转html
    private static String uploadDocFile(MultipartFile file) throws IOException, ParserConfigurationException, TransformerException {
        HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());
        Range range = wordDocument.getRange();
        titleList = new ArrayList<>();
        for (int i = 0; i < range.numParagraphs(); i++) {
            Paragraph para = range.getParagraph(i);
            int level = para.getStyleIndex();
            String text = para.text();
            if (!ValidationUtil.isEmpty(level) && level > 0 && level <= 6) {
                // 如果该段落为标题，获取标题信息
                para.replaceText(text+"#标题" + level + "#", true);
                titleList.add(para.getLvl() + 1);
            }
        }

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        //将读取到的图片上传并添加链接地址
        wordToHtmlConverter.setPicturesManager((imageStream, pictureType, name, width, height) -> {
            String imageUrl = "";
            try {
                Map<String, String> imgResult = Systemctl.fileStorageClient.updateCommonFile(encodeToMultipartFile(imageStream)).getResult();
                if (!ValidationUtil.isEmpty(imgResult) && imgResult.keySet().iterator().hasNext()) {
                    imageUrl = imgResult.keySet().iterator().next();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            return imageUrl;
        });


        wordToHtmlConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        //通过TransformerFactory创造出Transformer ,并设置Transformer的属性
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer transformer = transformerFactory.newTransformer();
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.METHOD, "html");

        //开始转换,结果数据在ByteArrayOutputStream里
        transformer.transform(domSource, streamResult);
        String html = new String(out.toByteArray(), StandardCharsets.UTF_8);

        // 关闭流
        out.close();
        wordDocument.close();
        return html;

    }

    private static Elements findTitels(Document doc, String className) {
        //找到标题所在的标签
        return doc.body().select("p." + className);
    }

    //docX转html后,标题的节点标签转换
    public static String docXReplaceTag(String htmlString, String fileName) {
        // 使用jsoup解析HTML文档
        Document docx = Jsoup.parse(htmlString);
        Collections.sort(titleList);
        Boolean isAddTitle = false;
        int start = 1;
        if (!ValidationUtil.isEmpty(titleList)) {
            Elements firstTitle = findTitels(docx, "X" + titleList.get(0));
            if (!ValidationUtil.isEmpty(firstTitle)) {
                Element firstEle = docx.body().children().get(0).children().first();
                if (!firstTitle.equals(firstEle)) {
                    isAddTitle = true;
                }
            }
            if (!ValidationUtil.isEmpty(titleList)) {
                start = titleList.get(0);
                if (titleList.get(0).equals(titleList.get(1))) {
                    isAddTitle = true;
                }
            }
        } else {
            isAddTitle = true;
        }
        if (!ValidationUtil.isEmpty(titleList)) {
            for (int i = start; i < start + 10; i++) {
                Elements titelsSingle = findTitels(docx, "X" + i);
                for (Element heading : titelsSingle) {
                    heading.tagName("div");
                    if (!ValidationUtil.isEmpty(heading.children())) {
                        int level = isAddTitle ? i - start + 2 : i - start + 1;
                        heading.attr("class", "clib-writefile-tag-text title-h" + level);
                        heading.attr("data-level", String.valueOf(level));
                        String eleId = level == 1 ? "1" : String.valueOf(UUID.randomUUID()).replace("-", "").toLowerCase();
                        heading.attr("id", eleId);
                        heading.removeAttr("style");

                        Element titelH = new Element("h" + level).attr("id", eleId);
                        titelH.text(heading.children().text());
                        heading.html("").appendChild(titelH);
                    }
                }
            }

        }
        //添加根节点
        if (isAddTitle) {
            Element root = new Element("div")
                    .attr("class", "clib-writefile-tag-text title-h1")
                    .attr("data-level", "1")
                    .attr("id", "1");
            Element titelH = new Element("h1").attr("id", "1").text(fileName);
            root.appendChild(titelH);
            docx.body().children().get(0).children().get(0).before(root);
        }
        return docx.body().children().html().replaceAll("\n", "");

    }

    //doc转html后,标题的节点标签转换
    private static String docReplaceTag(String htmlString, String fileName) {
        // 使用jsoup解析HTML文档
        Document doc = Jsoup.parse(htmlString);
        //内部样式转行内样式
        doc = formatHtmlRowCss(doc);

        Collections.sort(titleList);
        Boolean isAddTitle = false;
        int start = 1;
        if (!ValidationUtil.isEmpty(titleList)) {
            Elements firstTitle = findTitels(doc, "p" + titleList.get(0));
            if (!ValidationUtil.isEmpty(firstTitle)) {
                Element firstEle = doc.body().children().first();
                if (!firstTitle.get(0).equals(firstEle)) {
                    isAddTitle = true;
                }
            }
            if (!ValidationUtil.isEmpty(titleList)) {
                start = titleList.get(0);
                if (titleList.get(0).equals(titleList.get(1))) {
                    isAddTitle = true;
                }
            }
        } else {
            isAddTitle = true;
        }
        if (!ValidationUtil.isEmpty(titleList)) {
            for (int i = start; i < start + 10; i++) {
                Elements titelsSingle = doc.body().select("span:contains(#标题" + i + "#)");
                for (Element heading : titelsSingle) {
                    int level = isAddTitle ? i - start + 2 : i - start + 1;
                    String eleId = level == 1 ? "1" : String.valueOf(UUID.randomUUID()).replace("-", "").toLowerCase();
                    Element parentElement = heading.parent().tag().getName().equals("p") ? heading.parent() : heading.parent().parent();
                    if (!ValidationUtil.isEmpty(parentElement)) {
                        Element parent = parentElement;
                        parent.tagName("div");
                        parent.attr("class", "clib-writefile-tag-text title-h" + level);
                        parent.attr("data-level", String.valueOf(level));
                        parent.attr("id", eleId);
                        parent.removeAttr("style");

                        Element titelH = new Element("h" + level).attr("id", eleId);
                        titelH.text(parent.children().text().replaceFirst("#标题" + i + "#", ""));
                        parent.html("").appendChild(titelH);

                    }
                }
            }

        }
        //添加根节点
        if (isAddTitle) {
            Element root = new Element("div")
                    .attr("class", "clib-writefile-tag-text title-h1")
                    .attr("data-level", "1")
                    .attr("id", "1");
            Element titelH = new Element("h1").attr("id", "1").text(fileName);
            root.appendChild(titelH);
            doc.body().children().get(0).before(root);
        }
        return doc.body().html().replaceAll("\n", "");
    }

    /**
     * 获取MultipartFile文件
     *
     * @param picPath
     * @return
     */
    private static MultipartFile getMulFileByPath(String picPath) {
        FileItem fileItem = createFileItem(picPath);
        MultipartFile mfile = new CommonsMultipartFile(fileItem);
        return mfile;
    }

    private static FileItem createFileItem(String filePath) {
        FileItemFactory factory = new DiskFileItemFactory(16, null);
        String textFieldName = "textField";
        int num = filePath.lastIndexOf(".");
        String extFile = filePath.substring(num);
        FileItem item = factory.createItem(textFieldName, "text/plain", true, "MyFileName" + extFile);
        File newfile = new File(filePath);
        int bytesRead = 0;
        byte[] buffer = new byte[8192];
        try {
            FileInputStream fis = new FileInputStream(newfile);
            OutputStream os = item.getOutputStream();
            while ((bytesRead = fis.read(buffer, 0, 8192)) != -1) {
                os.write(buffer, 0, bytesRead);
            }
            os.close();
            fis.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return item;
    }

    /**
     * byte 转换为 MultipartFile
     *
     * @param bytes
     * @return
     */
    private static MultipartFile encodeToMultipartFile(byte[] bytes) {
        MultipartFile multipartFile = null;
        InputStream input = null;
        OutputStream outputStream = null;
        File tempFile = null;
        try {
            String path = System.getProperty("java.io.tmpdir");
            String firstImagePathStr = path + System.currentTimeMillis();

            File file = new File(firstImagePathStr);
            file.mkdirs();
            firstImagePathStr = firstImagePathStr + System.currentTimeMillis() + ".png";
            tempFile = new File(firstImagePathStr);
            tempFile.createNewFile();

            // 把 byte 转换为 File 文件
            getFileByBytes(bytes, firstImagePathStr);

            // 第一个参数 fieldName 就是文件上传的 name
            FileItem fileItem = new DiskFileItem("uploadFile", Files.probeContentType(tempFile.toPath()), false, tempFile.getName(), (int) tempFile.length(), tempFile.getParentFile());
            input = new FileInputStream(tempFile);
            outputStream = fileItem.getOutputStream();
            IOUtils.copy(input, outputStream);

            multipartFile = new CommonsMultipartFile(fileItem);


        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                input.close();
                outputStream.close();
            } catch (Exception e) {
                e.printStackTrace();
            }

        }

        // 删除这个 File
        if (tempFile.exists()) {
            tempFile.delete();
        }

        return multipartFile;
    }

    /**
     * byte [] 转换为 File
     *
     * @param bytes
     * @param filePath
     */
    private static void getFileByBytes(byte[] bytes, String filePath) {
        BufferedOutputStream bos = null;
        FileOutputStream fos = null;
        File file = null;
        try {
            File dir = new File(filePath);
            // 判断文件目录是否存在
            if (!dir.exists() && dir.isDirectory()) {
                dir.mkdirs();
            }
            file = new File(filePath);
            //输出流
            fos = new FileOutputStream(file);
            //缓冲流
            bos = new BufferedOutputStream(fos);

            //将字节数组写出
            bos.write(bytes);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (bos != null) {
                try {
                    bos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (fos != null) {
                try {
                    fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }


    private static Document formatHtmlRowCss(Document doc) {
        String allcss = doc.getElementsByTag("style").html();
        Map<String, String> styleMap = new HashMap<>();
        Pattern pattern = Pattern.compile("\\.(\\w+)\\{(?s)(.+?)\\}");
        Matcher matcher = pattern.matcher(allcss);
        while (matcher.find()) {
            styleMap.put(matcher.group(1), matcher.group(2));
        }
        return replaceClass(styleMap, doc);
    }


    private static Document replaceClass(Map<String, String> styMap, Document doc) {
        Elements anyClass = doc.getElementsByAttribute("class");
        for (Element element : anyClass) {
            String aClass = element.attr("class");
            System.out.println(aClass);
            String[] classStrs = (aClass.split(" "));
            for (String classStr : classStrs) {
                element.attr("style", element.attr("style") + styMap.get(classStr));
            }
            element.removeAttr("class");
        }
        return doc;
    }

    public static boolean isInteger(String str) {
        Pattern pattern = Pattern.compile("^[-\\+]?[\\d]*$");
        return pattern.matcher(str).matches();
    }
}
