Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

consider to add feature reverse HTML to Markdown #60

Open
amlan-sw opened this issue Sep 15, 2024 · 0 comments
Open

consider to add feature reverse HTML to Markdown #60

amlan-sw opened this issue Sep 15, 2024 · 0 comments

Comments

@amlan-sw
Copy link

amlan-sw commented Sep 15, 2024

This ChatGPT-generated code, already tuned and tested, can reverse markdown-live-preview default HTML output. 

file: html_to_markdown.html

<!DOCTYPE html>
<head>
    <meta charset="utf-8">
    <title>HTML to MD</title>
    <meta name="description" content="">
    <meta name="author" content="">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
</head>
<body>

<script type="text/javascript">

function html_to_markdown(html) {
    var tempDiv = document.createElement('div');
    tempDiv.innerHTML = html;

    function handleStyle(element) {
        const style = element.getAttribute('style') || '';
        const fontWeight = style.includes('font-weight: bold') ? '**' : '';
        const textDecoration = style.includes('text-decoration: underline') ? '__' : '';
        return fontWeight + textDecoration;
    }

    function parseNode(node) {
        let result = '';

        if (node.nodeType === Node.TEXT_NODE) {
            return node.nodeValue;
        }

        if (node.nodeType === Node.ELEMENT_NODE) {
            const tag = node.tagName.toLowerCase();
            const stylePrefix = handleStyle(node);

            switch (tag) {
                case 'b':
                case 'strong':
                    result = '**' + parseChildren(node) + '**';
                    break;
                case 'i':
                case 'em':
                    result = '*' + parseChildren(node) + '*';
                    break;
                case 'u':
                    result = '__' + parseChildren(node) + '__';
                    break;
                case 'a':
                    const href = node.getAttribute('href');
                    result = '[' + parseChildren(node) + '](' + href + ')';
                    break;
                case 'img':
                    const src = node.getAttribute('src');
                    const alt = node.getAttribute('alt') || '';
                    const title = node.getAttribute('title') ? ` "${node.getAttribute('title')}"` : '';
                    result = '![' + alt + '](' + src + title + ')';
                    break;
                case 'table':
                    result = parseTable(node);
                    break;
                case 'tr':
                    result = '| ' + parseChildren(node).trim() + ' |\n';
                    break;
                case 'th':
                    result = '**' + parseChildren(node) + '**';
                    break;
                case 'td':
                    result = parseTableData(node);
                    break;
                case 'br':
                    result = '\n';
                    break;
                case 'p':
                    result = '\n\n' + parseChildren(node) + '\n\n';
                    break;
                case 'h1':
                    result = '\n# ' + parseChildren(node) + '\n\n';
                    break;
                case 'h2':
                    result = '\n## ' + parseChildren(node) + '\n\n';
                    break;
                case 'h3':
                    result = '\n### ' + parseChildren(node) + '\n\n';
                    break;
                case 'h4':
                    result = '\n#### ' + parseChildren(node) + '\n\n';
                    break;
                case 'h5':
                    result = '\n##### ' + parseChildren(node) + '\n\n';
                    break;
                case 'h6':
                    result = '\n###### ' + parseChildren(node) + '\n\n';
                    break;
                case 'ul':
                    result = '\n' + parseList(node, '* ') + '\n';
                    break;
                case 'ol':
                    result = '\n' + parseList(node, '1. ') + '\n';
                    break;
                case 'blockquote':
                    result = '\n> ' + parseChildren(node).trim().replace(/\n/g, '\n> ') + '\n';
                    break;
                case 'code':
                    result = '`' + parseChildren(node) + '`';
                    break;
                case 'pre':
                    result = '\n```\n' + parseChildren(node) + '\n```\n';
                    break;
                default:
                    result = stylePrefix + parseChildren(node) + stylePrefix;
                    break;
            }
        }

        return result;
    }

    function parseChildren(node) {
        let result = '';
        node.childNodes.forEach(child => {
            result += parseNode(child);
        });
        return result;
    }

    function parseTable(node) {
        let rows = node.getElementsByTagName('tr');
        let tableData = [];

        // Gather all rows and cells, and calculate max column widths
        let columnWidths = [];

        for (let i = 0; i < rows.length; i++) {
            let row = [];
            let cells = rows[i].getElementsByTagName('th').length > 0 ? rows[i].getElementsByTagName('th') : rows[i].getElementsByTagName('td');

            for (let j = 0; j < cells.length; j++) {
                let cellContent = parseNode(cells[j]).trim();

                // Ensure that columnWidths array has an entry for this column
                columnWidths[j] = Math.max(columnWidths[j] || 0, cellContent.length);

                row.push(cellContent);
            }
            tableData.push(row);
        }

        // Build the markdown table with aligned columns
        let result = '\n';
        for (let i = 0; i < tableData.length; i++) {
            let row = tableData[i];

            // Format each row with the appropriate column widths
            let formattedRow = '| ';
            for (let j = 0; j < row.length; j++) {
                let cell = row[j];
                let paddedCell = cell.padEnd(columnWidths[j], ' ');  // Pad the cell to the max column width
                formattedRow += paddedCell + ' | ';
            }

            result += formattedRow + '\n';

            // Add separator after header row
            if (i === 0) {
                let separator = '| ';
                for (let j = 0; j < row.length; j++) {
                    separator += '-'.repeat(columnWidths[j]) + ' | ';
                }
                result += separator + '\n';
            }
        }

        return result + '\n';
    }

    function parseTableData(node) {
        const align = node.getAttribute('align') || 'left';
        const content = parseChildren(node);
        if (align === 'center') {
            return ':' + content + ':';
        } else if (align === 'right') {
            return content + ':';
        }
        return content;
    }

    function parseList(node, bullet, level = 0) {
        let result = '';
        let counter = 1;  

        node.childNodes.forEach(item => {
            if (item.tagName && item.tagName.toLowerCase() === 'li') {
                
                let listBullet = (bullet === '1. ') ? (counter++ + '. ') : bullet;

                let indentation = ' '.repeat(level * 4);  

                let nestedList = item.querySelector('ul, ol');

                if (nestedList) {
                    item.removeChild(nestedList);
                }

                result += indentation + listBullet + parseChildren(item).trim() + '\n';

                if (nestedList) {
                    let nestedBullet = (nestedList.tagName.toLowerCase() === 'ol') ? '1. ' : '* ';
                    result += parseList(nestedList, nestedBullet, level + 1);  // Rekursi untuk nested list
                }
            }
        });

        return result;
    }

    return parseChildren(tempDiv)
        .replace(/\s*\n\s*\n+/gs, '\n\n')  // max  \n\n
        .replace(/\n```\n\s*`\n*/gs, '\n```\n')  //handling <code> and <pre>  
        .replace(/\n*\s*`\s*\n```\n/gs, '\n```\n')  
        .trim() + '\n';
}

function run_test(){

// Example

    var html = `
<div>

    <h1>Markdown syntax guide</h1>

    <h2>Headers</h2>
    <h1>This is a Heading h1</h1>
    <h2>This is a Heading h2</h2>
    <h6>This is a Heading h6</h6>

    <h2>Emphasis</h2>
    <p><em>This text will be italic</em><br><em>This will also be italic</em></p>
    <p><strong>This text will be bold</strong><br><strong>This will also be bold</strong></p>
    <p><em>You <strong>can</strong> combine them</em></p>

    <h2>Lists</h2>

    <h3>Unordered</h3>
    <ul>
        <li>Item 1</li>
        <li>Item 2</li>
        <li>Item 2a</li>
        <li>Item 2b</li>
    </ul>

    <h3>Ordered</h3>
    <ol>
        <li>Item 1</li>
        <li>Item 2</li>
        <li>
            Item 3
            <ol>
                <li>Item 3a</li>
                <li>Item 3b</li>
            </ol>
        </li>
    </ol>

    <h2>Images</h2>

    <p><img title="This is a sample image." alt="This is an alt text." src="image/sample.webp"> </p>

    <h2>Links</h2>

    <p>You may be using <a href="https://markdownlivepreview.com/">Markdown Live Preview</a>.</p>

    <h2>Blockquotes</h2>

    <blockquote>
        <p>Markdown is a lightweight markup language with plain-text-formatting syntax, created in 2004 by John Gruber with Aaron Swartz.</p>
        <blockquote>
            <p>Markdown is often used to format readme files, for writing messages in online discussion forums, and to create rich text using a plain text editor.</p>
        </blockquote>
    </blockquote>

    <h2>Tables</h2>

    <table>
        <thead>
            <tr>
                <th>Left columns</th>
                <th align="center">Right columns</th>
            </tr>
        </thead>
        <tbody>
            <tr>
                <td>left foo</td>
                <td align="center">right foo</td>
            </tr>
            <tr>
                <td>left bar</td>
                <td align="center">right bar</td>
            </tr>
            <tr>
                <td>left baz</td>
                <td align="center">right baz</td>
            </tr>
        </tbody>
    </table>

    <h2>Blocks of code</h2>

    <pre>
        <code>
let message = 'Hello world';
alert(message);
        </code>
    </pre>

    <h2>Inline code</h2>

    <p>This web site is using <code>markedjs/marked</code>.</p>
</div>

    `;

    var md = html_to_markdown(html);
    alert('COPIED TO CLIBBOARD:\n\n'+md);
    navigator.clipboard.writeText(md);
}

</script>

<button onclick="run_test()"> Test and Copy </button>

</body>
</html>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants