Map HTML to JSON [closed]

I just wrote this function that does what you want; try it out let me know if it doesn’t work correctly for you:

// Test with an element.
var initElement = document.getElementsByTagName("html")[0];
var json = mapDOM(initElement, true);
console.log(json);

// Test with a string.
initElement = "<div><span>text</span>Text2</div>";
json = mapDOM(initElement, true);
console.log(json);

function mapDOM(element, json) {
    var treeObject = {};
    
    // If string convert to document Node
    if (typeof element === "string") {
        if (window.DOMParser) {
              parser = new DOMParser();
              docNode = parser.parseFromString(element,"text/xml");
        } else { // Microsoft strikes again
              docNode = new ActiveXObject("Microsoft.XMLDOM");
              docNode.async = false;
              docNode.loadXML(element); 
        } 
        element = docNode.firstChild;
    }
    
    //Recursively loop through DOM elements and assign properties to object
    function treeHTML(element, object) {
        object["type"] = element.nodeName;
        var nodeList = element.childNodes;
        if (nodeList != null) {
            if (nodeList.length) {
                object["content"] = [];
                for (var i = 0; i < nodeList.length; i++) {
                    if (nodeList[i].nodeType == 3) {
                        object["content"].push(nodeList[i].nodeValue);
                    } else {
                        object["content"].push({});
                        treeHTML(nodeList[i], object["content"][object["content"].length -1]);
                    }
                }
            }
        }
        if (element.attributes != null) {
            if (element.attributes.length) {
                object["attributes"] = {};
                for (var i = 0; i < element.attributes.length; i++) {
                    object["attributes"][element.attributes[i].nodeName] = element.attributes[i].nodeValue;
                }
            }
        }
    }
    treeHTML(element, treeObject);
    
    return (json) ? JSON.stringify(treeObject) : treeObject;
}

Working example: http://jsfiddle.net/JUSsf/ (Tested in Chrome, I can’t guarantee full browser support – you will have to test this).

​It creates an object that contains the tree structure of the HTML page in the format you requested and then uses JSON.stringify() which is included in most modern browsers (IE8+, Firefox 3+ .etc); If you need to support older browsers you can include json2.js.

It can take either a DOM element or a string containing valid XHTML as an argument (I believe, I’m not sure whether the DOMParser() will choke in certain situations as it is set to "text/xml" or whether it just doesn’t provide error handling. Unfortunately "text/html" has poor browser support).

You can easily change the range of this function by passing a different value as element. Whatever value you pass will be the root of your JSON map.

Leave a Comment