Split text into pages and present separately (HTML5)

SVG might be a good fit for your text pagination

  • SVG text is actually text — unlike canvas which displays just a picture of text.

  • SVG text is readable, selectable, searchable.

  • SVG text does not auto-wrap natively, but this is easily remedied using javascript.

  • Flexible page sizes are possible because page formatting is done in javascript.

  • Pagination does not rely on browser dependent formatting.

  • Text downloads are small and efficient. Only the text for the current page needs to be downloaded.

Here are the details of how SVG pagination can be done and a Demo:

http://jsfiddle.net/m1erickson/Lf4Vt/

enter image description here

Part 1: Efficiently fetch about a page worth of words from a database on the server

Store the entire text in a database with 1 word per row.

Each row (word) is sequentially indexed by the word’s order (word#1 has index==1, word#2 has index==2, etc).

For example this would fetch the entire text in proper word order:

// select the entire text of Romeo and Juliet
// “order by wordIndex” causes the words to be in proper order

Select word from RomeoAndJuliet order by wordIndex

If you assume any page has contains about 250 words when formatted, then this database query will fetch the first 250 words of text for page#1

// select the first 250 words for page#1

Select top 250 word from RomeoAndJuliet order by wordIndex

Now the good part!

Let’s say page#1 used 212 words after formatting. Then when you’re ready to process page#2 you can fetch 250 more words starting at word#213. This results in quick and efficient data fetches.

// select 250 more words for page#2
// “where wordIndex>212” causes the fetched words
// to begin with the 213th word in the text

Select top 250 word from RomeoAndJuliet order by wordIndex where wordIndex>212

Part 2: Format the fetched words into lines of text that fit into the specified page width

Each line of text must contain enough words to fill the specified page with, but not more.

Start line#1 with a single word and then add words 1-at-a-time until the text fits in the specified page width.

After the first line is fitted, we move down by a line-height and begin line#2.

Fitting the words on the line requires measuring each additional word added on a line. When the next word would exceed the line width, that extra word is moved to the next line.

A word can be measured using Html Canvases context.measureText method.

This code will take a set of words (like the 250 words fetched from the database) and will format as many words as possible to fill the page size.

maxWidth is the maximum pixel width of a line of text.

maxLines is the maximum number of lines that will fit on a page.

function textToLines(words,maxWidth,maxLines,x,y){

    var lines=[];

    while(words.length>0 && lines.length<=maxLines){
        var line=getOneLineOfText(words,maxWidth);
        words=words.splice(line.index+1);
        lines.push(line);
        wordCount+=line.index+1;
    }

    return(lines);
}

function getOneLineOfText(words,maxWidth){
    var line="";
    var space="";
    for(var i=0;i<words.length;i++){
        var testWidth=ctx.measureText(line+" "+words[i]).width;
        if(testWidth>maxWidth){return({index:i-1,text:line});}
        line+=space+words[i];
        space=" ";
    }
    return({index:words.length-1,text:line});
}

Part 3: Display the lines of text using SVG

The SVG Text element is a true html element that can be read, selected and searched.

Each individual line of text in the SVG Text element is displayed using an SVG Tspan element.

This code takes the lines of text which were formatted in Part#2 and displays the lines as a page of text using SVG.

function drawSvg(lines,x){
    var svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
    var sText = document.createElementNS('http://www.w3.org/2000/svg', 'text');
    sText.setAttributeNS(null, 'font-family', 'verdana');
    sText.setAttributeNS(null, 'font-size', "14px");
    sText.setAttributeNS(null, 'fill', '#000000');
    for(var i=0;i<lines.length;i++){
        var sTSpan = document.createElementNS('http://www.w3.org/2000/svg', 'tspan');
        sTSpan.setAttributeNS(null, 'x', x);
        sTSpan.setAttributeNS(null, 'dy', lineHeight+"px");
        sTSpan.appendChild(document.createTextNode(lines[i].text));
        sText.appendChild(sTSpan);
    }
    svg.appendChild(sText);
    $page.append(svg);
}

Here is complete code just in case the Demo link breaks:

<!doctype html>
<html>
<head>
<link rel="stylesheet" type="text/css" media="all" href="https://stackoverflow.com/questions/12202324/css/reset.css" /> <!-- reset css -->
<script type="text/javascript" src="http://code.jquery.com/jquery.min.js"></script>
<style>
    body{ background-color: ivory; }
    .page{border:1px solid red;}
</style>
<script>
$(function(){

    var canvas=document.createElement("canvas");
    var ctx=canvas.getContext("2d");
    ctx.font="14px verdana";

    var pageWidth=250;
    var pageHeight=150;
    var pagePaddingLeft=10;
    var pagePaddingRight=10;
    var approxWordsPerPage=500;        
    var lineHeight=18;
    var maxLinesPerPage=parseInt(pageHeight/lineHeight)-1;
    var x=pagePaddingLeft;
    var y=lineHeight;
    var maxWidth=pageWidth-pagePaddingLeft-pagePaddingRight;
    var text="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.";

    // # words that have been displayed 
    //(used when ordering a new page of words)
    var wordCount=0;

    // size the div to the desired page size
    $pages=$(".page");
    $pages.width(pageWidth)
    $pages.height(pageHeight);


    // Test: Page#1

    // get a reference to the page div
    var $page=$("#page");
    // use html canvas to word-wrap this page
    var lines=textToLines(getNextWords(wordCount),maxWidth,maxLinesPerPage,x,y);
    // create svg elements for each line of text on the page
    drawSvg(lines,x);

    // Test: Page#2 (just testing...normally there's only 1 full-screen page)
    var $page=$("#page2");
    var lines=textToLines(getNextWords(wordCount),maxWidth,maxLinesPerPage,x,y);
    drawSvg(lines,x);

    // Test: Page#3 (just testing...normally there's only 1 full-screen page)
    var $page=$("#page3");
    var lines=textToLines(getNextWords(wordCount),maxWidth,maxLinesPerPage,x,y);
    drawSvg(lines,x);


    // fetch the next page of words from the server database
    // (since we've specified the starting point in the entire text
    //  we only have to download 1 page of text as needed
    function getNextWords(nextWordIndex){
        // Eg: select top 500 word from romeoAndJuliet 
        //     where wordIndex>=nextwordIndex
        //     order by wordIndex
        //
        // But here for testing, we just hardcode the entire text 
        var testingText="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.";
        var testingWords=testingText.split(" ");
        var words=testingWords.splice(nextWordIndex,approxWordsPerPage);

        // 
        return(words);    
    }


    function textToLines(words,maxWidth,maxLines,x,y){

        var lines=[];

        while(words.length>0 && lines.length<=maxLines){
            var line=getLineOfText(words,maxWidth);
            words=words.splice(line.index+1);
            lines.push(line);
            wordCount+=line.index+1;
        }

        return(lines);
    }

    function getLineOfText(words,maxWidth){
        var line="";
        var space="";
        for(var i=0;i<words.length;i++){
            var testWidth=ctx.measureText(line+" "+words[i]).width;
            if(testWidth>maxWidth){return({index:i-1,text:line});}
            line+=space+words[i];
            space=" ";
        }
        return({index:words.length-1,text:line});
    }

    function drawSvg(lines,x){
        var svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
        var sText = document.createElementNS('http://www.w3.org/2000/svg', 'text');
        sText.setAttributeNS(null, 'font-family', 'verdana');
        sText.setAttributeNS(null, 'font-size', "14px");
        sText.setAttributeNS(null, 'fill', '#000000');
        for(var i=0;i<lines.length;i++){
            var sTSpan = document.createElementNS('http://www.w3.org/2000/svg', 'tspan');
            sTSpan.setAttributeNS(null, 'x', x);
            sTSpan.setAttributeNS(null, 'dy', lineHeight+"px");
            sTSpan.appendChild(document.createTextNode(lines[i].text));
            sText.appendChild(sTSpan);
        }
        svg.appendChild(sText);
        $page.append(svg);
    }

}); // end $(function(){});
</script>
</head>
<body>
    <h4>Text split into "pages"<br>(Selectable & Searchable)</h4>
    <div id="page" class="page"></div>
    <h4>Page 2</h4>
    <div id="page2" class="page"></div>
    <h4>Page 3</h4>
    <div id="page3" class="page"></div>
</body>
</html>

Leave a Comment