/* 
  totxt: A program that transforms HTML elements to plain text
  Copyright (C) 2009  Niels Serup

  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/*
  Version: 0.9
  You can contact Niels at <ns@metanohi.org>.
  Latest version is always available at <http://metanohi.org/projects/totxt/>,
  along with documentation.
*/



/* To avoid overuse of global variables, all functions and actions have been
   stuffed into the 'load_totxt_generator' function. */
function load_totxt_generator(action, var_a) {
    /* Normally, this function will load a bunch of functions and then generate
       PRE elements. This isn't the case if a special action is defined. */
    
    function count(haystack, needle) {
        // Counts needles in haystack
        if (needle == undefined)
            var needle = '\n'
        var c, offset
        offset = -1
        c = 0
        while (offset) {
            offset = haystack.indexOf(needle, offset) + 1
            c++
        }
        return c
    }
    
    if (action == 'show_or_hide') {
        /* Used by dynamic boxes that utilize sliding functionality. To open an
           article box, it is sufficient to click on it, but to close one, one
           must doubleclick on it. This is partly why there are some
           differences in what type of state the box is in. */
        var obj = document.totxt_preElements[var_a]
        if (obj.state) {
            // If open, COLLAPSE
            obj.state = 0
            if (obj.quickdyn) {
                // Don't slide
                obj.innerHTML = obj.header
                obj.className = obj.className.replace(obj.dynclass + '1', obj.dynclass + '0')
                obj.onclick = function(){load_totxt_generator('show_or_hide', this.number)}
                obj.ondblclick = undefined
            }
            else {
                // Do slide
                obj.onclick = undefined
                obj.ondblclick = undefined
                obj.slide_expand = 0
                obj.line = undefined
                load_totxt_generator('slide', var_a)
            }
        }
        else {
            // If closed, EXPAND
            obj.state = 1
            obj.className = obj.className.replace(obj.dynclass + '0', obj.dynclass + '1')
            if (obj.quickdyn) {
                // Don't slide
                obj.innerHTML = obj.fulltext
                obj.onclick = undefined
                obj.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)}
            }
            else {
                // Do slide
                obj.onclick = undefined
                obj.ondblclick = undefined
                obj.slide_expand = 1
                obj.line = undefined
                load_totxt_generator('slide', var_a)
            }
        }
        return true
    }
    else if (action == 'slide') {
        /* Using the setTimeout function, this action is used by 'show_or_hide'
           actions to either expand or collapse a box slidingly. */
        var obj, hh, th, obdl
        obj = document.totxt_preElements[var_a]
        hh = obj.headerheight
        th = obj.fulltextheight
        if (obj.line == undefined) {
            if (obj.slide_expand)
                obj.line = hh
            else
                obj.line = th
        }
        
        obj.innerHTML = obj.fulltext.split('\n', obj.line).join('\n')
        
        // Add lines when expanding, remove lines when collapsing
        if (obj.slide_expand) {
            obdl = obj.line
            if (obdl == th)
                obdl++
            else {
                obdl = obj.line + obj.dynlines
                if (obdl > th)
                    obdl = th
            }
            obj.line = obdl
        }
        else {
            obdl = obj.line
            if (obdl == hh)
                obdl--
            else {
                obdl = obj.line - obj.dynlines
                if (obdl < hh)
                    obdl = hh
            }
            obj.line = obdl
        }
        if (obj.line >= hh && obj.line <= th)
            setTimeout('load_totxt_generator(\'slide\', ' + var_a + ')', obj.dynspeed)
        else {
            // End 'loop'
            if (obj.slide_expand) {
                obj.onclick = undefined
                obj.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)}
            }
            else {
                obj.onclick = function(){load_totxt_generator('show_or_hide', this.number)}
                obj.ondblclick = undefined
                obj.className = obj.className.replace(obj.dynclass + '1', obj.dynclass + '0')
            }
            // Not necessary. Has no noticable effect.
            delete document.slide_object
            delete obj.slide_expand
            delete obj.line
        }
        return true
    }
    
    /*
       No more special actions exist. If none of the two above actions were
       specified, code execution will begin here.
    */
    
    // Various functions
    function isin(ref, str, end, defau) {
        // Checks for string str in array ref. If end is true, look for match in
        // beginning of str and return the end of str
        var x, c
        for (x in ref) {
            c = ref[x]
            if (end) {
                if (c.indexOf(str) == 0)
                    return c.substr(str.length)
            }
            else if (c == str)
                return true
        }
        if (defau)
            return defau
        else
            return false
    }

    function ctimes(cha, times) {
        // <character> * times (a, 3 = aaa)
        var t = ''
        for (var i = 0; i < times; i++) {
            t += cha
        }
        return t
    }

    function transform_text(str, w, align, del_xws) {
        /* This function transforms text seen in ordinary HTML tags into text
           that fits simple monospace character boxes. */
        
        // Prepare text
        str = str.replace(new RegExp('^\\s+'), '').replace(new RegExp('\\s+$'), '')
        str = str.replace(/<br \/>/gi, '<br>').replace(/<br>\n/gi, '<br>')
        //str = str.replace(/&nbsp;/g, ' ')
        str = str.replace(/\n/g, ' ')
        if (del_xws) {
            // Delete excessive whitespace
            str = str.replace(new RegExp('\\s+', 'g'), ' ')
        }
        str = str.replace(/<br>/gi, '\n')
        if (del_xws) {
            // Delete excessive whitespace caused by line break transformation
            str = str.replace(/\n /g, '\n')
        }
        
        var tags, tlen, intag, start, i, c, x, sstr, ntxt, ctxt, txt, cw, clen
        var entities, inentity, filter, tminus, l, spaces
        
        // Store and remove HTML tags in text (will be readded in text later)
        // Also look for HTML entities, such as &lt; (<)
        tagent = [] // To store HTML tags and HTML entities
        tlen = str.length
        start = 0
        intag = false
        inentity = false
        filter = ' <>!"\'\\/*-+,.:;'
        
        ntxt = ''
        for (i = 0; i < tlen; i++) {
            c = str.substr(i, 1)
            if (c == '<' && !intag) {
                intag = true
                start = i
            }
            else if (c == '>' && intag) {
                intag = false
                tagent[tagent.length] = ['tag', start, str.substr(start, i - start + 1)]
            }
            else if (!intag) {
                if (c == '&' && !inentity) {
                    if (filter.indexOf(str.substr(i+1, 1)) == -1) {
                        inentity = true
                        start = i
                    }
                    ntxt += c
                }
                else if (c == ';' && inentity) {
                    inentity = false
                    tagent[tagent.length] = ['ent', start, str.substr(start, i - start + 1)]
                }
                else if (!inentity)
                    ntxt += c
            }
        }
        str = ntxt
                
        // Change line width of text. When inserting a hyphen and a newline, check
        // if positions of HTML tags and HTML entities need to be changed.
        sstr = str.split('\n')
        ntxt = ''
        for (ctxt in sstr) {
            txt = sstr[ctxt]
            while (txt) {
                if (txt.length <= w) {
                    ntxt += txt
                    break
                }
                if (txt.substr(0, w).indexOf(' ') == -1) {
                    ntxt += txt.substr(0, w - 1) + '-\n'
                    txt = txt.substr(w - 1)
                    clen = ntxt.length
                    tminus = 0
                    for (x in tagent) {
                        if (tagent[x][1] - tminus > clen)
                            tagent[x][1] += 2
                        l = tagent[x][2].length
                        if (tagent[x][0] == 'ent')
                            l--
                        tminus += l
                    }
                    continue
                }
                cw = w
                while (txt.substr(cw, 1) != ' ')
                    cw -= 1
                ntxt += txt.substr(0, cw) + '\n'
                txt = txt.substr(cw + 1)
                // No need to readjust positions of HTML stuff here, as the
                // new newline replaces a space character
            }
            ntxt += '\n'
        }
        while (ntxt.substr(ntxt.length - 1) == '\n')
            ntxt = ntxt.substr(0, ntxt.length - 1)
        
        str = ntxt
        
        // Readd removed HTML tags and HTML entities
        for (x in tagent) {
            c = tagent[x]
            if (c[0] == 'tag')
                str = str.substr(0, c[1]) + c[2] + str.substr(c[1])
            else if (c[0] == 'ent')
                str = str.substr(0, c[1]) + c[2] + str.substr(c[1]+1)
        }

        // Align (0 = left, 1 = center, 2 = right)
        if (align != 0 && align != 1 && align != 2)
            align = 0
        
        sstr = str.split('\n')
        ntxt = ''
        for (ctxt in sstr) {
            txt = sstr[ctxt]
            tlen = txt.length
            clen = 0
            intag = false
            inentity = false
            for (i = 0; i < tlen; i++) {
                c = txt.substr(i, 1)
                if (c == '<' && !intag)
                    intag = true
                else if (c == '>' && intag)
                    intag = false
                else if (c == '&' && !inentity) {
                    if (filter.indexOf(txt.substr(i+1, 1)) == -1)
                        inentity = true
                    clen++
                }
                else if (c == ';' && inentity)
                    inentity = false
                else if (!intag && !inentity)
                    clen++
            }
            spaces = w - clen
            ntxt += '\n'
            if (align == 0) // Left
                ntxt += txt + ctimes(' ', spaces)
            else if (align == 2) // Right
                ntxt += ctimes(' ', spaces) + txt
            else if (align == 1) // Center
                ntxt += ctimes(' ', Math.floor(spaces / 2)) + txt + ctimes(' ', Math.ceil(spaces / 2))
        }
        ntxt = ntxt.substr(1)
        if (ntxt)
            str = ntxt
        return str
    }

    function add_borders(txt, border, padding) {
        // Adds borders to boxes
        if (border == undefined)
            border = '|'
        if (padding == undefined)
            padding = ' '
        var s, x, c, n
        s = txt.split('\n')
        n = ''
        for (x in s) {
            c = s[x]
            n += '\n' + border + padding + c + padding + border
        }
        return n.substr(1)
    }

    function transform_text_in_list(arr, w, del_xws, borders, padding, align) {
        // Transform, then join
        var x, c, f, t
        f = ''
        for (x in arr) {
            c = arr[x]
            if (align == undefined)
                align = 0 // Left
            t = transform_text(c, w, align, del_xws)
            if (borders)
                t = add_borders(t, borders, padding)
            f += '\n' + t
        }
        return f.substr(1)
    }
    // No more functions
    
        
    // Begin load + search + generate
    
    // Local variables
    var h1s, x, z, y, ok, cur, classes, par, chs, cur_found, ccur,
        header, subheaders, texts, elems, subheader, content,
        stdclass, dynclass, widclass, xwsclass, quiclass, speclass, linclass,
        stdwidth, width, stddynamic, dynamic, isdynamic, delxws,
        dynspeed, c_speed, dynlines, c_lines, quickdyn, c_quidyn, alwdelxws,
        stdhalign, stdshalign, stdtalign, halclass, shalclass, talclass,
        c_halign, c_shalign, c_talign
    
    var h_elmnm, sh_elmnm, txt_elmnm
    var tmps, text, preElements, elmminus, prelms, plen, celm
    var /*poss_classes, n_classes,*/ pid, tmp
    
    // Default classnames for recognition purposes.
    // Can be changed using global variables.
    stdclass = 'totxt'
    try {
        stdclass = totxt_CLASSNAME ? totxt_CLASSNAME : stdclass
    }
    catch(e) {}
    dynclass = 'dynamic'
    try {
        dynclass = totxt_DYNAMIC_CLASSNAME ? totxt_DYNAMIC_CLASSNAME : dynclass
    }
    catch(e) {}
    widclass = 'width'
    try {
        widclass = totxt_WIDTH_CLASSNAME ? totxt_WIDTH_CLASSNAME : widclass
    }
    catch(e) {}
    xwsclass = 'del_excess_whitespace'
    try {
        xwsclass = totxt_DELXWS_CLASSNAME ? totxt_DELXWS_CLASSNAME : xwsclass
    }
    catch(e) {}
    speclass = 'speed'
    try {
        speclass = totxt_DYNSPEED_CLASSNAME ? totxt_DYNSPEED_CLASSNAME : speclass
    }
    catch(e) {}
    linclass = 'lines'
    try {
        linclass = totxt_DYNLINES_CLASSNAME ? totxt_DYNLINES_CLASSNAME : linclass
    }
    catch(e) {}
    quiclass = 'quick_dynamic'
    try {
        quiclass = totxt_QUICK_CLASSNAME ? totxt_QUICK_CLASSNAME : quiclass
    }
    catch(e) {}
    halclass = 'halign'
    try {
        halclass = totxt_HALIGN_CLASSNAME ? totxt_HALIGN_CLASSNAME : halclass
    }
    catch(e) {}
    shalclass = 'shalign'
    try {
        shalclass = totxt_SHALIGN_CLASSNAME ? totxt_SHALIGN_CLASSNAME : shalclass
    }
    catch(e) {}
    talclass = 'talign'
    try {
        talclass = totxt_TALIGN_CLASSNAME ? totxt_TALIGN_CLASSNAME : talclass
    }
    catch(e) {}
    h_elmnm = 'H1'
    try {
        h_elmnm = totxt_HEADER_ELEMENT ? totxt_HEADER_ELEMENT.toUpperCase() : h_elmnm
    }
    catch(e) {}
    sh_elmnm = 'H2'
    try {
        sh_elmnm = totxt_SUBHEADER_ELEMENT ? totxt_SUBHEADER_ELEMENT.toUpperCase() : sh_elmnm
    }
    catch(e) {}
    txt_elmnm = 'P'
    try {
        txt_elmnm = totxt_TEXT_ELEMENT ? totxt_TEXT_ELEMENT.toUpperCase() : txt_elmnm
    }
    catch(e) {}
    
    h1s = document.getElementsByTagName(h_elmnm)
    if (!h1s)
        return false
    
    
    // Default values
    // Line width
    stdwidth = 80
    try {
        stdwidth = totxt_WIDTH * 1 ? totxt_WIDTH : stdwidth
    }
    catch(e) {}
    if (stdwidth < 10)
        stdwidth = 10
    
    // Should articles be allowed to collapse and expand with a mouseclick?
    stddynamic = -1
    try {
        stddynamic = totxt_DYNAMIC
    }
    catch(e) {}
    
    // Sliding speed
    dynspeed = 50
    try {
        dynspeed = totxt_DYNAMIC_SPEED * 1
    }
    catch(e) {}
    
    // Line(s) to (dis)appear per function call
    dynlines = 1
    try {
        dynlines = totxt_DYNAMIC_LINES * 1 ? totxt_DYNAMIC_LINES * 1 : dynlines
    }
    catch(e) {}
    
    // Don't slide?
    quickdyn = false
    try {
        quickdyn = totxt_QUICK_DYNAMIC
    }
    catch(e) {}
    
    // Delete excessive whitespace?
    alwdelxws = false
    try {
        alwdelxws = totxt_DEL_XWS
    }
    catch(e) {}
    
    // Header alignment
    stdhalign = 1 // Center
    try {
        stdhalign = totxt_HEADER_ALIGN * 1
    }
    catch(e) {}
    
    // Subheader alignment
    stdshalign = 0 // Left
    try {
        stdshalign = totxt_SUBHEADER_ALIGN * 1
    }
    catch(e) {}
    
    // Text alignment
    stdtalign = 0 // Left
    try {
        stdtalign = totxt_TEXT_ALIGN * 1
    }
    catch(e) {}
    // Checking for impossible alignments
    if (stdhalign > 2) stdhalign = 2
    else if (stdhalign < 0) stdhalign = 0
    if (stdshalign > 2) stdshalign = 2
    else if (stdshalign < 0) stdshalign = 0
    if (stdtalign > 2) stdtalign = 2
    else if (stdtalign < 0) stdtalign = 0
    
    elems = [] // Elements that contain info to be used in final PRE elements.
    prelms = [] // PRE elements
    
    /*
    // Special classes
    poss_classes = [stdclass, dynclass, widclass, xwsclass, speclass, linclass, quiclass, halclass, shalclass, talclass]
    */
    
    // document.totxt_preElements is the variable in which to store the PRE elements.
    try {
        /* document.totxt_preElements likely doesn't exist, which could result in an
           error if not called inside a try statement. */
        preElements = document.totxt_preElements
        elmminus = preElements.length
    }
    catch(e) {
        document.totxt_preElements = []
        preElements = []
        elmminus = 0
    }
    
    // Look up H1 elements
    for (x in h1s) {
        cur = h1s[x]
        if (!cur || !cur.className)
            continue
        // Filters
        classes = cur.className.split(' ')
        if (!isin(classes, stdclass))
            continue
        
        par = cur.parentNode
        if (!par)
            continue
        
        // Find H2 and P elements that are "below" the current H1 element
        chs = par.childNodes
        cur_found = 0
        header = cur.innerHTML
        subheaders = []
        texts = []
        elems[elems.length] = cur
        for (z in chs) {
            ccur = chs[z]
            if (ccur == cur) {
                cur_found = 1
                continue
            }
            if (!cur_found || !ccur.tagName)
                continue
            
            if (ccur.tagName == sh_elmnm)
                subheaders[subheaders.length] = ccur.innerHTML
            else if (ccur.tagName == txt_elmnm)
                texts[texts.length] = ccur.innerHTML
            else
                break
            // If either H2 or P element, add object to elems var
            elems[elems.length] = ccur
        }
        
        width = isin(classes, widclass, 1, stdwidth) * 1
        if (width < 10)
            width = stdwidth
        dynamic = isin(classes, dynclass, 1, stddynamic) * 1
        isdynamic = dynamic == 0 || dynamic == 1
        
        delxws = isin(classes, xwsclass, 0, alwdelxws)
        
        c_halign = isin(classes, halclass, 1, stdhalign) * 1
        c_shalign = isin(classes, shalclass, 1, stdshalign) * 1
        c_talign = isin(classes, talclass, 1, stdtalign) * 1
        
        tmps = [' ' + ctimes('-', width - 2),
            '|' + ctimes('_', width - 2) + '|',
            '| ' + ctimes('~', width - 4) + ' |\n']
                
        // Now generate the content
        header = add_borders(transform_text(header,  width - 4, c_halign, delxws), '|')
        subheader = subheaders ? transform_text_in_list(subheaders, width - 4, delxws, '|', ' ', c_shalign) : ''
        content = texts ? transform_text_in_list(texts, width - 4, delxws, '|', ' ', c_talign) : ''
        
        header = tmps[0] + '\n' + header + '\n' + tmps[0]
        text = header + '\n'
        if (subheader)
            text += subheader + '\n' + tmps[2]
        if (content)
            text += content + '\n'
        text += tmps[1]
        
        /*
        // Check for special classes that have nothing to do with settings
        n_classes = []
        for (z in classes) {
            for (y in poss_classes) {
                ok = true
                if (!classes[z].indexOf(poss_classes[y])) {
                    ok = false
                    break
                }
            }
            if (ok)
                n_classes[n_classes.length] = classes[z]
        }
        n_classes = n_classes.join(' ')
        */
        
        // Find eventual id
        pid = cur.id
        delete cur.id // To avoid two elements with alike ids
        
        plen = prelms.length
        prelms[plen] = document.createElement('pre')
        celm = prelms[plen]
        celm.number = plen + elmminus // preElements may not be empty
        celm.className = cur.className
        /*
        celm.className = stdclass
        if (n_classes)
            celm.className += ' ' + n_classes
        */
        if (pid)
            celm.id = pid
        
        // Find and apply eventual styles
        for (z in cur.style) {
            c = cur.style[z]
            if (c) {
                try {
                    if (c.indexOf(':') != -1)
                        tmp = c.split(':')
                        celm.style.setProperty(tmp[0], tmp[1].replace(';', ''), '')
                }
                catch(e){}
            }
        }
        
        celm.style.display = 'none'
        celm.innerHTML = text
        if (isdynamic) {
            c_speed = isin(classes, speclass, 1, dynspeed) * 1
            c_lines = isin(classes, linclass, 1, dynlines) * 1
            if (!c_lines) c_lines = dynlines
            c_quidyn = isin(classes, quiclass, 0, quickdyn)
            celm.fulltext = text
            celm.header = header
            celm.headerheight = count(header)
            celm.fulltextheight = count(text)
            celm.state = dynamic
            if (!isin(classes, dynclass))
                celm.className += ' ' + dynclass
            if (!isin(classes, dynclass + dynamic))
                celm.className += ' ' + dynclass + dynamic
            /*
            if (c_quidyn)
                celm.className += ' ' + quiclass
            */
            celm.dynclass = dynclass
            celm.dynspeed = c_speed
            celm.dynlines = c_lines
            celm.quickdyn = c_quidyn
            
            if (dynamic == 0)
                celm.onclick = function(){load_totxt_generator('show_or_hide', this.number)}
            else
                celm.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)}
            if (!dynamic)
                celm.innerHTML = header
        }
        
        par.insertBefore(prelms[plen], cur)
    }
    
    // Remove used elements
    for (x in elems) {
        elems[x].parentNode.removeChild(elems[x])
    }
    
    // Show new elements
    for (x in prelms) {
        prelms[x].style.display = 'block'
    }
    
    for (x in prelms) {
        preElements[preElements.length] = prelms[x]
    }
    document.totxt_preElements = preElements
    
    return true
}
