<?php
/**
 * The IMP_MIME_Viewer_html class renders out plain text with
 * URLs made into hyperlinks.
 *
 * $Horde: imp/lib/MIME/Viewer/html.php,v 1.48 2003/08/06 21:35:55 slusarz Exp $
 *
 * Copyright 1999-2003 Anil Madhavapeddy <anil@recoil.org>
 * Copyright 1999-2003 Jon Parise <jon@recoil.org>
 *
 * See the enclosed file COPYING for license information (GPL). If you
 * did not receive this file, see http://www.fsf.org/copyleft/gpl.html.
 *
 * @author  Anil Madhavapeddy <anil@recoil.org>
 * @author  Jon Parise <jon@horde.org>
 * @version $Revision: 1.48 $
 * @since   IMP 3.0
 * @package horde.mime.viewer
 */
class IMP_MIME_Viewer_html extends MIME_Viewer {

    /**
     * Render out the currently set contents.
     *
     * @access public
     *
     * @param array $params  An array with a reference to a MIME_Contents
     *                       object.
     *
     * @return string  The rendered text in HTML.
     */
    function render($params)
    {
        $contents = &$params[0];

        global $prefs;

        $data = $this->mime_part->getContents();

        /* These regular expressions attempt to make html safe for
           viewing. THEY ARE NOT PERFECT. If you enable html viewing,
           you are opening a security hole. With the current state of
           the web, I believe that the best we can do is to make sure
           that people _know_ html is a security hole, clean up what
           we can, and leave it at that. */

        /* Deal with <base> tags in the HTML, since they will screw up our
           own relative paths. */
        if (($i = stristr($data, '<base ')) && ($i = stristr($i, 'http')) &&
            ($j = strchr($i, '>'))) {
            $base = substr($i, 0, strlen($i) - strlen($j));
            $base = preg_replace('|(http.*://[^/]*/?).*|i', '\1', $base);

            if ($base[strlen($base) - 1] != '/') {
                $base .= '/';
            }
        }

        /* Change space entities to space characters. */
        $data = preg_replace('/&#(x0*20|0*32);?/i', ' ', $data);

        /* Nuke non-printable characters (a play in three acts). */
        /* Rule #1: If we have a semicolon, it's deterministically
           detectable and fixable, without introducing collateral damage. */
        $data = preg_replace('/&#x?0*([9A-D]|1[0-3]);/i', '&nbsp;', $data);

        /* Rule #2: Hex numbers (usually having an x prefix) are also
           deterministic, even if we don't have the semi. Note that
           some browsers will treat &#a or &#0a as a hex number even
           without the x prefix; hence /x?/ which will cover those
           cases in this rule. */
        $data = preg_replace('/&#x?0*[9A-D]([^0-9A-F]|$)/i', '&nbsp\\1', $data);

        /* Rule #3: Decimal numbers without semi. The problem is that
           some browsers will interpret &#10a as "\na", some as
           "&#x10a" so we have to clean the &#10 to be safe for the
           "\na" case at the expense of mangling a valid entity in
           other cases. (Solution for valid HTML authors: always use
           the semicolon.) */
        $data = preg_replace('/&#0*(9|1[0-3])([^0-9]|$)/i', '&nbsp\\2', $data);

        /* Remove overly long numeric entities. */
        $data = preg_replace('/&#x?0*[0-9A-F]{6,};?/i', '&nbsp;', $data);

        /* Get all attribute="javascript:foo()" tags. This is essentially
           the regex /=("?)[^>]*script:/ but expanded to catch camouflage
           with spaces and entities. */
        $preg = '/(&#0*61;?|&#x0*3D;?|=)\s*'.
                '(&#0*34;?|&#x0*22;?|")?'.
                '[^>]*\s*'.
                '(s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?)\s*'.
                '(c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*'.
                '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*'.
                '(i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*'.
                '(p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*'.
                '(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*'.
                '(:|&#0*58;?|&#x0*3a;?)/i';
        $data = preg_replace($preg, '=\2cleaned', $data);

        /* Get all on<foo>="bar()" and style="" tags. */
        $data = preg_replace('/(\s+[Oo][Nn]\w+)\s*=/', '\1Cleaned=', $data);
        $data = preg_replace('/\s+style\s*=/i', ' Cleaned=', $data);

        /* Get all tags that might cause trouble - <object>, <embed>,
           <base>, etc. Meta refreshes and iframes, too. */
        $malicious = array('|<([^>]*)s\s*c\s*r\s*i\s*p\s*t|i',
                           '|<([^>]*)embed|i',
                           '|<([^>]*)base[^line]|i',
                           '|<([^>]*)meta|i',
                           '|<([^>]*)j\sa\sv\sa|i',
                           '|<([^>]*)object|i',
                           '|<([^>]*)iframe|i');
        $data = preg_replace($malicious, '<cleaned_tag', $data);

        /* Comment out style tags. */
        $data = preg_replace('|<style[^>]*>(?:\s*<\!--)*|i', '<!--', $data);
        $data = preg_replace('|(?:-->\s*)*</style>|i', '-->', $data);

        /* A few other matches. */
        $data = preg_replace('|<([^>]*)&{.*}([^>]*)>|', '<&{;}\3>', $data);
        $data = preg_replace('|<([^>]*)mocha:([^>]*)>|i', '<cleaned\2>', $data);

        /* Attempt to fix paths that were relying on a <base> tag. */
        if (!empty($base)) {
            $data = preg_replace('|src="/|i', 'src="' . $base, $data);
            $data = preg_replace('|src=\'/|i', 'src=\'' . $base, $data);
            $data = preg_replace('|src=[^\'"]/|i', 'src=' . $base, $data);

            $data = preg_replace('|href= *"/|i', 'href="' . $base, $data);
            $data = preg_replace('|href= *\'/|i', 'href=\'' . $base, $data);
            $data = preg_replace('|href= *[^\'"]/|i', 'href=' . $base, $data);
        }

        /* Search for inlined images that we can display. */
        $related = $this->mime_part->getInformation('related_part');
        if ($related !== false) {
            $relatedPart = $contents->getMIMEPart($related);
            foreach ($relatedPart->getCIDList() as $ref => $id) {
                $id = trim($id, '<>');
                $cid_part = $contents->getDecodedMIMEPart($ref);
                $data = str_replace("cid:$id", $contents->urlView($cid_part, 'view_attach'), $data);
            }
        }

        /* Convert links to open in new windows. But first we hide all
           links that have an "#xyz" anchor. */
        $data = preg_replace('|<a([^>]*href=["\']?#)|i', "<\x01\\1", $data);
        $data = str_replace('<a', '<a target="_blank"', $data);
        $data = str_replace('<A', '<a target="_blank"', $data);
        $data = preg_replace("|\x01|", 'a', $data);

        /* Filter bad language. */
        $data = IMP::filterText($data);

        /* Only display images if specifically allowed by user. */
        if (!$contents->viewAsAttachment() &&
            !$contents->getPrintMode() &&
            $prefs->getValue('html_image_replacement')) {

            global $registry;

            /* Check to see if images exist. */
            $img_regex = '/(<img[^>]*src=|<input[^>]*src=|<body[^>]*background=|<td[^>]*background=|<table[^>]*background=|style=[^>]*background-image:.*url\()\"?[^\s\"\>]*\"?(\s+|>|\))/iUs';
            if (preg_match($img_regex, $data)) {
                $msg = array(_("This HTML message has images embedded in it."));

                /* Make sure the URL parameters are correct for the current
                   message. */
                $url = Horde::removeParameter(Horde::selfUrl(true), array('index', 'actionID'));
                $base_ob = &$contents->getBaseObjectPtr();
                $url = Horde::addParameter($url, 'index', $base_ob->getMessageIndex());

                $view_img = Horde::getFormData('view_html_images');
                $addr_check = ($prefs->getValue('html_image_addrbook') && $this->_inAddressBook($contents));

                if (!$view_img && !$addr_check) {
                    $newSrc = Horde::url($registry->getParam('webroot', 'imp') . '/graphics/folders/spacer.gif');
                    $data = preg_replace($img_regex, '\\1"' . $newSrc . '"\\2', $data);
                }

                /* Print link to view/hide images now. */
                if (!$addr_check) {
                    if ($view_img) {
                        $url = Horde::removeParameter($url, array('view_html_images'));
                        $msg[] = Horde::link($url, _("Hide the Images"), null, null, null, _("Hide the Images"), null) . _("Click here to HIDE the Images") . '</a>.';
                    } else {
                        $url = Horde::addParameter($url, 'view_html_images', 1);
                        $msg[] = Horde::link($url, _("Show the Images"), null, null, null, _("Show the Images"), null) . _("Click here to SHOW the Images") . '</a>.';
                    }
                }

                $data = $contents->formatStatusMsg($msg, Horde::img('image.gif', _("View the Images"), null, 'graphics/mime'), false) . $data;
            }
        }

        return $data;
    }

    /**
     * Return text/html as the content-type
     *
     * @access public
     *
     * @return string  'text/html' constant.
     */
    function getType()
    {
        return 'text/html';
    }

    /**
     * Determine whether the sender appears in an available addressbook.
     *
     * @access private
     *
     * @param object MIME_Contents &$contents  The MIME_Contents object.
     *
     * @return boolean  Does the sender appear in an addressbook?
     */
    function _inAddressBook(&$contents)
    {
        global $registry;

        /* If we don't have access to the sender information, return false. */
        $base_ob = &$contents->getBaseObjectPtr();

        /* Get the lists of available address books. */
        $sources = $registry->call('contacts/sources');

        /* Try to get back a result from the search. */
        $result = $registry->call('contacts/getField', array($base_ob->getFromAddress(), '__key', array_keys($sources)));

        if (is_a($result, 'PEAR_Error')) {
            return false;
        } else {
            return ((count($result)) ? true : false);
        }
    }

}
