/**************************************************************************
 Microsoft Translator Machine Translation plugin for OmegaT(http://www.omegat.org/)

 Copyright (C) 2012 Yu Tang

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 **************************************************************************/

package org.omegat.plugin.machinetranslators;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;

import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;

import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.TreeMap;

import org.omegat.core.machinetranslators.BaseTranslate;

import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OConsts;
import org.omegat.util.PatternConsts;
import org.omegat.util.Preferences;
import org.omegat.util.StringUtil;
import org.omegat.util.WikiGet;

/**
 * Support of Micrisoft Translator machine translation.
 *
 * Microsoft Translator
 * http://msdn.microsoft.com/en-us/library/dd576287.aspx
 * 
 * Microsoft Translator API - Frequently Asked Questions
 * http://social.msdn.microsoft.com/Forums/en-US/microsofttranslator/thread/c71aeddd-cc90-4228-93cc-51fb969fde09
 *
 * @author Yu Tang
 */
public class MicrosoftTranslator extends BaseTranslate {
    protected static final String PLUGIN_VERSION = "1.1.1";
    protected static final String CHARSET_MARK = "charset=";
    protected static final String AUTH_URL = "https://datamarket.accesscontrol.windows.net/v2/OAuth2-13";
    protected static final String TRAN_URL = "http://api.microsofttranslator.com/v2/Http.svc/Translate";
    protected static final Pattern RE_TOKEN = Pattern.compile("\"access_token\":\"([^\"]+)\"");
    protected static final Pattern RE_SPLITED_TAG = Pattern.compile("<([a-zA-Z]+\\d+)></\\1>");
    protected static String clientId = "";
    protected static String clientSecret = "";
    protected static String accessToken = "";
    protected static Date accessTokenExpiration = new Date();
    protected static Boolean stripTags = false;

    public MicrosoftTranslator() {
        // Set your Windows Azure Marketplace client info
        // See http://msdn.microsoft.com/en-us/library/hh454950.aspx
        clientId = System.getProperty("azure.application.client.id");
        clientSecret = System.getProperty("azure.application.client.secret");
        String stripTagsProperty = System.getProperty("microsoft.translator.striptags");

        log("version = " + PLUGIN_VERSION);

        if ( StringUtil.isEmpty(clientId) ) {
            log("Your ClientId is omitted. This plugin is not working without it.");
        } else {
            log("Your ClientId is '" + clientId + "'.");
        }
        
        if ( StringUtil.isEmpty(clientSecret) ) {
            log("Your ClientSecret is omitted. This plugin is not working without it.");
        } else {
            log("Your ClientSecret is '" + clientSecret + "'.");
        }
        
        if ( !StringUtil.isEmpty(stripTagsProperty) ) {
            stripTags = stripTagsProperty.equalsIgnoreCase("true");
        }
 
        log("microsoft.translator.striptags = " + stripTags.toString());
    }

    @Override
    protected String getPreferenceName()
    {
        return "allow_microsoft_translator";
    }

    public String getName() {
        return "Microsoft Translator";
    }

    @Override
    protected String translate(Language sLang, Language tLang, String text) throws Exception {
        String output = "";

        // Client Id or Client Secret is missing
        if ( StringUtil.isEmpty(clientId) || StringUtil.isEmpty(clientSecret) ) {
            return "Client Id and/or Client Secret is missing.\n"
                + "Command-line syntax:\n"
                + "java -Dazure.application.client.id=YourClientId "
                + "-Dazure.application.client.secret=YourClientSecret "
                + "-jar OmegaT.jar";
        }

        // get AccessToken
        if ( !isValidAccessToken(accessToken, accessTokenExpiration) ) {
            String json = getAuthJson(clientId, clientSecret);
            if ( !setAccessToken(json) ) {
                // Probably variant json has auth error message.
                log(json);
                return json;
            }
        }

        // Input length limit is 10,000 characters
        // See followed thread;
        //   API will enforce the input length limit for all methods
        //   http://social.msdn.microsoft.com/Forums/en-US/microsofttranslator/thread/98ec3467-2198-4b31-b11f-8f864dffe62f/
        //
        //   Translate Method
        //   http://msdn.microsoft.com/en-us/library/ff512421.aspx
        //
        // I didn't try to put nearly 10k chars by myself. If somebody did, let me know the result.
        String trText = text;
        if ( text.length() > 10000 ) {
            log("Input characters length is over the limit. Text is truncated to 10,000 chars.");
            trText = trText.substring(0, 9997) + "...";
        }
        String langFrom = getMSTLanguage(sLang);
        String langTo = getMSTLanguage(tLang);
        output += getTranslation(trText, accessToken, langFrom, langTo);

        return output;
    }
   
    private void log( String message ){
        Log.log("Microsoft Translator plugin: " + message);
    }   
    
    private Boolean isValidAccessToken( String accessToken, Date accessTokenExpiration ){
        return ( accessToken.startsWith("Bearer ") 
                && !isAccessTokenExpired(accessTokenExpiration) );
    }

    private String getAuthJson( String clientId, String clientSecret ){
        Map<String, String> params = new TreeMap<String, String>();

        params.put( "grant_type", "client_credentials" );
        params.put( "client_id", clientId );
        params.put( "client_secret", clientSecret );
        params.put( "scope", "http://api.microsofttranslator.com" );

        try {
            return WikiGet.post(AUTH_URL, params);
	} catch (IOException e) {
            return e.getLocalizedMessage();
        }
    }
    
    private Date getExpiration() {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.SECOND, 590);  // 10 minutes - 10 seconds
        return cal.getTime();
    }
    
    private Boolean setAccessToken( String json ){
        Matcher m = RE_TOKEN.matcher(json);
        if ( !m.find() ) {
            return false;
        }
        accessToken = "Bearer " + m.group(1);
        accessTokenExpiration = getExpiration();
        return true;
    }
    
    private Boolean isAccessTokenExpired( Date accessTokenExpiration ) {
        return (new Date()).after(accessTokenExpiration);
    }
    
    // Copy from WikiGet.put() and modified. 
    private String getTranslation( String text, String token, 
            String langFrom, String langTo) throws IOException {
        Map<String, String> params = new TreeMap<String, String>();
        Map<String, String> additionalHeaders = new TreeMap<String, String>();
        String sourceText = stripTags ? PatternConsts.OMEGAT_TAG.matcher(text).replaceAll("").trim() : text;
        String contentType = stripTags ? "text/plain" : "text/html";

        // Translate Method
        // http://msdn.microsoft.com/en-us/library/ff512421.aspx
        params.put( "text", sourceText );
        params.put( "from", langFrom );
        params.put( "to", langTo );
        params.put( "contentType", contentType );

        additionalHeaders.put( "Authorization", token ); // token was already URLEncoded.

        String search = "";
        for (Map.Entry<String, String> p : params.entrySet()) {
            if (!search.isEmpty()) {
                search += '&';
            }
            search += p.getKey() + '=' + URLEncoder.encode(p.getValue(), OConsts.UTF8);
        }

        String urlToGet = search.isEmpty() ? TRAN_URL : TRAN_URL + '?' + search;
        URL url = new URL(urlToGet);
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();

        try {
            conn.setRequestMethod("GET");
            if (additionalHeaders != null) {
                for (Map.Entry<String, String> en : additionalHeaders.entrySet()) {
                    conn.setRequestProperty(en.getKey(), en.getValue());
                }
            }

            // Added to pass through authenticated proxy
            String encodedUser = (Preferences.getPreference(Preferences.PROXY_USER_NAME));
            if (!StringUtil.isEmpty(encodedUser)) { // There is a proxy user
                String encodedPassword = (Preferences.getPreference(Preferences.PROXY_PASSWORD));
                try {
                    String pass = new String(org.omegat.util.Base64.decode(encodedUser));
                    pass += ":" + new String(org.omegat.util.Base64.decode(encodedPassword));
                    encodedPassword = org.omegat.util.Base64.encodeBytes(pass.getBytes());
                    conn.setRequestProperty("Proxy-Authorization", "Basic " + encodedPassword);
                } catch (IOException ex) {
                    Log.logErrorRB("LOG_DECODING_ERROR");
                    Log.log(ex);
                }
             }

            conn.setDoInput(true);

            if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {
                throw new IOException(conn.getResponseMessage());
            }
            contentType = conn.getHeaderField("Content-Type");
            int cp = contentType != null ? contentType.indexOf(CHARSET_MARK) : -1;
            String charset = cp >= 0 ? contentType.substring(cp + CHARSET_MARK.length()) : "ISO8859-1";
            String xml = convertInputStreamToString( conn.getInputStream(), charset );
 
            // xml is like this.
            // <string xmlns="http://...">...</string>
            // So we extract simply between first '>' and last '<'.
            if ( xml.startsWith("<string xmlns=") && xml.endsWith("</string>") ) {
                //log(xml);
                int beg = xml.indexOf(">") + 1;
                int end = xml.lastIndexOf("<");
                xml = xml.substring(beg, end);
                
                // unescape xml special chars (minimal)
                xml = xml.replace("&lt;", "<");
                xml = xml.replace("&gt;", ">");
                xml = xml.replace("&quot;", "\"");
                xml = xml.replace("&apos;", "'");
                xml = xml.replace("&amp;", "&");
            }

            if (!stripTags) {
                xml = restoreOmtTag(xml);   // "<br1></br1>" => "<br1/>"
            }

            return xml;
        } finally {
            conn.disconnect();
        }
    }
    
    private String convertInputStreamToString( InputStream is, String charset ) throws IOException {
        InputStreamReader reader = new InputStreamReader(is, charset);
        StringBuilder builder = new StringBuilder();
        char[] buf = new char[1024];
        int numRead;
        while (0 <= (numRead = reader.read(buf))) {
            builder.append(buf, 0, numRead);
        }
        return builder.toString();
    }
    
    /**
     * Modify some country codes to fit with Microsoft Translator
     * 
     * @param language
     *            An OmegaT language
     * @return A code modified for some Microsoft Translator languages
     */
    private String getMSTLanguage( Language language ) {
        String lang = language.getLanguage();
        String langCode = language.getLanguageCode().toLowerCase();
        String countryCode = language.getCountryCode().toUpperCase();

        // convert for Chinese
        // see What's New in Globalization and Localization
        //     http://msdn.microsoft.com/en-us/library/vstudio/dd997383(v=vs.100).aspx
        // and Translator Language Codes
        //     http://msdn.microsoft.com/en-us/library/hh456380.aspx
        if (langCode.equals("zh")){
            String[] countrysUsingChineseTraditional = {"HK", "TW", "CHT"};
            if(Arrays.asList(countrysUsingChineseTraditional).contains(countryCode)){
                lang = "zh-CHT"; // Chinese (Traditional) Legacy
            } else {
                lang = "zh-CHS"; // Chinese (Simplified) Legacy
            }
        }

        // return Microsoft Translator Language
        return lang;
    }

    /**
     * convert back OmegaT Tag splitted by Microsoft Translator API.
     * 
     * @param source
     *            Translated string.
     * @return Tag restored source string.
     */
    private String restoreOmtTag( String source ) {
        // Micrososft Translator API converts OmT tag like "<br1/>" to "<br1></br1)".
        // So we need to convert back to original.
        // See below for details.
        //   How to keep independent tag
        //   http://social.msdn.microsoft.com/Forums/en-US/microsofttranslator/thread/448b6717-a9f4-4365-b6ca-13abbb0b6404
        Matcher m = RE_SPLITED_TAG.matcher(source);
        if ( m.find() ) {
            return m.replaceAll("<$1/>");
        }

        return source;
    }
}