package com.limegroup.gnutella.spam;

import java.util.Locale;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.settings.SearchSettings;
import com.limegroup.gnutella.xml.LimeXMLNames;

public class SpamManager {
	private static final Log LOG = LogFactory.getLog(SpamManager.class);

	/**
	 * If an RFDs spam rating is > MAX_THRESHOLD we will not remember the rating
	 * for the Tokens of the RFD because it e.g. a spammer very frequently
	 * sending a bad UrnToken with varying keywords, sizes and addresses may be
	 * able to pollute the filter data
	 */
	public static final float MAX_THRESHOLD = 0.995f;

	/**
	 * inverse rating (1 - probability) for an RFD without SHA1 urn. 
	 */
//	private static final float NO_SHA1_URN_RATING = 0.5f;
	private static final float NO_SHA1_URN_RATING = 0.f;

	/**
	 * incomplete file... save the user the trouble of downloading it, if he has
	 * his spam filter enabled
	 */
//	private static final float INCOMPLETE_FILE_RATING = 0.2f;
	private static final float INCOMPLETE_FILE_RATING = 0.f;

	private static final SpamManager INSTANCE = new SpamManager();
    
	public static SpamManager instance() {
		return INSTANCE;
	}
    
	private SpamManager() {
	}

	/**
	 * informs the SpamManager of any query that was started and clears bad
	 * ratings for the keywords in the query
	 * 
	 * @param qr
	 *            the QueryRequest for the query.
	 */
	public void startedQuery(QueryRequest qr) {
		if (SearchSettings.ENABLE_SPAM_FILTER.getValue())
			RatingTable.instance().mark(qr, Token.RATING_CLEARED);
	}

	/**
	 * This method will rate a given rfd and return whether or not the
	 * SpamManager believes this is spam
	 * 
	 * @param rfd
	 *            the RemoteFileDesc to rate
	 * @modifies rfd
	 * @return true if the SpamManager internally rated it as spam and false if
	 *         the SpamManager did not rate it as spam
	 */
	public boolean isSpam(RemoteFileDesc rfd) {
		if (!SearchSettings.ENABLE_SPAM_FILTER.getValue())
			return false;

		// rate simple spam...
		float rating = 0.f;
		if (rfd.getSHA1Urn() == null && 
                rfd.getXMLDocument() != null &&
                rfd.getXMLDocument().getAction().length() == 0)
			rating = 1 - (1 - rating) * NO_SHA1_URN_RATING;
        
		if (isIncompleteFile(rfd.getFileName().toLowerCase(Locale.US))) {
			rating = 1 - (1 - rating) * INCOMPLETE_FILE_RATING;
		}

		/* cabos */
        String f = rfd.getFileName().toLowerCase(Locale.US);
        int n = rfd.getSize();
        if (
            n ==   11354 ||
            n ==   48139 ||
            n ==   50327 ||
            n ==   53379 ||
            n ==   56993 ||
            n ==   76233 ||
            n ==   76235 ||
            n ==   76249 ||
            n ==   76251 ||
            n ==   77282 ||
            n ==   77284 ||
            n ==   95927 ||
            n ==   96191 ||
            n ==   96268 ||
            n ==  121836 ||
            n ==  200706 ||
            n ==  283692 ||
            n ==  822182 ||
            n ==  863798 ||
            n ==  938820 ||
            n == 1058868 ||
            n == 1105392 ||
            n == 1155729 ||
            n == 1167481 ||
            n == 1216226 ||
            n == 1251562 ||
            n == 1252087 ||
            n == 1253906 ||
            n == 1279141 ||
            n == 1279142 ||
            n == 1411929 ||
            n == 1431124 ||
            n == 3515161 ||
            n == 3545425 ||
            n == 3877629 ||
            n == 5745425 ||
            n == 5745425 ||
            n ==12101663 ||
            (
             f.indexOf("cracked") != -1 &&
             (f.endsWith(".html") || f.endsWith(".wma") || f.endsWith(".xml") || f.indexOf("special edition") != -1)
            ) ||
            (f.startsWith("horny ") && (f.endsWith(".html") || f.endsWith(".zip"))) ||
            (f.startsWith("sexy ") && f.endsWith(".zip")) ||
            f.endsWith(" (2).zip") ||
            f.endsWith(" (album.edition).wma") ||
            f.endsWith(" (cover).wma") ||
            f.endsWith(" (divx).zip") ||
            f.endsWith(" (front).wma") ||
            f.endsWith(" (intro).wma") ||
            f.endsWith(" (latest).wma") ||
            f.endsWith(" (mix).wma") ||
            f.endsWith(" (music).zip") ||
            f.endsWith(" (new.album).wma") ||
            f.endsWith(" (new.single).wma") ||
            f.endsWith(" (original).zip") ||
            f.endsWith(" (porn).zip") ||
            f.endsWith(" (radio.edit).wma") ||
            f.endsWith(" (radio.version).zip") ||
            f.endsWith(" (remix).wma") ||
            f.endsWith(" (rock).wma") ||
            f.endsWith(" (single).zip") ||
            f.endsWith(" (special).wma") ||
            f.endsWith(" (special.edition).wma") ||
            f.endsWith(" (svcd).zip") ||
            f.endsWith(" (tool).wma") ||
            f.endsWith(" (uncensored).html") ||
            f.endsWith(" (uncensored).mov") ||
            f.endsWith(" (uncensored).zip") ||
            f.endsWith(" (unreleased).zip") ||
            f.endsWith(" (very hot).mpg") ||
            f.endsWith(" (vocal).wma") ||
            f.endsWith(" (working) album.wma") ||
            f.endsWith(" (working) divx.zip") ||
            f.endsWith(" *better version* [key].zip") ||
            f.endsWith(" *uncensored* single.wma") ||
            f.endsWith(" *working* (full).zip") ||
            f.endsWith(" *working* dirty.zip") ||
            f.endsWith(" - best music.zip") ||
            f.endsWith(" - gives blowjob.html") ||
            f.endsWith(" - great blowjob.html") ||
            f.endsWith(" - greatest hits.mp3") ||
            f.endsWith(" - greatest hits.wma") ||
            f.endsWith(" - pics.html") ||
            f.endsWith(" 2006.zip") ||
            f.endsWith(" 2007.zip") ||
            f.endsWith(" [!].zip") ||
            f.endsWith(" [album.edition].wma") ||
            f.endsWith(" [banned.version].zip") ||
            f.endsWith(" [bonus].wma") ||
            f.endsWith(" [cd].zip") ||
            f.endsWith(" [dirty].zip") ||
            f.endsWith(" [divx].zip") ||
            f.endsWith(" [explicit].zip") ||
            f.endsWith(" [extended.edition].wma") ||
            f.endsWith(" [full].zip") ||
            f.endsWith(" [mix].zip") ||
            f.endsWith(" [new.single].wma") ||
            f.endsWith(" [porn].zip") ||
            f.endsWith(" [radio.version].zip") ||
            f.endsWith(" [registered].wma") ||
            f.endsWith(" [rock].wma") ||
            f.endsWith(" [single].wma") ||
            f.endsWith(" [soundtrack].zip") ||
            f.endsWith(" [svcd].zip") ||
            f.endsWith(" [techno.remix].zip") ||
            f.endsWith(" [techno].zip") ||
            f.endsWith(" [theme].wma") ||
            f.endsWith(" [tool].wma") ||
            f.endsWith(" [ultimate.edition].wma") ||
            f.endsWith(" [vcd].zip") ||
            f.endsWith(" [video].wma") ||
            f.endsWith(" [wyzo].zip") ||
            f.endsWith(" [x].zip") ||
            f.endsWith(" album.edition.zip") ||
            f.endsWith(" back.wma") ||
            f.endsWith(" bdsm.html") ||
            f.endsWith(" bearshare download accelerator.zip") ||
            f.endsWith(" bittorrent downloader.exe") ||
            f.endsWith(" bittorrent downloader.zip") ||
            f.endsWith(" bonus.zip") ||
            f.endsWith(" by cdz intro.wma") ||
            f.endsWith(" by cdz theme.zip") ||
            f.endsWith(" by incite (1).zip") ||
            f.endsWith(" by pr0nstars key.wma") ||
            f.endsWith(" clip.zip") ||
            f.endsWith(" cover.zip") ||
            f.endsWith(" crack by frost.zip") ||
            f.endsWith(" cute girl has orgasm on webcam.mp3") ||
            f.endsWith(" cute girl has orgasm on webcam.mpg") ||
            f.endsWith(" dd breasts.zip") ||
            f.endsWith(" download free adult videos.zip") ||
            f.endsWith(" dream of jenna cd1 jenna.wma") ||
            f.endsWith(" dvdrip.zip") ||
            f.endsWith(" extended.edition.zip") ||
            f.endsWith(" extended.wma") ||
            f.endsWith(" free access 150 adult porn sites.zip") ||
            f.endsWith(" free access to 150 adult porn sites.zip") ||
            f.endsWith(" fucking.html") ||
            f.endsWith(" full.zip") ||
            f.endsWith(" get hacked passwords adult porn sites for free.zip") ||
            f.endsWith(" get hacked passwords to adult porn sites for free.zip") ||
            f.endsWith(" hentai anime.zip") ||
            f.endsWith(" keygen [ssg].zip") ||
            f.endsWith(" limewire download accelerator.zip") ||
            f.endsWith(" mp3.html") ||
            f.endsWith(" music.html") ||
            f.endsWith(" naked.html") ||
            f.endsWith(" naked.zip") ||
            f.endsWith(" new.zip") ||
            f.endsWith(" nocd patch.zip") ||
            f.endsWith(" nodvd patch.zip") ||
            f.endsWith(" online installer.zip") ||
            f.endsWith(" pictures.html") ||
            f.endsWith(" radio.edition.zip") ||
            f.endsWith(" remix.zip") ||
            f.endsWith(" ringtones.html") ||
            f.endsWith(" securely with new secured browser.zip") ||
            f.endsWith(" sexy girl has shaking orgasm during sex.mpg") ||
            f.endsWith(" share accelerator.zip") ||
            f.endsWith(" song.zip") ||
            f.endsWith(" soul.zip") ||
            f.endsWith(" special.edition.wma") ||
            f.endsWith(" special.zip") ||
            f.endsWith(" techno.zip") ||
            f.endsWith(" ultimate.edition.zip") ||
            f.endsWith(" using emule multimedia toolbar.zip") ||
            f.endsWith(" using multimedia toolbar.zip") ||
            f.endsWith(" using the best ultimate player.zip") ||
            f.endsWith(" video (extended).wma") ||
            f.endsWith(" video.zip") ||
            f.endsWith(" videos.html") ||
            f.endsWith(" vocal.wma") ||
            f.endsWith(" web hottest videos personal player.exe") ||
            f.endsWith(" webcam.html") ||
            f.endsWith(" wet and wild.zip") ||
            f.endsWith(" with bonus.zip") ||
            f.endsWith(" with new secured emule.zip") ||
            f.endsWith(" with the fastest bittorrent downloader.zip") ||
            f.endsWith(" with the ultimate player.zip") ||
            f.endsWith(" xvid.zip") ||
            f.endsWith(" young and cute.zip") ||
            f.endsWith("-vids.html") ||
            f.endsWith("-xcd.wma") ||
            f.endsWith("-xxx.wmv") ||
            f.endsWith("-xxx.wmv") ||
            f.equals("official limewire pro available here.com") ||
            f.indexOf("www.efreeclub.com") != -1 ||
            f.startsWith("!!!!!!!! ") ||
            f.startsWith("###### ") ||
            f.startsWith("(cinemaniacs) ") ||
            f.startsWith("(corp) ") ||
            f.startsWith("(deviance) ") ||
            f.startsWith("(fusion) ") ||
            f.startsWith("(genius) ") ||
            f.startsWith("(h2o) ") ||
            f.startsWith("*better version* ") ||
            f.startsWith("*myth* ") ||
            f.startsWith("*naked* ") ||
            f.startsWith("*strike* ") ||
            f.startsWith("*subnet* ") ||
            f.startsWith("- presented by ash - ") ||
            f.startsWith("---===== ") ||
            f.startsWith("curly haired cutie with size ") ||
            f.startsWith("download music ") ||
            f.startsWith("free 1000 gas gift card www.efreeclub.com ") ||
            f.startsWith("free apple computer system www.efreeclub.com ") ||
            f.startsWith("get music ") ||
            f.startsWith("mp3 escape great the girls like ") ||
            f.startsWith("warning, visit ") ||
            (rfd.getXMLDocument() != null &&
             rfd.getXMLDocument().getValue(LimeXMLNames.AUDIO_TITLE) != null &&
             rfd.getXMLDocument().getValue(LimeXMLNames.AUDIO_TITLE).startsWith("not related : "))
            )
			rating = 1 - (1 - rating) * 0.f;
        
		// apply bayesian filter
		rating = 1 - (1 - rating) * (1 - RatingTable.instance().getRating(rfd));
		rfd.setSpamRating(rating);
		return rating >= Math.max(SearchSettings.FILTER_SPAM_RESULTS.getValue(),
                SearchSettings.QUERY_SPAM_CUTOFF.getValue());
	}

	/**
	 * this method is called if the user marked some RFDs as being spam
	 * 
	 * @param rfds
	 *            an array of RemoteFileDesc that should be marked as good
	 */
	public void handleUserMarkedSpam(RemoteFileDesc[] rfds) {
		for (int i = 0; i < rfds.length; i++)
			rfds[i].setSpamRating(1.f);

		RatingTable.instance().mark(rfds, Token.RATING_USER_MARKED_SPAM);
	}

	/**
	 * this method is called if the user marked some RFDs as not being spam
	 * 
	 * @param rfds
	 *            an array of RemoteFileDesc that should be marked as good
	 */
	public void handleUserMarkedGood(RemoteFileDesc[] rfds) {
		for (int i = 0; i < rfds.length; i++)
			rfds[i].setSpamRating(0.f);

		RatingTable.instance().mark(rfds, Token.RATING_USER_MARKED_GOOD);
	}

	/**
	 * clears all collected filter data
	 */
	public void clearFilterData() {
		RatingTable.instance().clear();
	}
    
	/**
	 * look for
	 * <ul>
	 * <li>__INCOMPLETE</li>
	 * <li>___ARESTRA</li>
	 * <li>___INCOMPLETED</li>
	 * <li>PREVIEW-T-</li>
	 * <li>CORRUPT-(number)-</li>
	 * <li>T-(number)-</li>
	 * 
	 * @param name
	 *            the name of the file from a search result
	 * @return true if we think that this is an incomplete file
	 */
	private boolean isIncompleteFile(String name) {
		/* cabos */
		if (name.startsWith("incomplete_"))
			return true;
		if (name.startsWith("incomplete~"))
			return true;
		if (name.startsWith("inacheve_"))
			return true;
		if (name.startsWith("in_"))
			return true;
		if (name.startsWith("___arestra"))
			return true;
		if (name.startsWith("__incomplete"))
			return true;
		if (name.startsWith("___incompleted"))
			return true;
		if (name.startsWith("___arestra"))
			return true;
		if (name.startsWith("preview-t-"))
			return true;
		if (name.startsWith("t-")) {
			for (int i = 2; i < name.length(); i++) {
				if (Character.isDigit(name.charAt(i)))
					continue;
				else
					return name.charAt(i) == '-';
			}
		}
		if (name.startsWith("corrupt-")) {
			for (int i = 8; i < name.length(); i++) {
				if (Character.isDigit(name.charAt(i)))
					continue;
				else
					return name.charAt(i) == '-';
			}
		}
		return false;
	}
}
