package jp.ac.dendai.cdl.mori.wikie.util;

import static org.junit.Assert.*;

import java.io.*;

import org.junit.*;

public class NormalizerTest {
    private static WikipediaNormalizer normalizer;

    @BeforeClass
    public static void setUpBeforeClass() throws Exception {
         normalizer = new WikipediaNormalizer(new File("input/jawiki-latest-pages-meta-current.xml"));
    }

    @Test
    public  void testNormalize() {
        assertEquals("", normalizer.normalize(""));
        assertEquals("%", normalizer.normalize("%"));
        assertEquals("%1", normalizer.normalize("%1"));
        assertEquals("ニュース速報+板", normalizer.normalize("ニュース速報+板"));
        assertEquals(")2", WikipediaNormalizer.decode("%292"));
        assertEquals("ジャガー (曖昧さ回避)", WikipediaNormalizer.decode("ジャガー_%28曖昧さ回避%29"));
        assertEquals("", normalizer.normalize(":"));
        assertEquals("", normalizer.normalize("category:"));
        assertEquals("Abc", normalizer.normalize("abc"));
        assertEquals("+", normalizer.normalize("+"));
        assertEquals("&", normalizer.normalize("&amp;"));
        assertEquals("&", normalizer.normalize("%26amp%3B"));
        assertEquals("バッシング (映画)", normalizer.normalize("%E3%83%90%E3%83%83%E3%82%B7%E3%83%B3%E3%82%B0 %28%E6%98%A0%E7%94%BB%29"));
        assertEquals("category:Abc", normalizer.normalize("Category:abc"));
        assertEquals("category:Abc", normalizer.normalize("Category : abc"));
        assertEquals("category:A b c", normalizer.normalize(" Category _:_ a _ b _ c "));
        assertEquals("category:Magic : ザ・ギャザリング", normalizer.normalize("Category:magic : ザ・ギャザリング"));
        assertEquals("'''", "'''");
        assertEquals(":en:Richard M. Daley|リチャード・デイリー", normalizer.normalize(":en:Richard M. Daley|リチャード・デイリー"));
        assertEquals(":あいう", normalizer.normalize(":あいう"));
    }


    @Test
    public void testGetNamespace() {
        assertEquals(0, normalizer.getNamespaceNumber("abc"));
        assertEquals(14, normalizer.getNamespaceNumber("Category:abc"));
        assertEquals(14, normalizer.getNamespaceNumber("category : abc"));
        assertEquals(0, normalizer.getNamespaceNumber("wikipedia"));
        assertEquals(4, normalizer.getNamespaceNumber("wikipedia:リンクについて"));
        assertEquals(6, normalizer.getNamespaceNumber("Image:Knowledge French EU map.png|right|thumb|200px|'''EU加盟国および各自治体の住民におけるフランス語への理解度'''&lt;br /&gt;濃紺色が母語地域、以下50%以上、20-49%、10-19%、5-9%、5%未満（灰色はEU非加盟国・地域）"));
    }

    @Test
    public void testGetPageName() {
        assertEquals("abc", normalizer.getPageName("abc"));
        assertEquals("abc", normalizer.getPageName("category:abc"));
        assertEquals("abc", normalizer.getPageName("category : abc "));
        assertEquals("ß", "ß");
    }


}
