package jp.ac.dendai.cdl.mori.wikie.mapred;

import java.io.*;
import java.util.*;

import jp.ac.dendai.cdl.mori.wikie.main.*;
import jp.ac.dendai.cdl.mori.wikie.parser.*;
import jp.ac.dendai.cdl.mori.wikie.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

/**
 * pageLink処理用Mapper
 * @author Mori
 *
 */
public class PageLinkMapper extends WMapper {

    @Override
    public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
        try {
            WPageElementHandler page = createPageHandler(value);
            WEntry entry = normalizer.normalize(page.getTitle());
            String title = entry.toString();
            String id  = page.getId();
            String text = WNormalizer.deleteNonPrintingChar(page.getText());
            String kind = linkUtils.getKind(entry, text);
            output.collect(new Text(title), new Text(WikIE.ENTRY + "\t" + id));
            if (kind.equals(WikIE.LEAF)) {
                ArrayList<String> exist = new ArrayList<String>();
                Iterator<WLink> itr = linkUtils.getLink(text).iterator();
                while (itr.hasNext()) {
                    WLink link = itr.next();
                    String linkAsString = link.toString();
                    if (!exist.contains(linkAsString)) {
                        String target = link.getEntry().toString();
                        if (target.length() == 0) {
                            output.collect(new Text(title), new Text(id + "\t" + link.getSection() + "\t" + link.getAnchorText()));
                        }
                        else {
                            output.collect(new Text(link.getEntry().toString()), new Text(id + "\t" + link.getSection() + "\t" + link.getAnchorText()));
                        }
                        exist.add(linkAsString);
                    }
                }
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

}
