package jp.ac.dendai.cdl.mori.wikie.mapper;

import java.io.*;
import java.util.*;

import jp.ac.dendai.cdl.mori.wikie.main.*;
import jp.ac.dendai.cdl.mori.wikie.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
/**
 * isbn処理用Mapper
 * @author Mori
 *
 */
public class ISBNMapper extends WikipediaMapper implements Mapper<LongWritable, Text, Text, Text>{
    private static String isbnHeader;
    
    @Override
    public void configure(JobConf job) {
        super.configure(job);
        isbnHeader = job.get(Driver.PROP_ISBN_HEADER, "978");
    }
    
    @Override
    public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
        String page = WikipediaNormalizer.removeNonPrintingCharacter(value.toString());
        String id  = WikipediaMapper.getValueFromElementName(WikIE.ID_ELEMENT, page, 0, 1);
        String text = WikipediaMapper.getValueFromElementName(WikIE.TEXT_ELEMENT, page, 0, 1);
        if (text == null) return;
        ArrayList<String> isbnArrayList = WikipediaLinkChecker.getISBNCode(text, isbnHeader);
        Iterator<String> isbnItr = isbnArrayList.iterator();
        while (isbnItr.hasNext()) {
            String isbnCode = isbnItr.next();
            output.collect(new Text(id), new Text(isbnCode));
        }
    }
}
