package jp.ac.dendai.cdl.mori.wikie.io;

import java.io.*;

import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;

/**
 * テキストファイルから実際に1行ずつ読み出すクラス。
 * org.apache.hadoop.mapred.LineRecordReader.LineReaderをトップレベルクラスにしただけ。
 * @author Mori
 */
public class LineReader {
    public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
    private int bufferSize = DEFAULT_BUFFER_SIZE;
    private InputStream in;
    private byte[] buffer;
    // the number of bytes of real data in the buffer
    private int bufferLength = 0;
    // the current position in the buffer
    private int bufferPosn = 0;
    /**
     * Create a line reader that reads from the given stream using the
     * given buffer-size.
     * @param in
     * @throws IOException
     */
    public LineReader(InputStream in, int bufferSize) {
        this.in = in;
        this.bufferSize = bufferSize;
        this.buffer = new byte[this.bufferSize];
    }

    /**
     * Create a line reader that reads from the given stream using the
     * <code>io.file.buffer.size</code> specified in the given
     * <code>Configuration</code>.
     * @param in input stream
     * @param conf configuration
     * @throws IOException
     */
    public LineReader(InputStream in, Configuration conf) throws IOException {
        this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
    }

    /**
     * Fill the buffer with more data.
     * @return was there more data?
     * @throws IOException
     */
    boolean backfill() throws IOException {
        bufferPosn = 0;
        bufferLength = in.read(buffer);
        return bufferLength > 0;
    }

    /**
     * Close the underlying stream.
     * @throws IOException
     */
    public void close() throws IOException {
        in.close();
    }

    /**
     * Read from the InputStream into the given Text.
     * @param str the object to store the given line
     * @return the number of bytes read including the newline
     * @throws IOException if the underlying stream throws
     */
    public int readLine(Text str) throws IOException {
        str.clear();
        boolean hadFinalNewline = false;
        boolean hadFinalReturn = false;
        boolean hitEndOfFile = false;
        int startPosn = bufferPosn;
        outerLoop: while (true) {
            if (bufferPosn >= bufferLength) {
                if (!backfill()) {
                    hitEndOfFile = true;
                    break;
                }
            }
            startPosn = bufferPosn;
            for(; bufferPosn < bufferLength; ++bufferPosn) {
                switch (buffer[bufferPosn]) {
                case '\n':
                    hadFinalNewline = true;
                    bufferPosn += 1;
                    break outerLoop;
                case '\r':
                    if (hadFinalReturn) {
                        // leave this \n in the stream, so we'll get it next time
                        break outerLoop;
                    }
                    hadFinalReturn = true;
                    break;
                default:
                    if (hadFinalReturn) {
                        break outerLoop;
                    }
                }
            }
            int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0);
            if (length >= 0) {
                str.append(buffer, startPosn, length);
            }
        }
        int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0);
        if (!hitEndOfFile) {
            int length = bufferPosn - startPosn - newlineLength;
            if (length > 0) {
                str.append(buffer, startPosn, length);
            }
        }
        return str.getLength() + newlineLength;
    }
}