package sej;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import map.Const;

/**
 * セブンイレブンの住所を取得するプログラムです。
 * @author Kumano Tatsuo
 * 2005/11/27
 */
public class SEJ {
	/**
	 * セブンイレブンの市区町村別ページを解析して住所の一覧を取得します。
	 * @param url 市区町存別ページのURL
	 * @return 住所と店舗名の対応表
	 * @throws IOException 
	 * @throws UnsupportedEncodingException 
	 */
	public static Map<String, String> getAddresses(final URL url)
			throws UnsupportedEncodingException, IOException {
		final Map<String, String> ret = new LinkedHashMap<String, String>();
		final Scanner scanner3 = new Scanner(new InputStreamReader(url.openStream(),
				Const.Sej.ENCODING));
		String caption = null;
		String address = null;
		boolean isFirst = false;
		while (scanner3.hasNextLine()) {
			final String line3 = scanner3.nextLine();
			final Pattern pattern4 = Pattern.compile("(.+)</B></A></TD>");
			final Matcher matcher4 = pattern4.matcher(line3);
			if (matcher4.find()) {
				caption = matcher4.group(1);
				isFirst = true;
			}
			final Pattern pattern5 = Pattern.compile("<FONT .+>(.+)</FONT>");
			final Matcher matcher5 = pattern5.matcher(line3);
			if (matcher5.find()) {
				if (isFirst) {
					address = matcher5.group(1);
					if (caption != null && address != null) {
						ret.put(address, caption);
					}
					isFirst = false;
				}
			}
		}
		return ret;
	}

	/**
	 * セブンイレブンの都道府県別ページを解析して市区町村のURL一覧を取得します。
	 * @param url 都道府県別ページのURL
	 * @return 市区町村の一覧
	 * @throws IOException 
	 * @throws MalformedURLException 
	 * @throws UnsupportedEncodingException 
	 */
	public static Map<String, URL> getCities(final URL url) throws UnsupportedEncodingException,
			MalformedURLException, IOException {
		final Map<String, URL> ret = new LinkedHashMap<String, URL>();
		final Scanner scanner2 = new Scanner(new InputStreamReader(url.openStream(),
				Const.Sej.ENCODING));
		String prefecture = null;
		while (scanner2.hasNextLine()) {
			final String line2 = scanner2.nextLine();
			final Pattern pattern2 = Pattern
					.compile("<IMG SRC=\"img/ken/[0-9]+.gif\" WIDTH=\"[0-9]+\" ALT=\"([^> ]+) +\">");
			final Matcher matcher2 = pattern2.matcher(line2);
			if (matcher2.find()) {
				prefecture = matcher2.group(1);
			}
			for (final String string2 : line2.split("</A>")) {
				if (!string2.contains("#")) {
					final Pattern pattern3 = Pattern
							.compile("<A HREF=\"(ssactl.htm\\?ENC=.+)\">(.+)");
					final Matcher matcher3 = pattern3.matcher(string2);
					String city = null;
					if (matcher3.find()) {
						city = matcher3.group(2);
						final URL url2 = new URL(Const.Sej.BASE_URL + matcher3.group(1));
						ret.put(prefecture + "," + city, url2);
					}
				}
			}
		}
		return ret;
	}

	/**
	 * セブンイレブンのトップページを解析して都道府県のURL一覧を取得します。
	 * @return 都道府県の一覧
	 * @throws IOException 
	 * @throws MalformedURLException 
	 * @throws UnsupportedEncodingException 
	 */
	public static Map<String, URL> getPrefectures() throws UnsupportedEncodingException,
			MalformedURLException, IOException {
		final Map<String, URL> ret = new LinkedHashMap<String, URL>();
		final Scanner scanner = new Scanner(new InputStreamReader(new URL(Const.Sej.TOP_PAGE)
				.openStream(), Const.Sej.ENCODING));
		while (scanner.hasNextLine()) {
			final String line = scanner.nextLine();
			for (final String string : line.split("</A>")) {
				//System.out.println(line);
				final Pattern pattern = Pattern
						.compile("<A HREF=\"(ssactl.htm\\?ENC=.+)\">([^ ]+)");
				final Matcher matcher = pattern.matcher(string);
				if (matcher.find()) {
					final URL url = new URL(Const.Sej.BASE_URL + matcher.group(1));
					final String prefecture = matcher.group(2);
					ret.put(prefecture, url);
				}
			}
		}
		return ret;
	}

	/**
	 * @param args
	 * @throws IOException 
	 * @throws MalformedURLException 
	 * @throws UnsupportedEncodingException 
	 * @throws InterruptedException 
	 */
	public static void main(String[] args) throws UnsupportedEncodingException,
			MalformedURLException, IOException, InterruptedException {
		// test
		System.out.println(getPrefectures());
		System.exit(0);
		final Random random = new Random();
		final Scanner scanner = new Scanner(new InputStreamReader(new URL(Const.Sej.TOP_PAGE)
				.openStream(), Const.Sej.ENCODING));
		while (scanner.hasNextLine()) {
			final String line = scanner.nextLine();
			for (final String string : line.split("</A>")) {
				//System.out.println(line);
				final Pattern pattern = Pattern
						.compile("<A HREF=\"(ssactl.htm\\?ENC=.+)\">([^ ]+)");
				final Matcher matcher = pattern.matcher(string);
				if (matcher.find()) {
					Thread.sleep(random.nextInt(10000));
					final Scanner scanner2 = new Scanner(
							new InputStreamReader(new URL(Const.Sej.BASE_URL + matcher.group(1))
									.openStream(), Const.Sej.ENCODING));
					String prefecture = matcher.group(2);
					while (scanner2.hasNextLine()) {
						final String line2 = scanner2.nextLine();
						//System.out.println(line2);
						final Pattern pattern2 = Pattern
								.compile("<IMG SRC=\"img/ken/[0-9]+.gif\" WIDTH=\"[0-9]+\" ALT=\"([^> ]+) +\">");
						final Matcher matcher2 = pattern2.matcher(line2);
						if (matcher2.find()) {
							prefecture = matcher2.group(1);
							//System.out.println("DEBUG: prefecture = " + prefecture);
						}
						for (final String string2 : line2.split("</A>")) {
							if (!string2.contains("#")) {
								final Pattern pattern3 = Pattern
										.compile("<A HREF=\"(ssactl.htm\\?ENC=.+)\">(.+)");
								final Matcher matcher3 = pattern3.matcher(string2);
								String city = null;
								if (matcher3.find()) {
									//									System.out.println("DEBUG: getting " + matcher3.group(1) + "->"
									//											+ matcher3.group(2));
									city = matcher3.group(2);
									Thread.sleep(random.nextInt(10000));
									final URL url = new URL(Const.Sej.BASE_URL + matcher3.group(1));
									//									System.out.println("DEBUG: url = " + url);
									final Scanner scanner3 = new Scanner(new InputStreamReader(url
											.openStream(), Const.Sej.ENCODING));
									String caption = null;
									String address = null;
									boolean isFirst = false;
									while (scanner3.hasNextLine()) {
										final String line3 = scanner3.nextLine();
										//System.out.println("DEBUG: line3 = " + line3);
										final Pattern pattern4 = Pattern
												.compile("(.+)</B></A></TD>");
										final Matcher matcher4 = pattern4.matcher(line3);
										if (matcher4.find()) {
											//											System.out.println("DEBUG: caption = "
											//													+ matcher4.group(1));
											caption = matcher4.group(1);
											isFirst = true;
										}
										final Pattern pattern5 = Pattern
												.compile("<FONT .+>(.+)</FONT>");
										final Matcher matcher5 = pattern5.matcher(line3);
										if (matcher5.find()) {
											if (isFirst) {
												//												System.out.println("DEBUG: address = "
												//														+ matcher5.group(1));
												address = matcher5.group(1);
												if (prefecture != null && city != null
														&& caption != null && address != null) {
													System.out.println(prefecture + "," + city
															+ "," + address + "," + caption);
												}
												isFirst = false;
											}
										}
									}
								}
							}
						}
					}
				}
			}
		}
	}
}
