#-*- coding: utf-8; -*-

# 2009-01-10 katoy 
#   Show imported tree list for XBRL-instnce data.
#
# ruby 1.8.7
# libxml-ruby (0.9.8)

require 'rubygems'
require 'xml/libxml'
require 'open-uri'
require 'pathname'
require 'pp'
require 'yaml'
require 'benchmark'

class FileTree
  YAML_NAME = 'links.yaml'

  attr_reader :links    # from_url => [to_url, ...] のハッシュ

  def initialize
    @links = { }
    if File.exist?(YAML_NAME)
      @links = YAML.load_file(YAML_NAME)
    end
  end

  def save_links
    @links.reject!{ |k, v| k.index("http") != 0 }
    File.open( YAML_NAME, 'w' ) do |f|
      YAML.dump( @links, f )
    end
  end

  def generate(path, dir = '')
    from_path = Utils::normalize_path(path, dir)
    parent = Utils::parent(from_path)

    if @links[from_path] != nil
      puts "------- skip #{from_path}"
      return
    end

    reader = nil

    begin
      reader = XML::Reader.file(from_path)
      while reader.read
        href = nil
        name = "#{reader.namespace_uri}:#{reader.local_name}"
        case name
        when 'http://www.w3.org/2001/XMLSchema:import'
          href = reader['schemaLocation']
        when 'http://www.xbrl.org/2003/linkbase:schemaRef'
          href = reader['xlink:href']
        when 'http://www.xbrl.org/2003/linkbase:linkbaseRef'
          href = reader['xlink:href']
        end

        if href != nil
          to_path = Utils::normalize_path(href, parent)
          generate(to_path, parent)
          @links[from_path] = [] if @links[from_path] == nil
          @links[from_path] << to_path
        end
      end
    rescue
      puts "---- error reading #{from_path}"
      exit 1
    ensure
      reader.close if reader != nil
    end
  end

  def print_tree(path, indent=0)

    return if @links[path] == nil
    @links[path].each do |f|
      puts "#{' ' * (4*indent)}#{f}"
      print_tree(f, indent + 1)
    end
  end

end

class Utils
  def self.cleanpath(path)
    Pathname.new(path).cleanpath
  end

  def self.parent(full_path)
    p = full_path.rindex('/') - 1
    full_path[0..p]
  end

  def self.normalize_path(path, dir='')

    if path.index('http://') == 0
      uri = URI.parse(path)
      full_path = "#{uri.scheme}://#{uri.host}#{Utils::cleanpath(uri.path.to_s)}"
    elsif dir.index('http://') == 0
      uri = URI.parse("#{dir}/#{path}")
      full_path = "#{uri.scheme}://#{uri.host}#{Utils::cleanpath(uri.path.to_s)}"
    else
      path = path[5.. path.length] if path.index('file:') == 0
      dir = File::expand_path('.') if dir == ''
      full_path = Utils::cleanpath(File::expand_path(path, dir))
    end

    full_path.to_s
  end
end

# See http://www.sec.gov
# SEC - adobe
pat = "http://www.sec.gov/Archives/edgar/data/796343/000079634308000007/adbe-20080916.xml"
# SEC - microsoft
pat = "http://www.sec.gov/Archives/edgar/data/789019/000119312508215214/msft-20080930.xml"

# pat = "file:/Users/youichikato/work/www/xbrl.org/XBRL-CONF/Common/instance/397-00-ConsistentInstance-valid.xbrl"
# pat = "/Users/youichikato/work/www/xbrl.org/XBRL-CONF/**/*.xbrl"
# pat = "/Users/youichikato/NetBeansProjects/ruby-xbrl/Edinet/sample/**/*.xbrl"
# pat = "../data/X99001-000/jpfr-asr-X99001-000-2008-03-31-01-2008-06-27.xbrl"
# pat = ARGV[0]

puts Benchmark.measure { 
  ftree = FileTree.new

  if pat == nil
    puts "--No specified XBRL instance."
    exit 1
  elsif pat.index('http:') == 0
    full_path = Utils::normalize_path(pat)
    ftree.generate(full_path)
    ftree.print_tree(full_path)
  else
    Dir.glob(pat).each do |f|
      full_path = Utils::normalize_path(f)
      ftree.generate(full_path)
      ftree.print_tree(full_path)
    end
  end
  ftree.save_links
}
