
# 2009-01-10 katoy

require 'rubygems'
require 'xml/libxml'
require 'open-uri'
require 'pp'
require 'benchmark'

require 'parse_linkbase'

class Element
  attr_reader :name
  attr_reader :id
  attr_reader :type
  attr_reader :abstract
  attr_reader :substitutiongroup
  attr_reader :nillable
  attr_reader :periodtype
  attr_reader :balance

  def parse(reader, file='', dir='')
    @name = reader['name'].to_sym if reader['name'] != nil
    @id = reader['id'].to_sym  if reader['id'] != nil
    @type = reader['type'].to_sym if reader['type'] != nil

    @abstract = reader['abstract'].to_sym if reader['abstract'] != nil
    @substitutiongroup = reader['substitutionGroup'].to_sym if reader['substitutionGroup'] != nil
    @nillable = reader['nillable'].to_sym if reader['nillable'] != nil
    @periodtype = reader['xbrli:periodType'].to_sym if reader['xbrli:periodType'] != nil
    @nillable = reader['balance'].to_sym if reader['balance'] != nil

    self
  end

  def to_s
    "#{@name} id='#{@id.to_s}' type='#{@id.to_s}' abstract='#{@abstract.to_s}' subs='#{@substitutiongroup.to_s}' nillable='#{@nillable.to_s}' period='#{@periodtype.to_s} balance='#{@balance.to_s}''"
  end
end

class Xsd
  attr_reader :elements
  attr_reader :schemas
  attr_reader :linkbases

  def parse(file, dir='')
    @elements = {}
    @schemas = []
    @linkbases = []

    begin
      reader = XML::Reader.file file
    rescue
      puts "---- error reading #{file}"
      reader.close if reader != nil
      return self
    end

    while reader.read
      #      next if reader.prefix == "xs"
      #      next if (reader.prefix == "") or (reader.prefix == nil)

      name = "#{reader.namespace_uri}:#{reader.local_name}"
      case name

      when 'http://www.w3.org/2001/XMLSchema:schema'
      when 'http://www.w3.org/2001/XMLSchema:import'
        href = reader['schemaLocation']
        # puts "-------- read(import) #{href} from #{file}" #---------------

      when 'http://www.xbrl.org/2003/linkbase:schemaRef'
        schema = Schema.new.parse(reader, file, dir)
        @elements.merge!(schema.elements)
        @schemas << schema

      when 'http://www.w3.org/2001/XMLSchema:annotation'
      when 'http://www.w3.org/2001/XMLSchema:appinfo'
      when 'http://www.xbrl.org/2003/linkbase:linkbaseRef'
        href = reader['xlink:href']
        # puts "-------- read(linkbaseRef) #{href} from #{file}"  #---------------
        linkbase = Linkbase.new.parse(file, dir)
        @linkbases << linkbase

      when 'http://www.w3.org/2001/XMLSchema:element'
        next if reader.depth != 1
        elem = Element.new.parse(reader, file, dir)
        @elements["#{file}##{elem.id}"] = elem
        add_method(elem)

      when 'http://www.w3.org/2001/XMLSchema:complexType'
      when 'http://www.w3.org/2001/XMLSchema:attribute'
      when 'http://www.w3.org/2001/XMLSchema:sequence'
      when 'http://www.w3.org/2001/XMLSchema:attribute'

      when 'http://www.xbrl.org/2003/linkbase:roleType '
      when 'http://www.xbrl.org/2003/linkbase:definition'
      when 'http://www.xbrl.org/2003/linkbase:usedOn'
      when 'http://www.xbrl.org/2003/linkbase:roleType'

      when ':#comment'
      when ':#text'
      else
        puts "******* ignore(0) #{name} in xsd at #{file} line:#{reader.line_number}"
      end      
    end
    
    self
  end

  def has_element?(element)
    # ruby 1.8: methods() is an Array for String.
    # ruby 1.9: methods() is an Array for Symbol.
    (methods.index(element.to_sym) != nil) or (methods.index(element) != nil)
  end

  def add_method(elem)
    name = elem.name.to_s
    obj_singleton = class << self; self end

    obj_singleton.class_eval do
      define_method(name) do
        instance_variable_get("@#{name}")
      end
    end

    instance_variable_set("@#{name}", elem)

  end

end

class Schema
  attr_reader :href
  attr_reader :type

  attr_reader :elements
  attr_reader :labels
  attr_reader :presents
  attr_reader :calcs
  attr_reader :refs

  def parse(reader, file='', dir='')
    @href = reader['xlink:href'].to_sym
    @type = reader['xlink:type'].to_sym

    new_dir = Util::get_fullpath(file, dir)[1].to_s
    new_file = Util::get_fullpath(@href.to_s, new_dir)[0].to_s

    xsd = Xsd.new.parse(new_file)
    @elements ||= {}
    @elements.merge!(xsd.elements)

    reader.next
    self
  end

end

class Import < Schema
  def parse(reader, file='', dir='')

    @href = reader['schemaLocation'].to_sym
    new_dir = Util::get_fullpath(file, dir)[1].to_s
    new_file = Util::get_fullpath(@href.to_s, new_dir)[0].to_s

    xsd = Xsd.new.parse(new_file)
    @elements ||= {}
    @elements.merge!(xsd.elements)
    reader.next
    self
  end

end

class Util
  def self.get_fullpath(path, dir)

    if path.index('http://') == 0 or dir.index('http://')
      if path.index('http:') == 0
        uri = URI.parse(path)
      else
        uri = URI.parse("#{dir}/#{path}")
      end
      dir = "#{ uri.scheme}://#{uri.host}:#{uri.port}/#{File::dirname(uri.path)}"
      return [uri, dir]
    else
      path = path[5.. path.length] if path.index('file:')
      if dir == ''
        dir = File::dirname(File::expand_path(path))
      end
      return [File::expand_path(path, dir), dir]
    end
  end

end

if $0 == __FILE__

  # xml = '../data/msft/msft-20080930.xsd'
  xml = '../data/td-net/081220090203088072/tdnet-qcedjpfr-33500-2008-11-30-01-2009-02-20.xsd'

  # xml = ARGV[0]

  puts Benchmark.measure {
    xsd = Xsd.new.parse(xml)
    # pp xsd.elements.size
    # xsd.elements.each { |elem|
    #   pp elem
    # }
    #
    # xsd.schemas.each { |sc|
    #   pp sc.labels.size
    # }

    # pp xsd.methods

    if xsd.has_element?('ReversalOfAccumulatedImpairmentLossOnLeasedAssetsOpeCF')
      pp xsd.ReversalOfAccumulatedImpairmentLossOnLeasedAssetsOpeCF
    end

    if xsd.has_element?('ResearchAndDevelopmentExpense_Div_OperatingRevenue_OneYearDelta')
      pp xsd.ResearchAndDevelopmentExpense_Div_OperatingRevenue_OneYearDelta
    end
  }

end
