#
# Cookbook Name:: hadoop
# Recipe:: default
#
# Copyright 2013, whitestar
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

require 'digest/sha2'

users = {
  :hadoop => {:name => 'hadoop', :uid => 10001},
  :hdfs   => {:name => 'hdfs',   :uid => 10002},
  :mapred => {:name => 'mapred', :uid => 10003}
}

limits_files = [
  'hdfs.conf',
  'yarn.conf',
  'mapreduce.conf'
]

conf_files = [
  'capacity-scheduler.xml',
  'configuration.xsl',
  'core-site.xml',
  'fair-scheduler.xml',
  'hadoop-env.sh',
  'hadoop-metrics2.properties',
  'hadoop-policy.xml',
  'hdfs-site.xml',
  'hosts.include',
  'hosts.exclude',
  'log4j.properties',
  'mapred-queue-acls.xml',
  'mapred-site.xml',
  'masters',
  'slaves',
  'ssl-client.xml.example',
  'ssl-server.xml.example'
]

krb5_conf_files = [
  'krb5-strong.conf',
  'krb5-weak.conf'
]

def conf_template(conf_dir, middle_version, conf_files, tpl_vars)
  conf_files.each {|conf_file|
    template "#{conf_dir}/#{conf_file}" do
      source "conf-#{middle_version}/#{conf_file}"
      owner 'root'
      group 'root'
      mode '0644'
      variables(tpl_vars)
    end
  }
end

version = node['hadoop']['version']
major_version = nil
middle_version = nil

if /^(\d+)\.(\d+)\.(\d+)\.?(\d*)$/ =~ version then
  major_version = $1
  middle_version = "#{$1}.#{$2}"
else
  Chef::Application.fatal!("Invalid Hadoop version: #{version}")
end

if ! ('1.0.0' <= version \
  && version < '1.2') then
  Chef::Application.fatal!("Non supported version: #{version}")
end

users.each {|key, user|
  if key != :hadoop
    group user[:name] do
      gid user[:uid]
      members []
      action :create
      not_if "getent group #{user[:name]}"
    end
  
    user user[:name] do
      uid user[:uid]
      gid user[:uid]
      home "/home/#{user[:name]}"
      shell '/bin/sh'
      password nil
      supports :manage_home => false
      not_if "getent passwd #{user[:name]}"
    end
  end
}

group users[:hadoop][:name] do
  gid users[:hadoop][:uid]
  members ['hdfs', 'mapred']
  append true
  action :create
  not_if "getent group #{users[:hadoop][:name]}"
end

user users[:hadoop][:name] do
  uid users[:hadoop][:uid]
  gid users[:hadoop][:uid]
  home "/home/#{users[:hadoop][:name]}"
  shell '/bin/sh'
  password nil
  supports :manage_home => false
  not_if "getent passwd #{users[:hadoop][:name]}"
end

active_vol_nums = 0
node['grid']['max_vol_nums'].to_i.times {|vol_num|
  target_vol_dir = "#{node['grid']['vol_root']}/#{vol_num}"

  if vol_num == 0 || FileTest::directory?(target_vol_dir) then
    directory "#{target_vol_dir}/var" do
      owner 'root'
      group 'root'
      mode '0755'
      action :create
      recursive true
    end

    %w{lib log}.each {|dir|
      directory "#{target_vol_dir}/var/#{dir}" do
        owner 'root'
        group 'hadoop'
        mode '0775'
        action :create
        recursive true
      end
    }

    directory "#{target_vol_dir}/tmp" do
      owner 'root'
      group 'root'
      mode '1777'
      action :create
      recursive true
    end

    if vol_num == 0 then
      directory "#{target_vol_dir}/var/run" do
        owner 'root'
        group 'hadoop'
        mode '0775'
        action :create
        recursive true
      end

      directory "#{target_vol_dir}/var/log/hdfs" do
        owner 'hdfs'
        group 'hdfs'
        mode '0755'
        action :create
        recursive true
      end
    end
  else
    break
  end
  active_vol_nums = vol_num + 1
}
log "This node active volumes: #{active_vol_nums}"

file_cache_path = Chef::Config[:file_cache_path]
install_root = "#{node['grid']['app_root']}/hadoop-#{version}"
tarball = "hadoop-#{version}-bin.tar.gz"
tarball_mds = "#{tarball}.mds"
downloaded_tarball = "#{file_cache_path}/#{tarball}"
downloaded_tarball_mds = "#{file_cache_path}/#{tarball_mds}"

archive_url = node['hadoop']['archive_url']
if ! FileTest.directory? install_root then
  remote_file downloaded_tarball_mds do
    source "#{archive_url}/hadoop-#{version}/#{tarball_mds}"
    action :create_if_missing
  end

  remote_file downloaded_tarball do
    source "#{archive_url}/hadoop-#{version}/#{tarball}"
    action :create_if_missing
  end

  ruby_block "sha256 checksum #{downloaded_tarball}" do
    block do
=begin
e.g. md file format
'hadoop-1.1.2-bin.tar.gz:    MD5 = 4B 59 F4 81 A7 52 D2 A9  20 3D D7 D0 A9 50 5C
                                  18
hadoop-1.1.2-bin.tar.gz:   SHA1 = DCCC 01A0 4C42 587D 9DF1  83CA 7DC8 83F7 A6A4
                                  8D80
...'
=end
      checksum = File.read(downloaded_tarball_mds).
        gsub(/(\s)+/, '').
        scan(/#{tarball}:(.+?)=([0-9A-Z]+)/).
        assoc('SHA256')[1]
      Chef::Log.info "#{tarball}: SHA256 = #{checksum}"
      actual_checksum = Digest::SHA256.file(downloaded_tarball).to_s
      Chef::Log.info "#{tarball}: actual SHA256 = #{actual_checksum}"
      if ! checksum.casecmp(actual_checksum) then
        Chef::Application.fatal!("Invalid SHA256 checksum of #{downloaded_tarball}, expected: #{checksum}")
      end
    end
    action :create
  end

  pkg = 'tar'
  resources(:package => pkg) rescue package pkg do
    action :install
  end

  bash "install_hadoop-#{version}" do
    code <<-EOC
      tar xvzf #{downloaded_tarball} -C #{node['grid']['app_root']}
    EOC
    creates install_root
  end
end

link node['hadoop']['HADOOP_PREFIX'] do
  to install_root
  action [:delete, :create]
end

limits_files.each {|limits_file|
  if limits_file == 'yarn.conf' && major_version.to_i < 2 then
    next
  end
  template "/etc/security/limits.d/#{limits_file}" do
    source "etc/security/limits.d/#{limits_file}"
    owner 'root'
    group 'root'
    mode '0644'
  end
}

conf_dir = "#{node['grid']['app_root']}/hadoop-#{version}/conf"
tpl_vars = {
  :active_vol_nums => active_vol_nums
}
conf_template(conf_dir, middle_version, conf_files, tpl_vars)

# with security
if node['hadoop']['with_security'] then
  directory node['hadoop']['this.keytab.dir'] do
    owner 'root'
    group 'root'
    mode '0755'
    action :create
    recursive true
  end
  
  file "#{node['grid']['app_root']}/hadoop-#{version}/bin/task-controller" do
    owner 'root'
    group 'mapred'
    mode '6050'
  end
  
  template "#{node['grid']['app_root']}/hadoop-#{version}/conf/taskcontroller.cfg" do
    source "conf-#{middle_version}/taskcontroller.cfg"
    owner 'root'
    group 'root'
    mode '0400'
    variables({
      :active_vol_nums => active_vol_nums
    })
  end
  
  if node[:kernel][:machine] != 'x86_64' then
    jsvc_pkg = 'jsvc'
    case node[:platform_family]
      when 'debian'
        jsvc_pkg = 'jsvc'
      when 'rhel'
        jsvc_pkg = 'jakarta-commons-daemon-jsvc'
    end
  
    package jsvc_pkg do
      action :install
    end
  
    link "#{install_root}/libexec/jsvc.i386" do
      to '/usr/bin/jsvc'
    end
  end
  
  if middle_version == '1.0' then
    tpl_vars = nil
    conf_template(conf_dir, middle_version, krb5_conf_files, tpl_vars)
  end
end

log <<-EOM
Note:
You must initialize HDFS in the first installation:
  $ cd #{node['grid']['app_root']}/hadoop
  $ sudo -u hdfs ./bin/hadoop namenode -format
  $ sudo -u hdfs ./bin/hadoop-daemon.sh start namenode
  $ sudo -u hdfs ./bin/hadoop-daemon.sh start datanode
  $ sudo -u hdfs ./bin/hadoop fs -chown hdfs:hdfs /
  $ sudo -u hdfs ./bin/hadoop fs -chmod 755 /
  $ sudo -u hdfs ./bin/hadoop fs -mkdir /user
  $ sudo -u hdfs ./bin/hadoop fs -mkdir #{node['grid']['vol_root']}/0/var/lib/mapred
  $ sudo -u hdfs ./bin/hadoop fs -chown mapred:mapred #{node['grid']['vol_root']}/0/var/lib/mapred
EOM

if node['hadoop']['with_security'] then
  log <<-EOM
Note:
Example MapReduce job execution:
  $ sudo -u alice kinit
  Password for alice@LOCALDOMAIN: 
  $ sudo -u alice bin/hadoop jar hadoop-examples-#{version}.jar pi \\
  > -D mapreduce.job.acl-view-job=* -D mapreduce.job.acl-modify-job=alice 5 10
  EOM
else
  log <<-EOM
Note:
Example MapReduce job execution:
  $ sudo adduser alice
  $ sudo -u hdfs ./bin/hadoop fs -mkdir /user/alice
  $ sudo -u hdfs ./bin/hadoop fs -chown alice:alice /user/alice
  $ sudo -u alice ./bin/hadoop jar hadoop-examples-#{version}.jar pi 5 10
  EOM
end

