#!/usr/bin/env python
#
# Script to scrap files from different media sites 
# (C) 2008 Kushal Das.  This script is licensed under the
# GNU General Public License, version 2 or any later version.
#
#
# ChangeLog:
# * 0.1 Kushal Das <kushal@fedoraproject.org>
# - Initial version


import urllib2
import urllib
from BeautifulSoup import BeautifulSoup
import re
import sys
import os

def getPage(page):
    """To get the page"""
    f = urllib2.urlopen(page)
    html = f.read()
    soup = BeautifulSoup(html)
    songs = soup.findAll('a', href=re.compile('^http://download.apunkabollywood.com/songs/Audio.*'))
    finalsoup = BeautifulSoup(str(songs[0]))
    link = str(finalsoup('a')[0]['href'])
    filename = link.split('/')[-1]
    link = urllib.quote(link)
    link = link.replace('%3A', ':')
    download(link, filename)

def download(link, filename):
    """Download the file"""
    print "Downloading %s" % (filename)
    f = urllib2.urlopen(link)
    mp3 = f.read()
    f.close()
    f = open(filename,'wb')
    f.write(mp3)
    f.close() 

def movie(link):
    f = urllib2.urlopen(link)
    html = f.read()
    soup = BeautifulSoup(html)
    songs = soup.findAll('a', href=re.compile('^http://www.apunkabollywood.net/browser/download/get/.*'))
    for song in songs[::2]:
        small = BeautifulSoup(str(song))
        page = str(small('a')[0]['href'])
        page = urllib.quote(page)
        page = page.replace('%3A',':')
        print "Getting %s" % (page)
        getPage(page.replace('%3A',':'))


if __name__ == '__main__':
    try:
        config = open('movie.txt')
    except:
        print "Please put the links to a movie.txt in the current directory"
        sys.exit(-1)
    names = config.readlines()
    config.close()
    for name in names:
        dirname = name.split('/')[-1]
        dirname = dirname[:-1]
        print "Creating ", dirname
        os.mkdir(dirname)
        os.chdir(dirname)
        print "Current Directory ", dirname
        movie(name)
        os.chdir('..')
