#!/usr/bin/env python
# ----------------------------------------------------------------------------
#         File:           fanbox_down.py
#         Contains:       Main application code for fanbox_down
#         Copyright:      (C) 2022, 2023 supercell <stigma@disroot.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, version 3 only.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
# ----------------------------------------------------------------------------
from __future__ import print_function

import json
import os
import sys
from time import sleep

from argparse import ArgumentParser
from datetime import datetime
from datetime import timedelta
from datetime import tzinfo
from math import pow

LOG_FILENAME=os.environ.get("FANBOX_DOWN_LOG_FILE", "fanbox_down.log")

def log(msg, section="debug"):
    try:
        log_file = open(LOG_FILENAME, 'a')

        if sys.version_info.major == 2:
            _msg = msg.encode("utf-8")
            print("[{}]: {}".format(section, _msg), file=log_file)
        else:
            print("[{}]: {}".format(section, msg), file=log_file)

    finally:
        log_file.close()

class JST(tzinfo):
    def dst(self, dt):
        return timedelta(0)

    def tzname(self, dt):
        return 'JST'

    def utcoffset(self, dt):
        return timedelta(hours=9)

class Config():
    def __init__(self):
        self.output_dir = os.path.join(os.environ['HOME'], 'Pictures', 'fanbox')
        self.end_date = None

    def load(self, file_object):
        raw_text = file_object.read()
        json_obj = json.loads(raw_text)

        if 'outputDirectory' in json_obj:
            self.output_dir = json_obj['outputDirectory']

    def set_end_date(self, end_date):
        try:
            self.end_date = datetime.strptime(end_date, '%Y-%m-%d')
            self.end_date = self.end_date.replace(tzinfo=JST())
        except ValueError:
            self.end_date = None
            print('error: failed to correctly parse end date %s in format YYYY-mm-dd' % end_date)


def xdg_config(subdir = None):
    """
    Return the value of XDG_CONFIG_HOME, or the fallback, plus `subdir`
    """
    home = os.environ['HOME']
    cfg_dir = os.environ.get('XDG_CONFIG_HOME', os.path.join(home, '.config'))

    assert None != cfg_dir

    if None == subdir:
        return cfg_dir

    return os.path.join(cfg_dir, subdir)

def set_id(id):
    id_file_path = os.path.join(xdg_config('fanbox_down'), 'id')

    with open(id_file_path, 'w') as id_file:
        id_file.write(id)
        id_file.write('\n')
        id_file.flush()

def get_id():
    envid = os.environ.get("FANBOX_DOWN_SESSID", "")
    if "" != envid:
        return envid

    id_file_path = os.path.join(xdg_config('fanbox_down'), 'id')

    if os.path.exists(id_file_path):
        with open(id_file_path) as id_file:
            first_line = id_file.readline()
            if "" != first_line:
                return first_line.strip()

    return None

def make_http_request(uri, headers = [('Accept', '*/*')]):
    if sys.version_info[0] == 2:
        import urllib2 as urllib
    else:
        import urllib.request as urllib

    req = urllib.Request(uri)
    for header in headers:
        req.add_header(header[0], header[1])

    res = urllib.urlopen(req)
    return res

def make_fanbox_request(uri):
    id = get_id()

    if None == id:
        print("[error]: make_fanbox_request failed to retrieve ID.",
              file=sys.stderr)
        sys.exit(1)

    headers = [
        ("Accept", "application/json, text/plain, */*"),
        ("Cookie", "FANBOXSESSID={}".format(id)),
        ("Referer", "https://www.fanbox.cc"),
        ("Origin", "https://www.fanbox.cc"),
        ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:99.0) Gecko/20100101 Firefox/99.0")
        ]

    res = make_http_request(uri, headers)
    return res

def make_api_request(path):
    import json

    res = make_fanbox_request("https://api.fanbox.cc/{}".format(path))

    json_res = json.loads(res.read().decode("utf-8"))
    return json_res

def fdmkdir(config, info):
    full_path = os.path.join(config.output_dir, info['user']['name'],
                             info['title'])

    # Python2 compat: exists_ok isn't a parameter in os.makedirs.
    if False == os.path.exists(full_path):
        os.makedirs(full_path)

    return full_path

def py2compat_datetime_strptime(datestr):
    """Parse ISO 8601 formatted strings to JST time."""
    # More specifically, python2's %z modifer doesn't support
    # the timezone being formatted as +00:00, only +0000.
    if len(datestr) < 19:
        return None

    if not (datestr[-6] == '+' and datestr[-3] == ':'):
        return None

    trimmed_datestr = datestr[:-6]
    date_obj = datetime.strptime(trimmed_datestr,
                                 "%Y-%m-%dT%H:%M:%S")
    date_obj = date_obj.replace(tzinfo=JST())
    return date_obj


def set_file_date(path, date):
    import time

    date_obj = py2compat_datetime_strptime(date)

    posix_time = time.mktime(date_obj.timetuple())
    os.utime(path, (posix_time, posix_time))

def download_article(config, info):
    # Create directory
    print('[article]: Creating directory')
    directory = fdmkdir(config, info)

    if False == ('body' in info) and False == ('blocks' in info['body']):
        print('[article]: Failed: No blocks to download from.')
        return False

    blocks = info['body']['blocks']

    # Create article text file
    print('[article]: Downloading article content')
    url_embeds_file_path = os.path.join(directory, 'url_embed.html')
    url_embeds_file = open(url_embeds_file_path, 'w')
    content_file_path = os.path.join(directory, 'content.txt')
    content_file = open(content_file_path, 'w')
    for block in info['body']['blocks']:
        if block['type'] == 'p':
            if sys.version_info[0] == 2:
                print(block['text'].encode('utf-8'), file=content_file)
            else:
                print(block['text'], file=content_file)

        elif block["type"] == "header":
            header = block["text"]

            if sys.version_info[0] == 2:
                header = header.encode("utf-8")
                headerlen = len(header)
            else:
                headerlen = len(header)

            print("{}\n{}".format(header, '*' * headerlen),
                  file=content_file)

        elif block["type"] == "file":
            print("[file:{}]".format(block["fileId"]), file=content_file)
        elif block['type'] == 'image':
            print('[image:{}]'.format(block['imageId']), file=content_file)
        elif block['type'] == 'url_embed':
            embedId = block['urlEmbedId']
            # Print embed to HTML file
            print('<h2>' + embedId + '</h2>', file=url_embeds_file)
            print(info['body']['urlEmbedMap'][embedId]['html'] + '\n',
                file=url_embeds_file)
            # Print embedID to content.txt
            print("[embed:{}]".format(embedId), file=content_file)

        else:
            msg = "[article]: %s:\n\t%s \"%s\"." % (
                "Failed to store article content in file",
                "Unsupported content type",
                block["type"])
            print(msg)
            with open('fanbox_down.log', 'a') as logfile:
                print(msg, file=logfile)

    url_embeds_file.close()
    if os.stat(url_embeds_file_path).st_size == 0:
        os.remove(url_embeds_file_path)

    content_file.close()
    set_file_date(content_file_path, info['publishedDatetime'])

    id = get_id()
    if None == id:
        print('[article]: Error: Failed to get FANBOXSESSID')
        return False

    # Download images.
    headers = [
        ('Accept', '*/*'),
        ('Cookie', 'FANBOXSESSID={}'.format(id)),
        ('Referer', 'https://www.fanbox.cc'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:98.0) Gecko/20100101 Firefox/98.0')
    ]

    if 'imageMap' in info['body'] and 0 < len(info['body']['imageMap']):
        print('[article]: Downloading article images')
        length = len(info['body']['imageMap'])
        i = 1
        for _, image in info['body']['imageMap'].items():
            print('[article]: Downloading image {} of {}.'.format(i, length))
            image_file_path = os.path.join(directory,
                '{}.{}'.format(image['id'], image['extension']))
            raw_image = make_http_request(image['originalUrl'], headers)
            with open(image_file_path, 'wb') as image_file:
                image_file.write(raw_image.read())
            set_file_date(image_file_path, info['publishedDatetime'])
            sleep(2)
            i += 1

    if "fileMap" in info["body"] and 0 < len(info["body"]["fileMap"]):
        print("[article]: Downloading article files")
        length = len(info["body"]["fileMap"])
        i = 1
        for _, file_ in info["body"]["fileMap"].items():
            print("[article]: Downloading file {} of {}.".format(i, length))
            # Could use "name", but "id" matches the content.txt file
            file_path = os.path.join(directory,
                                     "{}.{}".format(file_["id"],
                                                    file_["extension"]))
            raw_file = make_http_request(file_["url"], headers)
            with open(file_path, "wb") as attachment:
                attachment.write(raw_file.read())
            set_file_date(file_path, info["publishedDatetime"])
            sleep(5)
            i += 1

    return True

def download_image(config, item):
    # Make directory
    directory = fdmkdir(config, item)

    # Download Images.
    if False == ('body' in item and 0 < len(item['body']['images'])):
        print('[image]: No content to download!')
        return False

    id = get_id()

    if None == id:
        print('[image]: Error: Failed to retrieve user FANBOXSESSID')
        return False

    # Download images.
    headers = [
        ('Accept', '*/*'),
        ('Cookie', 'FANBOXSESSID={}'.format(id)),
        ('Referer', 'https://www.fanbox.cc'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:98.0) Gecko/20100101 Firefox/98.0')
    ]

    images = item['body']['images']

    i = 1
    length = len(images)
    for image in images:
        print('[image]: Downloading image {} of {}.'.format(i, length))
        image_file_path = os.path.join(directory, '{}.{}'.format(image['id'], image['extension']))
        raw_image = make_http_request(image['originalUrl'], headers)
        with open(image_file_path, 'wb') as image_file:
            image_file.write(raw_image.read())

        set_file_date(image_file_path, item['publishedDatetime'])
        sleep(2)
        i += 1

    return True



def download_file(config, item):
    directory = fdmkdir(config, item)

    if sys.version_info.major == 2:
        log(u"Downloading post '{}' by {}".format(item['title'], item['creatorId']),
            "download_file")
    else:
        log("Downloading post '{}' by {}".format(item['title'], item['creatorId']),
            "download_file")

    if False == ("body" in item and 0 < len(item["body"]["files"])):
        print("[file]: No files to download")
        return False

    id = get_id()

    if None == id:
        print('[file]: Error: Failed to retrieve user FANBOXSESSID')
        return False

    # Download files.
    headers = [
        ('Accept', '*/*'),
        ('Cookie', 'FANBOXSESSID={}'.format(id)),
        ('Referer', 'https://www.fanbox.cc'),
        ('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; rv:98.0) Gecko/20100101 Firefox/98.0')
    ]

    if 'text' in item['body']:
        print("[file]: Saving text to file...")
        with open(os.path.join(directory, "text.txt"), 'w') as f:
            post_text = item['body']['text']
            if sys.version_info.major == 2:
                print(post_text.encode('utf-8'), file=f)
            else:
                print(post_text, file=f)

    post_files = item['body']['files']

    i = 1
    numfiles = len(post_files)
    for post_file in post_files:
        log("Downloading a .{} file".format(post_file['extension']), "download_file")
        print("[file]: Downloading file {} of {} ({} MB)".format(i, numfiles,
            post_file['size'] / pow(1000, 2)))
        path_file = os.path.join(directory, post_file['name'] + '.' + post_file['extension'])
        raw_file = make_http_request(post_file['url'], headers)

        with open(path_file, 'wb') as output:
            output.write(raw_file.read())

        set_file_date(path_file, item['publishedDatetime'])
        sleep(2)
        i += 1

    return True

def download_item(config, item):
    if sys.version_info[0] == 2:
        item_title = item["title"].encode("UTF-8")
    else:
        item_title = item["title"]


    print("[item]: Downloading {}".format(item_title))

    if item['isRestricted'] == True:
        print('[item]: Skipped! Restricted content.')
        log("Skipped {} (isRestricted == True)".format(item["id"]),
            "download_item")
        return

    post_info = make_api_request('post.info?postId={}'.format(item['id']))
    success = True

    if post_info['body']['type'] == 'article':
        success = download_article(config, post_info['body'])
    elif post_info['body']['type'] == 'image':
        success = download_image(config, post_info['body'])
    elif post_info["body"]["type"] == "file":
        success = download_file(config, post_info["body"])
    else:
        success = False
        with open('fanbox_down.log', 'a') as logfile:
            print("[item]: Skipped {} -- unsupported post type \"{}\"".format(
                post_info["body"]["id"], post_info["body"]["type"]), file=logfile)

        print('[item]: Failed! Unsupported post type "{}"'.format(
            post_info['body']['type']))

    if True == success:
        print("[item]: Completed {}".format(item_title))



def download_user(config, username):
    log("Requesting pages for {}".format(username), "download_user")

    pages_res = make_api_request('post.paginateCreator?creatorId={}'.format(
        username))

    if not 'body' in pages_res:
        print('error: no body in request for pagination (user: {})'.format(
            username), file=sys.stderr)
        sys.exit(1)

    pages = pages_res['body']
    number_of_pages = len(pages)

    if 0 == number_of_pages:
        print('warning: no pages found! not downloading anything.')
        sys.exit(0)

    index = 0
    for current_page in range(number_of_pages):
        print('[user]: Downloading page {} of {} for {}'.format(
            current_page + 1, number_of_pages, username))
        page_res = make_fanbox_request(pages[current_page])
        page_json = json.loads(page_res.read().decode("utf-8"))

        if not 'body' in page_json:
            print('error: no body in request for page {} for user {}'.format(
                current_page, username), file=sys.stderr)
            log("ERROR: No 'body' in page_res object", "download_user")
            continue

        page_items = page_json['body']['items']

        for item in page_items:
            date_obj = py2compat_datetime_strptime(item['publishedDatetime'])
            if config.end_date and (date_obj < config.end_date):
                print("[user]: Finished downloading -- past end date.")
                return

            download_item(config, item)
            sleep(4)

        index += 1
        if number_of_pages > index:
            print('[user]: Preparing next page...')
            sleep(10)

def download_supported(config):
    print("[supported]: Fetching recent supported posts...")
    list_supported = make_api_request("post.listSupporting?limit=10")

    if not "items" in list_supported["body"]:
        print("[supported]: Error occurred: no items returned in response",
              file=sys.stderr)
        with open("fanbox_down.log", 'a') as logfile:
            print("[supported]: Failed \"items\" not in response.",
                  file=logfile)
        sys.exit(1)

    for item in list_supported["body"]["items"]:
        date_obj = py2compat_datetime_strptime(item["publishedDatetime"])
        if config.end_date and (date_obj < config.end_date):
            print("[supported]: Finished downloading -- past end date.")
            return

        download_item(config, item)
        sleep(10)


def main(args):
    parser = ArgumentParser(prog='fanbox_down')
    parser.add_argument('-e', '--end-date', help='Specify the end date.', metavar='DATE')
    parser.add_argument('--set-id', help='Update the FANBOXSESSID used.', metavar='ID')
    parser.add_argument('-u', '--user', help='Download content from a specific username.')
    parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0.0')
    options = parser.parse_args(args[1:])

    config = Config()
    config_file_path = os.path.join(xdg_config('fanbox_down'), 'config.json')

    if os.path.exists(config_file_path):
        with open(config_file_path) as config_file:
            config.load(config_file)

    if options.set_id:
        set_id(options.set_id)
        print('Successfully changed the FANBOXSESSID!')
        if sys.version_info[0] == 2:
            cont = raw_input('Do you want to continue downloading new posts? [yes/(no)] ')
        else:
            cont = input('Do you want to continue downloading new posts? [yes/(no)] ')

        if not (cont.lower() == 'y' or cont.lower() == 'yes'):
            sys.exit(0)

    if options.end_date:
        config.set_end_date(options.end_date)
        if None == config.end_date:
            sys.exit(1)

    if options.user:
        download_user(config, options.user)
        sys.exit(0)

    download_supported(config)


if __name__ == '__main__':
    try:
        main(sys.argv)
    except KeyboardInterrupt:
        sys.exit(0)
