Source code for onix.scrapers

"""Functionality for pulling information from PS and converting to JSON"""
from __future__ import print_function

import copy
import datetime
import json
import os

import pkg_resources

from future.moves.urllib.request import urlopen
from github import Github
from py_mini_racer.py_mini_racer import MiniRacer

from onix.utilities import sanitize_string


[docs]def get_commit_from_timestamp(timestamp):
    """
    Get the PS commit hash corresponding to the last commit to master as of the
    specified timestamp

    Args:
        timestamp (datetime.datetime) : The date and time (in UTC) desired

    Returns:
        str :
            The commit has correspond to the last commit to master as of the
            specified timestamp

    Examples:
        >>> from datetime import datetime
        >>> from onix.scrapers import get_commit_from_timestamp
        >>> print(get_commit_from_timestamp(datetime(2016, 12, 1, 5, 0)))
        841d4c9e135d07d7affb725bf42f81df376b2de1
    """
    g = Github()
    ps = g.get_repo('Zarel/Pokemon-Showdown')
    return ps.get_commits(until=timestamp).get_page(0)[0].sha


def _write(data, destination_filename):
    """
    Helper method to write data to file

    Args:
        data (str) : the data to be written
        destination_filename (str) : filename to save to
    """
    directory = os.path.dirname(destination_filename)
    try:
        os.makedirs(directory)
    except OSError:
        if not os.path.isdir(directory):
            raise  # pragma: no cover

    json.dump(json.loads(data), open(destination_filename, 'w+'))


def _scrape(url, entry, commit=None, destination_filename=None):
    """
    Pulls javascript from the specified URL, extracts the requested entry
    and returns it as a JSON string. Optionally writes said JSON string to
    the requested file

    Args:
        url (str) : the location of the javascript file on the PS Github
            (url following ``master/``) or the full url to the file
        entry (str) : the ``exports`` entry we seek to extract
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

            .. note::
              If a full url is specified in the `url` field, this argument will
              be ignored.

        destination_filename (:obj:`str`, optional): if specified, the JSON
        string will be written to this file

    Returns:
        str : the JSON string representation of the requested data

    """
    url_prefix = \
        "https://raw.githubusercontent.com/Zarel/Pokemon-Showdown/"
    if commit:
        url_prefix += '{}/'.format(commit)
    else:
        url_prefix += 'master/'
    if not url.startswith('https'):
        url = url_prefix + url
    prerun = 'exports={}; '
    postrun = 'JSON.stringify(exports.'+entry+', null, 2)'
    javascript = urlopen(url).read().decode('utf-8')

    json_string = MiniRacer().eval(prerun+javascript+postrun)

    if destination_filename:
        _write(json_string, destination_filename)

    return json_string


[docs]def scrape_battle_formats_data(commit=None):
    """
    Grabs data including tier information for Pokemon. Useful for extracting
    banlists for the standard tiers.

    Args:
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

    Returns:
        dict : the data encoded in `formats-data.js`. The keys are the species /
        forme names

    Examples:
        >>> from onix import scrapers
        >>> commit = '5c14138b54dddf8bc034433eaef950a1c6eaf734'
        >>> battle_formats = scrapers.scrape_battle_formats_data(commit=commit)
        >>> print(battle_formats['bulbasaur']['tier'])
        LC
    """
    url = 'data/formats-data.js'
    entry = 'BattleFormatsData'
    folder = '.psdata/'
    if commit:
        folder += '{}/'.format(commit)
    filename = folder + 'formats_data.json'
    return json.loads(_scrape(url, entry, commit, filename))


[docs]def scrape_battle_pokedex(commit=None):
    """
    Grabs data including base stats, types and appearance-only form info, then
    does a little bit of post-processing to unlink Pokemon that cannot move
    between formes during battle (e.g.: Rotom-Wash)

    Args:
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

            .. note::
              In most cases, one shouldn't need to use old versions of the
              Pokedex, as data very rarely gets overwritten or removed. If
              one is looking to pull information from a previous generation, use
              a mod, not an old commit.

    Returns:
        dict : the data encoded in `pokedex.js`. The keys are the species /
        forme names

    Examples:
        >>> from onix import scrapers
        >>> pokedex = scrapers.scrape_battle_pokedex()
        >>> print(pokedex['bulbasaur']['baseStats']['hp'])
        45
    """

    url = 'data/pokedex.js'
    entry = 'BattlePokedex'
    folder = '.psdata/'
    if commit:
        folder += '{}/'.format(commit)
    filename = folder + 'pokedex.json'
    pokedex = json.loads(_scrape(url, entry, commit))
    baseable_formes = pkg_resources.resource_string('onix.resources',
                                                    'baseable_formes.txt'
                                                    ).decode('utf-8'
                                                             ).splitlines()
    for species in pokedex.keys():
        if 'baseSpecies' not in pokedex[species]:
            continue
        if species.endswith(('mega', 'megax', 'megay')):
            # intentionally left off primal
            continue
        if species not in baseable_formes:
            del pokedex[species]['baseSpecies']

    _write(json.dumps(pokedex, indent=4), filename)
    return pokedex


[docs]def scrape_battle_aliases(commit=None):
    """
    Grabs Pokemon aliases.

    Args:
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

    Returns:
        dict : the data encoded in `aliases.js`. The keys are the alternate
        names, the values are the correct names.

    Examples:
        >>> from onix import scrapers
        >>> aliases = scrapers.scrape_battle_aliases()
        >>> print(aliases['forry'])
        Forretress
    """

    url = 'data/aliases.js'
    entry = 'BattleAliases'
    folder = '.psdata/'
    if commit:
        folder += '{}/'.format(commit)
    filename = folder + 'aliases.json'
    return json.loads(_scrape(url, entry, commit, filename))


[docs]def scrape_battle_items(commit=None):
    """
    Grabs items. Used for determining mega evolutions and for pretty-print
    lookups.

    Args:
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

            .. note::
              In most cases, one shouldn't need to use old versions of the
              Item dex, as data very rarely gets overwritten or removed. If
              one is looking to pull information from a previous generation, use
              a mod, not an old commit.

    Returns:
        dict : the data encoded in `items.js`

    Examples:
        >>> from onix import scrapers
        >>> items = scrapers.scrape_battle_items()
        >>> print(items['gardevoirite']['megaEvolves'])
        Gardevoir
    """

    url = 'data/items.js'
    entry = 'BattleItems'
    folder = '.psdata/'
    if commit:
        folder += '{}/'.format(commit)
    filename = folder + 'items.json'
    return json.loads(_scrape(url, entry, commit, filename))


[docs]def scrape_battle_movedex(commit=None):
    """
    Grabs move metadata.

    Args:
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

            .. note::
              In most cases, one shouldn't need to use old versions of the
              Move dex, as data very rarely gets overwritten or removed. If
              one is looking to pull information from a previous generation, use
              a mod, not an old commit.

    Returns:
        dict : the data encoded in `moves.js`

    Examples:
        >>> from onix import scrapers
        >>> moves = scrapers.scrape_battle_movedex()
        >>> print(moves['scald']['name'])
        Scald
    """

    url = 'data/moves.js'
    entry = 'BattleMovedex'
    folder = '.psdata/'
    if commit:
        folder += '{}/'.format(commit)
    filename = folder + 'moves.json'
    return json.loads(_scrape(url, entry, commit, filename))


[docs]def scrape_formats(commit=None):
    """
    Grabs rulesets for the various metagames and saves it as `formats.json`.
    Useful for extracting, say, banlists for non-standard tiers. Does a bit
    of post-processing to transform the data from a list to a dict and to expand
    out any inherited rulesets

    Args:
        commit (:obj:`str`, optional): if specified, will pull the version of
            the file as of the commit specified by this full hash.

    Returns:
        dict : the data encoded in `formats.js`, post-processed for increased
        utility

    Examples:
        >>> from onix import scrapers
        >>> commit = '5c14138b54dddf8bc034433eaef950a1c6eaf734'
        >>> formats = scrapers.scrape_formats(commit=commit)
        >>> print(formats['lc']['maxLevel'])
        5
    """

    url = 'config/formats.js'
    entry = 'Formats'
    folder = '.psdata/'
    if commit:
        folder += '{}/'.format(commit)
    filename = folder + 'formats.json'
    raw_data = json.loads(_scrape(url, entry, commit))

    formats = dict()
    for metagame in raw_data:

        if 'name' not in metagame.keys():
            continue

        # expand out rulesets
        if 'ruleset' in metagame.keys():  # I think this is always True
            for rule in copy.deepcopy(metagame['ruleset']):
                rule_sanitized = sanitize_string(rule)
                if rule_sanitized in formats.keys():
                    metagame['ruleset'].remove(rule)
                    metagame['ruleset'] += formats[rule_sanitized].get(
                        'ruleset', [])

        formats[sanitize_string(metagame['name'])] = metagame

    _write(json.dumps(formats, indent=4), filename)

    return formats