Magic-Spoiler/wizards_scraper.py

270 lines
11 KiB
Python

# -*- coding: utf-8 -*-
import requests
from lxml import html
from PIL import Image
import os
def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-image-gallery/hour-devastation", setinfo={"code": "HOU"}, showRarityColors=False, showFrameColors=False, manual_cards=[], delete_cards=[]):
if 'name' in setinfo:
url = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setinfo['name'].lower().replace('of', '').replace(
' ', ' ').replace(' ', '-')
page = requests.get(url)
tree = html.fromstring(page.content)
cards = []
cardtree = tree.xpath('//*[@id="content-detail-page-of-an-article"]')
for child in cardtree:
cardElements = child.xpath('//*/p/img')
cardcount = 0
for cardElement in cardElements:
card = {
"name": cardElement.attrib['alt'].replace(u"\u2019", '\'').split(' /// ')[0],
"img": cardElement.attrib['src']
}
card["url"] = card["img"]
#card["cmc"] = 0
#card["manaCost"] = ""
#card["type"] = ""
#card["types"] = []
#card["text"] = ""
#card["colorIdentity"] = [""]
# if card['name'] in split_cards:
# card["names"] = [card['name'], split_cards[card['name']]]
# card["layout"] = "split"
#notSplit = True
# for backsplit in split_cards:
# if card['name'] == split_cards[backsplit]:
# notSplit = False
# if not card['name'] in delete_cards:
cards.append(card)
cardcount += 1
fullspoil = {"cards": cards}
print "Spoil Gallery has " + str(cardcount) + " cards."
download_images(fullspoil['cards'], setinfo['code'])
fullspoil = get_rarities_by_symbol(fullspoil, setinfo['code'])
fullspoil = get_mana_symbols(fullspoil, setinfo['code'])
#fullspoil = get_colors_by_frame(fullspoil, setinfo['code'])
return fullspoil
def get_rarities_by_symbol(fullspoil, setcode):
symbolPixels = (240, 219, 242, 221)
highVariance = 15
colorAverages = {
"Common": [30, 27, 28],
"Uncommon": [121, 155, 169],
"Rare": [166, 143, 80],
"Mythic Rare": [201, 85, 14]
}
symbolCount = 0
for card in fullspoil['cards']:
try:
cardImage = Image.open(
'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg')
except:
continue
pass
if '//' in card['name']:
setSymbol = cardImage.crop((240, 138, 242, 140))
else:
setSymbol = cardImage.crop(symbolPixels)
cardHistogram = setSymbol.histogram()
reds = cardHistogram[0:256]
greens = cardHistogram[256:256 * 2]
blues = cardHistogram[256 * 2: 256 * 3]
reds = sum(i * w for i, w in enumerate(reds)) / sum(reds)
greens = sum(i * w for i, w in enumerate(greens)) / sum(greens)
blues = sum(i * w for i, w in enumerate(blues)) / sum(blues)
variance = 768
for color in colorAverages:
colorVariance = 0
colorVariance = colorVariance + \
abs(colorAverages[color][0] - reds)
colorVariance = colorVariance + \
abs(colorAverages[color][1] - greens)
colorVariance = colorVariance + \
abs(colorAverages[color][2] - blues)
if colorVariance < variance:
variance = colorVariance
card['rarity'] = color
if variance > highVariance:
# if a card isn't close to any of the colors, it's probably a planeswalker? make it mythic.
print card['name'], 'has high variance of', variance, ', closest rarity is', card['rarity']
card['rarity'] = "Mythic Rare"
# print card['name'], '$', reds, greens, blues
if symbolCount < 10:
setSymbol.save(
'images/' + card['name'].replace(' // ', '') + '.symbol.jpg')
symbolCount += 1
return fullspoil
def get_colors_by_frame(fullspoil, setcode):
framePixels = (20, 11, 76, 16)
highVariance = 10
colorAverages = {
"White": [231, 225, 200],
"Blue": [103, 193, 230],
"Black": [58, 61, 54],
"Red": [221, 122, 101],
"Green": [118, 165, 131],
"Multicolor": [219, 200, 138],
"Artifact": [141, 165, 173],
"Colorless": [216, 197, 176],
}
symbolCount = 0
for card in fullspoil['cards']:
try:
cardImage = Image.open(
'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg')
except:
continue
pass
cardColor = cardImage.crop(framePixels)
cardHistogram = cardColor.histogram()
reds = cardHistogram[0:256]
greens = cardHistogram[256:256 * 2]
blues = cardHistogram[256 * 2: 256 * 3]
reds = sum(i * w for i, w in enumerate(reds)) / sum(reds)
greens = sum(i * w for i, w in enumerate(greens)) / sum(greens)
blues = sum(i * w for i, w in enumerate(blues)) / sum(blues)
variance = 768
for color in colorAverages:
colorVariance = 0
colorVariance = colorVariance + \
abs(colorAverages[color][0] - reds)
colorVariance = colorVariance + \
abs(colorAverages[color][1] - greens)
colorVariance = colorVariance + \
abs(colorAverages[color][2] - blues)
if colorVariance < variance:
variance = colorVariance
card['colors'] = [color]
return fullspoil
def get_mana_symbols(fullspoil={}, setcode="HOU"):
manaBoxes = [(234, 23, 244, 33), (220, 23, 230, 33),
(206, 23, 216, 33), (192, 23, 202, 33), (178, 23, 188, 33)]
highVariance = 0
colorAverages = {
"W": [126, 123, 110],
"U": [115, 140, 151],
"B": [105, 99, 98],
"R": [120, 89, 77],
"G": [65, 78, 69],
"1": [162, 156, 154],
"2": [155, 148, 147],
"3": [160, 153, 152],
"4": [149, 143, 141],
"5": [155, 149, 147],
"6": [151, 145, 143],
"7": [169, 163, 161],
"X": [160, 154, 152]
}
for card in fullspoil['cards']:
try:
cardImage = Image.open(
'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg')
except:
continue
pass
card['manaCost'] = ""
for manaBox in manaBoxes:
manaSymbol = cardImage.crop(manaBox)
cardHistogram = manaSymbol.histogram()
reds = cardHistogram[0:256]
greens = cardHistogram[256:256 * 2]
blues = cardHistogram[256 * 2: 256 * 3]
reds = sum(i * w for i, w in enumerate(reds)) / sum(reds)
greens = sum(i * w for i, w in enumerate(greens)) / sum(greens)
blues = sum(i * w for i, w in enumerate(blues)) / sum(blues)
variance = 768
for color in colorAverages:
colorVariance = 0
colorVariance = colorVariance + \
abs(colorAverages[color][0] - reds)
colorVariance = colorVariance + \
abs(colorAverages[color][1] - greens)
colorVariance = colorVariance + \
abs(colorAverages[color][2] - blues)
if colorVariance < variance:
variance = colorVariance
closestColor = color
if variance < 10:
# if card['name'] in ["Mirage Mirror", "Uncage the Menagerie", "Torment of Hailfire"]:
# print card['name'] + " " + str(reds) + " " + str(greens) + " " + str(blues)
if closestColor in ["2", "5"]:
twoVSfive = (
manaBox[0] + 1, manaBox[1] + 4, manaBox[2] - 5, manaBox[3] - 2)
manaSymbol = cardImage.crop(twoVSfive)
cardHistogram = manaSymbol.histogram()
reds = cardHistogram[0:256]
greens = cardHistogram[256:256 * 2]
blues = cardHistogram[256 * 2: 256 * 3]
reds = sum(
i * w for i, w in enumerate(reds)) / sum(reds)
greens = sum(
i * w for i, w in enumerate(greens)) / sum(greens)
blues = sum(
i * w for i, w in enumerate(blues)) / sum(blues)
variance = 768
colorVariance = 0
colorVariance = colorVariance + abs(175 - reds)
colorVariance = colorVariance + abs(168 - greens)
colorVariance = colorVariance + abs(166 - blues)
if colorVariance < 10:
closestColor = "2"
elif colorVariance > 110 and colorVariance < 120:
closestColor = "5"
else:
continue
card['manaCost'] = closestColor + card['manaCost']
return fullspoil
def smash_fullspoil(mtgjson, fullspoil):
different_keys = {}
for mtgjson_card in mtgjson['cards']:
for fullspoil_card in fullspoil['cards']:
if mtgjson_card['name'] == fullspoil_card['name']:
for key in fullspoil_card:
if key in mtgjson_card:
if mtgjson_card[key] != fullspoil_card[key] and key != 'colors':
if not fullspoil_card['name'] in different_keys:
different_keys[fullspoil_card['name']] = {
key: fullspoil_card[key]}
else:
different_keys[fullspoil_card['name']
][key] = fullspoil_card[key]
for fullspoil_card in fullspoil['cards']:
WOTC_only = []
match = False
for mtgjson_card in mtgjson['cards']:
if mtgjson_card['name'] == fullspoil_card['name']:
match = True
if not match:
WOTC_only.append(fullspoil_card['name'])
if len(WOTC_only) > 0:
print "WOTC only cards: "
print WOTC_only
print different_keys
def download_images(mtgjson, setcode):
if not os.path.isdir('images/' + setcode):
os.makedirs('images/' + setcode)
if 'cards' in mtgjson:
jsoncards = mtgjson['cards']
else:
jsoncards = mtgjson
for card in jsoncards:
if card['url']:
if os.path.isfile('images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg'):
continue
# print 'Downloading ' + card['url'] + ' to images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg'
requests.get(card['url'], 'images/' + setcode +
'/' + card['name'].replace(' // ', '') + '.jpg')