From 1dd538d5a132e5b9af30a3fc1a0e898eb4450016 Mon Sep 17 00:00:00 2001 From: Lee Matos Date: Wed, 5 Jul 2017 21:44:45 -0400 Subject: [PATCH] First pass refactoring scrapers into separate modules (#98) Splits off the respective scrapers into submodules (mtgs_scraper.py, scryfall_scraper.py, mythic_scraper.py, wizards_scraper.py) --- main.py | 119 +++--- mtgs_scraper.py | 269 +++++++++++++ mythic_scraper.py | 87 ++++ scryfall_scraper.py | 170 ++++++++ spoilers.py | 960 +++++++------------------------------------- wizards_scraper.py | 252 ++++++++++++ 6 files changed, 1002 insertions(+), 855 deletions(-) create mode 100644 mtgs_scraper.py create mode 100644 mythic_scraper.py create mode 100644 scryfall_scraper.py create mode 100644 wizards_scraper.py diff --git a/main.py b/main.py index 85b5348e..abe932b0 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- import spoilers +import mtgs_scraper +import scryfall_scraper +import mythic_scraper +import wizards_scraper import os import commentjson import json @@ -7,17 +11,20 @@ import io import sys presets = { - "isfullspoil": False, # when full spoil comes around, we only want to use WOTC images - "includeMasterpieces": True, # if the set has masterpieces, let's get those too - "oldRSS": False, # maybe MTGS hasn't updated their spoiler.rss but new cards have leaked + "isfullspoil": False, # when full spoil comes around, we only want to use WOTC images + "includeMasterpieces": True, # if the set has masterpieces, let's get those too + "oldRSS": False, # maybe MTGS hasn't updated their spoiler.rss but new cards have leaked "split_cards": { }, - "siteorder": ['scryfall','mtgs','mythicspoiler'], # if we want to use one site before another for card data TODO - "imageorder": ['wotc','scryfall','mtgs','mythicspoiler'], # prioritize images from certain sources - "useexclusively": '', # if we *only* want to use one site TODO - "dumpXML": False, # let travis print XML for testing - "scryfallComparison": False, #if we want to debug compare scryfall to other sources, enable - "dumpErrors": True # print the error log from out/errors.json + # if we want to use one site before another for card data TODO + "siteorder": ['scryfall', 'mtgs', 'mythicspoiler'], + # prioritize images from certain sources + "imageorder": ['wotc', 'scryfall', 'mtgs', 'mythicspoiler'], + "useexclusively": '', # if we *only* want to use one site TODO + "dumpXML": False, # let travis print XML for testing + # if we want to debug compare scryfall to other sources, enable + "scryfallComparison": False, + "dumpErrors": True # print the error log from out/errors.json } @@ -30,94 +37,114 @@ def load_json(json_file, lib_to_use): output_file = json.load(data_file) return output_file except Exception as ex: - print "Unable to load file: " +json_file+ "\nException information:\n" + str(ex.args) - sys.exit("Unable to load file: "+json_file) + print "Unable to load file: " + json_file + "\nException information:\n" + str(ex.args) + sys.exit("Unable to load file: " + json_file) -setinfos = load_json('set_info','commentjson') -manual_sets = load_json('cards_manual','json') -card_corrections = load_json('cards_corrections','commentjson') -delete_cards = load_json('cards_delete','commentjson') +setinfos = load_json('set_info', 'commentjson') +manual_sets = load_json('cards_manual', 'json') +card_corrections = load_json('cards_corrections', 'commentjson') +delete_cards = load_json('cards_delete', 'commentjson') errorlog = [] -#TODO insert configparser to add config.ini file +# TODO insert configparser to add config.ini file + def parseargs(): for argument in sys.argv: for preset in presets: - if argument.split('=')[0].lower().replace('-','') == preset.lower(): + if argument.split('=')[0].lower().replace('-', '') == preset.lower(): argvalue = argument.split('=')[1] - if argvalue in ['true','True','T','t']: + if argvalue in ['true', 'True', 'T', 't']: argvalue = True - elif argvalue in ['false','False','F','f']: + elif argvalue in ['false', 'False', 'F', 'f']: argvalue = False presets[preset] = argvalue print "Setting preset " + preset + " to value " + str(argvalue) + def save_allsets(AllSets): with io.open('out/AllSets.json', 'w', encoding='utf8') as json_file: - data = json.dumps(AllSets, ensure_ascii=False, encoding='utf8', indent=2, sort_keys=True, separators=(',',':')) + data = json.dumps(AllSets, ensure_ascii=False, encoding='utf8', + indent=2, sort_keys=True, separators=(',', ':')) json_file.write(unicode(data)) + def save_masterpieces(masterpieces, setinfo): with open('out/' + setinfo['masterpieces']['setname'] + '.json', 'w') as outfile: - json.dump(masterpieces, outfile, sort_keys=True, indent=2, separators=(',', ': ')) + json.dump(masterpieces, outfile, sort_keys=True, + indent=2, separators=(',', ': ')) + def save_setjson(mtgs, filename): with io.open('out/' + filename + '.json', 'w', encoding='utf8') as json_file: - data = json.dumps(mtgs, ensure_ascii=False, encoding='utf8', indent=2, sort_keys=True, separators=(',',':')) + data = json.dumps(mtgs, ensure_ascii=False, encoding='utf8', + indent=2, sort_keys=True, separators=(',', ':')) json_file.write(unicode(data)) + def save_errorlog(errorlog): with open('out/errors.json', 'w') as outfile: - json.dump(errorlog, outfile, sort_keys=True, indent=2, separators=(',', ': ')) + json.dump(errorlog, outfile, sort_keys=True, + indent=2, separators=(',', ': ')) + def save_xml(xmlstring, outfile): if os.path.exists(outfile): append_or_write = 'w' else: append_or_write = 'w' - with open(outfile,append_or_write) as xmlfile: + with open(outfile, append_or_write) as xmlfile: xmlfile.write(xmlstring.encode('utf-8')) + if __name__ == '__main__': parseargs() - AllSets = spoilers.get_allsets() #get AllSets from mtgjson + AllSets = spoilers.get_allsets() # get AllSets from mtgjson combinedjson = {} for setinfo in setinfos: if setinfo['setname'] in AllSets: - print "Found set from set_info " +setinfo['setname']+ " in MTGJSON, not adding it" + print "Found set from set_info " + setinfo['setname'] + " in MTGJSON, not adding it" continue if presets['oldRSS'] or 'noRSS' in setinfo and setinfo['noRSS']: - mtgs = { "cards":[] } + mtgs = {"cards": []} else: - mtgs = spoilers.scrape_mtgs('http://www.mtgsalvation.com/spoilers.rss') #scrape mtgs rss feed - [mtgs, split_cards] = spoilers.parse_mtgs(mtgs, [], [], [], presets['split_cards']) #parse spoilers into mtgjson format - mtgs = spoilers.correct_cards(mtgs, manual_sets[setinfo['setname']]['cards'], card_corrections, delete_cards) #fix using the fixfiles - mtgjson = spoilers.get_image_urls(mtgs, presets['isfullspoil'], setinfo['setname'], setinfo['setlongname'], setinfo['setsize'], setinfo) #get images + mtgs = mtgs_scraper.scrape_mtgs( + 'http://www.mtgsalvation.com/spoilers.rss') # scrape mtgs rss feed + [mtgs, split_cards] = mtgs_scraper.parse_mtgs( + mtgs, [], [], [], presets['split_cards']) # parse spoilers into mtgjson format + mtgs = spoilers.correct_cards( + mtgs, manual_sets[setinfo['setname']]['cards'], card_corrections, delete_cards) # fix using the fixfiles + mtgjson = spoilers.get_image_urls( + mtgs, presets['isfullspoil'], setinfo['setname'], setinfo['setlongname'], setinfo['setsize'], setinfo) # get images if presets['scryfallComparison']: - scryfall = spoilers.get_scryfall( + scryfall = scryfall_scraper.get_scryfall( 'https://api.scryfall.com/cards/search?q=++e:' + setinfo['setname'].lower()) - mtgjson = spoilers.smash_mtgs_scryfall(mtgs, scryfall) + mtgjson = scryfall_scraper.smash_mtgs_scryfall(mtgs, scryfall) if 'fullSpoil' in setinfo and setinfo['fullSpoil']: - wotc = spoilers.scrape_fullspoil('', setinfo) - spoilers.smash_fullspoil(mtgjson, wotc) - [mtgjson, errors] = spoilers.error_check(mtgjson, card_corrections) #check for errors where possible + wotc = wizards_scraper.scrape_fullspoil('', setinfo) + wizards_scraper.smash_fullspoil(mtgjson, wotc) + [mtgjson, errors] = spoilers.error_check( + mtgjson, card_corrections) # check for errors where possible errorlog += errors - spoilers.write_xml(mtgjson, setinfo['setname'], setinfo['setlongname'], setinfo['setreleasedate']) + spoilers.write_xml( + mtgjson, setinfo['setname'], setinfo['setlongname'], setinfo['setreleasedate']) #save_xml(spoilers.pretty_xml(setinfo['setname']), 'out/spoiler.xml') mtgjson = spoilers.add_headers(mtgjson, setinfo) AllSets = spoilers.make_allsets(AllSets, mtgjson, setinfo['setname']) - if 'masterpieces' in setinfo: #repeat all of the above for masterpieces - #masterpieces aren't in the rss feed, so for the new cards, we'll go to their individual pages on mtgs - #old cards will get their infos copied from mtgjson (including fields that may not apply like 'artist') - #the images will still come from mtgs - masterpieces = spoilers.make_masterpieces(setinfo['masterpieces'], AllSets, mtgjson) + if 'masterpieces' in setinfo: # repeat all of the above for masterpieces + # masterpieces aren't in the rss feed, so for the new cards, we'll go to their individual pages on mtgs + # old cards will get their infos copied from mtgjson (including fields that may not apply like 'artist') + # the images will still come from mtgs + masterpieces = spoilers.make_masterpieces( + setinfo['masterpieces'], AllSets, mtgjson) [masterpieces, errors] = spoilers.error_check(masterpieces) errorlog += errors - spoilers.write_xml(masterpieces, setinfo['masterpieces']['setname'], setinfo['masterpieces']['setlongname'], setinfo['masterpieces']['setreleasedate']) - AllSets = spoilers.make_allsets(AllSets, masterpieces, setinfo['masterpieces']['setname']) + spoilers.write_xml(masterpieces, setinfo['masterpieces']['setname'], + setinfo['masterpieces']['setlongname'], setinfo['masterpieces']['setreleasedate']) + AllSets = spoilers.make_allsets( + AllSets, masterpieces, setinfo['masterpieces']['setname']) save_masterpieces(masterpieces, setinfo) combinedjson[setinfo['masterpieces']['setname']] = masterpieces save_setjson(mtgjson, setinfo['setname']) @@ -128,7 +155,7 @@ if __name__ == '__main__': errorlog = spoilers.remove_corrected_errors(errorlog, card_corrections) save_errorlog(errorlog) save_allsets(AllSets) - #save_setjson(mtgjson) + # save_setjson(mtgjson) if presets['dumpXML']: print '' with open('out/spoiler.xml', 'r') as xmlfile: @@ -137,7 +164,7 @@ if __name__ == '__main__': if presets['dumpErrors']: if errorlog != {}: print '//----- DUMPING ERROR LOG -----' - print json.dumps(errorlog, ensure_ascii=False, encoding='utf8', indent=2, sort_keys=True, separators=(',',':')) + print json.dumps(errorlog, ensure_ascii=False, encoding='utf8', indent=2, sort_keys=True, separators=(',', ':')) print '//----- END ERROR LOG -----' else: print "No Detected Errors!" diff --git a/mtgs_scraper.py b/mtgs_scraper.py new file mode 100644 index 00000000..d3e56ea0 --- /dev/null +++ b/mtgs_scraper.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +import requests +import feedparser +import re +import sys +import time +from lxml import html + + +def scrape_mtgs(url): + return requests.get(url, headers={'Cache-Control': 'no-cache', 'Pragma': 'no-cache', 'Expires': 'Thu, 01 Jan 1970 00:00:00 GMT'}).text + + +def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], split_cards={}, related_cards=[]): + mtgs = mtgs.replace('utf-16', 'utf-8') + patterns = ['Name: (?P.*?)<', + 'Cost: (?P[X]*\d{0,2}[XWUBRGC]*?)<', + 'Type: (?P.*?)<', + 'Pow/Tgh: (?P.*?)<', + 'Rules Text: (?P.*?)
.*?)<', + 'Set Number: #(?P.*?)/' + ] + d = feedparser.parse(mtgs) + + cards = [] + for entry in d.items()[5][1]: + card = dict(cost='', cmc='', img='', pow='', name='', rules='', type='', + color='', altname='', colorIdentity='', colorArray=[], colorIdentityArray=[], setnumber='', rarity='') + summary = entry['summary'] + for pattern in patterns: + match = re.search(pattern, summary, re.MULTILINE | re.DOTALL) + if match: + dg = match.groupdict() + card[dg.items()[0][0]] = dg.items()[0][1] + cards.append(card) + + # if we didn't find any cards, let's bail out to prevent overwriting good data + count = 0 + for card in cards: + count = count + 1 + if count < 1: + sys.exit("No cards found, exiting to prevent file overwrite") + + cards2 = [] + for card in cards: + if 'rules' in card: + htmltags = re.compile(r'<.*?>') + card['rules'] = htmltags.sub('', card['rules']) + if '//' in card['name'] or 'Aftermath' in card['rules']: + print 'Splitting up Aftermath card ' + card['name'] + card1 = card.copy() + card2 = dict(cost='', cmc='', img='', pow='', name='', rules='', type='', + color='', altname='', colorIdentity='', colorArray=[], colorIdentityArray=[], setnumber='', rarity='') + if '//' in card['name']: + card['name'] = card['name'].replace(' // ', '//') + card1['name'] = card['name'].split('//')[0] + card2["name"] = card['name'].split('//')[1] + else: + card1['name'] = card['name'] + card2["name"] = card['rules'].split( + '\n\n')[1].strip().split(' {')[0] + card1['rules'] = card['rules'].split('\n\n')[0].strip() + card2["rules"] = "Aftermath" + card['rules'].split('Aftermath')[1] + card2['cost'] = re.findall( + r'{.*}', card['rules'])[0].replace('{', '').replace('}', '').upper() + card2['type'] = re.findall( + r'}\n.*\n', card['rules'])[0].replace('}', '').replace('\n', '') + if 'setnumber' in card: + card1['setnumber'] = card['setnumber'] + 'a' + card2['setnumber'] = card['setnumber'] + 'b' + if 'rarity' in card: + card2['rarity'] = card['rarity'] + if not card1['name'] in split_cards: + split_cards[card1['name']] = card2['name'] + card1['layout'] = 'aftermath' + card2['layout'] = 'aftermath' + cards2.append(card1) + cards2.append(card2) + else: + cards2.append(card) + cards = cards2 + + for card in cards: + card['name'] = card['name'].replace(''', '\'') + card['rules'] = card['rules'].replace(''', '\'') \ + .replace('<i>', '') \ + .replace('</i>', '') \ + .replace('"', '"') \ + .replace('blkocking', 'blocking')\ + .replace('&bull;', u'•')\ + .replace('•', u'•')\ + .replace('comes into the', 'enters the')\ + .replace('threeor', 'three or')\ + .replace('[i]', '')\ + .replace('[/i]', '')\ + .replace('Lawlwss', 'Lawless')\ + .replace('Costner', "Counter") + card['type'] = card['type'].replace(' ', ' ')\ + .replace('Crature', 'Creature') + + if card['type'][-1] == ' ': + card['type'] = card['type'][:-1] + + if 'cost' in card and len(card['cost']) > 0: + workingCMC = 0 + stripCost = card['cost'].replace('{', '').replace('}', '') + for manaSymbol in stripCost: + if manaSymbol.isdigit(): + workingCMC += int(manaSymbol) + elif not manaSymbol == 'X': + workingCMC += 1 + card['cmc'] = workingCMC + + for c in 'WUBRG': # figure out card's color + if c not in card['colorIdentity']: + if c in card['cost']: + card['color'] += c + card['colorIdentity'] += c + if (c + '}') in card['rules'] or (str.lower(c) + '}') in card['rules']: + if not (c in card['colorIdentity']): + card['colorIdentity'] += c + + cleanedcards = [] + for card in cards: # let's remove any cards that are named in delete_cards array + if not card['name'] in delete_cards: + cleanedcards.append(card) + cards = cleanedcards + + cardarray = [] + for card in cards: + dupe = False + for dupecheck in cardarray: + if dupecheck['name'] == card['name']: + dupe = True + if dupe == True: + continue + for cid in card['colorIdentity']: + card['colorIdentityArray'].append(cid) + if 'W' in card['color']: + card['colorArray'].append('White') + if 'U' in card['color']: + card['colorArray'].append('Blue') + if 'B' in card['color']: + card['colorArray'].append('Black') + if 'R' in card['color']: + card['colorArray'].append('Red') + if 'G' in card['color']: + card['colorArray'].append('Green') + cardpower = '' + cardtoughness = '' + if len(card['pow'].split('/')) > 1: + cardpower = card['pow'].split('/')[0] + cardtoughness = card['pow'].split('/')[1] + cardnames = [] + cardnumber = card['setnumber'].lstrip('0') + if card['name'] in related_cards: + cardnames.append(card['name']) + cardnames.append(related_cards[card['name']]) + cardnumber += 'a' + card['layout'] = 'double-faced' + for namematch in related_cards: + if card['name'] == related_cards[namematch]: + card['layout'] = 'double-faced' + cardnames.append(namematch) + if not card['name'] in cardnames: + cardnames.append(card['name']) + cardnumber += 'b' + cardnames = [] + if card['name'] in split_cards: + cardnames.append(card['name']) + cardnames.append(split_cards[card['name']]) + cardnumber = cardnumber.replace('b', '').replace('a', '') + 'a' + if not 'layout' in card: + card['layout'] = 'split' + for namematch in split_cards: + if card['name'] == split_cards[namematch]: + if not 'layout' in card or ('layout' in card and card['layout'] == ''): + card['layout'] = 'split' + cardnames.append(namematch) + if not card['name'] in cardnames: + cardnames.append(card['name']) + cardnumber = cardnumber.replace( + 'b', '').replace('a', '') + 'b' + if 'number' in card: + if 'b' in card['number'] or 'a' in card['number']: + if not 'layout' in card: + print card['name'] + " has a a/b number but no 'layout'" + card['type'] = card['type'].replace('instant', 'Instant').replace( + 'sorcery', 'Sorcery').replace('creature', 'Creature') + if '-' in card['type']: + subtype = card['type'].split(' - ')[1].strip() + else: + subtype = False + if subtype: + subtypes = subtype.split(' ') + else: + subtypes = False + if card['cmc'] == '': + card['cmc'] = 0 + cardjson = {} + #cardjson["id"] = hashlib.sha1(setname + card['name'] + str(card['name']).lower()).hexdigest() + cardjson["cmc"] = card['cmc'] + cardjson["manaCost"] = card['cost'] + cardjson["name"] = card['name'] + cardjson["number"] = cardnumber + # not sure if mtgjson has a list of acceptable rarities, but my application does + # so we'll warn me but continue to write a non-standard rarity (timeshifted?) + # may force 'special' in the future + if card['rarity'] not in ['Mythic Rare', 'Rare', 'Uncommon', 'Common', 'Special', 'Basic Land']: + #errors.append({"name": card['name'], "key": "rarity", "value": card['rarity']}) + print card['name'] + ' has rarity = ' + card['rarity'] + if subtypes: + cardjson['subtypes'] = subtypes + cardjson["rarity"] = card['rarity'] + cardjson["text"] = card['rules'] + cardjson["type"] = card['type'] + + workingtypes = card['type'] + if ' - ' in workingtypes: + workingtypes = card['type'].split(' - ')[0] + cardjson['types'] = workingtypes.replace('Legendary ', '').replace('Snow ', '')\ + .replace('Elite ', '').replace('Basic ', '').replace('World ', '').replace('Ongoing ', '')\ + .strip().split(' ') + cardjson["url"] = card['img'] + + # optional fields + if len(card['colorIdentityArray']) > 0: + cardjson["colorIdentity"] = card['colorIdentityArray'] + if len(card['colorArray']) > 0: + cardjson["colors"] = card['colorArray'] + if len(cardnames) > 1: + cardjson["names"] = cardnames + if cardpower or cardpower == '0': + cardjson["power"] = cardpower + cardjson["toughness"] = cardtoughness + if card.has_key('loyalty'): + cardjson["loyalty"] = card['loyalty'] + if card.has_key('layout'): + cardjson["layout"] = card['layout'] + + cardarray.append(cardjson) + + return [{"cards": cardarray}, split_cards] + + +def scrape_mtgs_images(url='http://www.mtgsalvation.com/spoilers/183-hour-of-devastation', mtgscardurl='http://www.mtgsalvation.com/cards/hour-of-devastation/', exemptlist=[]): + page = requests.get(url) + tree = html.fromstring(page.content) + cards = {} + cardstree = tree.xpath('//*[contains(@class, "log-card")]') + for child in cardstree: + if child.text in exemptlist: + continue + childurl = mtgscardurl + child.attrib['data-card-id'] + '-' + child.text.replace( + ' ', '-').replace("'", "").replace(',', '').replace('-//', '') + cardpage = requests.get(childurl) + tree = html.fromstring(cardpage.content) + cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]') + try: + cardurl = cardtree[0].attrib['src'] + except: + cardurl = '' + pass + cards[child.text] = { + "url": cardurl + } + time.sleep(.2) + return cards diff --git a/mythic_scraper.py b/mythic_scraper.py new file mode 100644 index 00000000..82cc4587 --- /dev/null +++ b/mythic_scraper.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +import requests +import time +from bs4 import BeautifulSoup as BS +from bs4 import Comment + + +# mtgjson is optional, will ignore cards found if passed +def get_mythic_cards(url='http://mythicspoiler.com/ixa/', mtgjson=False): + cards = {'cards': []} + r = requests.get(url) + soup = BS(r.text, "html.parser") + cardurls = soup.find_all('a', 'card') + urllist = [] + for cardurl in cardurls: + try: + urllist.append(url + str(cardurl).split("href=\"") + [1].split('">Name:
(?P.*?)<', - 'Cost: (?P[X]*\d{0,2}[XWUBRGC]*?)<', - 'Type: (?P.*?)<', - 'Pow/Tgh: (?P.*?)<', - 'Rules Text: (?P.*?)
.*?)<', - 'Set Number: #(?P.*?)/' - ] - d = feedparser.parse(mtgs) - - cards = [] - for entry in d.items()[5][1]: - card = dict(cost='',cmc='',img='',pow='',name='',rules='',type='', - color='', altname='', colorIdentity='', colorArray=[], colorIdentityArray=[], setnumber='', rarity='') - summary = entry['summary'] - for pattern in patterns: - match = re.search(pattern, summary, re.MULTILINE|re.DOTALL) - if match: - dg = match.groupdict() - card[dg.items()[0][0]] = dg.items()[0][1] - cards.append(card) - - #if we didn't find any cards, let's bail out to prevent overwriting good data - count = 0 - for card in cards: - count = count + 1 - if count < 1: - sys.exit("No cards found, exiting to prevent file overwrite") - - cards2 = [] - for card in cards: - if 'rules' in card: - htmltags = re.compile(r'<.*?>') - card['rules'] = htmltags.sub('', card['rules']) - if '//' in card['name'] or 'Aftermath' in card['rules']: - print 'Splitting up Aftermath card ' + card['name'] - card1 = card.copy() - card2 = dict(cost='',cmc='',img='',pow='',name='',rules='',type='', - color='', altname='', colorIdentity='', colorArray=[], colorIdentityArray=[], setnumber='', rarity='') - if '//' in card['name']: - card['name'] = card['name'].replace(' // ','//') - card1['name'] = card['name'].split('//')[0] - card2["name"] = card['name'].split('//')[1] - else: - card1['name'] = card['name'] - card2["name"] = card['rules'].split('\n\n')[1].strip().split(' {')[0] - card1['rules'] = card['rules'].split('\n\n')[0].strip() - card2["rules"] = "Aftermath" + card['rules'].split('Aftermath')[1] - card2['cost'] = re.findall(r'{.*}',card['rules'])[0].replace('{','').replace('}','').upper() - card2['type'] = re.findall(r'}\n.*\n', card['rules'])[0].replace('}','').replace('\n','') - if 'setnumber' in card: - card1['setnumber'] = card['setnumber'] + 'a' - card2['setnumber'] = card['setnumber'] + 'b' - if 'rarity' in card: - card2['rarity'] = card['rarity'] - if not card1['name'] in split_cards: - split_cards[card1['name']] = card2['name'] - card1['layout'] = 'aftermath' - card2['layout'] = 'aftermath' - cards2.append(card1) - cards2.append(card2) - else: - cards2.append(card) - cards = cards2 - - for card in cards: - card['name'] = card['name'].replace(''', '\'') - card['rules'] = card['rules'].replace(''', '\'') \ - .replace('<i>', '') \ - .replace('</i>', '') \ - .replace('"', '"') \ - .replace('blkocking', 'blocking')\ - .replace('&bull;',u'•')\ - .replace('•',u'•')\ - .replace('comes into the','enters the')\ - .replace('threeor', 'three or')\ - .replace('[i]','')\ - .replace('[/i]','')\ - .replace('Lawlwss','Lawless')\ - .replace('Costner',"Counter") - card['type'] = card['type'].replace(' ',' ')\ - .replace('Crature', 'Creature') - - if card['type'][-1] == ' ': - card['type'] = card['type'][:-1] - - if 'cost' in card and len(card['cost']) > 0: - workingCMC = 0 - stripCost = card['cost'].replace('{','').replace('}','') - for manaSymbol in stripCost: - if manaSymbol.isdigit(): - workingCMC += int(manaSymbol) - elif not manaSymbol == 'X': - workingCMC += 1 - card['cmc'] = workingCMC - - for c in 'WUBRG': #figure out card's color - if c not in card['colorIdentity']: - if c in card['cost']: - card['color'] += c - card['colorIdentity'] += c - if (c + '}') in card['rules'] or (str.lower(c) + '}') in card['rules']: - if not (c in card['colorIdentity']): - card['colorIdentity'] += c - - cleanedcards = [] - for card in cards: #let's remove any cards that are named in delete_cards array - if not card['name'] in delete_cards: - cleanedcards.append(card) - cards = cleanedcards - - cardarray = [] - for card in cards: - dupe = False - for dupecheck in cardarray: - if dupecheck['name'] == card['name']: - dupe = True - if dupe == True: - continue - for cid in card['colorIdentity']: - card['colorIdentityArray'].append(cid) - if 'W' in card['color']: - card['colorArray'].append('White') - if 'U' in card['color']: - card['colorArray'].append('Blue') - if 'B' in card['color']: - card['colorArray'].append('Black') - if 'R' in card['color']: - card['colorArray'].append('Red') - if 'G' in card['color']: - card['colorArray'].append('Green') - cardpower = '' - cardtoughness = '' - if len(card['pow'].split('/')) > 1: - cardpower = card['pow'].split('/')[0] - cardtoughness = card['pow'].split('/')[1] - cardnames = [] - cardnumber = card['setnumber'].lstrip('0') - if card['name'] in related_cards: - cardnames.append(card['name']) - cardnames.append(related_cards[card['name']]) - cardnumber += 'a' - card['layout'] = 'double-faced' - for namematch in related_cards: - if card['name'] == related_cards[namematch]: - card['layout'] = 'double-faced' - cardnames.append(namematch) - if not card['name'] in cardnames: - cardnames.append(card['name']) - cardnumber += 'b' - cardnames = [] - if card['name'] in split_cards: - cardnames.append(card['name']) - cardnames.append(split_cards[card['name']]) - cardnumber = cardnumber.replace('b','').replace('a','') + 'a' - if not 'layout' in card: - card['layout'] = 'split' - for namematch in split_cards: - if card['name'] == split_cards[namematch]: - if not 'layout' in card or ('layout' in card and card['layout'] == ''): - card['layout'] = 'split' - cardnames.append(namematch) - if not card['name'] in cardnames: - cardnames.append(card['name']) - cardnumber = cardnumber.replace('b','').replace('a','') + 'b' - if 'number' in card: - if 'b' in card['number'] or 'a' in card['number']: - if not 'layout' in card: - print card['name'] + " has a a/b number but no 'layout'" - card['type'] = card['type'].replace('instant','Instant').replace('sorcery','Sorcery').replace('creature','Creature') - if '-' in card['type']: - subtype = card['type'].split(' - ')[1].strip() - else: - subtype = False - if subtype: - subtypes = subtype.split(' ') - else: - subtypes = False - if card['cmc'] == '': - card['cmc'] = 0 - cardjson = {} - #cardjson["id"] = hashlib.sha1(setname + card['name'] + str(card['name']).lower()).hexdigest() - cardjson["cmc"] = card['cmc'] - cardjson["manaCost"] = card['cost'] - cardjson["name"] = card['name'] - cardjson["number"] = cardnumber - #not sure if mtgjson has a list of acceptable rarities, but my application does - #so we'll warn me but continue to write a non-standard rarity (timeshifted?) - #may force 'special' in the future - if card['rarity'] not in ['Mythic Rare','Rare','Uncommon','Common','Special','Basic Land']: - #errors.append({"name": card['name'], "key": "rarity", "value": card['rarity']}) - print card['name'] + ' has rarity = ' + card['rarity'] - if subtypes: - cardjson['subtypes'] = subtypes - cardjson["rarity"] = card['rarity'] - cardjson["text"] = card['rules'] - cardjson["type"] = card['type'] - - workingtypes = card['type'] - if ' - ' in workingtypes: - workingtypes = card['type'].split(' - ')[0] - cardjson['types'] = workingtypes.replace('Legendary ','').replace('Snow ','')\ - .replace('Elite ','').replace('Basic ','').replace('World ','').replace('Ongoing ','')\ - .strip().split(' ') - cardjson["url"] = card['img'] - - #optional fields - if len(card['colorIdentityArray']) > 0: - cardjson["colorIdentity"] = card['colorIdentityArray'] - if len(card['colorArray']) > 0: - cardjson["colors"] = card['colorArray'] - if len(cardnames) > 1: - cardjson["names"] = cardnames - if cardpower or cardpower == '0': - cardjson["power"] = cardpower - cardjson["toughness"] = cardtoughness - if card.has_key('loyalty'): - cardjson["loyalty"] = card['loyalty'] - if card.has_key('layout'): - cardjson["layout"] = card['layout'] - - cardarray.append(cardjson) - - return [{"cards": cardarray}, split_cards] def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[]): mtgjson2 = [] for card in manual_cards: if 'cmc' not in card: workingCMC = 0 - stripCost = card['manaCost'].replace('{','').replace('}','') + stripCost = card['manaCost'].replace('{', '').replace('}', '') for manaSymbol in stripCost: if manaSymbol.isdigit(): workingCMC += int(manaSymbol) @@ -265,8 +30,8 @@ def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[] .replace('Elite ', '').replace('Basic ', '').replace('World ', '').replace('Ongoing ', '') \ .strip().split(' ') if 'subtypes' not in card: -# if '—' in card['type']: -# workingSubtypes = card['type'].split('—')[1].strip() + # if '—' in card['type']: + # workingSubtypes = card['type'].split('—')[1].strip() if '-' in card['type']: workingSubtypes = card['type'].split('-')[1].strip() if workingSubtypes: @@ -325,26 +90,32 @@ def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[] return mtgjson + def error_check(mtgjson, card_corrections={}): errors = [] for card in mtgjson['cards']: for key in card: if key == "": errors.append({"name": card['name'], "key": key, "value": ""}) - requiredKeys = ['name','type','types'] + requiredKeys = ['name', 'type', 'types'] for requiredKey in requiredKeys: if not requiredKey in card: - errors.append({"name": card['name'], "key": key, "missing": True}) + errors.append( + {"name": card['name'], "key": key, "missing": True}) if 'text' in card: - card['text'] = card['text'].replace('','').replace('','').replace('','').replace('', '').replace( + '', '').replace('', '').replace(' 0: if not 'manaCost' in card: - errors.append({"name": card['name'], "key": "manaCost", "value": "", "match": card['cmc']}) + errors.append( + {"name": card['name'], "key": "manaCost", "value": "", "match": card['cmc']}) else: if 'manaCost' in card: - errors.append({"name": card['name'], "key": "manaCost", "oldvalue": card['manaCost'], "fixed": True}) + errors.append( + {"name": card['name'], "key": "manaCost", "oldvalue": card['manaCost'], "fixed": True}) del card["manaCost"] if 'colors' in card: if not 'colorIdentity' in card: if 'text' in card: if not 'devoid' in card['text'].lower(): - errors.append({"name": card['name'], "key": "colorIdentity", "value": ""}) + errors.append( + {"name": card['name'], "key": "colorIdentity", "value": ""}) else: - errors.append({"name": card['name'], "key": "colorIdentity", "value": ""}) + errors.append( + {"name": card['name'], "key": "colorIdentity", "value": ""}) if 'colorIdentity' in card: if not 'colors' in card: - #this one will false positive on emerge cards + # this one will false positive on emerge cards if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: if 'text' in card: if not 'emerge' in card['text'].lower() and not 'devoid' in card['text'].lower(): - errors.append({"name": card['name'], "key": "colors", "value": ""}) + errors.append( + {"name": card['name'], "key": "colors", "value": ""}) else: - errors.append({"name": card['name'], "key": "colors", "value": ""}) - #if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: + errors.append( + {"name": card['name'], "key": "colors", "value": ""}) + # if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: # errors.append({"name": card['name'], "key": "colors", "value": ""}) if not 'url' in card: errors.append({"name": card['name'], "key": "url", "value": ""}) @@ -419,11 +203,13 @@ def error_check(mtgjson, card_corrections={}): if not 'types' in card: errors.append({"name": card['name'], "key": "types", "value": ""}) - for card in mtgjson['cards']: #we're going to loop through again and make sure split cards get paired + # we're going to loop through again and make sure split cards get paired + for card in mtgjson['cards']: if 'layout' in card: if card['layout'] == 'split' or card['layout'] == 'meld' or card['layout'] == 'aftermath': if not 'names' in card: - errors.append({"name": card['name'], "key": "names", "value": ""}) + errors.append( + {"name": card['name'], "key": "names", "value": ""}) else: for related_card_name in card['names']: if related_card_name != card['name']: @@ -432,7 +218,8 @@ def error_check(mtgjson, card_corrections={}): if card2['name'] == related_card_name: related_card = card2 if not related_card: - errors.append({"name": card['name'], "key": "names", "value": card['names']}) + errors.append( + {"name": card['name'], "key": "names", "value": card['names']}) else: if 'colors' in related_card: for color in related_card['colors']: @@ -443,12 +230,15 @@ def error_check(mtgjson, card_corrections={}): if 'colorIdentity' in related_card: for colorIdentity in related_card['colorIdentity']: if not 'colorIdentity' in card: - card['colorIdentity'] = [colorIdentity] + card['colorIdentity'] = [ + colorIdentity] elif not colorIdentity in card['colorIdentity']: - card['colorIdentity'].append(colorIdentity) + card['colorIdentity'].append( + colorIdentity) if 'number' in card: if not 'a' in card['number'] and not 'b' in card['number'] and not 'c' in card['number']: - errors.append({"name": card['name'], "key": "number", "value": card['number']}) + errors.append( + {"name": card['name'], "key": "number", "value": card['number']}) for card in mtgjson['cards']: for cardCorrection in card_corrections: @@ -466,6 +256,7 @@ def error_check(mtgjson, card_corrections={}): return [mtgjson, errors] + def remove_corrected_errors(errorlog=[], card_corrections=[], print_fixed=False): errorlog2 = {} for error in errorlog: @@ -486,383 +277,29 @@ def remove_corrected_errors(errorlog=[], card_corrections=[], print_fixed=False) errorlog2[error['name']][error['key']] = error['value'] return errorlog2 -def get_scryfall(setUrl): - #getUrl = 'https://api.scryfall.com/cards/search?q=++e:' - #setUrl = getUrl + setname.lower() - setDone = False - scryfall = [] - - while setDone == False: - setcards = requests.get(setUrl) - setcards = setcards.json() - if setcards.has_key('data'): - scryfall.append(setcards['data']) - else: - setDone = True - #print setUrl - #print setcards - print 'No Scryfall data' - scryfall = [''] - time.sleep(.1) - if setcards.has_key('has_more'): - if setcards['has_more'] == True: - setUrl = setcards['next_page'] - else: - setDone = True - else: - setDone = True - if not scryfall[0] == '': - scryfall = convert_scryfall(scryfall[0]) - return {'cards': scryfall} - else: - return {'cards': []} - -def convert_scryfall(scryfall): - cards2 = [] - for card in scryfall: - card2 = {} - card2['cmc'] = int(card['cmc']) - if card.has_key('mana_cost'): - card2['manaCost'] = card['mana_cost'].replace('{','').replace('}','') - else: - card2['manaCost'] = '' - card2['name'] = card['name'] - card2['number'] = card['collector_number'] - card2['rarity'] = card['rarity'].replace('mythic','mythic rare').title() - if card.has_key('oracle_text'): - card2['text'] = card['oracle_text'].replace(u"\u2014",'-').replace(u"\u2212","-") - else: - card2['text'] = '' - card2['url'] = card['image_uri'] - if not 'type_line' in card: - card['type_line'] = 'Unknown' - card2['type'] = card['type_line'].replace(u'—','-') - cardtypes = card['type_line'].split(u' — ')[0].replace('Legendary ','').replace('Snow ','')\ - .replace('Elite ','').replace('Basic ','').replace('World ','').replace('Ongoing ','') - cardtypes = cardtypes.split(' ') - if u' — ' in card['type_line']: - cardsubtypes = card['type_line'].split(u' — ')[1] - if ' ' in cardsubtypes: - card2['subtypes'] = cardsubtypes.split(' ') - else: - card2['subtypes'] = [cardsubtypes] - if 'Legendary' in card['type_line']: - if card2.has_key('supertypes'): - card2['supertypes'].append('Legendary') - else: - card2['supertypes'] = ['Legendary'] - if 'Snow' in card['type_line']: - if card2.has_key('supertypes'): - card2['supertypes'].append('Snow') - else: - card2['supertypes'] = ['Snow'] - if 'Elite' in card['type_line']: - if card2.has_key('supertypes'): - card2['supertypes'].append('Elite') - else: - card2['supertypes'] = ['Elite'] - if 'Basic' in card['type_line']: - if card2.has_key('supertypes'): - card2['supertypes'].append('Basic') - else: - card2['supertypes'] = ['Basic'] - if 'World' in card['type_line']: - if card2.has_key('supertypes'): - card2['supertypes'].append('World') - else: - card2['supertypes'] = ['World'] - if 'Ongoing' in card['type_line']: - if card2.has_key('supertypes'): - card2['supertypes'].append('Ongoing') - else: - card2['supertypes'] = ['Ongoing'] - card2['types'] = cardtypes - if card.has_key('color_identity'): - card2['colorIdentity'] = card['color_identity'] - if card.has_key('colors'): - if not card['colors'] == []: - card2['colors'] = [] - if 'W' in card['colors']: - card2['colors'].append("White") - if 'U' in card['colors']: - card2['colors'].append("Blue") - if 'B' in card['colors']: - card2['colors'].append("Black") - if 'R' in card['colors']: - card2['colors'].append("Red") - if 'G' in card['colors']: - card2['colors'].append("Green") - #card2['colors'] = card['colors'] - if card.has_key('all_parts'): - card2['names'] = [] - for partname in card['all_parts']: - card2['names'].append(partname['name']) - if card.has_key('power'): - card2['power'] = card['power'] - if card.has_key('toughness'): - card2['toughness'] = card['toughness'] - if card.has_key('layout'): - if card['layout'] != 'normal': - card2['layout'] = card['layout'] - if card.has_key('loyalty'): - card2['loyalty'] = card['loyalty'] - if card.has_key('artist'): - card2['artist'] = card['artist'] - #if card.has_key('source'): - # card2['source'] = card['source'] - #if card.has_key('rulings'): - # card2['rulings'] = card['rulings'] - if card.has_key('flavor_text'): - card2['flavor'] = card['flavor_text'] - if card.has_key('multiverse_id'): - card2['multiverseid'] = card['multiverse_id'] - - cards2.append(card2) - - return cards2 - -def smash_mtgs_scryfall(mtgs, scryfall): - for mtgscard in mtgs['cards']: - cardFound = False - for scryfallcard in scryfall['cards']: - if scryfallcard['name'] == mtgscard['name']: - for key in scryfallcard: - if key in mtgscard: - if not mtgscard[key] == scryfallcard[key]: - try: - print "%s's key %s\nMTGS : %s\nScryfall: %s" % (mtgscard['name'], key, mtgscard[key], scryfallcard[key]) - except: - print "Error printing Scryfall vs MTGS debug info for " + mtgscard['name'] - pass - cardFound = True - if not cardFound: - print "MTGS has card %s and Scryfall does not." % mtgscard['name'] - for scryfallcard in scryfall['cards']: - cardFound = False - for mtgscard in mtgs['cards']: - if scryfallcard['name'] == mtgscard['name']: - cardFound = True - if not cardFound: - print "Scryfall has card %s and MTGS does not." % scryfallcard['name'] - - return mtgs - -def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-image-gallery/hour-devastation", setinfo={"setname":"HOU"}, showRarityColors=False, showFrameColors=False, manual_cards=[], delete_cards=[], split_cards=[]): - if 'setlongname' in setinfo: - url = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setinfo['setlongname'].lower().replace('of', '').replace( - ' ', ' ').replace(' ', '-') - page = requests.get(url) - tree = html.fromstring(page.content) - cards = [] - cardtree = tree.xpath('//*[@id="content-detail-page-of-an-article"]') - for child in cardtree: - cardElements = child.xpath('//*/p/img') - cardcount = 0 - for cardElement in cardElements: - card = { - "name": cardElement.attrib['alt'].replace(u"\u2019",'\'').split(' /// ')[0], - "img": cardElement.attrib['src'] - } - card["url"] = card["img"] - #card["cmc"] = 0 - #card["manaCost"] = "" - #card["type"] = "" - #card["types"] = [] - #card["text"] = "" - #card["colorIdentity"] = [""] - - #if card['name'] in split_cards: - # card["names"] = [card['name'], split_cards[card['name']]] - # card["layout"] = "split" - #notSplit = True - #for backsplit in split_cards: - # if card['name'] == split_cards[backsplit]: - # notSplit = False - #if not card['name'] in delete_cards: - cards.append(card) - cardcount += 1 - fullspoil = { "cards": cards } - print "Spoil Gallery has " + str(cardcount) + " cards." - download_images(fullspoil['cards'], setinfo['setname']) - fullspoil = get_rarities_by_symbol(fullspoil, setinfo['setname']) - fullspoil = get_mana_symbols(fullspoil, setinfo['setname']) - #fullspoil = get_colors_by_frame(fullspoil, setinfo['setname']) - return fullspoil def download_images(mtgjson, setcode): - if not os.path.isdir('images/' + setcode): - os.makedirs('images/' + setcode) - if 'cards' in mtgjson: - jsoncards = mtgjson['cards'] - else: - jsoncards = mtgjson - for card in jsoncards: - if card['url']: - if os.path.isfile('images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg'): - continue - #print 'Downloading ' + card['url'] + ' to images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg' - urllib.urlretrieve(card['url'], 'images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg') + if not os.path.isdir('images/' + setcode): + os.makedirs('images/' + setcode) + if 'cards' in mtgjson: + jsoncards = mtgjson['cards'] + else: + jsoncards = mtgjson + for card in jsoncards: + if card['url']: + if os.path.isfile('images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg'): + continue + # print 'Downloading ' + card['url'] + ' to images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg' + urllib.urlretrieve(card['url'], 'images/' + setcode + + '/' + card['name'].replace(' // ', '') + '.jpg') -def get_rarities_by_symbol(fullspoil, setcode, split_cards=[]): - symbolPixels = (240, 219, 242, 221) - highVariance = 15 - colorAverages = { - "Common": [30, 27, 28], - "Uncommon": [121, 155, 169], - "Rare": [166, 143, 80], - "Mythic Rare": [201, 85, 14] - } - symbolCount = 0 - for card in fullspoil['cards']: - try: - cardImage = Image.open('images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg') - except: - continue - pass - if '//' in card['name']: - setSymbol = cardImage.crop((240, 138, 242, 140)) - else: - setSymbol = cardImage.crop(symbolPixels) - cardHistogram = setSymbol.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - for color in colorAverages: - colorVariance = 0 - colorVariance = colorVariance + abs(colorAverages[color][0] - reds) - colorVariance = colorVariance + abs(colorAverages[color][1] - greens) - colorVariance = colorVariance + abs(colorAverages[color][2] - blues) - if colorVariance < variance: - variance = colorVariance - card['rarity'] = color - if variance > highVariance: - # if a card isn't close to any of the colors, it's probably a planeswalker? make it mythic. - print card['name'], 'has high variance of', variance, ', closest rarity is', card['rarity'] - card['rarity'] = "Mythic Rare" - #print card['name'], '$', reds, greens, blues - if symbolCount < 10: - setSymbol.save('images/' + card['name'].replace(' // ','') + '.symbol.jpg') - symbolCount += 1 - return fullspoil - -def get_colors_by_frame(fullspoil, setcode, split_cards={}): - framePixels = (20, 11, 76, 16) - highVariance = 10 - colorAverages = { - "White": [231,225,200], - "Blue": [103,193,230], - "Black": [58, 61, 54], - "Red": [221, 122, 101], - "Green": [118, 165, 131], - "Multicolor": [219, 200, 138], - "Artifact": [141, 165, 173], - "Colorless": [216, 197, 176], - } - symbolCount = 0 - for card in fullspoil['cards']: - try: - cardImage = Image.open('images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg') - except: - continue - pass - cardColor = cardImage.crop(framePixels) - - cardHistogram = cardColor.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - for color in colorAverages: - colorVariance = 0 - colorVariance = colorVariance + abs(colorAverages[color][0] - reds) - colorVariance = colorVariance + abs(colorAverages[color][1] - greens) - colorVariance = colorVariance + abs(colorAverages[color][2] - blues) - if colorVariance < variance: - variance = colorVariance - card['colors'] = [color] - return fullspoil - -def get_mana_symbols(fullspoil={}, setcode="HOU", split_cards=[]): - manaBoxes = [(234, 23, 244, 33), (220, 23, 230, 33), (206, 23, 216, 33), (192, 23, 202, 33), (178, 23, 188, 33)] - highVariance = 0 - colorAverages = { - "W": [126, 123, 110], - "U": [115, 140, 151], - "B": [105, 99, 98], - "R": [120, 89, 77], - "G": [65, 78, 69], - "1": [162, 156, 154], - "2": [155, 148, 147], - "3": [160, 153, 152], - "4": [149, 143, 141], - "5": [155, 149, 147], - "6": [151, 145, 143], - "7": [169, 163, 161], - "X": [160, 154, 152] - } - for card in fullspoil['cards']: - try: - cardImage = Image.open('images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg') - except: - continue - pass - card['manaCost'] = "" - for manaBox in manaBoxes: - manaSymbol = cardImage.crop(manaBox) - cardHistogram = manaSymbol.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - for color in colorAverages: - colorVariance = 0 - colorVariance = colorVariance + abs(colorAverages[color][0] - reds) - colorVariance = colorVariance + abs(colorAverages[color][1] - greens) - colorVariance = colorVariance + abs(colorAverages[color][2] - blues) - if colorVariance < variance: - variance = colorVariance - closestColor = color - if variance < 10: - #if card['name'] in ["Mirage Mirror", "Uncage the Menagerie", "Torment of Hailfire"]: - # print card['name'] + " " + str(reds) + " " + str(greens) + " " + str(blues) - if closestColor in ["2","5"]: - twoVSfive = (manaBox[0] + 1, manaBox[1] + 4, manaBox[2] - 5, manaBox[3] - 2) - manaSymbol = cardImage.crop(twoVSfive) - cardHistogram = manaSymbol.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - colorVariance = 0 - colorVariance = colorVariance + abs(175 - reds) - colorVariance = colorVariance + abs(168 - greens) - colorVariance = colorVariance + abs(166 - blues) - if colorVariance < 10: - closestColor = "2" - elif colorVariance > 110 and colorVariance < 120: - closestColor = "5" - else: - continue - card['manaCost'] = closestColor + card['manaCost'] - return fullspoil def get_image_urls(mtgjson, isfullspoil, setname, setlongname, setSize=269, setinfo=False): - IMAGES = 'http://magic.wizards.com/en/content/' + setlongname.lower().replace(' ', '-') + '-cards' + IMAGES = 'http://magic.wizards.com/en/content/' + \ + setlongname.lower().replace(' ', '-') + '-cards' IMAGES2 = 'http://mythicspoiler.com/newspoilers.html' - IMAGES3 = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setlongname.lower().replace('of','').replace(' ',' ').replace(' ', '-') + IMAGES3 = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + \ + setlongname.lower().replace('of', '').replace(' ', ' ').replace(' ', '-') text = requests.get(IMAGES).text text2 = requests.get(IMAGES2).text @@ -876,27 +313,33 @@ def get_image_urls(mtgjson, isfullspoil, setname, setlongname, setSize=269, seti cardname = ' // '.join(c['names']) else: cardname = c['name'] - match = re.search(wotcpattern.format(cardname.replace('\'','’')), text, re.DOTALL) + match = re.search(wotcpattern.format( + cardname.replace('\'', '’')), text, re.DOTALL) if match: c['url'] = match.groupdict()['img'] else: - match3 = re.search(wotcpattern2.format(cardname.replace('\'','’')), text3) + match3 = re.search(wotcpattern2.format( + cardname.replace('\'', '’')), text3) if match3: c['url'] = match3.groupdict()['img'] else: - match4 = re.search(wotcpattern.format(cardname.replace('\'','’')), text3, re.DOTALL) + match4 = re.search(wotcpattern.format( + cardname.replace('\'', '’')), text3, re.DOTALL) if match4: c['url'] = match4.groupdict()['img'] else: - match2 = re.search(mythicspoilerpattern.format(cardname.lower().replace(' // ','').replace(' ', '').replace(''', '').replace('-', '').replace('\'','').replace(',', '')), text2, re.DOTALL) + match2 = re.search(mythicspoilerpattern.format(cardname.lower().replace(' // ', '').replace( + ' ', '').replace(''', '').replace('-', '').replace('\'', '').replace(',', '')), text2, re.DOTALL) if match2 and not isfullspoil: - c['url'] = match2.group(0).replace(' src="', 'http://mythicspoiler.com/').replace('">', '') + c['url'] = match2.group(0).replace( + ' src="', 'http://mythicspoiler.com/').replace('">', '') pass if 'wizards.com' in c['url']: WOTC.append(c['name']) if setinfo: if 'mtgsurl' in setinfo and 'mtgscardpath' in setinfo: - mtgsImages = scrape_mtgs_images(setinfo['mtgsurl'], setinfo['mtgscardpath'], WOTC) + mtgsImages = mtgs_scraper.scrape_mtgs_images( + setinfo['mtgsurl'], setinfo['mtgscardpath'], WOTC) for card in mtgjson['cards']: if card['name'] in mtgsImages: if mtgsImages[card['name']]['url'] != '': @@ -907,53 +350,6 @@ def get_image_urls(mtgjson, isfullspoil, setname, setlongname, setSize=269, seti print(card['name'] + ' has no image.') return mtgjson -def smash_fullspoil(mtgjson, fullspoil): - different_keys = {} - for mtgjson_card in mtgjson['cards']: - for fullspoil_card in fullspoil['cards']: - if mtgjson_card['name'] == fullspoil_card['name']: - for key in fullspoil_card: - if key in mtgjson_card: - if mtgjson_card[key] != fullspoil_card[key] and key != 'colors': - if not fullspoil_card['name'] in different_keys: - different_keys[fullspoil_card['name']] = {key: fullspoil_card[key]} - else: - different_keys[fullspoil_card['name']][key] = fullspoil_card[key] - for fullspoil_card in fullspoil['cards']: - WOTC_only = [] - match = False - for mtgjson_card in mtgjson['cards']: - if mtgjson_card['name'] == fullspoil_card['name']: - match = True - if not match: - WOTC_only.append(fullspoil_card['name']) - if len(WOTC_only) > 0: - print "WOTC only cards: " - print WOTC_only - print different_keys - -def scrape_mtgs_images(url='http://www.mtgsalvation.com/spoilers/183-hour-of-devastation', mtgscardurl='http://www.mtgsalvation.com/cards/hour-of-devastation/', exemptlist=[]): - page = requests.get(url) - tree = html.fromstring(page.content) - cards = {} - cardstree = tree.xpath('//*[contains(@class, "log-card")]') - for child in cardstree: - if child.text in exemptlist: - continue - childurl = mtgscardurl + child.attrib['data-card-id'] + '-' + child.text.replace(' ','-').replace("'","").replace(',','').replace('-//','') - cardpage = requests.get(childurl) - tree = html.fromstring(cardpage.content) - cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]') - try: - cardurl = cardtree[0].attrib['src'] - except: - cardurl = '' - pass - cards[child.text] = { - "url": cardurl - } - time.sleep(.2) - return cards def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): if not os.path.isdir('out/'): @@ -979,7 +375,7 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): "\n" "\n" "\n") - #print mtgjson + # print mtgjson for card in mtgjson["cards"]: for carda in split_cards: if card["name"] == split_cards[carda]: @@ -1013,7 +409,9 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): for jsoncard in mtgjson["cards"]: if jsoncard['name'] == card['names'][1]: cardtype += " // " + jsoncard["type"] - manacost += " // " + (jsoncard["manaCost"]).replace('{', '').replace('}', '') + manacost += " // " + \ + (jsoncard["manaCost"]).replace( + '{', '').replace('}', '') cardcmc += " // " + str(jsoncard["cmc"]) text += "\n---\n" + jsoncard["text"] name += " // " + jsoncard['name'] @@ -1022,7 +420,6 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): else: print card["name"] + " has multiple names and no 'layout' key" - tablerow = "1" if "Land" in cardtype: tablerow = "0" @@ -1037,13 +434,15 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): if 'b' in card['number']: if 'layout' in card: if card['layout'] == 'split' or card['layout'] == 'aftermath': - #print "We're skipping " + card['name'] + " because it's the right side of a split card" + # print "We're skipping " + card['name'] + " because it's the right side of a split card" continue cardsxml.write("\n") cardsxml.write("" + name.encode('utf-8') + "\n") - cardsxml.write('' + setname + '\n') - cardsxml.write("" + manacost.encode('utf-8') + "\n") + cardsxml.write( + '' + setname + '\n') + cardsxml.write( + "" + manacost.encode('utf-8') + "\n") cardsxml.write("" + cardcmc + "\n") if card.has_key('colors'): colorTranslate = { @@ -1054,7 +453,8 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): "Green": "G" } for color in card['colors']: - cardsxml.write('' + colorTranslate[color] + '\n') + cardsxml.write( + '' + colorTranslate[color] + '\n') if name + ' enters the battlefield tapped' in text: cardsxml.write("1\n") cardsxml.write("" + cardtype.encode('utf-8') + "\n") @@ -1065,8 +465,9 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): cardsxml.write("" + tablerow + "\n") cardsxml.write("" + text.encode('utf-8') + "\n") if related: - # for relatedname in related: - cardsxml.write("" + related.encode('utf-8') + "\n") + # for relatedname in related: + cardsxml.write( + "" + related.encode('utf-8') + "\n") related = '' cardsxml.write("\n") @@ -1080,6 +481,7 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]): print 'Newest: ' + str(newest) print 'Runtime: ' + str(datetime.datetime.today().strftime('%H:%M')) + ' on ' + str(datetime.date.today()) + def write_combined_xml(mtgjson, setinfos): if not os.path.isdir('out/'): os.makedirs('out/') @@ -1092,21 +494,21 @@ def write_combined_xml(mtgjson, setinfos): setobj = mtgjson[setcode] if 'cards' in setobj and len(setobj['cards']) > 0: cardsxml.write("\n" - + setcode + - "\n" - "" - + setobj['name'] + - "\n" - "" - + setobj['type'].title() + - "\n" - "" - + setobj['releaseDate'] + - "\n" - "\n") + + setcode + + "
\n" + "" + + setobj['name'] + + "\n" + "" + + setobj['type'].title() + + "\n" + "" + + setobj['releaseDate'] + + "\n" + "\n") cardsxml.write( - "\n" - "\n") + "\n" + "\n") count = 0 dfccount = 0 newest = '' @@ -1144,23 +546,26 @@ def write_combined_xml(mtgjson, setinfos): if len(card["names"]) > 1: if card["names"][0] == card["name"]: related = card["names"][1] - text += '\n\n(Related: ' + card["names"][1] + ')' + text += '\n\n(Related: ' + \ + card["names"][1] + ')' dfccount += 1 elif card['names'][1] == card['name']: related = card["names"][0] - text += '\n\n(Related: ' + card["names"][0] + ')' + text += '\n\n(Related: ' + \ + card["names"][0] + ')' else: for cardb in setobj['cards']: if cardb['name'] == card["names"][1]: cardtype += " // " + cardb['type'] - manacost += " // " + (cardb["manaCost"]).replace('{', '').replace('}', '') + manacost += " // " + \ + (cardb["manaCost"]).replace( + '{', '').replace('}', '') cardcmc += " // " + str(cardb["cmc"]) text += "\n---\n" + cardb["text"] name += " // " + cardb['name'] else: print card["name"] + " has multiple names and no 'layout' key" - tablerow = "1" if "Land" in cardtype: tablerow = "0" @@ -1175,13 +580,15 @@ def write_combined_xml(mtgjson, setinfos): if 'b' in card['number']: if 'layout' in card: if card['layout'] == 'split' or card['layout'] == 'aftermath': - #print "We're skipping " + card['name'] + " because it's the right side of a split card" + # print "We're skipping " + card['name'] + " because it's the right side of a split card" continue cardsxml.write("\n") cardsxml.write("" + name.encode('utf-8') + "\n") - cardsxml.write('' + setcode + '\n') - cardsxml.write("" + manacost.encode('utf-8') + "\n") + cardsxml.write( + '' + setcode + '\n') + cardsxml.write( + "" + manacost.encode('utf-8') + "\n") cardsxml.write("" + cardcmc + "\n") if card.has_key('colors'): colorTranslate = { @@ -1192,19 +599,22 @@ def write_combined_xml(mtgjson, setinfos): "Green": "G" } for color in card['colors']: - cardsxml.write('' + colorTranslate[color] + '\n') + cardsxml.write( + '' + colorTranslate[color] + '\n') if name + ' enters the battlefield tapped' in text: cardsxml.write("1\n") cardsxml.write("" + cardtype.encode('utf-8') + "\n") if pt: cardsxml.write("" + pt + "\n") if card.has_key('loyalty'): - cardsxml.write("" + str(card['loyalty']) + "\n") + cardsxml.write( + "" + str(card['loyalty']) + "\n") cardsxml.write("" + tablerow + "\n") cardsxml.write("" + text.encode('utf-8') + "\n") if related: - # for relatedname in related: - cardsxml.write("" + related.encode('utf-8') + "\n") + # for relatedname in related: + cardsxml.write( + "" + related.encode('utf-8') + "\n") related = '' cardsxml.write("\n") @@ -1218,22 +628,27 @@ def write_combined_xml(mtgjson, setinfos): print 'Newest: ' + str(newest) print 'Runtime: ' + str(datetime.datetime.today().strftime('%H:%M')) + ' on ' + str(datetime.date.today()) + def pretty_xml(infile): - prettyxml = xml.dom.minidom.parse(infile) # or xml.dom.minidom.parseString(xml_string) + # or xml.dom.minidom.parseString(xml_string) + prettyxml = xml.dom.minidom.parse(infile) pretty_xml_as_string = prettyxml.toprettyxml(newl='') return pretty_xml_as_string + def make_allsets(AllSets, mtgjson, setname): AllSets[setname] = mtgjson return AllSets + def scrape_masterpieces(url='http://www.mtgsalvation.com/spoilers/181-amonkhet-invocations', mtgscardurl='http://www.mtgsalvation.com/cards/amonkhet-invocations/'): page = requests.get(url) tree = html.fromstring(page.content) cards = [] cardstree = tree.xpath('//*[contains(@class, "log-card")]') for child in cardstree: - childurl = mtgscardurl + child.attrib['data-card-id'] + '-' + child.text.replace(' ','-') + childurl = mtgscardurl + \ + child.attrib['data-card-id'] + '-' + child.text.replace(' ', '-') cardpage = requests.get(childurl) tree = html.fromstring(cardpage.content) cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]') @@ -1249,8 +664,10 @@ def scrape_masterpieces(url='http://www.mtgsalvation.com/spoilers/181-amonkhet-i cards.append(card) return cards + def make_masterpieces(headers, AllSets, spoil): - masterpieces = scrape_masterpieces(headers['mtgsurl'], headers['mtgscardpath']) + masterpieces = scrape_masterpieces( + headers['mtgsurl'], headers['mtgscardpath']) masterpieces2 = [] for masterpiece in masterpieces: matched = False @@ -1291,6 +708,7 @@ def make_masterpieces(headers, AllSets, spoil): } return mpsjson + def set_has_cards(setinfo, manual_cards, mtgjson): if setinfo['setname'] in manual_cards or setinfo['setname'] in mtgjson: return True @@ -1299,6 +717,7 @@ def set_has_cards(setinfo, manual_cards, mtgjson): if set == setinfo['setname']: return True + def get_allsets(): class MyOpener(urllib.FancyURLopener): version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko / 20071127 Firefox / 2.0.0.11' @@ -1309,6 +728,7 @@ def get_allsets(): AllSets = json.load(data_file) return AllSets + def add_headers(mtgjson, setinfos): mtgjson2 = { "border": "black", @@ -1321,8 +741,8 @@ def add_headers(mtgjson, setinfos): if not 'noBooster' in setinfos: mtgjson2['booster'] = [ [ - "rare", - "mythic rare" + "rare", + "mythic rare" ], "uncommon", "uncommon", @@ -1343,81 +763,3 @@ def add_headers(mtgjson, setinfos): if 'blockname' in setinfos: mtgjson2['block'] = setinfos['blockname'] return mtgjson2 - -def get_mythic_cards(url='http://mythicspoiler.com/ixa/', mtgjson=False): #mtgjson is optional, will ignore cards found if passed - cards = {'cards':[]} - r = requests.get(url) - soup = BS(r.text, "html.parser") - cardurls = soup.find_all('a', 'card') - urllist = [] - for cardurl in cardurls: - try: - urllist.append(url + str(cardurl).split("href=\"")[1].split('"> highVariance: + # if a card isn't close to any of the colors, it's probably a planeswalker? make it mythic. + print card['name'], 'has high variance of', variance, ', closest rarity is', card['rarity'] + card['rarity'] = "Mythic Rare" + # print card['name'], '$', reds, greens, blues + if symbolCount < 10: + setSymbol.save( + 'images/' + card['name'].replace(' // ', '') + '.symbol.jpg') + symbolCount += 1 + return fullspoil + + +def get_colors_by_frame(fullspoil, setcode, split_cards={}): + framePixels = (20, 11, 76, 16) + highVariance = 10 + colorAverages = { + "White": [231, 225, 200], + "Blue": [103, 193, 230], + "Black": [58, 61, 54], + "Red": [221, 122, 101], + "Green": [118, 165, 131], + "Multicolor": [219, 200, 138], + "Artifact": [141, 165, 173], + "Colorless": [216, 197, 176], + } + symbolCount = 0 + for card in fullspoil['cards']: + try: + cardImage = Image.open( + 'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg') + except: + continue + pass + cardColor = cardImage.crop(framePixels) + + cardHistogram = cardColor.histogram() + reds = cardHistogram[0:256] + greens = cardHistogram[256:256 * 2] + blues = cardHistogram[256 * 2: 256 * 3] + reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) + greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) + blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) + variance = 768 + for color in colorAverages: + colorVariance = 0 + colorVariance = colorVariance + \ + abs(colorAverages[color][0] - reds) + colorVariance = colorVariance + \ + abs(colorAverages[color][1] - greens) + colorVariance = colorVariance + \ + abs(colorAverages[color][2] - blues) + if colorVariance < variance: + variance = colorVariance + card['colors'] = [color] + return fullspoil + + +def get_mana_symbols(fullspoil={}, setcode="HOU", split_cards=[]): + manaBoxes = [(234, 23, 244, 33), (220, 23, 230, 33), + (206, 23, 216, 33), (192, 23, 202, 33), (178, 23, 188, 33)] + highVariance = 0 + colorAverages = { + "W": [126, 123, 110], + "U": [115, 140, 151], + "B": [105, 99, 98], + "R": [120, 89, 77], + "G": [65, 78, 69], + "1": [162, 156, 154], + "2": [155, 148, 147], + "3": [160, 153, 152], + "4": [149, 143, 141], + "5": [155, 149, 147], + "6": [151, 145, 143], + "7": [169, 163, 161], + "X": [160, 154, 152] + } + for card in fullspoil['cards']: + try: + cardImage = Image.open( + 'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg') + except: + continue + pass + card['manaCost'] = "" + for manaBox in manaBoxes: + manaSymbol = cardImage.crop(manaBox) + cardHistogram = manaSymbol.histogram() + reds = cardHistogram[0:256] + greens = cardHistogram[256:256 * 2] + blues = cardHistogram[256 * 2: 256 * 3] + reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) + greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) + blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) + variance = 768 + for color in colorAverages: + colorVariance = 0 + colorVariance = colorVariance + \ + abs(colorAverages[color][0] - reds) + colorVariance = colorVariance + \ + abs(colorAverages[color][1] - greens) + colorVariance = colorVariance + \ + abs(colorAverages[color][2] - blues) + if colorVariance < variance: + variance = colorVariance + closestColor = color + if variance < 10: + # if card['name'] in ["Mirage Mirror", "Uncage the Menagerie", "Torment of Hailfire"]: + # print card['name'] + " " + str(reds) + " " + str(greens) + " " + str(blues) + if closestColor in ["2", "5"]: + twoVSfive = ( + manaBox[0] + 1, manaBox[1] + 4, manaBox[2] - 5, manaBox[3] - 2) + manaSymbol = cardImage.crop(twoVSfive) + cardHistogram = manaSymbol.histogram() + reds = cardHistogram[0:256] + greens = cardHistogram[256:256 * 2] + blues = cardHistogram[256 * 2: 256 * 3] + reds = sum( + i * w for i, w in enumerate(reds)) / sum(reds) + greens = sum( + i * w for i, w in enumerate(greens)) / sum(greens) + blues = sum( + i * w for i, w in enumerate(blues)) / sum(blues) + variance = 768 + colorVariance = 0 + colorVariance = colorVariance + abs(175 - reds) + colorVariance = colorVariance + abs(168 - greens) + colorVariance = colorVariance + abs(166 - blues) + if colorVariance < 10: + closestColor = "2" + elif colorVariance > 110 and colorVariance < 120: + closestColor = "5" + else: + continue + card['manaCost'] = closestColor + card['manaCost'] + return fullspoil + + +def smash_fullspoil(mtgjson, fullspoil): + different_keys = {} + for mtgjson_card in mtgjson['cards']: + for fullspoil_card in fullspoil['cards']: + if mtgjson_card['name'] == fullspoil_card['name']: + for key in fullspoil_card: + if key in mtgjson_card: + if mtgjson_card[key] != fullspoil_card[key] and key != 'colors': + if not fullspoil_card['name'] in different_keys: + different_keys[fullspoil_card['name']] = { + key: fullspoil_card[key]} + else: + different_keys[fullspoil_card['name'] + ][key] = fullspoil_card[key] + for fullspoil_card in fullspoil['cards']: + WOTC_only = [] + match = False + for mtgjson_card in mtgjson['cards']: + if mtgjson_card['name'] == fullspoil_card['name']: + match = True + if not match: + WOTC_only.append(fullspoil_card['name']) + if len(WOTC_only) > 0: + print "WOTC only cards: " + print WOTC_only + print different_keys