# -*- coding: utf-8 -*- import requests import re import os from lxml import html import lzma import datetime import json import mtgs_scraper import xml.dom.minidom def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[]): mtgjson2 = [] for card in manual_cards: if 'manaCost' in card: card['manaCost'] = str(card['manaCost']) if 'number' in card: card['number'] = str(card['number']) if 'cmc' not in card: workingCMC = 0 if 'manaCost' in card: stripCost = card['manaCost'].replace('{','').replace('}','') for manaSymbol in stripCost: if manaSymbol.isdigit(): workingCMC += int(manaSymbol) elif not manaSymbol == 'X': workingCMC += 1 card['cmc'] = workingCMC if 'types' not in card: card['types'] = [] workingtypes = card['type'] if ' - ' in workingtypes: workingtypes = card['type'].split(' - ')[0] card['types'] = workingtypes.replace('Legendary ', '').replace('Snow ', '') \ .replace('Elite ', '').replace('Basic ', '').replace('World ', '').replace('Ongoing ', '') \ .strip().split(' ') if 'subtypes' not in card: # if '—' in card['type']: # workingSubtypes = card['type'].split('—')[1].strip() if '-' in card['type']: workingSubtypes = card['type'].split('-')[1].strip() if workingSubtypes: card['subtypes'] = workingSubtypes.split(' ') colorMap = { "W": "White", "U": "Blue", "B": "Black", "R": "Red", "G": "Green" } if 'manaCost' in card: if 'text' in card and not 'Devoid' in card['text']: for letter in str(card['manaCost']): if not letter.isdigit() and not letter == 'X': if 'colorIdentity' in card: if not letter in card['colorIdentity']: card['colorIdentity'] += letter else: card['colorIdentity'] = [letter] if 'colors' in card: if not colorMap[letter] in card['colors']: card['colors'].append(colorMap[letter]) else: card['colors'] = [colorMap[letter]] if 'text' in card: for CID in colorMap: if '{' + CID + '}' in card['text']: if 'colorIdentity' in card: if not CID in card['colorIdentity']: card['colorIdentity'] += CID else: card['colorIdentity'] = [CID] manual_added = [] for card in mtgjson['cards']: isManual = False for manualCard in manual_cards: if card['name'] == manualCard['name']: mtgjson2.append(manualCard) manual_added.append(manualCard['name'] + " (overwritten)") isManual = True if not isManual and not card['name'] in delete_cards: mtgjson2.append(card) for manualCard in manual_cards: addManual = True for card in mtgjson['cards']: if manualCard['name'] == card['name']: addManual = False if addManual: mtgjson2.append(manualCard) manual_added.append(manualCard['name']) if manual_added != []: print ("Manual Cards Added: " + str(manual_added).strip('[]')) mtgjson = {"cards": mtgjson2} transforms = {} for card in mtgjson['cards']: if 'text' in card: if '{' in card['text']: card['text'] = re.sub(r'{(.*?)}', replace_costs, card['text']) for card2 in mtgjson['cards']: if 'number' in card and 'number' in card2 and card2['number'] == card['number'] and \ not card['name'] == card2['name'] and card['number'] != '?' and card2['number'] != '?': transforms[card['name']] = card2['name'] if 'number' in card and not '?' in card['number']: if 'transforms from' in card['text'].lower(): if 'number' in card: if not 'b' in card['number']: if 'a' in card['number']: card['number'] = card['number'].replace('a','b') else: card['number'] = str(card['number']) + 'b' card['layout'] = 'double-faced' if 'transform ' in card['text'].lower() or 'transformed' in card['text'].lower(): if 'number' in card: if not 'a' in card['number']: if 'b' in card['number']: card['number'] = card['number'].replace('b','a') else: card['number'] = str(card['number']) + 'a' card['layout'] = 'double-faced' if 'number' in card and 'a' in card['number'] or 'b' in card['number']: for card1 in transforms: if card['name'] == card1: if 'a' in card['number']: card['names'] = [card1, transforms[card1]] else: card['names'] = [transforms[card1], card1] if card['name'] == transforms[card1]: if 'a' in card['number']: card['names'] = [card['name'], card1] else: card['names'] = [card1, card['name']] return mtgjson def replace_costs(match): full_cost = match.group(1) individual_costs = [] if len(full_cost) > 0: for x in range(0, len(full_cost)): individual_costs.append('{' + str(full_cost[x]).upper() + '}') return ''.join(individual_costs) def error_check(mtgjson, card_corrections={}): errors = [] for card in mtgjson['cards']: for key in card: if key == "": errors.append({"name": card['name'], "key": key, "value": ""}) requiredKeys = ['name', 'type', 'types'] for requiredKey in requiredKeys: if not requiredKey in card: errors.append( {"name": card['name'], "key": key, "missing": True}) if 'text' in card: card['text'] = card['text'].replace('', '').replace( '', '').replace('', '').replace(' 0: if not 'manaCost' in card: errors.append( {"name": card['name'], "key": "manaCost", "value": "", "match": card['cmc']}) else: if 'manaCost' in card: errors.append( {"name": card['name'], "key": "manaCost", "oldvalue": card['manaCost'], "fixed": True}) del card["manaCost"] if 'colors' in card: if not 'colorIdentity' in card: if 'text' in card: if not 'devoid' in card['text'].lower(): errors.append( {"name": card['name'], "key": "colorIdentity", "value": ""}) else: errors.append( {"name": card['name'], "key": "colorIdentity", "value": ""}) if 'colorIdentity' in card: if not 'colors' in card: # this one will false positive on emerge cards if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: if 'text' in card: if not 'emerge' in card['text'].lower() and not 'devoid' in card['text'].lower(): errors.append( {"name": card['name'], "key": "colors", "value": ""}) else: errors.append( {"name": card['name'], "key": "colors", "value": ""}) # if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: # errors.append({"name": card['name'], "key": "colors", "value": ""}) if not 'url' in card: errors.append({"name": card['name'], "key": "url", "value": ""}) elif len(card['url']) < 10: errors.append({"name": card['name'], "key": "url", "value": ""}) if not 'number' in card: errors.append({"name": card['name'], "key": "number", "value": ""}) if not 'types' in card: errors.append({"name": card['name'], "key": "types", "value": ""}) else: for type in card['types']: if type not in ['Creature', 'Artifact', 'Conspiracy', 'Enchantment', 'Instant', 'Land', 'Phenomenon', 'Plane', 'Planeswalker', 'Scheme', 'Sorcery', 'Tribal', 'Vanguard']: errors.append({"name": card['name'], "key": "types", "value":card['types']}) # we're going to loop through again and make sure split cards get paired for card in mtgjson['cards']: if 'layout' in card: if card['layout'] == 'split' or card['layout'] == 'meld' or card['layout'] == 'aftermath': if not 'names' in card: errors.append( {"name": card['name'], "key": "names", "value": ""}) else: for related_card_name in card['names']: if related_card_name != card['name']: related_card = False for card2 in mtgjson['cards']: if card2['name'] == related_card_name: related_card = card2 if not related_card: errors.append( {"name": card['name'], "key": "names", "value": card['names']}) else: if 'colors' in related_card: for color in related_card['colors']: if not 'colors' in card: card['colors'] = [color] elif not color in card['colors']: card['colors'].append(color) if 'colorIdentity' in related_card: for colorIdentity in related_card['colorIdentity']: if not 'colorIdentity' in card: card['colorIdentity'] = [ colorIdentity] elif not colorIdentity in card['colorIdentity']: card['colorIdentity'].append( colorIdentity) if 'number' in card: if not 'a' in card['number'] and not 'b' in card['number'] and not 'c' in card['number']: errors.append( {"name": card['name'], "key": "number", "value": card['number']}) for card in mtgjson['cards']: for cardCorrection in card_corrections: if card['name'] == cardCorrection: for correctionType in card_corrections[cardCorrection]: # if not correctionType in card and correctionType not in : # sys.exit("Invalid correction for " + cardCorrection + " of type " + card) if correctionType == 'number': card_corrections[cardCorrection]['number'] = str(card_corrections[cardCorrection]['number']) if not correctionType == 'name': if correctionType == 'img': card['url'] = card_corrections[cardCorrection][correctionType] else: card[correctionType] = card_corrections[cardCorrection][correctionType] if 'name' in card_corrections[cardCorrection]: card['name'] = card_corrections[cardCorrection]['name'] return [mtgjson, errors] def remove_corrected_errors(errorlog=[], card_corrections=[], print_fixed=False): errorlog2 = {} for error in errorlog: if not print_fixed: if 'fixed' in error and error['fixed'] == True: continue removeError = False for correction in card_corrections: for correction_type in card_corrections[correction]: if error['name'] == correction: if error['key'] == correction_type: removeError = True if not removeError: if not error['name'] in errorlog2: errorlog2[error['name']] = {} if not 'value' in error: error['value'] = "" errorlog2[error['name']][error['key']] = error['value'] return errorlog2 def get_image_urls(mtgjson, isfullspoil, setinfo=False): if not 'mythicCode' in setinfo: setinfo['mythicCode'] = setinfo['code'] IMAGES = 'https://magic.wizards.com/en/products/' + \ setinfo['name'].lower().replace(' ', '-') + '/cards' IMAGES2 = 'http://mythicspoiler.com/newspoilers.html' IMAGES3 = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + \ setinfo['name'].lower().replace('of', '').replace(' ', ' ').replace(' ', '-') text = requests.get(IMAGES).text text2 = requests.get(IMAGES2).text text3 = requests.get(IMAGES3).text wotcpattern = r'{}.*?' WOTC = [] for c in mtgjson['cards']: if 'names' in c: cardname = ' // '.join(c['names']) else: cardname = c['name'] match = re.search(wotcpattern.format( cardname.replace('\'', '’')), text, re.DOTALL) if match: c['url'] = match.groupdict()['img'] else: match3 = re.search(wotcpattern2.format( cardname.replace('\'', '’')), text3) if match3: c['url'] = match3.groupdict()['img'] else: match4 = re.search(wotcpattern.format( cardname.replace('\'', '’')), text3, re.DOTALL) if match4: c['url'] = match4.groupdict()['img'] else: match2 = re.search(mythicspoilerpattern.format(cardname.lower().replace(' // ', '').replace( ' ', '').replace(''', '').replace('-', '').replace('\'', '').replace(',', '')), text2, re.DOTALL) if match2 and not isfullspoil: c['url'] = match2.group(0).replace( ' src="', 'http://mythicspoiler.com/').replace('">', '') pass if 'wizards.com' in c['url']: WOTC.append(c['name']) if setinfo: if 'mtgsurl' in setinfo and 'mtgscardpath' in setinfo: mtgsImages = mtgs_scraper.scrape_mtgs_images( setinfo['mtgsurl'], setinfo['mtgscardpath'], WOTC) for card in mtgjson['cards']: if card['name'] in mtgsImages: if mtgsImages[card['name']]['url'] != '': card['url'] = mtgsImages[card['name']]['url'] #for card in mtgjson['cards']: # if len(str(card['url'])) < 10: # print(card['name'] + ' has no image.') return mtgjson def write_xml(mtgjson, code, name, releaseDate): if not 'cards' in mtgjson or not mtgjson['cards'] or mtgjson['cards'] == []: return if not os.path.isdir('out/'): os.makedirs('out/') cardsxml = open('out/' + code + '.xml', 'w+') cardsxml.truncate() count = 0 dfccount = 0 newest = '' related = 0 cardsxml.write("\n" "\n" "\n\n" + code + "\n" "" + name + "\n" "Expansion\n" "" + releaseDate + "\n" "\n" "\n" "\n") # print (mtgjson) for card in mtgjson["cards"]: if 'names' in card: if 'layout' in card and card['layout'] != 'double-faced': if card["name"] == card['names'][1]: continue if count == 0: newest = card["name"] count += 1 name = card["name"] if "manaCost" in card.keys(): manacost = card["manaCost"].replace('{', '').replace('}', '') else: manacost = "" if "power" in card.keys() or "toughness" in card.keys(): if card["power"]: pt = str(card["power"]) + "/" + str(card["toughness"]) else: pt = 0 else: pt = 0 if "text" in card.keys(): text = card["text"] else: text = "" cardcmc = str(card['cmc']) cardtype = card["type"] if "names" in card.keys(): if "layout" in card: if card['layout'] == 'split' or card['layout'] == 'aftermath': if 'names' in card: if card['name'] == card['names'][0]: for jsoncard in mtgjson["cards"]: if jsoncard['name'] == card['names'][1]: cardtype += " // " + jsoncard["type"] newmanacost = "" if 'manaCost' in jsoncard: newmanacost = jsoncard['manaCost'] manacost += " // " + \ newmanacost.replace( '{', '').replace('}', '') cardcmc += " // " + str(jsoncard["cmc"]) text += "\n---\n" + jsoncard["text"] name += " // " + jsoncard['name'] elif card['layout'] == 'double-faced': if not 'names' in card: print (card['name'] + ' is double-faced but no "names" key') else: for dfcname in card['names']: if dfcname != card['name']: related = dfcname else: print (card["name"] + " has names, but layout != split, aftermath, or double-faced") else: print (card["name"] + " has multiple names and no 'layout' key") tablerow = "1" if "Land" in cardtype: tablerow = "0" elif "Sorcery" in cardtype: tablerow = "3" elif "Instant" in cardtype: tablerow = "3" elif "Creature" in cardtype: tablerow = "2" if 'number' in card: if 'b' in str(card['number']): if 'layout' in card: if card['layout'] == 'split' or card['layout'] == 'aftermath': # print ("We're skipping " + card['name'] + " because it's the right side of a split card") continue cardsxml.write("\n") cardsxml.write("" + name + "\n") cardsxml.write( '' + code + '\n') cardsxml.write( "" + manacost + "\n") cardsxml.write("" + cardcmc + "\n") if 'colors' in card.keys(): colorTranslate = { "White": "W", "Blue": "U", "Black": "B", "Red": "R", "Green": "G" } for color in card['colors']: cardsxml.write( '' + colorTranslate[color] + '\n') if name + ' enters the battlefield tapped' in text: cardsxml.write("1\n") cardsxml.write("" + cardtype + "\n") if pt: cardsxml.write("" + pt + "\n") if 'loyalty' in card.keys(): cardsxml.write("" + str(card['loyalty']) + "\n") cardsxml.write("" + tablerow + "\n") cardsxml.write("" + text + "\n") if related: # for relatedname in related: cardsxml.write( "" + related + "\n") related = '' cardsxml.write("\n") cardsxml.write("\n") if count > 0: print ('XML Stats for ' + code) print ('Total cards: ' + str(count)) if dfccount > 0: print ('DFC: ' + str(dfccount)) print ('Newest: ' + str(newest)) else: print ('Set ' + code + ' has no spoiled cards.') def write_combined_xml(mtgjson, setinfos): if not os.path.isdir('out/'): os.makedirs('out/') cardsxml = open('out/spoiler.xml', 'w+') cardsxml.truncate() cardsxml.write("\n" "\n") cardsxml.write("\n") cardsxml.write("\n") for setcode in mtgjson: setobj = mtgjson[setcode] if 'cards' in setobj and len(setobj['cards']) > 0: cardsxml.write("\n" + setcode + "\n" "" + setobj['name'] + "\n" "" + setobj['type'].title() + "\n" "" + setobj['releaseDate'] + "\n" "\n") cardsxml.write( "\n" "\n") count = 0 dfccount = 0 newest = '' related = 0 for setcode in mtgjson: setobj = mtgjson[setcode] for card in setobj["cards"]: if 'layout' in card and (card['layout'] == 'split' or card['layout'] == 'aftermath'): if 'b' in card["number"]: continue if count == 0: newest = card["name"] count += 1 name = card["name"] if "manaCost" in card.keys(): manacost = card["manaCost"].replace('{', '').replace('}', '') else: manacost = "" if "power" in card.keys() or "toughness" in card.keys(): if card["power"]: pt = str(card["power"]) + "/" + str(card["toughness"]) else: pt = 0 else: pt = 0 if "text" in card.keys(): text = card["text"] else: text = "" cardcmc = str(card['cmc']) cardtype = card["type"] if "names" in card.keys(): if "layout" in card: if card["layout"] != 'split' and card["layout"] != 'aftermath': if len(card["names"]) > 1: if card["names"][0] == card["name"]: related = card["names"][1] text += '\n\n(Related: ' + \ card["names"][1] + ')' dfccount += 1 elif card['names'][1] == card['name']: related = card["names"][0] text += '\n\n(Related: ' + \ card["names"][0] + ')' else: for cardb in setobj['cards']: if cardb['name'] == card["names"][1]: cardtype += " // " + cardb['type'] manacost += " // " + \ (cardb["manaCost"]).replace( '{', '').replace('}', '') cardcmc += " // " + str(cardb["cmc"]) text += "\n---\n" + cardb["text"] name += " // " + cardb['name'] else: print (card["name"] + " has multiple names and no 'layout' key") tablerow = "1" if "Land" in cardtype: tablerow = "0" elif "Sorcery" in cardtype: tablerow = "3" elif "Instant" in cardtype: tablerow = "3" elif "Creature" in cardtype: tablerow = "2" if 'number' in card: if 'b' in card['number']: if 'layout' in card: if card['layout'] == 'split' or card['layout'] == 'aftermath': # print ("We're skipping " + card['name'] + " because it's the right side of a split card") continue cardsxml.write("\n") cardsxml.write("" + name + "\n") cardsxml.write( '' + setcode + '\n') if 'colors' in card.keys(): colorTranslate = { "White": "W", "Blue": "U", "Black": "B", "Red": "R", "Green": "G" } for color in card['colors']: cardsxml.write( '' + colorTranslate[color] + '\n') if related: # for relatedname in related: cardsxml.write( "" + related + "\n") related = '' cardsxml.write( "" + manacost + "\n") cardsxml.write("" + cardcmc + "\n") cardsxml.write("" + cardtype + "\n") if pt: cardsxml.write("" + pt + "\n") cardsxml.write("" + tablerow + "\n") cardsxml.write("" + text + "\n") if name + ' enters the battlefield tapped' in text: cardsxml.write("1\n") if 'loyalty' in card.keys(): cardsxml.write( "" + str(card['loyalty']) + "\n") cardsxml.write("\n") cardsxml.write("\n") print ('XML COMBINED STATS') print ('Total cards: ' + str(count)) if dfccount > 0: print ('DFC: ' + str(dfccount)) print ('Newest: ' + str(newest)) def pretty_xml(infile): # or xml.dom.minidom.parseString(xml_string) prettyxml = xml.dom.minidom.parse(infile) pretty_xml_as_string = prettyxml.toprettyxml(newl='') return pretty_xml_as_string def make_allsets(AllSets, mtgjson, code): AllSets[code] = mtgjson return AllSets def scrape_masterpieces(url='http://www.mtgsalvation.com/spoilers/181-amonkhet-invocations', mtgscardurl='http://www.mtgsalvation.com/cards/amonkhet-invocations/'): page = requests.get(url) tree = html.fromstring(page.content) cards = [] cardstree = tree.xpath('//*[contains(@class, "log-card")]') for child in cardstree: childurl = mtgscardurl + \ child.attrib['data-card-id'] + '-' + child.text.replace(' ', '-') cardpage = requests.get(childurl) tree = html.fromstring(cardpage.content) cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]') try: cardurl = cardtree[0].attrib['src'] except: cardurl = '' pass card = { "name": child.text, "url": cardurl } cards.append(card) return cards def make_masterpieces(headers, AllSets, spoil): masterpieces = scrape_masterpieces( headers['mtgsurl'], headers['mtgscardpath']) masterpieces2 = [] for masterpiece in masterpieces: matched = False if headers['code'] in AllSets: for oldMasterpiece in AllSets[headers['code']]['cards']: if masterpiece['name'] == oldMasterpiece['name']: matched = True for set in AllSets: if not matched: for oldcard in AllSets[set]['cards']: if oldcard['name'] == masterpiece['name'] and not matched: mixcard = oldcard mixcard['url'] = masterpiece['url'] mixcard['rarity'] = 'Mythic Rare' masterpieces2.append(mixcard) matched = True break for spoilcard in spoil['cards']: if not matched: if spoilcard['name'] == masterpiece['name']: mixcard = spoilcard mixcard['rarity'] = 'Mythic Rare' mixcard['url'] = masterpiece['url'] masterpieces2.append(mixcard) matched = True break if not matched: print ("We couldn't find a card object to assign the data to for masterpiece " + masterpiece['name']) masterpieces2.append(masterpiece) mpsjson = { "name": headers['name'], "alternativeNames": headers['alternativeNames'], "code": headers['code'], "releaseDate": headers['releaseDate'], "border": "black", "type": "masterpiece", "cards": masterpieces2 } return mpsjson def set_has_cards(setinfo, manual_cards, mtgjson): if setinfo['code'] in manual_cards or setinfo['code'] in mtgjson: return True for card in manual_cards['cards']: if set in card: if set == setinfo['code']: return True def download_file(url): local_filename = url.split('/')[-1] headers = {'user-agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko / 20071127 Firefox / 2.0.0.11'} r = requests.get(url, stream=True, headers=headers) with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) return local_filename def get_allsets(): file_location = download_file('https://mtgjson.com/json/AllSets.json.xz') AllSets = json.loads(lzma.open(file_location).read()) return AllSets def add_headers(mtgjson, setinfos): mtgjson2 = { "border": "black", "code": setinfos['code'], "name": setinfos['name'], "releaseDate": setinfos['releaseDate'], "type": setinfos['type'], "cards": mtgjson['cards'] } if not 'noBooster' in setinfos: mtgjson2['booster'] = [ [ "rare", "mythic rare" ], "uncommon", "uncommon", "uncommon", "common", "common", "common", "common", "common", "common", "common", "common", "common", "common", "land", "marketing" ], if 'block' in setinfos: mtgjson2['block'] = setinfos['block'] return mtgjson2