mirror of
https://github.com/Cockatrice/Magic-Spoiler.git
synced 2026-03-21 17:54:59 -05:00
Refactor set_info, download_images to scraper sub
Refactor set_info to align with mtgjson keys. Move download_images to wizards_scraper
This commit is contained in:
parent
599aaee733
commit
dc9b9b7a48
30
main.py
30
main.py
|
|
@ -58,7 +58,7 @@ def save_allsets(AllSets):
|
|||
|
||||
|
||||
def save_masterpieces(masterpieces, setinfo):
|
||||
with open('out/' + setinfo['masterpieces']['setname'] + '.json', 'w') as outfile:
|
||||
with open('out/' + setinfo['masterpieces']['code'] + '.json', 'w') as outfile:
|
||||
json.dump(masterpieces, outfile, sort_keys=True,
|
||||
indent=2, separators=(',', ': '))
|
||||
|
||||
|
|
@ -90,8 +90,8 @@ if __name__ == '__main__':
|
|||
AllSets = spoilers.get_allsets() # get AllSets from mtgjson
|
||||
combinedjson = {}
|
||||
for setinfo in setinfos:
|
||||
if setinfo['setname'] in AllSets:
|
||||
print "Found set from set_info.yml " +setinfo['setname']+ " in MTGJSON, not adding it"
|
||||
if setinfo['code'] in AllSets:
|
||||
print "Found set from set_info.yml " +setinfo['code']+ " in MTGJSON, not adding it"
|
||||
continue
|
||||
if presets['oldRSS'] or 'noRSS' in setinfo and setinfo['noRSS']:
|
||||
mtgs = {"cards": []}
|
||||
|
|
@ -101,12 +101,12 @@ if __name__ == '__main__':
|
|||
[mtgs, split_cards] = mtgs_scraper.parse_mtgs(
|
||||
mtgs, [], [], [], presets['split_cards']) # parse spoilers into mtgjson format
|
||||
mtgs = spoilers.correct_cards(
|
||||
mtgs, manual_sets[setinfo['setname']], card_corrections, delete_cards['delete']) # fix using the fixfiles
|
||||
mtgs, manual_sets[setinfo['code']], card_corrections, delete_cards['delete']) # fix using the fixfiles
|
||||
mtgjson = spoilers.get_image_urls(
|
||||
mtgs, presets['isfullspoil'], setinfo['setname'], setinfo['setlongname'], setinfo['setsize'], setinfo) # get images
|
||||
mtgs, presets['isfullspoil'], setinfo['code'], setinfo['name'], setinfo['size'], setinfo) # get images
|
||||
if presets['scryfallComparison']:
|
||||
scryfall = scryfall_scraper.get_scryfall(
|
||||
'https://api.scryfall.com/cards/search?q=++e:' + setinfo['setname'].lower())
|
||||
'https://api.scryfall.com/cards/search?q=++e:' + setinfo['code'].lower())
|
||||
mtgjson = scryfall_scraper.smash_mtgs_scryfall(mtgs, scryfall)
|
||||
if 'fullSpoil' in setinfo and setinfo['fullSpoil']:
|
||||
wotc = wizards_scraper.scrape_fullspoil('', setinfo)
|
||||
|
|
@ -115,10 +115,10 @@ if __name__ == '__main__':
|
|||
mtgjson, card_corrections) # check for errors where possible
|
||||
errorlog += errors
|
||||
spoilers.write_xml(
|
||||
mtgjson, setinfo['setname'], setinfo['setlongname'], setinfo['setreleasedate'])
|
||||
#save_xml(spoilers.pretty_xml(setinfo['setname']), 'out/spoiler.xml')
|
||||
mtgjson, setinfo['code'], setinfo['name'], setinfo['releaseDate'])
|
||||
#save_xml(spoilers.pretty_xml(setinfo['code']), 'out/spoiler.xml')
|
||||
mtgjson = spoilers.add_headers(mtgjson, setinfo)
|
||||
AllSets = spoilers.make_allsets(AllSets, mtgjson, setinfo['setname'])
|
||||
AllSets = spoilers.make_allsets(AllSets, mtgjson, setinfo['code'])
|
||||
if 'masterpieces' in setinfo: # repeat all of the above for masterpieces
|
||||
# masterpieces aren't in the rss feed, so for the new cards, we'll go to their individual pages on mtgs
|
||||
# old cards will get their infos copied from mtgjson (including fields that may not apply like 'artist')
|
||||
|
|
@ -127,14 +127,14 @@ if __name__ == '__main__':
|
|||
setinfo['masterpieces'], AllSets, mtgjson)
|
||||
[masterpieces, errors] = spoilers.error_check(masterpieces)
|
||||
errorlog += errors
|
||||
spoilers.write_xml(masterpieces, setinfo['masterpieces']['setname'],
|
||||
setinfo['masterpieces']['setlongname'], setinfo['masterpieces']['setreleasedate'])
|
||||
spoilers.write_xml(masterpieces, setinfo['masterpieces']['code'],
|
||||
setinfo['masterpieces']['name'], setinfo['masterpieces']['releaseDate'])
|
||||
AllSets = spoilers.make_allsets(
|
||||
AllSets, masterpieces, setinfo['masterpieces']['setname'])
|
||||
AllSets, masterpieces, setinfo['masterpieces']['code'])
|
||||
save_masterpieces(masterpieces, setinfo)
|
||||
combinedjson[setinfo['masterpieces']['setname']] = masterpieces
|
||||
save_setjson(mtgjson, setinfo['setname'])
|
||||
combinedjson[setinfo['setname']] = mtgjson
|
||||
combinedjson[setinfo['masterpieces']['code']] = masterpieces
|
||||
save_setjson(mtgjson, setinfo['code'])
|
||||
combinedjson[setinfo['code']] = mtgjson
|
||||
save_setjson(combinedjson, 'spoiler')
|
||||
spoilers.write_combined_xml(combinedjson, setinfos)
|
||||
save_xml(spoilers.pretty_xml('out/spoiler.xml'), 'out/spoiler.xml')
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], spli
|
|||
if card['cmc'] == '':
|
||||
card['cmc'] = 0
|
||||
cardjson = {}
|
||||
#cardjson["id"] = hashlib.sha1(setname + card['name'] + str(card['name']).lower()).hexdigest()
|
||||
#cardjson["id"] = hashlib.sha1(code + card['name'] + str(card['name']).lower()).hexdigest()
|
||||
cardjson["cmc"] = card['cmc']
|
||||
cardjson["manaCost"] = card['cost']
|
||||
cardjson["name"] = card['name']
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import time
|
|||
|
||||
def get_scryfall(setUrl):
|
||||
#getUrl = 'https://api.scryfall.com/cards/search?q=++e:'
|
||||
#setUrl = getUrl + setname.lower()
|
||||
#setUrl = getUrl + code.lower()
|
||||
setDone = False
|
||||
scryfall = []
|
||||
|
||||
|
|
|
|||
54
set_info.yml
54
set_info.yml
|
|
@ -2,21 +2,21 @@
|
|||
#
|
||||
# required keys
|
||||
#
|
||||
# setname: FSN
|
||||
# setlongname: "Full Set Name"
|
||||
# setsize: 274
|
||||
# setreleasedate: "2050-02-28"
|
||||
# settype: expansion
|
||||
# code: FSN
|
||||
# name: "Full Set Name"
|
||||
# size: 274
|
||||
# releaseDate: "2050-02-28"
|
||||
# type: expansion
|
||||
#
|
||||
# optional keys
|
||||
#
|
||||
# blockname: "Block Name"
|
||||
# block: "Block Name"
|
||||
# mtgsurl: "http://url_to_mtgsalvation.com/spoilers/page
|
||||
# mtgscardpath "http://url_to_mtgsalvation.com/cards/setpage/"
|
||||
# fullSpoil: false
|
||||
# masterpieces:
|
||||
#
|
||||
# Masterpieces contain setname, setlongname, setreleasedate as above
|
||||
# Masterpieces contain code, name, releaseDate as above
|
||||
# and requires mtgsurl and mtgscardpath
|
||||
# also can contain
|
||||
#
|
||||
|
|
@ -24,37 +24,37 @@
|
|||
#
|
||||
|
||||
---
|
||||
setname: "HOU"
|
||||
setlongname: "Hour of Devastation"
|
||||
blockname: "Amonkhet"
|
||||
setsize: 199
|
||||
setreleasedate: "2017-07-14"
|
||||
settype: "expansion"
|
||||
code: "HOU"
|
||||
name: "Hour of Devastation"
|
||||
block: "Amonkhet"
|
||||
size: 199
|
||||
releaseDate: "2017-07-14"
|
||||
type: "expansion"
|
||||
mtgsurl: "http://www.mtgsalvation.com/spoilers/183-hour-of-devastation"
|
||||
mtgscardpath: "http://www.mtgsalvation.com/cards/hour-of-devastation/"
|
||||
fullSpoil: false
|
||||
masterpieces:
|
||||
setname: "MPS_AKH"
|
||||
setlongname: "Masterpiece Series: Amonkhet Invocations"
|
||||
setreleasedate: "2017-04-28"
|
||||
code: "MPS_AKH"
|
||||
name: "Masterpiece Series: Amonkhet Invocations"
|
||||
releaseDate: "2017-04-28"
|
||||
alternativeNames: ["Amonkhet Invocations"]
|
||||
galleryURL: "http://magic.wizards.com/en/articles/archive/feature/masterpiece-series-hour-devastation-invocations-2017-06-19"
|
||||
additionalCardNames: []
|
||||
mtgsurl: "http://www.mtgsalvation.com/spoilers/181-amonkhet-invocations"
|
||||
mtgscardpath: "http://www.mtgsalvation.com/cards/amonkhet-invocations/"
|
||||
---
|
||||
setname: "XLN"
|
||||
setlongname: "Ixalan"
|
||||
blockname: "Ixalan"
|
||||
setsize: 279
|
||||
setreleasedate: "2017-09-29"
|
||||
settype: "expansion"
|
||||
code: "XLN"
|
||||
name: "Ixalan"
|
||||
block: "Ixalan"
|
||||
size: 279
|
||||
releaseDate: "2017-09-29"
|
||||
type: "expansion"
|
||||
noRSS: true
|
||||
---
|
||||
setname: "C17"
|
||||
setlongname: "Commander 2017"
|
||||
setsize: 309
|
||||
setreleasedate: "2017-09-29"
|
||||
settype: "commander"
|
||||
code: "C17"
|
||||
name: "Commander 2017"
|
||||
size: 309
|
||||
releaseDate: "2017-09-29"
|
||||
type: "commander"
|
||||
noRSS: true
|
||||
noBooster: true
|
||||
68
spoilers.py
68
spoilers.py
|
|
@ -283,35 +283,19 @@ def remove_corrected_errors(errorlog=[], card_corrections=[], print_fixed=False)
|
|||
return errorlog2
|
||||
|
||||
|
||||
def download_images(mtgjson, setcode):
|
||||
if not os.path.isdir('images/' + setcode):
|
||||
os.makedirs('images/' + setcode)
|
||||
if 'cards' in mtgjson:
|
||||
jsoncards = mtgjson['cards']
|
||||
else:
|
||||
jsoncards = mtgjson
|
||||
for card in jsoncards:
|
||||
if card['url']:
|
||||
if os.path.isfile('images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg'):
|
||||
continue
|
||||
# print 'Downloading ' + card['url'] + ' to images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg'
|
||||
requests.get(card['url'], 'images/' + setcode +
|
||||
'/' + card['name'].replace(' // ', '') + '.jpg')
|
||||
|
||||
|
||||
def get_image_urls(mtgjson, isfullspoil, setname, setlongname, setSize=269, setinfo=False):
|
||||
def get_image_urls(mtgjson, isfullspoil, code, name, size=269, setinfo=False):
|
||||
IMAGES = 'http://magic.wizards.com/en/content/' + \
|
||||
setlongname.lower().replace(' ', '-') + '-cards'
|
||||
name.lower().replace(' ', '-') + '-cards'
|
||||
IMAGES2 = 'http://mythicspoiler.com/newspoilers.html'
|
||||
IMAGES3 = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + \
|
||||
setlongname.lower().replace('of', '').replace(' ', ' ').replace(' ', '-')
|
||||
name.lower().replace('of', '').replace(' ', ' ').replace(' ', '-')
|
||||
|
||||
text = requests.get(IMAGES).text
|
||||
text2 = requests.get(IMAGES2).text
|
||||
text3 = requests.get(IMAGES3).text
|
||||
wotcpattern = r'<img alt="{}.*?" src="(?P<img>.*?\.png)"'
|
||||
wotcpattern2 = r'<img src="(?P<img>.*?\.png).*?alt="{}.*?"'
|
||||
mythicspoilerpattern = r' src="' + setname.lower() + '/cards/{}.*?.jpg">'
|
||||
mythicspoilerpattern = r' src="' + code.lower() + '/cards/{}.*?.jpg">'
|
||||
WOTC = []
|
||||
for c in mtgjson['cards']:
|
||||
if 'names' in c:
|
||||
|
|
@ -356,10 +340,10 @@ def get_image_urls(mtgjson, isfullspoil, setname, setlongname, setSize=269, seti
|
|||
return mtgjson
|
||||
|
||||
|
||||
def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]):
|
||||
def write_xml(mtgjson, code, name, releaseDate, split_cards=[]):
|
||||
if not os.path.isdir('out/'):
|
||||
os.makedirs('out/')
|
||||
cardsxml = open('out/' + setname + '.xml', 'w+')
|
||||
cardsxml = open('out/' + code + '.xml', 'w+')
|
||||
cardsxml.truncate()
|
||||
count = 0
|
||||
dfccount = 0
|
||||
|
|
@ -368,14 +352,14 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]):
|
|||
cardsxml.write("<?xml version='1.0' encoding='UTF-8'?>\n"
|
||||
"<cockatrice_carddatabase version='3'>\n"
|
||||
"<sets>\n<set>\n<name>"
|
||||
+ setname +
|
||||
+ code +
|
||||
"</name>\n"
|
||||
"<longname>"
|
||||
+ setlongname +
|
||||
+ name +
|
||||
"</longname>\n"
|
||||
"<settype>Expansion</settype>\n"
|
||||
"<releasedate>"
|
||||
+ setreleasedate +
|
||||
+ releaseDate +
|
||||
"</releasedate>\n"
|
||||
"</set>\n"
|
||||
"</sets>\n"
|
||||
|
|
@ -445,7 +429,7 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]):
|
|||
cardsxml.write("<card>\n")
|
||||
cardsxml.write("<name>" + name.encode('utf-8') + "</name>\n")
|
||||
cardsxml.write(
|
||||
'<set rarity="' + card['rarity'] + '" picURL="' + card["url"] + '">' + setname + '</set>\n')
|
||||
'<set rarity="' + card['rarity'] + '" picURL="' + card["url"] + '">' + code + '</set>\n')
|
||||
cardsxml.write(
|
||||
"<manacost>" + manacost.encode('utf-8') + "</manacost>\n")
|
||||
cardsxml.write("<cmc>" + cardcmc + "</cmc>\n")
|
||||
|
|
@ -479,7 +463,7 @@ def write_xml(mtgjson, setname, setlongname, setreleasedate, split_cards=[]):
|
|||
|
||||
cardsxml.write("</cards>\n</cockatrice_carddatabase>")
|
||||
|
||||
print 'XML Stats for ' + setlongname
|
||||
print 'XML Stats for ' + name
|
||||
print 'Total cards: ' + str(count)
|
||||
if dfccount > 0:
|
||||
print 'DFC: ' + str(dfccount)
|
||||
|
|
@ -641,8 +625,8 @@ def pretty_xml(infile):
|
|||
return pretty_xml_as_string
|
||||
|
||||
|
||||
def make_allsets(AllSets, mtgjson, setname):
|
||||
AllSets[setname] = mtgjson
|
||||
def make_allsets(AllSets, mtgjson, code):
|
||||
AllSets[code] = mtgjson
|
||||
return AllSets
|
||||
|
||||
|
||||
|
|
@ -676,8 +660,8 @@ def make_masterpieces(headers, AllSets, spoil):
|
|||
masterpieces2 = []
|
||||
for masterpiece in masterpieces:
|
||||
matched = False
|
||||
if headers['setname'] in AllSets:
|
||||
for oldMasterpiece in AllSets[headers['setname']]['cards']:
|
||||
if headers['code'] in AllSets:
|
||||
for oldMasterpiece in AllSets[headers['code']]['cards']:
|
||||
if masterpiece['name'] == oldMasterpiece['name']:
|
||||
matched = True
|
||||
for set in AllSets:
|
||||
|
|
@ -703,10 +687,10 @@ def make_masterpieces(headers, AllSets, spoil):
|
|||
print "We couldn't find a card object to assign the data to for masterpiece " + masterpiece['name']
|
||||
masterpieces2.append(masterpiece)
|
||||
mpsjson = {
|
||||
"name": headers['setlongname'],
|
||||
"name": headers['name'],
|
||||
"alternativeNames": headers['alternativeNames'],
|
||||
"code": headers['setname'],
|
||||
"releaseDate": headers['setreleasedate'],
|
||||
"code": headers['code'],
|
||||
"releaseDate": headers['releaseDate'],
|
||||
"border": "black",
|
||||
"type": "masterpiece",
|
||||
"cards": masterpieces2
|
||||
|
|
@ -715,11 +699,11 @@ def make_masterpieces(headers, AllSets, spoil):
|
|||
|
||||
|
||||
def set_has_cards(setinfo, manual_cards, mtgjson):
|
||||
if setinfo['setname'] in manual_cards or setinfo['setname'] in mtgjson:
|
||||
if setinfo['code'] in manual_cards or setinfo['code'] in mtgjson:
|
||||
return True
|
||||
for card in manual_cards['cards']:
|
||||
if set in card:
|
||||
if set == setinfo['setname']:
|
||||
if set == setinfo['code']:
|
||||
return True
|
||||
|
||||
|
||||
|
|
@ -733,10 +717,10 @@ def get_allsets():
|
|||
def add_headers(mtgjson, setinfos):
|
||||
mtgjson2 = {
|
||||
"border": "black",
|
||||
"code": setinfos['setname'],
|
||||
"name": setinfos['setlongname'],
|
||||
"releaseDate": setinfos['setreleasedate'],
|
||||
"type": setinfos['settype'],
|
||||
"code": setinfos['code'],
|
||||
"name": setinfos['name'],
|
||||
"releaseDate": setinfos['releaseDate'],
|
||||
"type": setinfos['type'],
|
||||
"cards": mtgjson['cards']
|
||||
}
|
||||
if not 'noBooster' in setinfos:
|
||||
|
|
@ -761,6 +745,6 @@ def add_headers(mtgjson, setinfos):
|
|||
"land",
|
||||
"marketing"
|
||||
],
|
||||
if 'blockname' in setinfos:
|
||||
mtgjson2['block'] = setinfos['blockname']
|
||||
if 'block' in setinfos:
|
||||
mtgjson2['block'] = setinfos['block']
|
||||
return mtgjson2
|
||||
|
|
|
|||
|
|
@ -2,11 +2,12 @@
|
|||
import requests
|
||||
from lxml import html
|
||||
from PIL import Image
|
||||
import os
|
||||
|
||||
|
||||
def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-image-gallery/hour-devastation", setinfo={"setname": "HOU"}, showRarityColors=False, showFrameColors=False, manual_cards=[], delete_cards=[], split_cards=[]):
|
||||
if 'setlongname' in setinfo:
|
||||
url = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setinfo['setlongname'].lower().replace('of', '').replace(
|
||||
def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-image-gallery/hour-devastation", setinfo={"code": "HOU"}, showRarityColors=False, showFrameColors=False, manual_cards=[], delete_cards=[], split_cards=[]):
|
||||
if 'name' in setinfo:
|
||||
url = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setinfo['name'].lower().replace('of', '').replace(
|
||||
' ', ' ').replace(' ', '-')
|
||||
page = requests.get(url)
|
||||
tree = html.fromstring(page.content)
|
||||
|
|
@ -40,10 +41,10 @@ def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-imag
|
|||
cardcount += 1
|
||||
fullspoil = {"cards": cards}
|
||||
print "Spoil Gallery has " + str(cardcount) + " cards."
|
||||
download_images(fullspoil['cards'], setinfo['setname'])
|
||||
fullspoil = get_rarities_by_symbol(fullspoil, setinfo['setname'])
|
||||
fullspoil = get_mana_symbols(fullspoil, setinfo['setname'])
|
||||
#fullspoil = get_colors_by_frame(fullspoil, setinfo['setname'])
|
||||
download_images(fullspoil['cards'], setinfo['code'])
|
||||
fullspoil = get_rarities_by_symbol(fullspoil, setinfo['code'])
|
||||
fullspoil = get_mana_symbols(fullspoil, setinfo['code'])
|
||||
#fullspoil = get_colors_by_frame(fullspoil, setinfo['code'])
|
||||
return fullspoil
|
||||
|
||||
|
||||
|
|
@ -250,3 +251,19 @@ def smash_fullspoil(mtgjson, fullspoil):
|
|||
print "WOTC only cards: "
|
||||
print WOTC_only
|
||||
print different_keys
|
||||
|
||||
|
||||
def download_images(mtgjson, setcode):
|
||||
if not os.path.isdir('images/' + setcode):
|
||||
os.makedirs('images/' + setcode)
|
||||
if 'cards' in mtgjson:
|
||||
jsoncards = mtgjson['cards']
|
||||
else:
|
||||
jsoncards = mtgjson
|
||||
for card in jsoncards:
|
||||
if card['url']:
|
||||
if os.path.isfile('images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg'):
|
||||
continue
|
||||
# print 'Downloading ' + card['url'] + ' to images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg'
|
||||
requests.get(card['url'], 'images/' + setcode +
|
||||
'/' + card['name'].replace(' // ', '') + '.jpg')
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user