Types and errorlog improvements (#26)

* Ignore corrected files in Error.
Card Type/Types detection improvement.

* Merge fix

Bad Merge Conflict Resolution
This commit is contained in:
tritoch 2017-06-20 09:50:16 -05:00 committed by GitHub
parent 45de69d330
commit c68da7ccab
2 changed files with 47 additions and 66 deletions

20
main.py
View File

@ -49,14 +49,6 @@ def save_setjson(mtgs, filename):
json_file.write(unicode(data))
def save_errorlog(errorlog):
fixederrors = []
unfixederrors = []
for error in errorlog:
if 'fixed' in error:
fixederrors.append(error)
else:
unfixederrors.append(error)
errorlog = [unfixederrors]
with open('out/errors.json', 'w') as outfile:
json.dump(errorlog, outfile, sort_keys=True, indent=2, separators=(',', ': '))
@ -81,7 +73,7 @@ if __name__ == '__main__':
scryfall = spoilers.get_scryfall('https://api.scryfall.com/cards/search?q=++e:' + setinfo['setname'].lower())
mtgs = spoilers.get_image_urls(mtgs, presets['isfullspoil'], setinfo['setname'], setinfo['setlongname'], setinfo['setsize']) #get images
mtgjson = spoilers.smash_mtgs_scryfall(mtgs, scryfall)
[mtgjson, errors] = spoilers.errorcheck(mtgjson, card_corrections) #check for errors where possible
[mtgjson, errors] = spoilers.error_check(mtgjson) #check for errors where possible
errorlog += errors
spoilers.write_xml(mtgjson, setinfo['setname'], setinfo['setlongname'], setinfo['setreleasedate'])
#save_xml(spoilers.pretty_xml(setinfo['setname']), 'out/spoiler.xml')
@ -92,7 +84,7 @@ if __name__ == '__main__':
#old cards will get their infos copied from mtgjson (including fields that may not apply like 'artist')
#the images will still come from mtgs
masterpieces = spoilers.make_masterpieces(setinfo['masterpieces'], AllSets, mtgjson)
[masterpieces, errors] = spoilers.errorcheck(masterpieces)
[masterpieces, errors] = spoilers.error_check(masterpieces)
errorlog += errors
spoilers.write_xml(masterpieces, setinfo['masterpieces']['setname'], setinfo['masterpieces']['setlongname'], setinfo['masterpieces']['setreleasedate'])
AllSets = spoilers.make_allsets(AllSets, masterpieces, setinfo['masterpieces']['setname'])
@ -103,11 +95,15 @@ if __name__ == '__main__':
save_setjson(combinedjson, 'spoiler')
spoilers.write_combined_xml(combinedjson, setinfos)
save_xml(spoilers.pretty_xml('out/spoiler.xml'), 'out/spoiler.xml')
errorlog = spoilers.remove_corrected_errors(errorlog, card_corrections)
save_errorlog(errorlog)
#save_allsets(AllSets)
#save_setjson(mtgjson)
if presets['dumpXML']:
print '----- DUMPING SPOILER.XML -----'
print '<!----- DUMPING SPOILER.XML ----->'
with open('out/spoiler.xml', 'r') as xmlfile:
print xmlfile.read()
print '----- END XML DUMP -----'
print '<!----- END XML DUMP ----->'
print '#----- DUMPING ERROR LOG -----'
print json.dumps(errorlog, ensure_ascii=False, encoding='utf8', indent=2, sort_keys=True, separators=(',',':'))
print '#----- END ERROR LOG -----'

View File

@ -15,11 +15,10 @@ import xml.dom.minidom
from bs4 import BeautifulSoup as BS
from bs4 import Comment
def scrape_mtgs(url):
return requests.get(url, headers={'Cache-Control':'no-cache', 'Pragma':'no-cache', 'Expires': 'Thu, 01 Jan 1970 00:00:00 GMT'}).text
def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], split_cards=[], related_cards=[]):
def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], split_cards={}, related_cards=[]):
mtgs = mtgs.replace('utf-16','utf-8')
patterns = ['<b>Name:</b> <b>(?P<name>.*?)<',
'Cost: (?P<cost>\d{0,2}[WUBRGC]*?)<',
@ -121,16 +120,12 @@ def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], spli
card['colorIdentity'] += c
cleanedcards = []
for card in cards: #let's remove any cards that are named in delete_cards array
if not card['name'] in delete_cards:
cleanedcards.append(card)
cards = cleanedcards
cardlist = []
cardarray = []
for card in cards:
dupe = False
for dupecheck in cardarray:
@ -138,8 +133,6 @@ def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], spli
dupe = True
if dupe == True:
continue
#if 'draft' in card['rules']:
# continue
for cid in card['colorIdentity']:
card['colorIdentityArray'].append(cid)
if 'W' in card['color']:
@ -188,19 +181,11 @@ def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], spli
if 'b' in card['number'] or 'a' in card['number']:
if not 'layout' in card:
print card['name'] + " has a a/b number but no 'layout'"
cardtypes = []
if not '-' in card['type']:
card['type'] = card['type'].replace('instant','Instant').replace('sorcery','Sorcery').replace('creature','Creature')
cardtypes.append(card['type'].replace('instant','Instant'))
else:
cardtypes = card['type'].replace('Legendary ','').split(' - ')[0].split(' ')[:-1]
card['type'] = card['type'].replace('instant','Instant').replace('sorcery','Sorcery').replace('creature','Creature')
if '-' in card['type']:
subtype = card['type'].split(' - ')[1].strip()
else:
subtype = False
#if u"—" in card['type']:
# subtype = card['type'].split(' — ')[1].strip()
if subtype:
subtypes = subtype.split(' ')
else:
@ -216,7 +201,7 @@ def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], spli
#not sure if mtgjson has a list of acceptable rarities, but my application does
#so we'll warn me but continue to write a non-standard rarity (timeshifted?)
#may force 'special' in the future
if card['rarity'] not in ['Mythic Rare','Rare','Uncommon','Common','Special']:
if card['rarity'] not in ['Mythic Rare','Rare','Uncommon','Common','Special','Basic Land']:
#errors.append({"name": card['name'], "key": "rarity", "value": card['rarity']})
print card['name'] + ' has rarity = ' + card['rarity']
if subtypes:
@ -224,8 +209,15 @@ def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], spli
cardjson["rarity"] = card['rarity']
cardjson["text"] = card['rules']
cardjson["type"] = card['type']
workingtypes = card['type']
if ' - ' in workingtypes:
workingtypes = card['type'].split(' - ')[0]
cardjson['types'] = workingtypes.replace('Legendary ','').replace('Snow ','')\
.replace('Elite ','').replace('Basic ','').replace('World ','').replace('Ongoing ','')\
.strip().split(' ')
cardjson["url"] = card['img']
cardjson["types"] = cardtypes
#optional fields
if len(card['colorIdentityArray']) > 0:
cardjson["colorIdentity"] = card['colorIdentityArray']
@ -258,13 +250,12 @@ def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[]
workingCMC += 1
if 'types' not in card:
card['types'] = []
# if '—' in card['type']:
# workingTypes = card['type'].split('—')[0].strip()
# else:
workingTypes = card['type'].split('-')[0].strip()
workingTypes.replace('Legendary ','').replace('Snow ','')\
.replace('Elite ','').replace('Basic ','').replace('World ','').replace('Ongoing ','')
card['types'] += workingTypes.split(' ')
workingtypes = card['type']
if ' - ' in workingtypes:
workingtypes = card['type'].split(' - ')[0]
card['types'] = workingtypes.replace('Legendary ', '').replace('Snow ', '') \
.replace('Elite ', '').replace('Basic ', '').replace('World ', '').replace('Ongoing ', '') \
.strip().split(' ')
if 'subtypes' not in card:
# if '—' in card['type']:
# workingSubtypes = card['type'].split('—')[1].strip()
@ -301,7 +292,6 @@ def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[]
card['colorIdentity'] += CID
else:
card['colorIdentity'] = [CID]
#print mtgjson
for card in mtgjson['cards']:
isManual = False
for manualCard in manual_cards:
@ -325,14 +315,14 @@ def correct_cards(mtgjson, manual_cards=[], card_corrections=[], delete_cards=[]
return mtgjson
def errorcheck(mtgjson, card_corrections={}):
def error_check(mtgjson):
errors = []
for card in mtgjson['cards']:
for key in card:
if key == "":
errors.append({"name": card['name'], "key": key, "value": ""})
requiredKeys = ['name','type']
requiredKeys = ['name','type','types']
for requiredKey in requiredKeys:
if not requiredKey in card:
errors.append({"name": card['name'], "key": key, "missing": True})
@ -424,53 +414,48 @@ def errorcheck(mtgjson, card_corrections={}):
errors.append({"name": card['name'], "key": "number", "value": ""})
if not 'types' in card:
errors.append({"name": card['name'], "key": "types", "value": ""})
for card in mtgjson['cards']:
for cardCorrection in card_corrections:
if card['name'] == cardCorrection:
for correctionType in card_corrections[cardCorrection]:
# if not correctionType in card and correctionType not in :
# sys.exit("Invalid correction for " + cardCorrection + " of type " + card)
if not correctionType == 'name':
if correctionType == 'img':
card['url'] = card_corrections[cardCorrection][correctionType]
else:
card[correctionType] = card_corrections[cardCorrection][correctionType]
if 'name' in card_corrections[cardCorrection]:
card['name'] = card_corrections[cardCorrection]['name']
#print errors
return [mtgjson, errors]
def remove_corrected_errors(errorlog=[], card_corrections=[], print_fixed=False):
errorlog2 = {}
for error in errorlog:
if not print_fixed:
if 'fixed' in error and error['fixed'] == True:
continue
removeError = False
for correction in card_corrections:
for correction_type in card_corrections[correction]:
if error['name'] == correction:
if error['key'] == correction_type:
removeError = True
if not removeError:
if not error['name'] in errorlog2:
errorlog2[error['name']] = {}
if not 'value' in error:
error['value'] = ""
errorlog2[error['name']][error['key']] = error['value']
return errorlog2
def get_scryfall(setUrl):
#getUrl = 'https://api.scryfall.com/cards/search?q=++e:'
#setUrl = getUrl + setname.lower()
setDone = False
scryfall = []
#firstPass = True
while setDone == False:
setcards = requests.get(setUrl)
setcards = setcards.json()
if setcards.has_key('data'):
#if firstPass:
# cards[set]["cards"] = []
# firstPass = False
scryfall.append(setcards['data'])
#for setkey in mtgjson[set]:
# if 'card' not in setkey:
# if set != 'NMS':
# cards[set][setkey] = mtgjson[set][setkey]
else:
setDone = True
print setUrl
print setcards
print 'No Scryfall data'
scryfall = ['']
#noset.append(set)
time.sleep(.1)
if setcards.has_key('has_more'):
if setcards['has_more'] == True:
#print 'Going to extra page of ' + set
setUrl = setcards['next_page']
else:
setDone = True