Magic-Token/scripts/update_image_links.py
tooomm 9fdba7d3b1
Some checks failed
Picture Health / Check image links (push) Has been cancelled
Update version on tokens.xml changes / update_version (push) Has been cancelled
Scryfall API: Add headers and enhance logging (#358)
2026-04-01 17:08:45 +02:00

195 lines
6.1 KiB
Python

"""
This script parses a token.xml file, collects the picURLs of cards within, and replaces
the links to Scryfall images with up-to-date URLs by querying Scryfall's API.
"""
from xml.sax import saxutils, make_parser, handler
from urllib.parse import urlsplit
from urllib.request import Request, urlopen
from urllib.error import HTTPError
import itertools
import json
import sys
import time
import os
import tempfile
import pathlib
import shutil
SCRYFALL_MAX_LIST_SIZE = 75
SCRYFALL_API_HEADERS = {
'Content-Type': 'application/json',
'User-Agent': 'Magic-Token/1.0',
'Accept': 'application/json',
}
def cards_collection(identifiers):
"""
Get information about a set of cards using the Scryfall API.
This simply returns a list of dictionaries representing the Card objects as
returned by the /card/collection Scryfall API.
If the list of identifiers is larger than Scryfall's API limit,
cards_collection automatically splits the list into smaller chunks and
makes multiple requests.
"""
start_time = 0
n = 0
while n < len(identifiers):
chunk = identifiers[n:n + SCRYFALL_MAX_LIST_SIZE]
print("Requesting chunk {}-{}/{}...".format(n, n + len(chunk), len(identifiers)))
n += SCRYFALL_MAX_LIST_SIZE
payload = json.dumps({'identifiers': chunk}).encode('utf-8')
req = Request('https://api.scryfall.com/cards/collection', payload, headers=SCRYFALL_API_HEADERS)
# Rate limiting
cur_time = time.time()
delta_time = cur_time - start_time
if delta_time < 0.1:
time.sleep(0.1 - delta_time)
start_time = time.time()
try:
with urlopen(req) as f:
list_obj = json.load(f)
except HTTPError as e:
error_body = e.read().decode()
raise RuntimeError(f"Scryfall API request failed: {error_body}") from e
assert not list_obj.get('has_more', False)
assert 'warnings' not in list_obj
yield from list_obj['data']
def parse_picurl(picurl):
"""
Parse a Scryfall picURL into its components.
The Scryfall picURL must be in one of those forms:
- https://c1.scryfall.com/file/scryfall-cards/<version>/<face>/*/*/<uuid>.jpg
- https://cards.scryfall.io/<version>/<face>/*/*/<uuid>.jpg
If it is, a dictionary with keys 'uuid', 'version' and 'face' is returned.
Otherwise, an empty dictionary is returned.
"""
obj = {}
urlinfo = urlsplit(picurl)
if urlinfo.netloc == 'c1.scryfall.com':
parts = urlinfo.path.split('/')
obj['version'] = parts[3]
obj['face'] = parts[4]
obj['uuid'] = parts[-1].rsplit('.')[0]
elif urlinfo.netloc == 'cards.scryfall.io':
parts = urlinfo.path.split('/')
obj['version'] = parts[1]
obj['face'] = parts[2]
obj['uuid'] = parts[-1].rsplit('.')[0]
return obj
class URLCollector(handler.ContentHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.urls = []
def startElement(self, name, attrs):
if name == 'set' and 'picURL' in attrs:
obj = parse_picurl(attrs['picURL'])
if obj:
assert 'uuid' in obj
self.urls.append(obj)
class URLRewriter(handler.LexicalHandler, saxutils.XMLGenerator):
def __init__(self, images, **kwargs):
super().__init__(**kwargs)
self._images = images
self.started = False
def startElement(self, name, attrs):
self.started = True
if name == 'set' and 'picURL' in attrs:
obj = parse_picurl(attrs['picURL'])
if 'uuid' in obj and obj['uuid'] in self._images:
new_url = self._images[obj['uuid']][obj['face']][obj['version']]
if parse_picurl(new_url) != obj:
raise RuntimeError(
"URL `{}` was rewritten to `{}` that no longer resolves to the same card (hint: update parse_picurl).".format(
attrs['picURL'], new_url))
attrs = dict(attrs, picURL=new_url)
super().startElement(name, attrs)
def endDocument(self):
super().endDocument()
self._write('\n')
def comment(self, content):
self.ignorableWhitespace('<!--{}-->{}'.format(content, '' if self.started else '\n'))
def collect_urls(fname):
parser = make_parser()
uc = URLCollector()
parser.setContentHandler(uc)
parser.parse(fname)
return uc.urls
def rewrite_urls(fname, images, *, out=None):
parser = make_parser()
ur = URLRewriter(images, out=out, encoding='UTF-8')
parser.setContentHandler(ur)
parser.setProperty(handler.property_lexical_handler, ur)
parser.parse(fname)
def main(fname, *, out=None):
urls = collect_urls(fname)
identifiers = {obj['uuid'] for obj in urls}
identifiers = [{'id': uuid} for uuid in identifiers]
images = {}
for card in cards_collection(identifiers):
assert card['id'] not in images
if 'image_uris' in card:
images[card['id']] = {'front': card['image_uris']}
else:
assert 'card_faces' in card and len(card['card_faces']) == 2
images[card['id']] = {
'front': card['card_faces'][0]['image_uris'],
'back': card['card_faces'][1]['image_uris'],
}
rewrite_urls(fname, images, out=out)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description='Helper script to refresh scryfall image URLs')
parser.add_argument('filename', nargs='?', default='tokens.xml')
output_group = parser.add_mutually_exclusive_group(required=True)
output_group.add_argument('--output', '-o')
output_group.add_argument('--inplace', '-i', action='store_true')
ns = parser.parse_args()
if ns.inplace:
outpath = pathlib.Path(ns.filename)
else:
outpath = pathlib.Path(ns.output)
fd, temppath = tempfile.mkstemp(dir=outpath.parent)
try:
main(ns.filename, out=os.fdopen(fd, mode='w+b'))
os.replace(temppath, outpath)
except:
os.unlink(temppath)
raise