Module:Format ISBN/data/doc
< Module:Format ISBN | data
Jump to navigation
Jump to search
This is the documentation page for Module:Format ISBN/data
Python script to update this data module
#!/usr/bin/env python
# This work has been released into the public domain by its author, User:Cobaltcigs.
# This applies worldwide. In some countries this may not be legally possible; if so:
# User:Cobaltcigs grants anyone the right to use this work for any purpose, without any
# conditions, unless such conditions are required by law.
import xml.dom.minidom
import urllib2
url = "https://www.isbn-international.org/export_rangemessage.xml"
req = urllib2.Request(url)
response = urllib2.urlopen(req)
xmlTxt = response.read()
f = open("RangeMessage.xml", "w")
f.write(xmlTxt)
f.close()
document = xml.dom.minidom.parseString(xmlTxt)
source = document.getElementsByTagName("MessageSource")[0].firstChild.nodeValue
version = document.getElementsByTagName("MessageSerialNumber")[0].firstChild.nodeValue
timestamp = document.getElementsByTagName("MessageDate")[0].firstChild.nodeValue
print("--[[")
print("""\tGenerated from %s's RangeMessage.xml file at:
\t\t%s
\tusing a script.
\tVersion: %s (%s)""" % (source, url, version, timestamp))
print("""\tNotes:
\t* Integer tuples at right represent the middle three (of five) digit-group quantities
\t for any ISBN in the specified (quasi-numeric but actually lexicographical) range.
\t* The "specified range" for the tuple specified on row[N] can be thought of as any
\t ISBN Q where (Q <= row[N].isbn) && (Q > (row[n-1].isbn or 0))
\t* Omitted for brevity are the first group ("978"/"979", always 3) and the last group
\t (check digit, always 1).
\t* In other words, an ISBN whose digit grouping is specified below as {x,y,z} will match
\t the regular expression "^\d{3}\-\d{x}\-\d{y}\-\d{z}\-\d{1}$" once properly formatted.
""")
print("--]]")
print("return {")
for p in document.getElementsByTagName("Prefix"):
pp = p.firstChild.nodeValue.split("-")
if len(pp) != 2: continue
prefix = "".join(pp)
n1 = len(pp[0])
n2 = len(pp[1])
for q in p.parentNode.getElementsByTagName("Rule"):
end = (prefix + q.getElementsByTagName("Range")[0].firstChild.nodeValue.split("-")[1]+"9"*9)[:13]
n3 = int(q.getElementsByTagName("Length")[0].firstChild.nodeValue)
# disregard (yet unallocated?) blocks where (publisher identifier's) <Length>0</Length>
if n3 == 0: continue
n4 = 12-(n1+n2+n3)
# check digit length not mathematically needed here
# n5 = 1
print('\t{"%s", {%s,%s,%s}},' % (end,n2,n3,n4))
print("\t}")