aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEduardo Pedroni <e.pedroni91@gmail.com>2015-06-01 17:17:30 +0200
committerEduardo Pedroni <e.pedroni91@gmail.com>2015-06-01 17:17:30 +0200
commitc072ed0ac345a7e421cb0612048d4cd534bf6e73 (patch)
tree96761c4f1ab9c4f8cf159b1cfe69477c4e745fe6
parent5fd864718f4cd9e1e67835063a1efc24dd85e0c4 (diff)
Forget scraping, there are already APIs for querying card data. I'm moving on to the collection management stage, scrape work will be set aside on a different branch for future reference, if needed
-rwxr-xr-xcardbase.py69
-rw-r--r--cardparser.py55
2 files changed, 87 insertions, 37 deletions
diff --git a/cardbase.py b/cardbase.py
index c4bc17e..bb831b0 100755
--- a/cardbase.py
+++ b/cardbase.py
@@ -1,34 +1,73 @@
#!/usr/bin/env python3
+import cardparser
import requests
import sys
from lxml import html
+import yaml
+import re
-database = ""
+def exit(msg=""):
+ if msg != "":
+ print(msg)
+
+ #database.close()
-def parseInput(raw):
- if raw == "help":
- print("Need help? too bad")
- elif raw == "
+ sys.exit()
def main(args):
try:
- database = open(args[1], "w")
- except:
+ dataFile = args[1]
+ #database = open(args[1], "w")
+ except Exception as e:
print("Please provide a valid database file as the first argument.")
- sys.exit(1)
+ exit(e)
print("Welcome to cardbase")
print("For a list of commands, type \"help\"")
- exit = False
- while(not exit):
- try:
- raw = input("> ")
- parseInput(raw)
- except:
- exit = True
+ globalSet = ""
+
+ while(True):
+ #try:
+ raw = input("(" + globalSet + ")> ").strip()
+ args = re.split("[\t ]+", raw)
+
+ if args[0] == "help":
+ print("Need help? try google.com")
+
+ elif args[0] == "exit":
+ exit()
+
+ elif args[0] == "set":
+ if args[1] and args[1] != "":
+ globalSet = re.sub("[^0-9A-Za-z]", "", args[1])
+ else:
+ globalSet = ""
+
+ elif args[0] == "save":
+ if args[1] and args[1] != "":
+ dataFile = args[1]
+
+ elif args[0]:
+ if globalSet != "":
+ cardNo = re.sub("[^0-9A-Za-z]", "", args[0])
+ print("Fetching card " + cardNo)
+ try:
+ pass
+ newCard = cardparser.fetchCard(globalSet, args[0])
+ print(newCard.title)
+ except cardparser.CardNotFoundException as e:
+ print("Card not found.")
+ else:
+ print("Select a set with the \"set\" command before adding cards.")
+
+ else:
+ print("Invalid input")
+
+ #except Exception as e:
+ # exit(e)
# The entry point
diff --git a/cardparser.py b/cardparser.py
index 759ad47..4a9de71 100644
--- a/cardparser.py
+++ b/cardparser.py
@@ -1,4 +1,5 @@
import re
+import requests
from lxml import html
class Card():
@@ -20,29 +21,28 @@ class Card():
self.toughness = ""
self.loyalty = ""
+class CardNotFoundException(Exception):
+ pass
+
# fetching functions
def makeUrl(cardSet, cardNo):
return "http://magiccards.info/" + cardSet + "/en/" + cardNo + ".html"
def remoteFetch(url):
- return requests.get(url).text
+ return html.fromstring(requests.get(url).text)
-def fetchCard(cardSet, cardNo):
- # build object
- card = Card()
- card.edition = cardSet
- card.scan = "http://magiccards.info/scans/en/" + cardSet + "/" + cardNo + ".jpg"
- card.number = cardNo
-
- #setRemoteData(card, makeUrl(cardSet, cardNo))
-
- return card
+def isValid(page):
+ notFound = page.xpath("/html/body/h1/text()")
+ response404 = page.xpath("/html/body/h1/text()")
+ if notFound:
+ raise CardNotFoundException()
-def setRemoteData(card, url, fetchCallback=remoteFetch):
+def setRemoteData(card, url):
# fetch card from upstream
- page = html.fromstring(fetchCallback(url))
+ page = html.fromstring(requests.get(url).text)
+ isValid(page)
- # set remote data
+ # parse and set data
card.title = getTitle(page)
card.cost = getCost(page)
card.convertedCost = getConvertedCost(page)
@@ -57,6 +57,18 @@ def setRemoteData(card, url, fetchCallback=remoteFetch):
card.toughness = getToughness(page)
card.loyalty = getLoyalty(page)
+
+def fetchCard(cardSet, cardNo):
+ # build object
+ card = Card()
+ card.edition = cardSet
+ card.scan = "http://magiccards.info/scans/en/" + cardSet + "/" + cardNo + ".jpg"
+ card.number = cardNo
+
+ setRemoteData(card, makeUrl(cardSet, cardNo))
+
+ return card
+
# parsing functions
def getTitle(page):
return page.xpath("/html/body/table[3]/tr/td[2]/span/a/text()")[0]
@@ -65,7 +77,9 @@ def extractSubTitle(page):
line = page.xpath("/html/body/table[3]/tr/td[2]/p[1]/text()")[0]
line = re.sub("\n", "", line)
line = re.sub(" +", " ", line)
- return line.strip()
+ line = line.strip()
+
+ return line
def getCost(page):
cost = extractSubTitle(page)
@@ -88,18 +102,13 @@ def getConvertedCost(page):
def getColour(page):
colours = extractSubTitle(page)
colours = re.search(" [0-9X]*([WGRBU\{\}/]*) ", colours)
+
if colours:
colours = colours.group(1)
-
- colours = re.sub("U+", "U", colours)
- colours = re.sub("W+", "W", colours)
- colours = re.sub("R+", "R", colours)
- colours = re.sub("B+", "B", colours)
- colours = re.sub("G+", "G", colours)
colours = re.sub("[\{\}/]*", "", colours)
+ colours = re.sub(r"(.)\1+", r"\1", colours)
return colours
-
else:
return ""
@@ -121,10 +130,12 @@ def getSubType(page):
def getArtist(page):
artist = page.xpath("/html/body/table[3]/tr/td[2]/p[4]/text()")[0]
artist = re.sub("Illus. ", "", artist)
+
return artist
def getText(page):
text = page.xpath("/html/body/table[3]/tr/td[2]/p[2]/b/text()")
+
return text
def getFlavour(page):