trash-area.com ≫ blog ≫ GAE ≫ GAE(python)+AWS+googleカレンダーで新刊チェック
なんとなくマンガの新刊チェッカーを実装してみました。
1. GAEからAmazon Web Services (AWS)をぐるぐる叩いて、結果を保存。
2. ics形式で出力させたのをgoogleカレンダーに食わせてスマホで同期。
とこんな感じです。
main.py
#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright 2007 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import wsgiref.handlers import urllib import hmac import hashlib import base64 import time import datetime import logging import re from xml.etree.ElementTree import fromstring from google.appengine.ext import webapp from google.appengine.api import urlfetch from google.appengine.ext import db from google.appengine.ext.webapp import template # チェック条件 class Rule(db.Model): title = db.StringProperty() author = db.StringProperty() publisher = db.StringProperty() lastChecked = db.DateTimeProperty(default=None) # 新刊情報 class NewReleases(db.Model): asin = db.StringProperty() title = db.StringProperty() releaseDate = db.DateProperty() class InitHandler(webapp.RequestHandler): def get(self): Rule(title=u"ベルセルク", author=u"三浦 建太郎", publisher=u"白泉社").put() Rule(title=u"カイジ*和也", author=u"福本 伸行").put() Rule(title=u"咲", author=u"小林 立").put() class CronHandler(webapp.RequestHandler): def get(self): query = Rule.all().filter('lastChecked =', None).fetch(3) if not len(query) > 0: query = Rule.all().order('lastChecked').fetch(3) q = [] for rule in query: a = [] if rule.title: a.append("title:%s" % rule.title.encode('utf-8')) if rule.author: a.append("author:%s" % rule.author.encode('utf-8')) if rule.publisher: a.append("publisher:%s" % rule.publisher.encode('utf-8')) if not len(a) > 0: continue q.append("(" + " and ".join(a) + ")") lastChecked = datetime.datetime.now() lastChecked += datetime.timedelta(hours=9) rule.lastChecked = lastChecked rule.put() if not len(q) > 0: return power = "(Binding:コミック) and (" + " or ".join(q) + ")" logging.info("query .. power:" + power) query_pairs = { "AssociateTag": "#!あなたのAmazonアソシエイトのID!#", "Service": "AWSECommerceService", "Operation": "ItemSearch", "SearchIndex": "Books", "ResponseGroup": "Small,ItemAttributes", "Sort": "daterank", "MaximumPrice": "1000", "Availability": "Available", "Power": power, } aws = AWSUtil( "#!あなたのアクセスキーID!#", "#!あなたのシークレットアクセスキー!#") # AWSのコンソールの セキュリティ証明書 の アクセス証明書から url = aws.getSignedUrl(query_pairs) result = urlfetch.fetch(url, '', urlfetch.GET, {}) xmlresult = fromstring(result.content) # 検索結果の取得 ns = AWSUtil.getNameSpace() for elem in xmlresult.findall(".//{%s}Item" % (ns)): asin = elem.findtext('./{%s}ASIN' % (ns)) pubDate = elem.findtext( './{%s}ItemAttributes/{%s}PublicationDate' % (ns, ns)) title = elem.findtext( './{%s}ItemAttributes/{%s}Title' % (ns, ns)) # 年月日じゃないのは無視 if re.match('\d{4}-\d{2}-\d{2}', pubDate) is None: logging.info("publicationDate:" + str(pubDate) + " .. continue (" + title.encode('utf-8') + ")") continue st = time.strptime(pubDate, '%Y-%m-%d') pubDate = datetime.date(st.tm_year, st.tm_mon, st.tm_mday) # 2ヶ月前より昔は無視 checkDate = datetime.date.today() + datetime.timedelta(days=-60) if pubDate < checkDate: logging.info("publicationDate:" + str(pubDate) + " .. continue (" + title.encode('utf-8') + ")") continue # 2年以上先のも無視 checkDate = datetime.date.today() + datetime.timedelta(days=365*2) if pubDate > checkDate: logging.info("publicationDate:" + str(pubDate) + " .. continue (" + title.encode('utf-8') + ")") continue # データ保存。キーはASIN key = "key_" + asin nr = NewReleases.get_or_insert(key, asin=asin, title=title, releaseDate=pubDate) class IcsHandler(webapp.RequestHandler): def get(self): items = [] query = NewReleases.all().order('releaseDate').fetch(1000) for newRelease in query: rec = {} keystr = str(newRelease.key()) m = hashlib.md5() m.update(keystr) rec['UID'] = m.hexdigest() + '@trash-area.com' rec['TITLE'] = newRelease.title.encode('utf-8') dts = newRelease.releaseDate dte = newRelease.releaseDate + datetime.timedelta(1) rec['DTSTART'] = str(dts).replace('-', '') rec['DTEND'] = str(dte).replace('-', '') rec['TODAY'] = datetime.datetime.today().strftime("%Y%m%dT%H%M%SZ") items.append(rec) outparams = {'result_set': items} self.response.headers["Content-Type"] = "text/calendar; charset=utf-8" self.response.out.write(template.render("output.ics", outparams)) class AWSUtil: @staticmethod def getApiVersion(): return "2011-08-01" @staticmethod def getNameSpace(): ret = "http://webservices.amazon.com/AWSECommerceService/" ret += AWSUtil.getApiVersion() return ret def __init__(self, access_key, secret): self.access_key = access_key self.secret = secret def getSignedUrl(self, query_dict): method = "GET" host = "ecs.amazonaws.jp" uri = "/onca/xml" query_dict["AWSAccessKeyId"] = self.access_key query_dict["Timestamp"] = time.strftime( "%Y-%m-%dT%H:%M:%SZ", time.gmtime()) query_dict["Version"] = AWSUtil.getApiVersion() #create the query a = [] for k, v in sorted(query_dict.items()): a.append(k + "=" + urllib.quote(v)) query_string = "&".join(a) #calculate HMAC with SHA256 hm = hmac.new( self.secret, method + "\n" + host + "\n" + uri + "\n" + query_string, hashlib.sha256 ) # and base64-encoding signature = urllib.quote(base64.b64encode(hm.digest())) return "http://" + host + uri + "?" + query_string + "&Signature=" + signature def main(): application = webapp.WSGIApplication([ ('/init', InitHandler), ('/cron', CronHandler), ('/fe41867a25dae6e6b708dca499e4a7b9.ics', IcsHandler), ], debug=True) wsgiref.handlers.CGIHandler().run(application) if __name__ == '__main__': main() |
output.ics
BEGIN:VCALENDAR VERSION:2.0 PRODID:-//trash-area.com//Manually//EN METHOD:PUBLISH X-WR-CALNAME:新刊情報 BEGIN:VTIMEZONE TZID:Japan BEGIN:STANDARD DTSTART:19390101T000000 TZOFFSETFROM:+0900 TZOFFSETTO:+0900 TZNAME:JST END:STANDARD END:VTIMEZONE {% for r in result_set %}BEGIN:VEVENT DTSTAMP:{{ r.TODAY }} DTSTART;VALUE=DATE:{{ r.DTSTART }} DTEND;VALUE=DATE:{{ r.DTEND }} UID:{{ r.UID }} SUMMARY:{{ r.TITLE }} DESCRIPTION:{{ r.TITLE }} LOCATION: END:VEVENT {% endfor %}END:VCALENDAR |
cron.yaml
cron: - description: cron url: /cron schedule: every 2 hours |
個人用なのでところどころ適度に変えてみて下さい。
ユーザー対応しようと思ったんですけど初期の作りが悪くて挫折しました。
参考デス。
ItemSearch – Product Advertising API
iCalendar