trash-area.com ≫ blog ≫ GAEGAE(python)+AWS+googleカレンダーで新刊チェック

2011年12月26日 0

なんとなくマンガの新刊チェッカーを実装してみました。
1. GAEからAmazon Web Services (AWS)をぐるぐる叩いて、結果を保存。
2. ics形式で出力させたのをgoogleカレンダーに食わせてスマホで同期。
とこんな感じです。

main.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
 
import wsgiref.handlers
import urllib
import hmac
import hashlib
import base64
import time
import datetime
import logging
import re
 
from xml.etree.ElementTree import fromstring
 
from google.appengine.ext import webapp
from google.appengine.api import urlfetch
from google.appengine.ext import db
from google.appengine.ext.webapp import template
 
 
# チェック条件
class Rule(db.Model):
    title = db.StringProperty()
    author = db.StringProperty()
    publisher = db.StringProperty()
    lastChecked = db.DateTimeProperty(default=None)
 
 
# 新刊情報
class NewReleases(db.Model):
    asin = db.StringProperty()
    title = db.StringProperty()
    releaseDate = db.DateProperty()
 
 
class InitHandler(webapp.RequestHandler):
    def get(self):
        Rule(title=u"ベルセルク", author=u"三浦 建太郎", publisher=u"白泉社").put()
        Rule(title=u"カイジ*和也", author=u"福本 伸行").put()
        Rule(title=u"咲", author=u"小林 立").put()
 
 
class CronHandler(webapp.RequestHandler):
    def get(self):
        query = Rule.all().filter('lastChecked =', None).fetch(3)
        if not len(query) > 0:
            query = Rule.all().order('lastChecked').fetch(3)
 
        q = []
        for rule in query:
            a = []
            if rule.title:
                a.append("title:%s" % rule.title.encode('utf-8'))
 
            if rule.author:
                a.append("author:%s" % rule.author.encode('utf-8'))
 
            if rule.publisher:
                a.append("publisher:%s" % rule.publisher.encode('utf-8'))
 
            if not len(a) > 0:
                continue
 
            q.append("(" + " and ".join(a) + ")")
 
            lastChecked = datetime.datetime.now()
            lastChecked += datetime.timedelta(hours=9)
 
            rule.lastChecked = lastChecked
            rule.put()
 
        if not len(q) > 0:
            return
 
        power = "(Binding:コミック) and (" + " or ".join(q) + ")"
 
        logging.info("query .. power:" + power)
 
        query_pairs = {
                "AssociateTag": "#!あなたのAmazonアソシエイトのID!#",
                "Service": "AWSECommerceService",
                "Operation": "ItemSearch",
                "SearchIndex": "Books",
                "ResponseGroup": "Small,ItemAttributes",
                "Sort": "daterank",
                "MaximumPrice": "1000",
                "Availability": "Available",
                "Power": power,
            }
 
        aws = AWSUtil(
                "#!あなたのアクセスキーID!#",
                "#!あなたのシークレットアクセスキー!#")     
        # AWSのコンソールの セキュリティ証明書 の アクセス証明書から
 
        url = aws.getSignedUrl(query_pairs)
 
        result = urlfetch.fetch(url, '', urlfetch.GET, {})
 
        xmlresult = fromstring(result.content)
 
        # 検索結果の取得
        ns = AWSUtil.getNameSpace()
 
        for elem in xmlresult.findall(".//{%s}Item" % (ns)):
 
            asin = elem.findtext('./{%s}ASIN' % (ns))
            pubDate = elem.findtext(
                    './{%s}ItemAttributes/{%s}PublicationDate' % (ns, ns))
            title = elem.findtext(
                    './{%s}ItemAttributes/{%s}Title' % (ns, ns))
 
            # 年月日じゃないのは無視
            if re.match('\d{4}-\d{2}-\d{2}', pubDate) is None:
                logging.info("publicationDate:" + str(pubDate)
                        + " .. continue (" + title.encode('utf-8') + ")")
                continue
 
            st = time.strptime(pubDate, '%Y-%m-%d')
            pubDate = datetime.date(st.tm_year, st.tm_mon, st.tm_mday)
 
            # 2ヶ月前より昔は無視
            checkDate = datetime.date.today() + datetime.timedelta(days=-60)
 
            if pubDate < checkDate:
                logging.info("publicationDate:" + str(pubDate)
                        + " .. continue (" + title.encode('utf-8') + ")")
                continue
 
            # 2年以上先のも無視
            checkDate = datetime.date.today() + datetime.timedelta(days=365*2)
            if pubDate > checkDate:
                logging.info("publicationDate:" + str(pubDate)
                        + " .. continue (" + title.encode('utf-8') + ")")
                continue
 
            # データ保存。キーはASIN
            key = "key_" + asin
            nr = NewReleases.get_or_insert(key, asin=asin,
                    title=title,
                    releaseDate=pubDate)
 
 
class IcsHandler(webapp.RequestHandler):
    def get(self):
        items = []
        query = NewReleases.all().order('releaseDate').fetch(1000)
        for newRelease in query:
            rec = {}
 
            keystr = str(newRelease.key())
            m = hashlib.md5()
            m.update(keystr)
 
            rec['UID'] = m.hexdigest() + '@trash-area.com'
            rec['TITLE'] = newRelease.title.encode('utf-8')
 
            dts = newRelease.releaseDate
            dte = newRelease.releaseDate + datetime.timedelta(1)
 
            rec['DTSTART'] = str(dts).replace('-', '')
            rec['DTEND'] = str(dte).replace('-', '')
            rec['TODAY'] = datetime.datetime.today().strftime("%Y%m%dT%H%M%SZ")
 
            items.append(rec)
 
        outparams = {'result_set': items}
 
        self.response.headers["Content-Type"] = "text/calendar; charset=utf-8"
        self.response.out.write(template.render("output.ics", outparams))
 
 
class AWSUtil:
 
    @staticmethod
    def getApiVersion():
        return "2011-08-01"
 
    @staticmethod
    def getNameSpace():
        ret = "http://webservices.amazon.com/AWSECommerceService/"
        ret += AWSUtil.getApiVersion()
        return ret
 
    def __init__(self, access_key, secret):
        self.access_key = access_key
        self.secret = secret
 
    def getSignedUrl(self, query_dict):
        method = "GET"
        host = "ecs.amazonaws.jp"
        uri = "/onca/xml"
 
        query_dict["AWSAccessKeyId"] = self.access_key
        query_dict["Timestamp"] = time.strftime(
                "%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        query_dict["Version"] = AWSUtil.getApiVersion()
 
        #create the query
        a = []
        for k, v in sorted(query_dict.items()):
            a.append(k + "=" + urllib.quote(v))
        query_string = "&".join(a)
 
        #calculate HMAC with SHA256
        hm = hmac.new(
                self.secret,
                method + "\n" + host + "\n" + uri + "\n" + query_string,
                hashlib.sha256
                )
 
        # and base64-encoding
        signature = urllib.quote(base64.b64encode(hm.digest()))
 
        return "http://" + host + uri + "?" + query_string + "&Signature=" + signature
 
 
def main():
    application = webapp.WSGIApplication([
        ('/init', InitHandler),
        ('/cron', CronHandler),
        ('/fe41867a25dae6e6b708dca499e4a7b9.ics', IcsHandler),
        ], debug=True)
    wsgiref.handlers.CGIHandler().run(application)
 
if __name__ == '__main__':
    main()

output.ics

BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//trash-area.com//Manually//EN
METHOD:PUBLISH
X-WR-CALNAME:新刊情報
BEGIN:VTIMEZONE
TZID:Japan
BEGIN:STANDARD
DTSTART:19390101T000000
TZOFFSETFROM:+0900
TZOFFSETTO:+0900
TZNAME:JST
END:STANDARD
END:VTIMEZONE
{% for r in result_set %}BEGIN:VEVENT
DTSTAMP:{{ r.TODAY }}
DTSTART;VALUE=DATE:{{ r.DTSTART }}
DTEND;VALUE=DATE:{{ r.DTEND }}
UID:{{ r.UID }}
SUMMARY:{{ r.TITLE }}
DESCRIPTION:{{ r.TITLE }}
LOCATION:
END:VEVENT
{% endfor %}END:VCALENDAR

cron.yaml

cron:
- description: cron
  url: /cron
  schedule: every 2 hours

個人用なのでところどころ適度に変えてみて下さい。
ユーザー対応しようと思ったんですけど初期の作りが悪くて挫折しました。

参考デス。
ItemSearch – Product Advertising API
iCalendar

ソーシャルブックマーク
はてな Livedoor del.icio.us
関連してそうな記事
同じカテゴリーの別の記事
タグ
, , ,
トラックバックURL
コメント

コメントをどうぞ

*反映されるまでに時間がかかることがあります。
*メールアドレスはアバターの使用に使います。