mirror of
https://bitbucket.org/oreolek/imaginary-realities.git
synced 2024-04-24 05:11:37 +03:00
262 lines
9.6 KiB
Python
262 lines
9.6 KiB
Python
"""
|
|
Author: Richard Tew <richard.m.tew@gmail.com>
|
|
|
|
This script can either be invoked manually, if required, or periodically by
|
|
having a scheduler like cron invoke it.
|
|
|
|
It is not necessary to factor in the usage limits of the services it polls
|
|
(Reddit, Disqus, ...) as the script will do that itself and only access them
|
|
if a minimum amount of time has passed.
|
|
|
|
"""
|
|
|
|
# What we can use from the standard library.
|
|
import codecs
|
|
import collections
|
|
import ConfigParser
|
|
import datetime
|
|
import email.utils
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import types
|
|
|
|
# The external burden of required dependencies.
|
|
import praw
|
|
from disqusapi import DisqusAPI
|
|
|
|
# The burden of our own module.
|
|
import gensite
|
|
|
|
r_username = "rmtew"
|
|
r_platform = "python"
|
|
r_appname = "siteshitter"
|
|
r_appversion = "v0.1"
|
|
|
|
r_useragent = "%s:%s:%s (by /u/%s)" % (r_platform, r_appname, r_appversion, r_username)
|
|
|
|
SITE_URL = "http://journal.imaginary-realities.com/"
|
|
|
|
|
|
def make_time_string(value):
|
|
""" It's quicker to write this than to find something that does it and know it works. """
|
|
if type(value) in (int, float):
|
|
value = int(value)
|
|
hours = value / (60 * 60)
|
|
value = value % (60 * 60)
|
|
minutes = value / 60
|
|
value = value % 60
|
|
seconds = value
|
|
else:
|
|
hours, minutes, seconds = [ int(v) for v in value.split(":") ]
|
|
|
|
if hours:
|
|
if hours >= 24:
|
|
days = hours / 24
|
|
if days >= 7:
|
|
if days >= 30:
|
|
months = int(days / (365/12.0))
|
|
if months >= 12:
|
|
years = days / 365
|
|
time_string = "%d year%s" % (years, "" if years == 1 else "s")
|
|
else:
|
|
time_string = "%d month%s" % (months, "" if months == 1 else "s")
|
|
else:
|
|
weeks = days / 7
|
|
time_string = "%d week%s" % (weeks, "" if weeks == 1 else "s")
|
|
else:
|
|
time_string = "%d day%s" % (days, "" if days == 1 else "s")
|
|
else:
|
|
time_string = "%d hour%s" % (hours, "" if hours == 1 else "s")
|
|
elif minutes:
|
|
time_string = "%d minute%s" % (minutes, "" if minutes == 1 else "s")
|
|
else:
|
|
time_string = "%d second%s" % (seconds, "" if seconds == 1 else "s")
|
|
return time_string
|
|
|
|
def run_reddit(update=False):
|
|
# NOTE(rmtew): Until I work out what values to display, this will store all the
|
|
# data that can be reconciled as storable.
|
|
if update:
|
|
r = praw.Reddit(user_agent=r_useragent)
|
|
submissions = r.search("url:'%s'" % SITE_URL, sort="new")
|
|
header = None
|
|
lines = []
|
|
string_columns = set()
|
|
for match in submissions:
|
|
if header is None:
|
|
header = []
|
|
for k in dir(match):
|
|
if k[0] == "_" or k in ("reddit_session", "comments"): continue
|
|
v = getattr(match,k)
|
|
if type(v) in (types.MethodType, types.FunctionType): continue
|
|
if type(v) in types.StringTypes:
|
|
header.append(k)
|
|
elif type(v) in (bool, int, float, types.NoneType, dict, list):
|
|
header.append(k)
|
|
elif type(v) in (types.ClassType, types.TypeType):
|
|
string_columns.add(k)
|
|
header.append(k)
|
|
|
|
line = []
|
|
for column_name in header:
|
|
v = getattr(match, column_name, None)
|
|
if column_name in string_columns:
|
|
line.append(unicode(v))
|
|
else:
|
|
line.append(v)
|
|
lines.append(line)
|
|
|
|
with codecs.open("reddit.json", "wb", "utf-8") as f:
|
|
json.dump((header, lines), f)
|
|
|
|
def run_disqus(update=False):
|
|
# Get the absolute path of the directory the script is located in.
|
|
script_path = os.path.dirname(__file__)
|
|
if not len(script_path):
|
|
script_path = sys.path[0]
|
|
|
|
config = ConfigParser.ConfigParser()
|
|
config.read([
|
|
os.path.join(script_path, "ir-config.ini"),
|
|
])
|
|
|
|
# If this is not an absolute path, make it one based on the script directory.
|
|
data_path = config.get("paths", "data-storage")
|
|
if not os.path.isabs(data_path):
|
|
data_path = os.path.abspath(os.path.join(script_path, data_path))
|
|
|
|
# TODO: Load in persisted data.
|
|
persists = {}
|
|
datafile_path = os.path.join(data_path, "data.json")
|
|
if os.path.exists(datafile_path):
|
|
persists = json.load(open(datafile_path, "rb"))
|
|
|
|
if update:
|
|
SECRET_KEY = config.get("disqus-keys", "private")
|
|
PUBLIC_KEY = config.get("disqus-keys", "public")
|
|
disqus = DisqusAPI(SECRET_KEY, PUBLIC_KEY)
|
|
|
|
thread_data = persists.get("disqus-thread-data", {})
|
|
_comment_threads = set()
|
|
|
|
# ....
|
|
# comment_timestamp = time.time()
|
|
comment_data = []
|
|
for comment in disqus.posts.list(forum="imaginaryrealities"):
|
|
if comment["isSpam"] or comment["isDeleted"]:
|
|
continue
|
|
|
|
# Get the RFC3339 date string from disqus, assume it's UTC, and convert it to a timestamp.
|
|
timeseq = list(time.strptime(comment["createdAt"], '%Y-%m-%dT%H:%M:%S'))
|
|
timeseq.append(0) # offset of date's timezone from UTC.
|
|
timeseq = tuple(timeseq)
|
|
post_timestamp = email.utils.mktime_tz(timeseq) # UTC seconds since the epoch
|
|
|
|
poster_username = comment["author"]["name"]
|
|
post_commentid = comment["id"]
|
|
post_threadid = comment["thread"]
|
|
post_text = comment["raw_message"]
|
|
comment_data.append((post_commentid, poster_username, post_timestamp, post_threadid, post_text))
|
|
|
|
# Track the threads which the processed comments belong to.
|
|
_comment_threads.add(post_threadid)
|
|
|
|
# Check which threads have new comments which we do not know about.
|
|
_unknown_threads = _comment_threads - set(thread_data)
|
|
if len(_unknown_threads):
|
|
print "Processing new threads."
|
|
thread_timestamp = persists.get("disqus-thread-timestamp", '1333256400')
|
|
# TODO: disqus does not like a since value, so cannot pass one yet until why, is determined.
|
|
for result in disqus.threads.list(forum="imaginaryrealities", limit=20):#, since=thread_timestamp):
|
|
thread_id = result["id"]
|
|
thread_entry = [ result["link"], result["feed"], result["clean_title"] ]
|
|
thread_data[thread_id] = thread_entry
|
|
persists["disqus-thread-timestamp"] = time.time()
|
|
|
|
_unknown_threads = _comment_threads - set(thread_data)
|
|
if len(_unknown_threads):
|
|
print "ERROR: still have %d unknown threads" % len(_unknown_threads)
|
|
|
|
persists["disqus-comment-data"] = comment_data
|
|
persists["disqus-thread-data"] = thread_data
|
|
json.dump(persists, open(datafile_path, "wb"))
|
|
|
|
return persists
|
|
|
|
|
|
def run_tests():
|
|
def test_make_time_string(s_in, s_out):
|
|
s_out_actual = make_time_string(s_in)
|
|
if s_out_actual != s_out:
|
|
raise Exception("Expected '%s'; got '%s'" % (s_out, s_out_actual))
|
|
test_make_time_string("00:00:00", "0 seconds")
|
|
test_make_time_string("00:00:01", "1 second")
|
|
test_make_time_string("00:00:59", "59 seconds")
|
|
test_make_time_string("00:01:00", "1 minute")
|
|
test_make_time_string("00:59:00", "59 minutes")
|
|
test_make_time_string("01:00:00", "1 hour")
|
|
test_make_time_string("23:00:00", "23 hours")
|
|
test_make_time_string("24:00:00", "1 day")
|
|
test_make_time_string("%02d:00:00" % (24*6+23), "6 days")
|
|
test_make_time_string("%02d:00:00" % (24*7), "1 week")
|
|
test_make_time_string("%02d:00:00" % (24*7*3), "3 weeks")
|
|
test_make_time_string("%02d:00:00" % (24*7*5), "1 month")
|
|
test_make_time_string("%02d:00:00" % (24*7*5*2), "2 months")
|
|
test_make_time_string("%02d:00:00" % (24*365), "1 year")
|
|
|
|
|
|
class disqus_data_wrapper(object):
|
|
comment_class = collections.namedtuple("comment_class", [ "comment_id", "user_name", "timestamp", "thread_id", "text" ])
|
|
thread_class = collections.namedtuple("thread_class", [ "url", "feed", "title" ])
|
|
|
|
def __init__(self, data):
|
|
self.data = data
|
|
|
|
def get_recent_comments(self, limit=6):
|
|
# For now, disqus provides the comments from newest to oldest.
|
|
print self.data.keys()
|
|
return [
|
|
self.comment_class(*entry)
|
|
for entry
|
|
in self.data.get("disqus-comment-data", [])
|
|
][:limit]
|
|
|
|
def get_thread(self, thread_id):
|
|
return self.thread_class(*self.data.get("disqus-thread-data", {})[thread_id])
|
|
|
|
def get_time_string(self, timestamp):
|
|
return make_time_string(time.time() - timestamp)
|
|
|
|
class reddit_data_wrapper(object):
|
|
def __init__(self, data):
|
|
self.data = data
|
|
|
|
|
|
def run():
|
|
args = set(sys.argv[1:])
|
|
|
|
if "tests" in args:
|
|
print "Running tests.."
|
|
run_tests()
|
|
print "..done"
|
|
sys.exit(1)
|
|
|
|
update_disqus = "disqus" in args or "all" in args
|
|
update_reddit = "reddit" in args # or "all" in args
|
|
update_website = "website" in args or "all" in args
|
|
|
|
disqus_data = run_disqus(update=update_disqus)
|
|
reddit_data = run_reddit(update=update_reddit)
|
|
|
|
if update_website:
|
|
gensite_targets = (gensite.TARGET_WEBSITE, )# | gensite.FLAG_ONLINE, )
|
|
gensite.run(gensite_targets,
|
|
disqus_data=disqus_data_wrapper(disqus_data),
|
|
reddit_data=reddit_data_wrapper(reddit_data))
|
|
# TODO(rmtew): Put the resulting website in place. Set permissions.
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run() |