From 899211de38451bb361fa1744f4a63daf6e0ef210 Mon Sep 17 00:00:00 2001 From: "richard.m.tew@gmail.com" Date: Wed, 29 Jul 2015 21:22:06 +1200 Subject: [PATCH] Update scripts to fetch and embed disqus recent comments. Still requires work. --- gensite.py | 51 ++++++- readme.txt | 20 ++- templates/website/css/style.css | 8 + templates/website/homepage.html | 22 ++- update.py | 256 +++++++++++++++++++++++++++++++- 5 files changed, 344 insertions(+), 13 deletions(-) diff --git a/gensite.py b/gensite.py index caddc5b..7a768ed 100644 --- a/gensite.py +++ b/gensite.py @@ -30,7 +30,6 @@ import calendar import codecs -import datetime import distutils import distutils.dir_util # Not needed on windows. import distutils.file_util # Not needed on windows. @@ -222,6 +221,8 @@ def get_back_issues_data(tp): def generate_website_index_page(): + global data_disqus + t = jinja2_env.get_template("homepage.html") output_path = os.path.join(setting_target_dirname, "index.html") @@ -257,7 +258,29 @@ def generate_website_index_page(): # SECTION: Recent comments tp.sections.recent_comments.title = "Recent Comments" - tp.sections.recent_comments.content = """
Disqus comments on articles will be populated here dynamically at some later point. Watch this space!
""" + tp.sections.recent_comments.entries = [] + tp.sections.recent_comments.is_enabled = True + if data_disqus is None: + tp.sections.recent_comments.is_enabled = False + tp.sections.recent_comments.content = """
Failed to generate content.
""" + else: + # comment.comment_id/user_name/timestamp/thread_id/text + for comment in data_disqus.get_recent_comments(): + # thread.url/feed/title + thread = data_disqus.get_thread(comment.thread_id) + + text = comment.text + while " " in text: + text = text.replace(" ", " ") + text = text[:80]+"..." + + entry = TemplateParameters() + entry.age_string = data_disqus.get_time_string(comment.timestamp) + entry.user_name = comment.user_name + entry.thread_title = thread.title + entry.thread_url = thread.url + entry.text = text + tp.sections.recent_comments.entries.append(entry) html = t.render(tp=tp) with codecs.open(output_path, "wb", "utf-8") as f: @@ -663,9 +686,26 @@ def get_article_block_content(volume_number, issue_number, page_dirname, block_n return next(t.blocks[block_name](None)).strip() return "" +default_generation_targets = TARGET_WEBSITE, TARGET_EBOOK -if __name__ == "__main__": - for setting_generation_target in (TARGET_WEBSITE, TARGET_EBOOK): +def run(targets=None, disqus_data=None, reddit_data=None): + # TODO(rmtew): Make these non-global at some point. It's not really a problem, but it's messy. + global setting_target_dirname + global setting_use_minimised_files + global setting_website_hidden_issue_pages + global setting_generation_target + global setting_base_template + global jinja2_env + global data_disqus + global data_reddit + + data_disqus = disqus_data + data_reddit = reddit_data + + if targets is None: + targets = default_generation_targets + + for setting_generation_target in targets: if setting_generation_target not in templates_by_target: print >> sys.stderr, "Unknown target:", setting_generation_target sys.exit(1) @@ -695,4 +735,7 @@ if __name__ == "__main__": generate_issues(issue_data) generate_ebooks(issue_data) +if __name__ == "__main__": + run() + # EOF diff --git a/readme.txt b/readme.txt index bb1fab7..3a17519 100644 --- a/readme.txt +++ b/readme.txt @@ -27,5 +27,21 @@ OPTIONAL SETUP USAGE 1. _env\Scripts\activate -2. python gensite.py -3. python update.py \ No newline at end of file + +Then to fetch some initial disqus data: + +1. Edit ir-config.ini and enter required values. +2. python update.py disqus + +To generate a website with dynamic data (recent comments on homepage..): + +1. python update.py website + +To generate a website with no dynamic data (recent comments on homepage..) + +1. python gensite.py + +Note that editing either of these scripts might be required, in order to set the right website target. + + TARGET_WEBSITE (generates the website without google analytics and disqus comment sections on articles). + TARGET_WEBSITE | FLAG_ONLINE (generates the website with google analytics and disqus comment sections on articles). diff --git a/templates/website/css/style.css b/templates/website/css/style.css index 603d695..41391bb 100644 --- a/templates/website/css/style.css +++ b/templates/website/css/style.css @@ -133,6 +133,14 @@ ul { padding: 3px 0 3px 0; } +table.table-comments tr.last-row td{ + padding-bottom: 5px; +} + +.row-comment-text { + font-style: italic; +} + .licensetitle { padding-top: 10px; border-bottom: 1px solid grey; diff --git a/templates/website/homepage.html b/templates/website/homepage.html index 043a19c..17b9f91 100644 --- a/templates/website/homepage.html +++ b/templates/website/homepage.html @@ -65,7 +65,8 @@ - Or peruse older issues from the previous period of publication that ended in 2001. +
+ Or check out the older issues from the previous period of publication which ended in 2001.
@@ -88,7 +89,26 @@
+{% if tp.sections.recent_comments.is_enabled %} + + +{% for entry in tp.sections.recent_comments.entries %} + + + + + + + +{% endfor %} + +
+ {{entry.user_name}} ({{entry.age_string}} ago): {{entry.text}}
+{% else %} {{tp.sections.recent_comments.content}} +{% endif %}
diff --git a/update.py b/update.py index a9ea8e4..c3cb6ec 100644 --- a/update.py +++ b/update.py @@ -1,4 +1,33 @@ +""" +Author: Richard Tew + +This script can either be invoked manually, if required, or periodically by +having a scheduler like cron invoke it. + +It is not necessary to factor in the usage limits of the services it polls +(Reddit, Disqus, ...) as the script will do that itself and only access them +if a minimum amount of time has passed. + +""" + +# What we can use from the standard library. +import codecs +import collections +import ConfigParser +import datetime +import email.utils +import json +import os +import sys +import time +import types + +# The external burden of required dependencies. import praw +from disqusapi import DisqusAPI + +# The burden of our own module. +import gensite r_username = "rmtew" r_platform = "python" @@ -7,12 +36,227 @@ r_appversion = "v0.1" r_useragent = "%s:%s:%s (by /u/%s)" % (r_platform, r_appname, r_appversion, r_username) +SITE_URL = "http://journal.imaginary-realities.com/" + + +def make_time_string(value): + """ It's quicker to write this than to find something that does it and know it works. """ + if type(value) in (int, float): + value = int(value) + hours = value / (60 * 60) + value = value % (60 * 60) + minutes = value / 60 + value = value % 60 + seconds = value + else: + hours, minutes, seconds = [ int(v) for v in value.split(":") ] + + if hours: + if hours >= 24: + days = hours / 24 + if days >= 7: + if days >= 30: + months = int(days / (365/12.0)) + if months >= 12: + years = days / 365 + time_string = "%d year%s" % (years, "" if years == 1 else "s") + else: + time_string = "%d month%s" % (months, "" if months == 1 else "s") + else: + weeks = days / 7 + time_string = "%d week%s" % (weeks, "" if weeks == 1 else "s") + else: + time_string = "%d day%s" % (days, "" if days == 1 else "s") + else: + time_string = "%d hour%s" % (hours, "" if hours == 1 else "s") + elif minutes: + time_string = "%d minute%s" % (minutes, "" if minutes == 1 else "s") + else: + time_string = "%d second%s" % (seconds, "" if seconds == 1 else "s") + return time_string + +def run_reddit(update=False): + # NOTE(rmtew): Until I work out what values to display, this will store all the + # data that can be reconciled as storable. + if update: + r = praw.Reddit(user_agent=r_useragent) + submissions = r.search("url:'%s'" % SITE_URL, sort="new") + header = None + lines = [] + string_columns = set() + for match in submissions: + if header is None: + header = [] + for k in dir(match): + if k[0] == "_" or k in ("reddit_session", "comments"): continue + v = getattr(match,k) + if type(v) in (types.MethodType, types.FunctionType): continue + if type(v) in types.StringTypes: + header.append(k) + elif type(v) in (bool, int, float, types.NoneType, dict, list): + header.append(k) + elif type(v) in (types.ClassType, types.TypeType): + string_columns.add(k) + header.append(k) + + line = [] + for column_name in header: + v = getattr(match, column_name, None) + if column_name in string_columns: + line.append(unicode(v)) + else: + line.append(v) + lines.append(line) + + with codecs.open("reddit.json", "wb", "utf-8") as f: + json.dump((header, lines), f) + +def run_disqus(update=False): + # Get the absolute path of the directory the script is located in. + script_path = os.path.dirname(__file__) + if not len(script_path): + script_path = sys.path[0] + + config = ConfigParser.ConfigParser() + config.read([ + os.path.join(script_path, "ir-config.ini"), + ]) + + # If this is not an absolute path, make it one based on the script directory. + data_path = config.get("paths", "data-storage") + if not os.path.isabs(data_path): + data_path = os.path.abspath(os.path.join(script_path, data_path)) + + # TODO: Load in persisted data. + persists = {} + datafile_path = os.path.join(data_path, "data.json") + if os.path.exists(datafile_path): + persists = json.load(open(datafile_path, "rb")) + + if update: + SECRET_KEY = config.get("disqus-keys", "private") + PUBLIC_KEY = config.get("disqus-keys", "public") + disqus = DisqusAPI(SECRET_KEY, PUBLIC_KEY) + + thread_data = persists.get("disqus-thread-data", {}) + _comment_threads = set() + + # .... + # comment_timestamp = time.time() + comment_data = [] + for comment in disqus.posts.list(forum="imaginaryrealities"): + if comment["isSpam"] or comment["isDeleted"]: + continue + + # Get the RFC3339 date string from disqus, assume it's UTC, and convert it to a timestamp. + timeseq = list(time.strptime(comment["createdAt"], '%Y-%m-%dT%H:%M:%S')) + timeseq.append(0) # offset of date's timezone from UTC. + timeseq = tuple(timeseq) + post_timestamp = email.utils.mktime_tz(timeseq) # UTC seconds since the epoch + + poster_username = comment["author"]["name"] + post_commentid = comment["id"] + post_threadid = comment["thread"] + post_text = comment["raw_message"] + comment_data.append((post_commentid, poster_username, post_timestamp, post_threadid, post_text)) + + # Track the threads which the processed comments belong to. + _comment_threads.add(post_threadid) + + # Check which threads have new comments which we do not know about. + _unknown_threads = _comment_threads - set(thread_data) + if len(_unknown_threads): + print "Processing new threads." + thread_timestamp = persists.get("disqus-thread-timestamp", '1333256400') + # TODO: disqus does not like a since value, so cannot pass one yet until why, is determined. + for result in disqus.threads.list(forum="imaginaryrealities", limit=20):#, since=thread_timestamp): + thread_id = result["id"] + thread_entry = [ result["link"], result["feed"], result["clean_title"] ] + thread_data[thread_id] = thread_entry + persists["disqus-thread-timestamp"] = time.time() + + _unknown_threads = _comment_threads - set(thread_data) + if len(_unknown_threads): + print "ERROR: still have %d unknown threads" % len(_unknown_threads) + + persists["disqus-comment-data"] = comment_data + persists["disqus-thread-data"] = thread_data + json.dump(persists, open(datafile_path, "wb")) + + return persists + + +def run_tests(): + def test_make_time_string(s_in, s_out): + s_out_actual = make_time_string(s_in) + if s_out_actual != s_out: + raise Exception("Expected '%s'; got '%s'" % (s_out, s_out_actual)) + test_make_time_string("00:00:00", "0 seconds") + test_make_time_string("00:00:01", "1 second") + test_make_time_string("00:00:59", "59 seconds") + test_make_time_string("00:01:00", "1 minute") + test_make_time_string("00:59:00", "59 minutes") + test_make_time_string("01:00:00", "1 hour") + test_make_time_string("23:00:00", "23 hours") + test_make_time_string("24:00:00", "1 day") + test_make_time_string("%02d:00:00" % (24*6+23), "6 days") + test_make_time_string("%02d:00:00" % (24*7), "1 week") + test_make_time_string("%02d:00:00" % (24*7*3), "3 weeks") + test_make_time_string("%02d:00:00" % (24*7*5), "1 month") + test_make_time_string("%02d:00:00" % (24*7*5*2), "2 months") + test_make_time_string("%02d:00:00" % (24*365), "1 year") + + +class disqus_data_wrapper(object): + comment_class = collections.namedtuple("comment_class", [ "comment_id", "user_name", "timestamp", "thread_id", "text" ]) + thread_class = collections.namedtuple("thread_class", [ "url", "feed", "title" ]) + + def __init__(self, data): + self.data = data + + def get_recent_comments(self, limit=6): + # For now, disqus provides the comments from newest to oldest. + print self.data.keys() + return [ + self.comment_class(*entry) + for entry + in self.data.get("disqus-comment-data", []) + ][:limit] + + def get_thread(self, thread_id): + return self.thread_class(*self.data.get("disqus-thread-data", {})[thread_id]) + + def get_time_string(self, timestamp): + return make_time_string(time.time() - timestamp) + +class reddit_data_wrapper(object): + def __init__(self, data): + self.data = data + + def run(): - r = praw.Reddit(user_agent=r_useragent) - submissions = r.search("url:'http://journal.imaginary-realities.com/'") - for match in submissions: - match.ups, match.downs, match.score, match.title, match.url - # Take the url, strip guff at the end, match it to a generated page. + args = set(sys.argv[1:]) + + if "tests" in args: + print "Running tests.." + run_tests() + print "..done" + sys.exit(1) + + update_disqus = "disqus" in args or "all" in args + update_reddit = "reddit" in args # or "all" in args + update_website = "website" in args or "all" in args + + disqus_data = run_disqus(update=update_disqus) + reddit_data = run_reddit(update=update_reddit) + + if update_website: + gensite_targets = (gensite.TARGET_WEBSITE, )# | gensite.FLAG_ONLINE, ) + gensite.run(gensite_targets, + disqus_data=disqus_data_wrapper(disqus_data), + reddit_data=reddit_data_wrapper(reddit_data)) + # TODO(rmtew): Put the resulting website in place. Set permissions. + if __name__ == "__main__": - run() + run() \ No newline at end of file