Update scripts to fetch and embed disqus recent comments. Still requires work.

2024-04-29 23:59:33 +03:00 · 2015-07-29 21:22:06 +12:00 · 2015-07-29 21:22:06 +12:00 · 899211de38
parent 95906d5981
commit 899211de38
5 changed files with 344 additions and 13 deletions
--- a/gensite.py
+++ b/gensite.py
@ -30,7 +30,6 @@

 import calendar
 import codecs
-import datetime
 import distutils
 import distutils.dir_util # Not needed on windows.
 import distutils.file_util # Not needed on windows.
@ -222,6 +221,8 @@ def get_back_issues_data(tp):

    
 def generate_website_index_page():
+    global data_disqus
+
    t = jinja2_env.get_template("homepage.html")
    output_path = os.path.join(setting_target_dirname, "index.html")    

@ -257,7 +258,29 @@ def generate_website_index_page():
    
    # SECTION: Recent comments
    tp.sections.recent_comments.title = "Recent Comments"
-    tp.sections.recent_comments.content = """<div class="watch-this-space">Disqus comments on articles will be populated here dynamically at some later point.  Watch this space!</div>"""
+    tp.sections.recent_comments.entries = []
+    tp.sections.recent_comments.is_enabled = True
+    if data_disqus is None:
+        tp.sections.recent_comments.is_enabled = False
+        tp.sections.recent_comments.content = """<div class="watch-this-space">Failed to generate content.</div>"""
+    else:
+        # comment.comment_id/user_name/timestamp/thread_id/text
+        for comment in data_disqus.get_recent_comments():
+            # thread.url/feed/title
+            thread = data_disqus.get_thread(comment.thread_id)
+            
+            text = comment.text
+            while "  " in text:
+                text = text.replace("  ", " ")
+            text = text[:80]+"..."
+
+            entry = TemplateParameters()
+            entry.age_string = data_disqus.get_time_string(comment.timestamp)
+            entry.user_name = comment.user_name
+            entry.thread_title = thread.title
+            entry.thread_url = thread.url
+            entry.text = text
+            tp.sections.recent_comments.entries.append(entry)

    html = t.render(tp=tp)
    with codecs.open(output_path, "wb", "utf-8") as f:
@ -663,9 +686,26 @@ def get_article_block_content(volume_number, issue_number, page_dirname, block_n
        return next(t.blocks[block_name](None)).strip()
    return ""

+default_generation_targets = TARGET_WEBSITE, TARGET_EBOOK

-if __name__ == "__main__":
-    for setting_generation_target in (TARGET_WEBSITE, TARGET_EBOOK):
+def run(targets=None, disqus_data=None, reddit_data=None):
+    # TODO(rmtew): Make these non-global at some point.  It's not really a problem, but it's messy.
+    global setting_target_dirname
+    global setting_use_minimised_files
+    global setting_website_hidden_issue_pages
+    global setting_generation_target
+    global setting_base_template
+    global jinja2_env
+    global data_disqus
+    global data_reddit
+    
+    data_disqus = disqus_data
+    data_reddit = reddit_data
+
+    if targets is None:
+        targets = default_generation_targets
+
+    for setting_generation_target in targets:
        if setting_generation_target not in templates_by_target:
            print >> sys.stderr, "Unknown target:", setting_generation_target
            sys.exit(1)
@ -695,4 +735,7 @@ if __name__ == "__main__":
            generate_issues(issue_data)
            generate_ebooks(issue_data)

+if __name__ == "__main__":
+    run()
+
 # EOF
--- a/readme.txt
+++ b/readme.txt
@ -27,5 +27,21 @@ OPTIONAL SETUP
 USAGE

 1. _env\Scripts\activate
-2. python gensite.py
-3. python update.py
+
+Then to fetch some initial disqus data:
+
+1. Edit ir-config.ini and enter required values.
+2. python update.py disqus
+
+To generate a website with dynamic data (recent comments on homepage..):
+
+1. python update.py website
+
+To generate a website with no dynamic data (recent comments on homepage..)
+
+1. python gensite.py
+
+Note that editing either of these scripts might be required, in order to set the right website target.
+
+  TARGET_WEBSITE (generates the website without google analytics and disqus comment sections on articles).
+  TARGET_WEBSITE | FLAG_ONLINE (generates the website with google analytics and disqus comment sections on articles).
--- a/templates/website/css/style.css
+++ b/templates/website/css/style.css
@ -133,6 +133,14 @@ ul {
  padding: 3px 0 3px 0;
 }

+table.table-comments tr.last-row  td{
+	padding-bottom: 5px;
+}
+
+.row-comment-text {
+  font-style: italic;
+}
+
 .licensetitle {
  padding-top: 10px;
  border-bottom: 1px solid grey;
--- a/templates/website/homepage.html
+++ b/templates/website/homepage.html
@ -65,7 +65,8 @@
 				</table>
 				</div>
 				
-				Or peruse <a href="http://imaginary-realities.disinterest.org/">older issues</a> from the previous period of publication that ended in 2001.
+				<br/>
+				Or check out the <a href="http://imaginary-realities.disinterest.org/">older issues</a> from the previous period of publication which ended in 2001.
 			  </div>
 			</div>
 			<div class="aligned-example text">
@ -88,7 +89,26 @@
 			<div class="aligned-example text">
 			  <div class="mainheader"><a name="recent-comments">{{tp.sections.recent_comments.title}}</a></div>
 			  <div class="mainbody">
+{% if tp.sections.recent_comments.is_enabled %}
+				<table class="table-comments">
+					<tbody>
+{% for entry in tp.sections.recent_comments.entries %}
+					<tr class="row-comment-link">
+						<td colspan=2>
+							<a href="{{entry.thread_url}}#comments">{{entry.thread_title}}</a>
+						</td>
+					</tr>
+					<tr class="last-row">
+						<td colspan=2>
+							{{entry.user_name}} ({{entry.age_string}} ago): <span class="row-comment-text">{{entry.text}}</span></td>
+						</td>
+					</tr>
+{% endfor %}
+					</tbody>
+				</table>
+{% else %}
 				{{tp.sections.recent_comments.content}}
+{% endif %}
 			  </div>
 			</div>
 		</div>
--- a/update.py
+++ b/update.py
@ -1,4 +1,33 @@
+"""
+Author: Richard Tew <richard.m.tew@gmail.com>
+
+This script can either be invoked manually, if required, or periodically by
+having a scheduler like cron invoke it.
+
+It is not necessary to factor in the usage limits of the services it polls
+(Reddit, Disqus, ...) as the script will do that itself and only access them
+if a minimum amount of time has passed.
+
+"""
+
+# What we can use from the standard library.
+import codecs
+import collections
+import ConfigParser
+import datetime
+import email.utils
+import json
+import os
+import sys
+import time
+import types
+
+# The external burden of required dependencies.
 import praw
+from disqusapi import DisqusAPI
+
+# The burden of our own module.
+import gensite

 r_username = "rmtew"
 r_platform = "python"
@ -7,12 +36,227 @@ r_appversion = "v0.1"

 r_useragent = "%s:%s:%s (by /u/%s)" % (r_platform, r_appname, r_appversion, r_username)

+SITE_URL = "http://journal.imaginary-realities.com/"
+
+
+def make_time_string(value):
+    """ It's quicker to write this than to find something that does it and know it works. """
+    if type(value) in (int, float):
+        value = int(value)
+        hours = value / (60 * 60)
+        value = value % (60 * 60)
+        minutes = value / 60
+        value = value % 60
+        seconds = value
+    else:
+        hours, minutes, seconds = [ int(v) for v in value.split(":") ]
+    
+    if hours:
+        if hours >= 24:
+            days = hours / 24
+            if days >= 7:
+                if days >= 30:
+                    months = int(days / (365/12.0))
+                    if months >= 12:
+                        years = days / 365
+                        time_string = "%d year%s" % (years, "" if years == 1 else "s")
+                    else:
+                        time_string = "%d month%s" % (months, "" if months == 1 else "s")
+                else:
+                    weeks = days / 7
+                    time_string = "%d week%s" % (weeks, "" if weeks == 1 else "s")
+            else:
+                time_string = "%d day%s" % (days, "" if days == 1 else "s")
+        else:
+            time_string = "%d hour%s" % (hours, "" if hours == 1 else "s")
+    elif minutes:
+        time_string = "%d minute%s" % (minutes, "" if minutes == 1 else "s")
+    else:
+        time_string = "%d second%s" % (seconds, "" if seconds == 1 else "s")
+    return time_string
+
+def run_reddit(update=False):
+    # NOTE(rmtew): Until I work out what values to display, this will store all the 
+    #     data that can be reconciled as storable.
+    if update:
+        r = praw.Reddit(user_agent=r_useragent)
+        submissions = r.search("url:'%s'" % SITE_URL, sort="new")
+        header = None
+        lines = []
+        string_columns = set()
+        for match in submissions:
+            if header is None:
+                header = []
+                for k in dir(match):
+                    if k[0] == "_" or k in ("reddit_session", "comments"): continue
+                    v = getattr(match,k)
+                    if type(v) in (types.MethodType, types.FunctionType): continue
+                    if type(v) in types.StringTypes:
+                        header.append(k)
+                    elif type(v) in (bool, int, float, types.NoneType, dict, list):
+                        header.append(k)
+                    elif type(v) in (types.ClassType, types.TypeType):
+                        string_columns.add(k)
+                        header.append(k)
+
+            line = []
+            for column_name in header:
+                v = getattr(match, column_name, None)
+                if column_name in string_columns:
+                    line.append(unicode(v))
+                else:
+                    line.append(v)
+            lines.append(line)
+
+        with codecs.open("reddit.json", "wb", "utf-8") as f:
+            json.dump((header, lines), f)
+
+def run_disqus(update=False):
+    # Get the absolute path of the directory the script is located in.
+    script_path = os.path.dirname(__file__)
+    if not len(script_path):
+        script_path = sys.path[0]
+
+    config = ConfigParser.ConfigParser()
+    config.read([
+        os.path.join(script_path, "ir-config.ini"),
+    ])
+
+    # If this is not an absolute path, make it one based on the script directory.
+    data_path = config.get("paths", "data-storage")
+    if not os.path.isabs(data_path):
+        data_path = os.path.abspath(os.path.join(script_path, data_path))
+
+    # TODO: Load in persisted data.
+    persists = {}
+    datafile_path = os.path.join(data_path, "data.json")
+    if os.path.exists(datafile_path):
+        persists = json.load(open(datafile_path, "rb"))
+        
+    if update:    
+        SECRET_KEY = config.get("disqus-keys", "private")
+        PUBLIC_KEY = config.get("disqus-keys", "public")    
+        disqus = DisqusAPI(SECRET_KEY, PUBLIC_KEY)
+
+        thread_data = persists.get("disqus-thread-data", {})
+        _comment_threads = set()
+
+        # ....
+        # comment_timestamp = time.time()
+        comment_data = []
+        for comment in disqus.posts.list(forum="imaginaryrealities"):
+            if comment["isSpam"] or comment["isDeleted"]:
+                continue
+
+            # Get the RFC3339 date string from disqus, assume it's UTC, and convert it to a timestamp.
+            timeseq = list(time.strptime(comment["createdAt"], '%Y-%m-%dT%H:%M:%S'))
+            timeseq.append(0) # offset of date's timezone from UTC.
+            timeseq = tuple(timeseq)
+            post_timestamp = email.utils.mktime_tz(timeseq) # UTC seconds since the epoch
+            
+            poster_username = comment["author"]["name"]
+            post_commentid = comment["id"]
+            post_threadid = comment["thread"]
+            post_text = comment["raw_message"]
+            comment_data.append((post_commentid, poster_username, post_timestamp, post_threadid, post_text))
+
+            # Track the threads which the processed comments belong to.
+            _comment_threads.add(post_threadid)
+
+        # Check which threads have new comments which we do not know about.
+        _unknown_threads = _comment_threads - set(thread_data)    
+        if len(_unknown_threads):
+            print "Processing new threads."
+            thread_timestamp = persists.get("disqus-thread-timestamp", '1333256400')
+            # TODO: disqus does not like a since value, so cannot pass one yet until why, is determined.
+            for result in disqus.threads.list(forum="imaginaryrealities", limit=20):#, since=thread_timestamp):
+                thread_id = result["id"]
+                thread_entry = [ result["link"], result["feed"], result["clean_title"] ]
+                thread_data[thread_id] = thread_entry
+            persists["disqus-thread-timestamp"] = time.time()
+
+        _unknown_threads = _comment_threads - set(thread_data)
+        if len(_unknown_threads):
+            print "ERROR: still have %d unknown threads" % len(_unknown_threads)
+
+        persists["disqus-comment-data"] = comment_data
+        persists["disqus-thread-data"] = thread_data
+        json.dump(persists, open(datafile_path, "wb"))
+        
+    return persists
+
+
+def run_tests():
+    def test_make_time_string(s_in, s_out):
+        s_out_actual = make_time_string(s_in)
+        if s_out_actual != s_out:
+            raise Exception("Expected '%s'; got '%s'" % (s_out, s_out_actual))
+    test_make_time_string("00:00:00", "0 seconds")
+    test_make_time_string("00:00:01", "1 second")
+    test_make_time_string("00:00:59", "59 seconds")
+    test_make_time_string("00:01:00", "1 minute")
+    test_make_time_string("00:59:00", "59 minutes")
+    test_make_time_string("01:00:00", "1 hour")
+    test_make_time_string("23:00:00", "23 hours")
+    test_make_time_string("24:00:00", "1 day")
+    test_make_time_string("%02d:00:00" % (24*6+23), "6 days")
+    test_make_time_string("%02d:00:00" % (24*7), "1 week")
+    test_make_time_string("%02d:00:00" % (24*7*3), "3 weeks")
+    test_make_time_string("%02d:00:00" % (24*7*5), "1 month")
+    test_make_time_string("%02d:00:00" % (24*7*5*2), "2 months")
+    test_make_time_string("%02d:00:00" % (24*365), "1 year")
+
+
+class disqus_data_wrapper(object):
+    comment_class = collections.namedtuple("comment_class", [ "comment_id", "user_name", "timestamp", "thread_id", "text" ])
+    thread_class = collections.namedtuple("thread_class", [ "url", "feed", "title" ])
+
+    def __init__(self, data):
+        self.data = data
+        
+    def get_recent_comments(self, limit=6):
+        # For now, disqus provides the comments from newest to oldest.
+        print self.data.keys()
+        return [
+            self.comment_class(*entry)
+            for entry
+            in self.data.get("disqus-comment-data", [])
+        ][:limit]
+        
+    def get_thread(self, thread_id):
+        return self.thread_class(*self.data.get("disqus-thread-data", {})[thread_id])
+        
+    def get_time_string(self, timestamp):
+        return make_time_string(time.time() - timestamp)
+        
+class reddit_data_wrapper(object):
+    def __init__(self, data):
+        self.data = data
+
+
 def run():
-    r = praw.Reddit(user_agent=r_useragent)
-    submissions = r.search("url:'http://journal.imaginary-realities.com/'")
-    for match in submissions:
-        match.ups, match.downs, match.score, match.title, match.url
-        # Take the url, strip guff at the end, match it to a generated page.
+    args = set(sys.argv[1:])
+    
+    if "tests" in args:
+        print "Running tests.."
+        run_tests()
+        print "..done"
+        sys.exit(1)
+        
+    update_disqus = "disqus" in args or "all" in args
+    update_reddit = "reddit" in args # or "all" in args
+    update_website = "website" in args or "all" in args
+
+    disqus_data = run_disqus(update=update_disqus)
+    reddit_data = run_reddit(update=update_reddit)
+
+    if update_website:
+        gensite_targets = (gensite.TARGET_WEBSITE, )# | gensite.FLAG_ONLINE, )
+        gensite.run(gensite_targets,
+            disqus_data=disqus_data_wrapper(disqus_data),
+            reddit_data=reddit_data_wrapper(reddit_data))
+        # TODO(rmtew): Put the resulting website in place.  Set permissions.
+

 if __name__ == "__main__":
    run()