1
0
Fork 0
mirror of https://bitbucket.org/oreolek/imaginary-realities.git synced 2024-04-29 23:59:33 +03:00

Update scripts to fetch and embed disqus recent comments. Still requires work.

This commit is contained in:
richard.m.tew@gmail.com 2015-07-29 21:22:06 +12:00
parent 95906d5981
commit 899211de38
5 changed files with 344 additions and 13 deletions

View file

@ -30,7 +30,6 @@
import calendar
import codecs
import datetime
import distutils
import distutils.dir_util # Not needed on windows.
import distutils.file_util # Not needed on windows.
@ -222,6 +221,8 @@ def get_back_issues_data(tp):
def generate_website_index_page():
global data_disqus
t = jinja2_env.get_template("homepage.html")
output_path = os.path.join(setting_target_dirname, "index.html")
@ -257,7 +258,29 @@ def generate_website_index_page():
# SECTION: Recent comments
tp.sections.recent_comments.title = "Recent Comments"
tp.sections.recent_comments.content = """<div class="watch-this-space">Disqus comments on articles will be populated here dynamically at some later point. Watch this space!</div>"""
tp.sections.recent_comments.entries = []
tp.sections.recent_comments.is_enabled = True
if data_disqus is None:
tp.sections.recent_comments.is_enabled = False
tp.sections.recent_comments.content = """<div class="watch-this-space">Failed to generate content.</div>"""
else:
# comment.comment_id/user_name/timestamp/thread_id/text
for comment in data_disqus.get_recent_comments():
# thread.url/feed/title
thread = data_disqus.get_thread(comment.thread_id)
text = comment.text
while " " in text:
text = text.replace(" ", " ")
text = text[:80]+"..."
entry = TemplateParameters()
entry.age_string = data_disqus.get_time_string(comment.timestamp)
entry.user_name = comment.user_name
entry.thread_title = thread.title
entry.thread_url = thread.url
entry.text = text
tp.sections.recent_comments.entries.append(entry)
html = t.render(tp=tp)
with codecs.open(output_path, "wb", "utf-8") as f:
@ -663,9 +686,26 @@ def get_article_block_content(volume_number, issue_number, page_dirname, block_n
return next(t.blocks[block_name](None)).strip()
return ""
default_generation_targets = TARGET_WEBSITE, TARGET_EBOOK
if __name__ == "__main__":
for setting_generation_target in (TARGET_WEBSITE, TARGET_EBOOK):
def run(targets=None, disqus_data=None, reddit_data=None):
# TODO(rmtew): Make these non-global at some point. It's not really a problem, but it's messy.
global setting_target_dirname
global setting_use_minimised_files
global setting_website_hidden_issue_pages
global setting_generation_target
global setting_base_template
global jinja2_env
global data_disqus
global data_reddit
data_disqus = disqus_data
data_reddit = reddit_data
if targets is None:
targets = default_generation_targets
for setting_generation_target in targets:
if setting_generation_target not in templates_by_target:
print >> sys.stderr, "Unknown target:", setting_generation_target
sys.exit(1)
@ -695,4 +735,7 @@ if __name__ == "__main__":
generate_issues(issue_data)
generate_ebooks(issue_data)
if __name__ == "__main__":
run()
# EOF

View file

@ -27,5 +27,21 @@ OPTIONAL SETUP
USAGE
1. _env\Scripts\activate
2. python gensite.py
3. python update.py
Then to fetch some initial disqus data:
1. Edit ir-config.ini and enter required values.
2. python update.py disqus
To generate a website with dynamic data (recent comments on homepage..):
1. python update.py website
To generate a website with no dynamic data (recent comments on homepage..)
1. python gensite.py
Note that editing either of these scripts might be required, in order to set the right website target.
TARGET_WEBSITE (generates the website without google analytics and disqus comment sections on articles).
TARGET_WEBSITE | FLAG_ONLINE (generates the website with google analytics and disqus comment sections on articles).

View file

@ -133,6 +133,14 @@ ul {
padding: 3px 0 3px 0;
}
table.table-comments tr.last-row td{
padding-bottom: 5px;
}
.row-comment-text {
font-style: italic;
}
.licensetitle {
padding-top: 10px;
border-bottom: 1px solid grey;

View file

@ -65,7 +65,8 @@
</table>
</div>
Or peruse <a href="http://imaginary-realities.disinterest.org/">older issues</a> from the previous period of publication that ended in 2001.
<br/>
Or check out the <a href="http://imaginary-realities.disinterest.org/">older issues</a> from the previous period of publication which ended in 2001.
</div>
</div>
<div class="aligned-example text">
@ -88,7 +89,26 @@
<div class="aligned-example text">
<div class="mainheader"><a name="recent-comments">{{tp.sections.recent_comments.title}}</a></div>
<div class="mainbody">
{% if tp.sections.recent_comments.is_enabled %}
<table class="table-comments">
<tbody>
{% for entry in tp.sections.recent_comments.entries %}
<tr class="row-comment-link">
<td colspan=2>
<a href="{{entry.thread_url}}#comments">{{entry.thread_title}}</a>
</td>
</tr>
<tr class="last-row">
<td colspan=2>
{{entry.user_name}} ({{entry.age_string}} ago): <span class="row-comment-text">{{entry.text}}</span></td>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
{{tp.sections.recent_comments.content}}
{% endif %}
</div>
</div>
</div>

254
update.py
View file

@ -1,4 +1,33 @@
"""
Author: Richard Tew <richard.m.tew@gmail.com>
This script can either be invoked manually, if required, or periodically by
having a scheduler like cron invoke it.
It is not necessary to factor in the usage limits of the services it polls
(Reddit, Disqus, ...) as the script will do that itself and only access them
if a minimum amount of time has passed.
"""
# What we can use from the standard library.
import codecs
import collections
import ConfigParser
import datetime
import email.utils
import json
import os
import sys
import time
import types
# The external burden of required dependencies.
import praw
from disqusapi import DisqusAPI
# The burden of our own module.
import gensite
r_username = "rmtew"
r_platform = "python"
@ -7,12 +36,227 @@ r_appversion = "v0.1"
r_useragent = "%s:%s:%s (by /u/%s)" % (r_platform, r_appname, r_appversion, r_username)
SITE_URL = "http://journal.imaginary-realities.com/"
def make_time_string(value):
""" It's quicker to write this than to find something that does it and know it works. """
if type(value) in (int, float):
value = int(value)
hours = value / (60 * 60)
value = value % (60 * 60)
minutes = value / 60
value = value % 60
seconds = value
else:
hours, minutes, seconds = [ int(v) for v in value.split(":") ]
if hours:
if hours >= 24:
days = hours / 24
if days >= 7:
if days >= 30:
months = int(days / (365/12.0))
if months >= 12:
years = days / 365
time_string = "%d year%s" % (years, "" if years == 1 else "s")
else:
time_string = "%d month%s" % (months, "" if months == 1 else "s")
else:
weeks = days / 7
time_string = "%d week%s" % (weeks, "" if weeks == 1 else "s")
else:
time_string = "%d day%s" % (days, "" if days == 1 else "s")
else:
time_string = "%d hour%s" % (hours, "" if hours == 1 else "s")
elif minutes:
time_string = "%d minute%s" % (minutes, "" if minutes == 1 else "s")
else:
time_string = "%d second%s" % (seconds, "" if seconds == 1 else "s")
return time_string
def run_reddit(update=False):
# NOTE(rmtew): Until I work out what values to display, this will store all the
# data that can be reconciled as storable.
if update:
r = praw.Reddit(user_agent=r_useragent)
submissions = r.search("url:'%s'" % SITE_URL, sort="new")
header = None
lines = []
string_columns = set()
for match in submissions:
if header is None:
header = []
for k in dir(match):
if k[0] == "_" or k in ("reddit_session", "comments"): continue
v = getattr(match,k)
if type(v) in (types.MethodType, types.FunctionType): continue
if type(v) in types.StringTypes:
header.append(k)
elif type(v) in (bool, int, float, types.NoneType, dict, list):
header.append(k)
elif type(v) in (types.ClassType, types.TypeType):
string_columns.add(k)
header.append(k)
line = []
for column_name in header:
v = getattr(match, column_name, None)
if column_name in string_columns:
line.append(unicode(v))
else:
line.append(v)
lines.append(line)
with codecs.open("reddit.json", "wb", "utf-8") as f:
json.dump((header, lines), f)
def run_disqus(update=False):
# Get the absolute path of the directory the script is located in.
script_path = os.path.dirname(__file__)
if not len(script_path):
script_path = sys.path[0]
config = ConfigParser.ConfigParser()
config.read([
os.path.join(script_path, "ir-config.ini"),
])
# If this is not an absolute path, make it one based on the script directory.
data_path = config.get("paths", "data-storage")
if not os.path.isabs(data_path):
data_path = os.path.abspath(os.path.join(script_path, data_path))
# TODO: Load in persisted data.
persists = {}
datafile_path = os.path.join(data_path, "data.json")
if os.path.exists(datafile_path):
persists = json.load(open(datafile_path, "rb"))
if update:
SECRET_KEY = config.get("disqus-keys", "private")
PUBLIC_KEY = config.get("disqus-keys", "public")
disqus = DisqusAPI(SECRET_KEY, PUBLIC_KEY)
thread_data = persists.get("disqus-thread-data", {})
_comment_threads = set()
# ....
# comment_timestamp = time.time()
comment_data = []
for comment in disqus.posts.list(forum="imaginaryrealities"):
if comment["isSpam"] or comment["isDeleted"]:
continue
# Get the RFC3339 date string from disqus, assume it's UTC, and convert it to a timestamp.
timeseq = list(time.strptime(comment["createdAt"], '%Y-%m-%dT%H:%M:%S'))
timeseq.append(0) # offset of date's timezone from UTC.
timeseq = tuple(timeseq)
post_timestamp = email.utils.mktime_tz(timeseq) # UTC seconds since the epoch
poster_username = comment["author"]["name"]
post_commentid = comment["id"]
post_threadid = comment["thread"]
post_text = comment["raw_message"]
comment_data.append((post_commentid, poster_username, post_timestamp, post_threadid, post_text))
# Track the threads which the processed comments belong to.
_comment_threads.add(post_threadid)
# Check which threads have new comments which we do not know about.
_unknown_threads = _comment_threads - set(thread_data)
if len(_unknown_threads):
print "Processing new threads."
thread_timestamp = persists.get("disqus-thread-timestamp", '1333256400')
# TODO: disqus does not like a since value, so cannot pass one yet until why, is determined.
for result in disqus.threads.list(forum="imaginaryrealities", limit=20):#, since=thread_timestamp):
thread_id = result["id"]
thread_entry = [ result["link"], result["feed"], result["clean_title"] ]
thread_data[thread_id] = thread_entry
persists["disqus-thread-timestamp"] = time.time()
_unknown_threads = _comment_threads - set(thread_data)
if len(_unknown_threads):
print "ERROR: still have %d unknown threads" % len(_unknown_threads)
persists["disqus-comment-data"] = comment_data
persists["disqus-thread-data"] = thread_data
json.dump(persists, open(datafile_path, "wb"))
return persists
def run_tests():
def test_make_time_string(s_in, s_out):
s_out_actual = make_time_string(s_in)
if s_out_actual != s_out:
raise Exception("Expected '%s'; got '%s'" % (s_out, s_out_actual))
test_make_time_string("00:00:00", "0 seconds")
test_make_time_string("00:00:01", "1 second")
test_make_time_string("00:00:59", "59 seconds")
test_make_time_string("00:01:00", "1 minute")
test_make_time_string("00:59:00", "59 minutes")
test_make_time_string("01:00:00", "1 hour")
test_make_time_string("23:00:00", "23 hours")
test_make_time_string("24:00:00", "1 day")
test_make_time_string("%02d:00:00" % (24*6+23), "6 days")
test_make_time_string("%02d:00:00" % (24*7), "1 week")
test_make_time_string("%02d:00:00" % (24*7*3), "3 weeks")
test_make_time_string("%02d:00:00" % (24*7*5), "1 month")
test_make_time_string("%02d:00:00" % (24*7*5*2), "2 months")
test_make_time_string("%02d:00:00" % (24*365), "1 year")
class disqus_data_wrapper(object):
comment_class = collections.namedtuple("comment_class", [ "comment_id", "user_name", "timestamp", "thread_id", "text" ])
thread_class = collections.namedtuple("thread_class", [ "url", "feed", "title" ])
def __init__(self, data):
self.data = data
def get_recent_comments(self, limit=6):
# For now, disqus provides the comments from newest to oldest.
print self.data.keys()
return [
self.comment_class(*entry)
for entry
in self.data.get("disqus-comment-data", [])
][:limit]
def get_thread(self, thread_id):
return self.thread_class(*self.data.get("disqus-thread-data", {})[thread_id])
def get_time_string(self, timestamp):
return make_time_string(time.time() - timestamp)
class reddit_data_wrapper(object):
def __init__(self, data):
self.data = data
def run():
r = praw.Reddit(user_agent=r_useragent)
submissions = r.search("url:'http://journal.imaginary-realities.com/'")
for match in submissions:
match.ups, match.downs, match.score, match.title, match.url
# Take the url, strip guff at the end, match it to a generated page.
args = set(sys.argv[1:])
if "tests" in args:
print "Running tests.."
run_tests()
print "..done"
sys.exit(1)
update_disqus = "disqus" in args or "all" in args
update_reddit = "reddit" in args # or "all" in args
update_website = "website" in args or "all" in args
disqus_data = run_disqus(update=update_disqus)
reddit_data = run_reddit(update=update_reddit)
if update_website:
gensite_targets = (gensite.TARGET_WEBSITE, )# | gensite.FLAG_ONLINE, )
gensite.run(gensite_targets,
disqus_data=disqus_data_wrapper(disqus_data),
reddit_data=reddit_data_wrapper(reddit_data))
# TODO(rmtew): Put the resulting website in place. Set permissions.
if __name__ == "__main__":
run()