mirror of
https://bitbucket.org/oreolek/imaginary-realities.git
synced 2024-04-29 23:59:33 +03:00
Update scripts to fetch and embed disqus recent comments. Still requires work.
This commit is contained in:
parent
95906d5981
commit
899211de38
51
gensite.py
51
gensite.py
|
@ -30,7 +30,6 @@
|
|||
|
||||
import calendar
|
||||
import codecs
|
||||
import datetime
|
||||
import distutils
|
||||
import distutils.dir_util # Not needed on windows.
|
||||
import distutils.file_util # Not needed on windows.
|
||||
|
@ -222,6 +221,8 @@ def get_back_issues_data(tp):
|
|||
|
||||
|
||||
def generate_website_index_page():
|
||||
global data_disqus
|
||||
|
||||
t = jinja2_env.get_template("homepage.html")
|
||||
output_path = os.path.join(setting_target_dirname, "index.html")
|
||||
|
||||
|
@ -257,7 +258,29 @@ def generate_website_index_page():
|
|||
|
||||
# SECTION: Recent comments
|
||||
tp.sections.recent_comments.title = "Recent Comments"
|
||||
tp.sections.recent_comments.content = """<div class="watch-this-space">Disqus comments on articles will be populated here dynamically at some later point. Watch this space!</div>"""
|
||||
tp.sections.recent_comments.entries = []
|
||||
tp.sections.recent_comments.is_enabled = True
|
||||
if data_disqus is None:
|
||||
tp.sections.recent_comments.is_enabled = False
|
||||
tp.sections.recent_comments.content = """<div class="watch-this-space">Failed to generate content.</div>"""
|
||||
else:
|
||||
# comment.comment_id/user_name/timestamp/thread_id/text
|
||||
for comment in data_disqus.get_recent_comments():
|
||||
# thread.url/feed/title
|
||||
thread = data_disqus.get_thread(comment.thread_id)
|
||||
|
||||
text = comment.text
|
||||
while " " in text:
|
||||
text = text.replace(" ", " ")
|
||||
text = text[:80]+"..."
|
||||
|
||||
entry = TemplateParameters()
|
||||
entry.age_string = data_disqus.get_time_string(comment.timestamp)
|
||||
entry.user_name = comment.user_name
|
||||
entry.thread_title = thread.title
|
||||
entry.thread_url = thread.url
|
||||
entry.text = text
|
||||
tp.sections.recent_comments.entries.append(entry)
|
||||
|
||||
html = t.render(tp=tp)
|
||||
with codecs.open(output_path, "wb", "utf-8") as f:
|
||||
|
@ -663,9 +686,26 @@ def get_article_block_content(volume_number, issue_number, page_dirname, block_n
|
|||
return next(t.blocks[block_name](None)).strip()
|
||||
return ""
|
||||
|
||||
default_generation_targets = TARGET_WEBSITE, TARGET_EBOOK
|
||||
|
||||
if __name__ == "__main__":
|
||||
for setting_generation_target in (TARGET_WEBSITE, TARGET_EBOOK):
|
||||
def run(targets=None, disqus_data=None, reddit_data=None):
|
||||
# TODO(rmtew): Make these non-global at some point. It's not really a problem, but it's messy.
|
||||
global setting_target_dirname
|
||||
global setting_use_minimised_files
|
||||
global setting_website_hidden_issue_pages
|
||||
global setting_generation_target
|
||||
global setting_base_template
|
||||
global jinja2_env
|
||||
global data_disqus
|
||||
global data_reddit
|
||||
|
||||
data_disqus = disqus_data
|
||||
data_reddit = reddit_data
|
||||
|
||||
if targets is None:
|
||||
targets = default_generation_targets
|
||||
|
||||
for setting_generation_target in targets:
|
||||
if setting_generation_target not in templates_by_target:
|
||||
print >> sys.stderr, "Unknown target:", setting_generation_target
|
||||
sys.exit(1)
|
||||
|
@ -695,4 +735,7 @@ if __name__ == "__main__":
|
|||
generate_issues(issue_data)
|
||||
generate_ebooks(issue_data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
|
||||
# EOF
|
||||
|
|
20
readme.txt
20
readme.txt
|
@ -27,5 +27,21 @@ OPTIONAL SETUP
|
|||
USAGE
|
||||
|
||||
1. _env\Scripts\activate
|
||||
2. python gensite.py
|
||||
3. python update.py
|
||||
|
||||
Then to fetch some initial disqus data:
|
||||
|
||||
1. Edit ir-config.ini and enter required values.
|
||||
2. python update.py disqus
|
||||
|
||||
To generate a website with dynamic data (recent comments on homepage..):
|
||||
|
||||
1. python update.py website
|
||||
|
||||
To generate a website with no dynamic data (recent comments on homepage..)
|
||||
|
||||
1. python gensite.py
|
||||
|
||||
Note that editing either of these scripts might be required, in order to set the right website target.
|
||||
|
||||
TARGET_WEBSITE (generates the website without google analytics and disqus comment sections on articles).
|
||||
TARGET_WEBSITE | FLAG_ONLINE (generates the website with google analytics and disqus comment sections on articles).
|
||||
|
|
|
@ -133,6 +133,14 @@ ul {
|
|||
padding: 3px 0 3px 0;
|
||||
}
|
||||
|
||||
table.table-comments tr.last-row td{
|
||||
padding-bottom: 5px;
|
||||
}
|
||||
|
||||
.row-comment-text {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.licensetitle {
|
||||
padding-top: 10px;
|
||||
border-bottom: 1px solid grey;
|
||||
|
|
|
@ -65,7 +65,8 @@
|
|||
</table>
|
||||
</div>
|
||||
|
||||
Or peruse <a href="http://imaginary-realities.disinterest.org/">older issues</a> from the previous period of publication that ended in 2001.
|
||||
<br/>
|
||||
Or check out the <a href="http://imaginary-realities.disinterest.org/">older issues</a> from the previous period of publication which ended in 2001.
|
||||
</div>
|
||||
</div>
|
||||
<div class="aligned-example text">
|
||||
|
@ -88,7 +89,26 @@
|
|||
<div class="aligned-example text">
|
||||
<div class="mainheader"><a name="recent-comments">{{tp.sections.recent_comments.title}}</a></div>
|
||||
<div class="mainbody">
|
||||
{% if tp.sections.recent_comments.is_enabled %}
|
||||
<table class="table-comments">
|
||||
<tbody>
|
||||
{% for entry in tp.sections.recent_comments.entries %}
|
||||
<tr class="row-comment-link">
|
||||
<td colspan=2>
|
||||
<a href="{{entry.thread_url}}#comments">{{entry.thread_title}}</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="last-row">
|
||||
<td colspan=2>
|
||||
{{entry.user_name}} ({{entry.age_string}} ago): <span class="row-comment-text">{{entry.text}}</span></td>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% else %}
|
||||
{{tp.sections.recent_comments.content}}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
254
update.py
254
update.py
|
@ -1,4 +1,33 @@
|
|||
"""
|
||||
Author: Richard Tew <richard.m.tew@gmail.com>
|
||||
|
||||
This script can either be invoked manually, if required, or periodically by
|
||||
having a scheduler like cron invoke it.
|
||||
|
||||
It is not necessary to factor in the usage limits of the services it polls
|
||||
(Reddit, Disqus, ...) as the script will do that itself and only access them
|
||||
if a minimum amount of time has passed.
|
||||
|
||||
"""
|
||||
|
||||
# What we can use from the standard library.
|
||||
import codecs
|
||||
import collections
|
||||
import ConfigParser
|
||||
import datetime
|
||||
import email.utils
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
|
||||
# The external burden of required dependencies.
|
||||
import praw
|
||||
from disqusapi import DisqusAPI
|
||||
|
||||
# The burden of our own module.
|
||||
import gensite
|
||||
|
||||
r_username = "rmtew"
|
||||
r_platform = "python"
|
||||
|
@ -7,12 +36,227 @@ r_appversion = "v0.1"
|
|||
|
||||
r_useragent = "%s:%s:%s (by /u/%s)" % (r_platform, r_appname, r_appversion, r_username)
|
||||
|
||||
SITE_URL = "http://journal.imaginary-realities.com/"
|
||||
|
||||
|
||||
def make_time_string(value):
|
||||
""" It's quicker to write this than to find something that does it and know it works. """
|
||||
if type(value) in (int, float):
|
||||
value = int(value)
|
||||
hours = value / (60 * 60)
|
||||
value = value % (60 * 60)
|
||||
minutes = value / 60
|
||||
value = value % 60
|
||||
seconds = value
|
||||
else:
|
||||
hours, minutes, seconds = [ int(v) for v in value.split(":") ]
|
||||
|
||||
if hours:
|
||||
if hours >= 24:
|
||||
days = hours / 24
|
||||
if days >= 7:
|
||||
if days >= 30:
|
||||
months = int(days / (365/12.0))
|
||||
if months >= 12:
|
||||
years = days / 365
|
||||
time_string = "%d year%s" % (years, "" if years == 1 else "s")
|
||||
else:
|
||||
time_string = "%d month%s" % (months, "" if months == 1 else "s")
|
||||
else:
|
||||
weeks = days / 7
|
||||
time_string = "%d week%s" % (weeks, "" if weeks == 1 else "s")
|
||||
else:
|
||||
time_string = "%d day%s" % (days, "" if days == 1 else "s")
|
||||
else:
|
||||
time_string = "%d hour%s" % (hours, "" if hours == 1 else "s")
|
||||
elif minutes:
|
||||
time_string = "%d minute%s" % (minutes, "" if minutes == 1 else "s")
|
||||
else:
|
||||
time_string = "%d second%s" % (seconds, "" if seconds == 1 else "s")
|
||||
return time_string
|
||||
|
||||
def run_reddit(update=False):
|
||||
# NOTE(rmtew): Until I work out what values to display, this will store all the
|
||||
# data that can be reconciled as storable.
|
||||
if update:
|
||||
r = praw.Reddit(user_agent=r_useragent)
|
||||
submissions = r.search("url:'%s'" % SITE_URL, sort="new")
|
||||
header = None
|
||||
lines = []
|
||||
string_columns = set()
|
||||
for match in submissions:
|
||||
if header is None:
|
||||
header = []
|
||||
for k in dir(match):
|
||||
if k[0] == "_" or k in ("reddit_session", "comments"): continue
|
||||
v = getattr(match,k)
|
||||
if type(v) in (types.MethodType, types.FunctionType): continue
|
||||
if type(v) in types.StringTypes:
|
||||
header.append(k)
|
||||
elif type(v) in (bool, int, float, types.NoneType, dict, list):
|
||||
header.append(k)
|
||||
elif type(v) in (types.ClassType, types.TypeType):
|
||||
string_columns.add(k)
|
||||
header.append(k)
|
||||
|
||||
line = []
|
||||
for column_name in header:
|
||||
v = getattr(match, column_name, None)
|
||||
if column_name in string_columns:
|
||||
line.append(unicode(v))
|
||||
else:
|
||||
line.append(v)
|
||||
lines.append(line)
|
||||
|
||||
with codecs.open("reddit.json", "wb", "utf-8") as f:
|
||||
json.dump((header, lines), f)
|
||||
|
||||
def run_disqus(update=False):
|
||||
# Get the absolute path of the directory the script is located in.
|
||||
script_path = os.path.dirname(__file__)
|
||||
if not len(script_path):
|
||||
script_path = sys.path[0]
|
||||
|
||||
config = ConfigParser.ConfigParser()
|
||||
config.read([
|
||||
os.path.join(script_path, "ir-config.ini"),
|
||||
])
|
||||
|
||||
# If this is not an absolute path, make it one based on the script directory.
|
||||
data_path = config.get("paths", "data-storage")
|
||||
if not os.path.isabs(data_path):
|
||||
data_path = os.path.abspath(os.path.join(script_path, data_path))
|
||||
|
||||
# TODO: Load in persisted data.
|
||||
persists = {}
|
||||
datafile_path = os.path.join(data_path, "data.json")
|
||||
if os.path.exists(datafile_path):
|
||||
persists = json.load(open(datafile_path, "rb"))
|
||||
|
||||
if update:
|
||||
SECRET_KEY = config.get("disqus-keys", "private")
|
||||
PUBLIC_KEY = config.get("disqus-keys", "public")
|
||||
disqus = DisqusAPI(SECRET_KEY, PUBLIC_KEY)
|
||||
|
||||
thread_data = persists.get("disqus-thread-data", {})
|
||||
_comment_threads = set()
|
||||
|
||||
# ....
|
||||
# comment_timestamp = time.time()
|
||||
comment_data = []
|
||||
for comment in disqus.posts.list(forum="imaginaryrealities"):
|
||||
if comment["isSpam"] or comment["isDeleted"]:
|
||||
continue
|
||||
|
||||
# Get the RFC3339 date string from disqus, assume it's UTC, and convert it to a timestamp.
|
||||
timeseq = list(time.strptime(comment["createdAt"], '%Y-%m-%dT%H:%M:%S'))
|
||||
timeseq.append(0) # offset of date's timezone from UTC.
|
||||
timeseq = tuple(timeseq)
|
||||
post_timestamp = email.utils.mktime_tz(timeseq) # UTC seconds since the epoch
|
||||
|
||||
poster_username = comment["author"]["name"]
|
||||
post_commentid = comment["id"]
|
||||
post_threadid = comment["thread"]
|
||||
post_text = comment["raw_message"]
|
||||
comment_data.append((post_commentid, poster_username, post_timestamp, post_threadid, post_text))
|
||||
|
||||
# Track the threads which the processed comments belong to.
|
||||
_comment_threads.add(post_threadid)
|
||||
|
||||
# Check which threads have new comments which we do not know about.
|
||||
_unknown_threads = _comment_threads - set(thread_data)
|
||||
if len(_unknown_threads):
|
||||
print "Processing new threads."
|
||||
thread_timestamp = persists.get("disqus-thread-timestamp", '1333256400')
|
||||
# TODO: disqus does not like a since value, so cannot pass one yet until why, is determined.
|
||||
for result in disqus.threads.list(forum="imaginaryrealities", limit=20):#, since=thread_timestamp):
|
||||
thread_id = result["id"]
|
||||
thread_entry = [ result["link"], result["feed"], result["clean_title"] ]
|
||||
thread_data[thread_id] = thread_entry
|
||||
persists["disqus-thread-timestamp"] = time.time()
|
||||
|
||||
_unknown_threads = _comment_threads - set(thread_data)
|
||||
if len(_unknown_threads):
|
||||
print "ERROR: still have %d unknown threads" % len(_unknown_threads)
|
||||
|
||||
persists["disqus-comment-data"] = comment_data
|
||||
persists["disqus-thread-data"] = thread_data
|
||||
json.dump(persists, open(datafile_path, "wb"))
|
||||
|
||||
return persists
|
||||
|
||||
|
||||
def run_tests():
|
||||
def test_make_time_string(s_in, s_out):
|
||||
s_out_actual = make_time_string(s_in)
|
||||
if s_out_actual != s_out:
|
||||
raise Exception("Expected '%s'; got '%s'" % (s_out, s_out_actual))
|
||||
test_make_time_string("00:00:00", "0 seconds")
|
||||
test_make_time_string("00:00:01", "1 second")
|
||||
test_make_time_string("00:00:59", "59 seconds")
|
||||
test_make_time_string("00:01:00", "1 minute")
|
||||
test_make_time_string("00:59:00", "59 minutes")
|
||||
test_make_time_string("01:00:00", "1 hour")
|
||||
test_make_time_string("23:00:00", "23 hours")
|
||||
test_make_time_string("24:00:00", "1 day")
|
||||
test_make_time_string("%02d:00:00" % (24*6+23), "6 days")
|
||||
test_make_time_string("%02d:00:00" % (24*7), "1 week")
|
||||
test_make_time_string("%02d:00:00" % (24*7*3), "3 weeks")
|
||||
test_make_time_string("%02d:00:00" % (24*7*5), "1 month")
|
||||
test_make_time_string("%02d:00:00" % (24*7*5*2), "2 months")
|
||||
test_make_time_string("%02d:00:00" % (24*365), "1 year")
|
||||
|
||||
|
||||
class disqus_data_wrapper(object):
|
||||
comment_class = collections.namedtuple("comment_class", [ "comment_id", "user_name", "timestamp", "thread_id", "text" ])
|
||||
thread_class = collections.namedtuple("thread_class", [ "url", "feed", "title" ])
|
||||
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
def get_recent_comments(self, limit=6):
|
||||
# For now, disqus provides the comments from newest to oldest.
|
||||
print self.data.keys()
|
||||
return [
|
||||
self.comment_class(*entry)
|
||||
for entry
|
||||
in self.data.get("disqus-comment-data", [])
|
||||
][:limit]
|
||||
|
||||
def get_thread(self, thread_id):
|
||||
return self.thread_class(*self.data.get("disqus-thread-data", {})[thread_id])
|
||||
|
||||
def get_time_string(self, timestamp):
|
||||
return make_time_string(time.time() - timestamp)
|
||||
|
||||
class reddit_data_wrapper(object):
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
|
||||
|
||||
def run():
|
||||
r = praw.Reddit(user_agent=r_useragent)
|
||||
submissions = r.search("url:'http://journal.imaginary-realities.com/'")
|
||||
for match in submissions:
|
||||
match.ups, match.downs, match.score, match.title, match.url
|
||||
# Take the url, strip guff at the end, match it to a generated page.
|
||||
args = set(sys.argv[1:])
|
||||
|
||||
if "tests" in args:
|
||||
print "Running tests.."
|
||||
run_tests()
|
||||
print "..done"
|
||||
sys.exit(1)
|
||||
|
||||
update_disqus = "disqus" in args or "all" in args
|
||||
update_reddit = "reddit" in args # or "all" in args
|
||||
update_website = "website" in args or "all" in args
|
||||
|
||||
disqus_data = run_disqus(update=update_disqus)
|
||||
reddit_data = run_reddit(update=update_reddit)
|
||||
|
||||
if update_website:
|
||||
gensite_targets = (gensite.TARGET_WEBSITE, )# | gensite.FLAG_ONLINE, )
|
||||
gensite.run(gensite_targets,
|
||||
disqus_data=disqus_data_wrapper(disqus_data),
|
||||
reddit_data=reddit_data_wrapper(reddit_data))
|
||||
# TODO(rmtew): Put the resulting website in place. Set permissions.
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
Loading…
Reference in a new issue