102 lines
2.3 KiB
Ruby
102 lines
2.3 KiB
Ruby
require 'nokogiri'
|
|
require 'open-uri'
|
|
require 'sqlite3'
|
|
require 'data_mapper'
|
|
require 'dm-migrations'
|
|
|
|
### CUSTOMIZE THESE ###
|
|
|
|
ifdb_url = "http://ifdb.tads.org/search?searchfor=&sortby=&browse&pg="
|
|
ifdb_start_page = 1
|
|
ifdb_get_pages = 1
|
|
|
|
### THE CODE FOLLOWS
|
|
|
|
DataMapper.setup(:default, 'sqlite://'+`pwd`.chomp()+'/ifdb.sqlite3')
|
|
|
|
class Work
|
|
include DataMapper::Resource
|
|
|
|
property :id, Serial # An auto-increment integer key
|
|
property :name, String, :required => true
|
|
property :ifid, String, :required => true, :unique => true
|
|
property :released, String # it can be a fuzzy date
|
|
property :parsed_at, DateTime
|
|
has n, :authorworks
|
|
has n, :authors, :through => :authorworks
|
|
end
|
|
class Authorwork
|
|
include DataMapper::Resource
|
|
|
|
property :id, Serial
|
|
|
|
belongs_to :author
|
|
belongs_to :work
|
|
end
|
|
class Author
|
|
include DataMapper::Resource
|
|
|
|
property :id, Serial
|
|
property :name, String, :required => true, :unique => true
|
|
has n, :authorworks
|
|
has n, :works, :through => :authorworks
|
|
end
|
|
DataMapper.finalize
|
|
DataMapper.auto_migrate!
|
|
|
|
def page_parsing(url)
|
|
page = Nokogiri::HTML(open(url))
|
|
games = page.css(".main p")
|
|
for game in games do
|
|
ifid, author, year, name, id = nil
|
|
author_models = Array.new
|
|
authors = Array.new
|
|
|
|
link = game.css("a")[0]
|
|
if not link then
|
|
next
|
|
end
|
|
ifid = link["href"].match(/viewgame\?id=(.*)/)[1]
|
|
name = link.text
|
|
match = game.text.match(/, by ([\w\s,]+)(?:\s\(\d+\))?$/)
|
|
if match then
|
|
author = match[1]
|
|
end
|
|
if author.match(",") then
|
|
authors = author.split(', ')
|
|
for author in authors do
|
|
new_author = Author.create(
|
|
:name => author
|
|
)
|
|
end
|
|
author_models.push(new_author)
|
|
else
|
|
new_author = Author.create(
|
|
:name => author
|
|
)
|
|
author_models.push(new_author)
|
|
end
|
|
match = game.text.match(/\((\d+)\)/)
|
|
if match then
|
|
year = match[1]
|
|
end
|
|
new_work = Work.create(
|
|
:name => name,
|
|
:ifid => ifid,
|
|
:released => "#{year}",
|
|
:parsed_at => Time.now
|
|
)
|
|
for author in author_models do
|
|
author.works << new_work
|
|
author.save
|
|
end
|
|
end
|
|
end
|
|
|
|
current_page = ifdb_start_page
|
|
while current_page <= ifdb_get_pages do
|
|
url = ifdb_url + current_page.to_s
|
|
page_parsing(url)
|
|
current_page += 1
|
|
end
|