Itch.io parser
This commit is contained in:
parent
905edf9f6d
commit
b85bdd8bf6
|
@ -34,10 +34,10 @@ class Collect extends Command
|
|||
//'Apero',
|
||||
//'Questbook',
|
||||
//'Axma',
|
||||
'IFDB',
|
||||
//'IFDB',
|
||||
'Itch',
|
||||
/*
|
||||
'instead',
|
||||
'Itch',
|
||||
'Steam',
|
||||
'Urq',
|
||||
'Kvester',
|
||||
|
|
|
@ -19,14 +19,21 @@
|
|||
namespace App\Sources;
|
||||
|
||||
use \App\Models\Game;
|
||||
use \App\Models\Platform;
|
||||
use \App\Models\Language;
|
||||
use \App\Models\Author;
|
||||
use \App\Models\Tag;
|
||||
use \App\Source;
|
||||
use Log;
|
||||
use \Pandoc\Pandoc;
|
||||
|
||||
class Itch extends Source {
|
||||
public $title = "Itch.io";
|
||||
public $keyword = 'itch';
|
||||
public $queue = [];
|
||||
public $games = [];
|
||||
public $print_description = FALSE;
|
||||
|
||||
public function parse_tag($url) {
|
||||
$max_pages = 4; // load 30*4 = 120 latest games
|
||||
for ($i = 1; $i <= $max_pages; $i++) {
|
||||
|
@ -35,11 +42,27 @@ class Itch extends Source {
|
|||
$this->loadStr($text->content);
|
||||
$this->dom->filter('.game_cell')->each(function($cell) {
|
||||
$game = new Game;
|
||||
$game->source_id = (int) $cell->attr('data-game_id');
|
||||
$game->url = $cell->filter('a.game_link')->attr('href');
|
||||
$game->title = $cell->filter('a.title')->text();
|
||||
$game->author = $cell->filter('.game_author')->text();
|
||||
$game->image = $cell->filter('.game_thumb')->attr('data-background_image');
|
||||
$this->queue[] = $game;
|
||||
$game->title = html_entity_decode($game->title);
|
||||
$game = $this->findGame($game);
|
||||
$game->image_url = $cell->filter('.game_thumb')->attr('data-background_image');
|
||||
$author_name = $cell->filter('.game_author')->text();
|
||||
$author_url = $cell->filter('.game_author > a')->first()->attr('href');
|
||||
if ($game->save() && !empty($author_name)) {
|
||||
$author_model = Author::findByName($author_name);
|
||||
if (empty($author_model)) {
|
||||
$author_model = new Author();
|
||||
$author_model->name = $author_name;
|
||||
$author_model->url = $author_url;
|
||||
$author_model->save();
|
||||
}
|
||||
if (!$game->authors()->where('name', $author_name)->exists()) {
|
||||
$game->authors()->attach($author_model);
|
||||
}
|
||||
}
|
||||
$this->queue[] = $game->url;
|
||||
});
|
||||
if ($text->num_items < 30) {
|
||||
// less than default number of games, probably last page
|
||||
|
@ -48,27 +71,15 @@ class Itch extends Source {
|
|||
}
|
||||
}
|
||||
public function parse() {
|
||||
global $argv;
|
||||
if (isset($argv[2])) {
|
||||
$game_page = $this->get_text($argv[2]);
|
||||
$this->parse_tag("https://itch.io/games/newest/tag-text-based");
|
||||
$this->parse_tag("https://itch.io/games/newest/tag-twine");
|
||||
$this->parse_tag("https://itch.io/games/newest/tag-interactive-fiction");
|
||||
$this->queue = array_unique($this->queue);
|
||||
foreach ($this->queue as $url) {
|
||||
$game_page = $this->get_text($url);
|
||||
$this->loadStr($game_page);
|
||||
$this->saveGame($this->page($argv[2]));
|
||||
} else {
|
||||
$this->parse_tag("https://itch.io/games/newest/tag-text-based");
|
||||
$this->parse_tag("https://itch.io/games/newest/tag-twine");
|
||||
$this->parse_tag("https://itch.io/games/newest/tag-interactive-fiction");
|
||||
$this->queue = array_unique($this->queue);
|
||||
foreach ($this->queue as $game) {
|
||||
$game_page = $this->get_text($game->url);
|
||||
$this->loadStr($game_page);
|
||||
$game = $this->page($game->url);
|
||||
// if ($game->date < $this->period) {
|
||||
$this->games[] = $game->print();
|
||||
}
|
||||
$this->games = array_unique($this->games);
|
||||
foreach ($this->games as $game) {
|
||||
$this->saveGame($game);
|
||||
}
|
||||
$game = $this->page($url);
|
||||
$game->save();
|
||||
}
|
||||
}
|
||||
public function checkPage($url) {
|
||||
|
@ -77,8 +88,9 @@ class Itch extends Source {
|
|||
public function page($url) {
|
||||
$game = new Game;
|
||||
$game->url = $url;
|
||||
$game = $this->findGame($game);
|
||||
$title = trim($this->dom->filter("title")->first()->text());
|
||||
[$game->title, $game->author] = explode(' by ', $title);
|
||||
[$game->title, $author_name] = explode(' by ', $title);
|
||||
unset($title);
|
||||
$this->dom->filter('script[type="application/ld+json"]')->each(function($script) use(&$game) {
|
||||
$data = json_decode($script->html());
|
||||
|
@ -92,16 +104,19 @@ class Itch extends Source {
|
|||
if (isset($data->name)) {
|
||||
$game->title = $data->name;
|
||||
}
|
||||
/*
|
||||
if (isset($data->offers) && isset($data->offers->seller)) {
|
||||
$game->author = $data->offers->seller->name;
|
||||
}
|
||||
*/
|
||||
}
|
||||
});
|
||||
$game->title = html_entity_decode($game->title);
|
||||
$date = $this->dom->filter('td abbr');
|
||||
if ($date->count() > 0) {
|
||||
$date = $date->first()->attr('title');
|
||||
$date = str_replace('@', '', $date);
|
||||
$game->date = new \DateTime($date);
|
||||
$game->release_date = new \DateTime($date);
|
||||
}
|
||||
if ($this->print_description) {
|
||||
$desc = $this->dom->filter('.formatted_description');
|
||||
|
|
Reference in a new issue