From b85bdd8bf6903cf2adf6bcb6a6b3a1626eedd2c6 Mon Sep 17 00:00:00 2001 From: Alexander Yakovlev Date: Sat, 4 Apr 2020 20:32:20 +0700 Subject: [PATCH] Itch.io parser --- app/Commands/Collect.php | 4 +-- app/Sources/Itch.php | 65 ++++++++++++++++++++++++---------------- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/app/Commands/Collect.php b/app/Commands/Collect.php index 43092ac..abc2a19 100644 --- a/app/Commands/Collect.php +++ b/app/Commands/Collect.php @@ -34,10 +34,10 @@ class Collect extends Command //'Apero', //'Questbook', //'Axma', - 'IFDB', + //'IFDB', + 'Itch', /* 'instead', - 'Itch', 'Steam', 'Urq', 'Kvester', diff --git a/app/Sources/Itch.php b/app/Sources/Itch.php index b5bf317..20b2301 100644 --- a/app/Sources/Itch.php +++ b/app/Sources/Itch.php @@ -19,14 +19,21 @@ namespace App\Sources; use \App\Models\Game; +use \App\Models\Platform; +use \App\Models\Language; +use \App\Models\Author; +use \App\Models\Tag; use \App\Source; +use Log; use \Pandoc\Pandoc; class Itch extends Source { public $title = "Itch.io"; + public $keyword = 'itch'; public $queue = []; public $games = []; public $print_description = FALSE; + public function parse_tag($url) { $max_pages = 4; // load 30*4 = 120 latest games for ($i = 1; $i <= $max_pages; $i++) { @@ -35,11 +42,27 @@ class Itch extends Source { $this->loadStr($text->content); $this->dom->filter('.game_cell')->each(function($cell) { $game = new Game; + $game->source_id = (int) $cell->attr('data-game_id'); $game->url = $cell->filter('a.game_link')->attr('href'); $game->title = $cell->filter('a.title')->text(); - $game->author = $cell->filter('.game_author')->text(); - $game->image = $cell->filter('.game_thumb')->attr('data-background_image'); - $this->queue[] = $game; + $game->title = html_entity_decode($game->title); + $game = $this->findGame($game); + $game->image_url = $cell->filter('.game_thumb')->attr('data-background_image'); + $author_name = $cell->filter('.game_author')->text(); + $author_url = $cell->filter('.game_author > a')->first()->attr('href'); + if ($game->save() && !empty($author_name)) { + $author_model = Author::findByName($author_name); + if (empty($author_model)) { + $author_model = new Author(); + $author_model->name = $author_name; + $author_model->url = $author_url; + $author_model->save(); + } + if (!$game->authors()->where('name', $author_name)->exists()) { + $game->authors()->attach($author_model); + } + } + $this->queue[] = $game->url; }); if ($text->num_items < 30) { // less than default number of games, probably last page @@ -48,27 +71,15 @@ class Itch extends Source { } } public function parse() { - global $argv; - if (isset($argv[2])) { - $game_page = $this->get_text($argv[2]); + $this->parse_tag("https://itch.io/games/newest/tag-text-based"); + $this->parse_tag("https://itch.io/games/newest/tag-twine"); + $this->parse_tag("https://itch.io/games/newest/tag-interactive-fiction"); + $this->queue = array_unique($this->queue); + foreach ($this->queue as $url) { + $game_page = $this->get_text($url); $this->loadStr($game_page); - $this->saveGame($this->page($argv[2])); - } else { - $this->parse_tag("https://itch.io/games/newest/tag-text-based"); - $this->parse_tag("https://itch.io/games/newest/tag-twine"); - $this->parse_tag("https://itch.io/games/newest/tag-interactive-fiction"); - $this->queue = array_unique($this->queue); - foreach ($this->queue as $game) { - $game_page = $this->get_text($game->url); - $this->loadStr($game_page); - $game = $this->page($game->url); - // if ($game->date < $this->period) { - $this->games[] = $game->print(); - } - $this->games = array_unique($this->games); - foreach ($this->games as $game) { - $this->saveGame($game); - } + $game = $this->page($url); + $game->save(); } } public function checkPage($url) { @@ -77,8 +88,9 @@ class Itch extends Source { public function page($url) { $game = new Game; $game->url = $url; + $game = $this->findGame($game); $title = trim($this->dom->filter("title")->first()->text()); - [$game->title, $game->author] = explode(' by ', $title); + [$game->title, $author_name] = explode(' by ', $title); unset($title); $this->dom->filter('script[type="application/ld+json"]')->each(function($script) use(&$game) { $data = json_decode($script->html()); @@ -92,16 +104,19 @@ class Itch extends Source { if (isset($data->name)) { $game->title = $data->name; } + /* if (isset($data->offers) && isset($data->offers->seller)) { $game->author = $data->offers->seller->name; } + */ } }); + $game->title = html_entity_decode($game->title); $date = $this->dom->filter('td abbr'); if ($date->count() > 0) { $date = $date->first()->attr('title'); $date = str_replace('@', '', $date); - $game->date = new \DateTime($date); + $game->release_date = new \DateTime($date); } if ($this->print_description) { $desc = $this->dom->filter('.formatted_description');