From 691d0c4ebb9ef10e4a4c70830b72da6628de772d Mon Sep 17 00:00:00 2001 From: Alexander Yakovlev Date: Thu, 18 Apr 2019 20:28:59 +0700 Subject: [PATCH] More games from Itch --- Game.php | 41 +++++++++++++++---- Source.php | 14 +++++++ Source/Itch.php | 103 +++++++++++++++++++++++++++++------------------- 3 files changed, 110 insertions(+), 48 deletions(-) diff --git a/Game.php b/Game.php index 0f89598..8c56f97 100644 --- a/Game.php +++ b/Game.php @@ -22,7 +22,18 @@ class Game { public $url; public $title; public $author; + /** + * Полное или единственное описание. + * + * @var string + */ public $description; + /** + * Короткое описание. + * + * @var string + */ + public $short_description; /** * Дата выхода игры. * @@ -95,13 +106,11 @@ class Game { $output .= " by *".trim($this->author)."*"; } } - if ($this->description) { - if (FORMAT === 'MARKDOWN') { - $output .= "\n\n > ".$converter->convert($this->description, 'html', 'markdown_github')."\n"; - } - if (FORMAT === 'HTML') { - $output .= "\n
".$this->description."
\n"; - } + if (FORMAT === 'MARKDOWN') { + $output .= "\n\n > ".$converter->convert($this->getDescription(), 'html', 'markdown_github')."\n"; + } + if (FORMAT === 'HTML') { + $output .= "\n
".$this->getDescription()."
\n"; } if (FORMAT === 'MARKDOWN') { $output .= "\n"; @@ -111,4 +120,22 @@ class Game { } return $output; } + + /** + * Serialization for array_unique function. + */ + public function __toString() { + if (!empty($this->url)) { + return $this->url; + } else { + return $this->title; + } + } + + public function getDescription() { + if ($this->short_description) { + return $this->short_description; + } + return $this->description; + } } diff --git a/Source.php b/Source.php index 6b47d01..f3ae2b2 100644 --- a/Source.php +++ b/Source.php @@ -100,6 +100,20 @@ abstract class Source { return (string) $response->getBody(); } + /** + * GET JSON data. + */ + public function get_json($url) { + $client = new GuzzleClient([ + 'timeout' => 30, + ]); + $response = $client->request('GET', $url, [ + 'cookies' => $this->cookies, + ]); + $text = (string) $response->getBody(); + return json_decode($text); + } + /** * Print the game line and catch all exceptions. * diff --git a/Source/Itch.php b/Source/Itch.php index c86451a..074b5f0 100644 --- a/Source/Itch.php +++ b/Source/Itch.php @@ -23,55 +23,52 @@ use \Pandoc\Pandoc; class Itch extends Source { public $title = "Itch.io"; + public $queue = []; public $games = []; protected function parse_tag($url) { - $service = new \Sabre\Xml\Service(); - $xml = $this->get_text($url); - $service->elementMap = [ - '{}item' => function(\Sabre\Xml\Reader $reader) { + $max_pages = 1; // load 30*4 = 120 latest games + for ($i = 1; $i <= $max_pages; $i++) { + $cururl = $url.'?format=json&page='.$i; + $text = $this->get_json($cururl); + $this->loadStr($text->content); + $this->dom->filter('.game_cell')->each(function($cell) { $game = new Game; - $keyValue = \Sabre\Xml\Deserializer\keyValue($reader, '{}item'); - if (isset($keyValue['{}pubDate'])) { - $game->date = strtotime($keyValue['{}pubDate']); - if ($game->date < $this->period) { - return $game; - } - } - if (isset($keyValue['{}plainTitle'])) { - $game->title = $keyValue['{}plainTitle']; - } - if (isset($keyValue['{}link'])) { - $game->url = $keyValue['{}link']; - } - if (isset($keyValue['{}description'])) { - $game->description = trim(strip_tags($keyValue['{}description'], '


')); - } - $game_page = $this->get_text($game->url); - $this->loadStr($game_page, []); - $lines = $this->dom->filter('.game_info_panel_widget tr')->each(function($line) use($game){ - $text = $line->filter('td'); - if (trim($text->text()) == 'Author') { - $game->author = strip_tags($text->nextAll()->first()->html()); - } - }); - $this->games[] = $game->print(); - return $game; - }, - ]; - try { - $dom = $service->parse($xml); - } catch (\Exception $e) {} // ignore malformed XML + $game->url = $cell->filter('a.game_link')->attr('href'); + $game->title = $cell->filter('a.title')->text(); + $game->author = $cell->filter('.game_author')->text(); + $game->image = $cell->filter('.game_thumb')->attr('data-background_image'); + $this->queue[] = $game; + }); + if ($text->num_items < 30) { + // less than default number of games, probably last page + break; + } + } } protected function parse() { global $argv; + if (!$this->period instanceof \DateTime) { + $this->period = new \DateTime('@'.$this->period); + } if (isset($argv[2])) { $game_page = $this->get_text($argv[2]); $this->loadStr($game_page, []); $this->output .= $this->page($argv[2])->print(); } else { - $this->parse_tag("https://itch.io/games/newest/tag-text-based.xml"); - $this->parse_tag("https://itch.io/games/newest/tag-twine.xml"); - $this->parse_tag("https://itch.io/games/newest/tag-interactive-fiction.xml"); + $this->parse_tag("https://itch.io/games/newest/tag-text-based"); + //$this->parse_tag("https://itch.io/games/newest/tag-twine"); + //$this->parse_tag("https://itch.io/games/newest/tag-interactive-fiction"); + $this->queue = array_unique($this->queue); + foreach ($this->queue as $game) { + echo 'Loading details for '.$game->title.'…'.PHP_EOL; + $game_page = $this->get_text($game->url); + $this->loadStr($game_page, []); + $game = $this->page($game->url); + if ($game->date < $this->period) { + continue; + } + $this->games[] = $game->print(); + } $this->games = array_unique($this->games); foreach ($this->games as $game) { $this->output .= $game; @@ -87,12 +84,36 @@ class Itch extends Source { $title = trim($this->dom->filter("title")->first()->text()); [$game->title, $game->author] = explode(' by ', $title); unset($title); + $this->dom->filter('script[type="application/ld+json"]')->each(function($script) use(&$game) { + $data = json_decode($script->html()); + if ($data === false) { + return; + } + if ($data->{'@type'} === 'Product') { + if (isset($data->description)) { + $game->short_description = $data->description; + } + if (isset($data->name)) { + $game->title = $data->name; + } + if (isset($data->offers) && isset($data->offers->seller)) { + $game->author = $data->offers->seller->name; + } + } + }); + $date = $this->dom->filter('td abbr'); + if ($date) { + $date = $date->first()->attr('title'); + $date = str_replace('@', '', $date); + $game->date = new \DateTime($date); + } $desc = $this->dom->filter('.formatted_description'); try { $game->description = trim($desc->first()->html()); - } catch (\Throwable $e) {} - $converter = new Pandoc(); - $game->description = $converter->convert($game->description, 'html', 'mediawiki'); + } catch (\Throwable $e) { + } + // $converter = new Pandoc(); + // $game->description = $converter->convert($game->description, 'html', 'mediawiki'); return $game; } }