From 99e82aae5d51b999202132277a1a84e952806918 Mon Sep 17 00:00:00 2001 From: Alexander Yakovlev Date: Sat, 23 Apr 2016 13:28:37 +0700 Subject: [PATCH] Porndig is in --- console/controllers/VideoController.php | 7 +- console/helpers/Parser.php | 2 + console/helpers/parsers/Porndig.php | 96 +++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 console/helpers/parsers/Porndig.php diff --git a/console/controllers/VideoController.php b/console/controllers/VideoController.php index c925484..560e1f8 100644 --- a/console/controllers/VideoController.php +++ b/console/controllers/VideoController.php @@ -6,10 +6,13 @@ use yii\helpers\Console; use common\models\Video; use app\helpers\parsers\Vporn; +use app\helpers\parsers\Porndig; class VideoController extends Controller { public function actionParse() { - $vporn = new Vporn([]); - $vporn->run(); + //$vporn = new Vporn([]); + //$vporn->run(); + $porndig = new Porndig([]); + $porndig->run(); } } diff --git a/console/helpers/Parser.php b/console/helpers/Parser.php index e58312a..d2d206e 100644 --- a/console/helpers/Parser.php +++ b/console/helpers/Parser.php @@ -2,6 +2,8 @@ namespace app\helpers; +use DomXPath; + /** * Abstract Parser class. * A parser connects to a video hosting site, gets the latest videos and their diff --git a/console/helpers/parsers/Porndig.php b/console/helpers/parsers/Porndig.php new file mode 100644 index 0000000..24261e1 --- /dev/null +++ b/console/helpers/parsers/Porndig.php @@ -0,0 +1,96 @@ +getElementsByTagName('item'); + foreach ($links as $link) + { + $url = $link->getElementsByTagName('link')->item(0)->nodeValue; + $this->parse_single_page($url); + } + } + + protected function parse_single_page($url) + { + libxml_use_internal_errors(true); + // check if this video is already in the database + if (Video::find()->where(['original_url' => $url])->one() !== NULL) + return; + $html = DOMDocument::loadHTML(file_get_contents($url)); + // check if the video is HD + if ($this->is_hd($html) === FALSE) + return; + echo 'Downloading video from '.$url; + $video = new Video; + // video is in iframe, no pictures + $video->original_url = $url; + $video->original_id = $this->get_id($url, $html); + $video->name = $this->get_name($url, $html); + $video->filename = $this->get_video_src($html); + $video->save(); + echo "Done.\n"; + } + + /** + * Is the video HD or not? (boolean) + **/ + protected function is_hd($html) + { + // count the links to download video, if <= 2 (Low, Mid, no High) then it's + // not HD + + $videolinks = $this->get_class($html, 'post_download'); + if (is_null($videolinks)) + return FALSE; + $children = $videolinks->item(0); + if (isset($children->childNodes)) + { + $children = $children->childNodes; + } else { + return; + } + $children = $children->item(1)->childNodes; + $count = 0; + foreach ($children as $child) { + if ($child instanceof DOMElement && $child->tagname === 'a') + { + $this->hd_link = $child->getAttribute('href'); + $count++; + } + } + if ($count > 3) + return TRUE; + return FALSE; + } + + protected function get_id($url, $html) + { + preg_match('/\/(\d+)\//', $url, $matches); + return $matches[1]; + } + + protected function get_video_src($html) + { + return $this->hd_link; + } + + protected function get_name($url, $html) + { + $title = $html->getElementsByTagName('title')->item(0)->nodeValue; + preg_match('/(.*)\s+-\s+PornDig.*/', $title, $matches); + if(isset($matches[1])) + { + return $matches[1]; + } + } +}