1
0
Fork 0

Porndig is in

This commit is contained in:
Alexander Yakovlev 2016-04-23 13:28:37 +07:00
parent 298922d2bb
commit 99e82aae5d
3 changed files with 103 additions and 2 deletions

View File

@ -6,10 +6,13 @@ use yii\helpers\Console;
use common\models\Video;
use app\helpers\parsers\Vporn;
use app\helpers\parsers\Porndig;
class VideoController extends Controller {
public function actionParse() {
$vporn = new Vporn([]);
$vporn->run();
//$vporn = new Vporn([]);
//$vporn->run();
$porndig = new Porndig([]);
$porndig->run();
}
}

View File

@ -2,6 +2,8 @@
namespace app\helpers;
use DomXPath;
/**
* Abstract Parser class.
* A parser connects to a video hosting site, gets the latest videos and their

View File

@ -0,0 +1,96 @@
<?php
namespace app\helpers\parsers;
use app\helpers\Parser;
use app\models\Video;
use app\models\Picture;
use app\models\Tag;
use app\models\Category;
use DomDocument;
class Porndig extends Parser {
protected $hd_link;
public function run() {
$html = DOMDocument::loadXML(file_get_contents('https://www.porndig.com/rss/top/videos.xml'));
$links = $html->getElementsByTagName('item');
foreach ($links as $link)
{
$url = $link->getElementsByTagName('link')->item(0)->nodeValue;
$this->parse_single_page($url);
}
}
protected function parse_single_page($url)
{
libxml_use_internal_errors(true);
// check if this video is already in the database
if (Video::find()->where(['original_url' => $url])->one() !== NULL)
return;
$html = DOMDocument::loadHTML(file_get_contents($url));
// check if the video is HD
if ($this->is_hd($html) === FALSE)
return;
echo 'Downloading video from '.$url;
$video = new Video;
// video is in iframe, no pictures
$video->original_url = $url;
$video->original_id = $this->get_id($url, $html);
$video->name = $this->get_name($url, $html);
$video->filename = $this->get_video_src($html);
$video->save();
echo "Done.\n";
}
/**
* Is the video HD or not? (boolean)
**/
protected function is_hd($html)
{
// count the links to download video, if <= 2 (Low, Mid, no High) then it's
// not HD
$videolinks = $this->get_class($html, 'post_download');
if (is_null($videolinks))
return FALSE;
$children = $videolinks->item(0);
if (isset($children->childNodes))
{
$children = $children->childNodes;
} else {
return;
}
$children = $children->item(1)->childNodes;
$count = 0;
foreach ($children as $child) {
if ($child instanceof DOMElement && $child->tagname === 'a')
{
$this->hd_link = $child->getAttribute('href');
$count++;
}
}
if ($count > 3)
return TRUE;
return FALSE;
}
protected function get_id($url, $html)
{
preg_match('/\/(\d+)\//', $url, $matches);
return $matches[1];
}
protected function get_video_src($html)
{
return $this->hd_link;
}
protected function get_name($url, $html)
{
$title = $html->getElementsByTagName('title')->item(0)->nodeValue;
preg_match('/(.*)\s+-\s+PornDig.*/', $title, $matches);
if(isset($matches[1]))
{
return $matches[1];
}
}
}