Porndig is in
This commit is contained in:
parent
298922d2bb
commit
99e82aae5d
|
@ -6,10 +6,13 @@ use yii\helpers\Console;
|
||||||
|
|
||||||
use common\models\Video;
|
use common\models\Video;
|
||||||
use app\helpers\parsers\Vporn;
|
use app\helpers\parsers\Vporn;
|
||||||
|
use app\helpers\parsers\Porndig;
|
||||||
|
|
||||||
class VideoController extends Controller {
|
class VideoController extends Controller {
|
||||||
public function actionParse() {
|
public function actionParse() {
|
||||||
$vporn = new Vporn([]);
|
//$vporn = new Vporn([]);
|
||||||
$vporn->run();
|
//$vporn->run();
|
||||||
|
$porndig = new Porndig([]);
|
||||||
|
$porndig->run();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
namespace app\helpers;
|
namespace app\helpers;
|
||||||
|
|
||||||
|
use DomXPath;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract Parser class.
|
* Abstract Parser class.
|
||||||
* A parser connects to a video hosting site, gets the latest videos and their
|
* A parser connects to a video hosting site, gets the latest videos and their
|
||||||
|
|
96
console/helpers/parsers/Porndig.php
Normal file
96
console/helpers/parsers/Porndig.php
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
<?php
|
||||||
|
namespace app\helpers\parsers;
|
||||||
|
|
||||||
|
use app\helpers\Parser;
|
||||||
|
use app\models\Video;
|
||||||
|
use app\models\Picture;
|
||||||
|
use app\models\Tag;
|
||||||
|
use app\models\Category;
|
||||||
|
use DomDocument;
|
||||||
|
|
||||||
|
class Porndig extends Parser {
|
||||||
|
protected $hd_link;
|
||||||
|
public function run() {
|
||||||
|
$html = DOMDocument::loadXML(file_get_contents('https://www.porndig.com/rss/top/videos.xml'));
|
||||||
|
$links = $html->getElementsByTagName('item');
|
||||||
|
foreach ($links as $link)
|
||||||
|
{
|
||||||
|
$url = $link->getElementsByTagName('link')->item(0)->nodeValue;
|
||||||
|
$this->parse_single_page($url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function parse_single_page($url)
|
||||||
|
{
|
||||||
|
libxml_use_internal_errors(true);
|
||||||
|
// check if this video is already in the database
|
||||||
|
if (Video::find()->where(['original_url' => $url])->one() !== NULL)
|
||||||
|
return;
|
||||||
|
$html = DOMDocument::loadHTML(file_get_contents($url));
|
||||||
|
// check if the video is HD
|
||||||
|
if ($this->is_hd($html) === FALSE)
|
||||||
|
return;
|
||||||
|
echo 'Downloading video from '.$url;
|
||||||
|
$video = new Video;
|
||||||
|
// video is in iframe, no pictures
|
||||||
|
$video->original_url = $url;
|
||||||
|
$video->original_id = $this->get_id($url, $html);
|
||||||
|
$video->name = $this->get_name($url, $html);
|
||||||
|
$video->filename = $this->get_video_src($html);
|
||||||
|
$video->save();
|
||||||
|
echo "Done.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the video HD or not? (boolean)
|
||||||
|
**/
|
||||||
|
protected function is_hd($html)
|
||||||
|
{
|
||||||
|
// count the links to download video, if <= 2 (Low, Mid, no High) then it's
|
||||||
|
// not HD
|
||||||
|
|
||||||
|
$videolinks = $this->get_class($html, 'post_download');
|
||||||
|
if (is_null($videolinks))
|
||||||
|
return FALSE;
|
||||||
|
$children = $videolinks->item(0);
|
||||||
|
if (isset($children->childNodes))
|
||||||
|
{
|
||||||
|
$children = $children->childNodes;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$children = $children->item(1)->childNodes;
|
||||||
|
$count = 0;
|
||||||
|
foreach ($children as $child) {
|
||||||
|
if ($child instanceof DOMElement && $child->tagname === 'a')
|
||||||
|
{
|
||||||
|
$this->hd_link = $child->getAttribute('href');
|
||||||
|
$count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($count > 3)
|
||||||
|
return TRUE;
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function get_id($url, $html)
|
||||||
|
{
|
||||||
|
preg_match('/\/(\d+)\//', $url, $matches);
|
||||||
|
return $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function get_video_src($html)
|
||||||
|
{
|
||||||
|
return $this->hd_link;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function get_name($url, $html)
|
||||||
|
{
|
||||||
|
$title = $html->getElementsByTagName('title')->item(0)->nodeValue;
|
||||||
|
preg_match('/(.*)\s+-\s+PornDig.*/', $title, $matches);
|
||||||
|
if(isset($matches[1]))
|
||||||
|
{
|
||||||
|
return $matches[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue