Skip to content

Instantly share code, notes, and snippets.

@doulmi
Created June 21, 2017 07:50
Show Gist options
  • Save doulmi/87348271c400e7671a7aced2ae6d0e66 to your computer and use it in GitHub Desktop.
Save doulmi/87348271c400e7671a7aced2ae6d0e66 to your computer and use it in GitHub Desktop.
Bienvenu.php
<?php
namespace App\Console\Commands;
use Carbon\Carbon;
use Illuminate\Console\Command;
require_once app_path('simple_html_dom.php');
class FrDown extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'downloader:fr';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Command description';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
//save youtubeId to file
$baseUrl = 'https://francebienvenue1.wordpress.com/page/';
$today = Carbon::now();
$list = [];
for ($i = 30; $i <= 38; $i++) {
$content = file_get_contents($baseUrl . $i);
$count = preg_match_all("/https:\/\/francebienvenue1.wordpress.com\/\d*\/\d*\/\d*\/.*\/#more-\d*/", $content, $urls);
$length = count($urls[0]);
for($j = 0; $j < $length; $j ++) {
$url = $urls[0][$j];
$nodes = file_get_html($url);
$title = html_entity_decode(trim($nodes->find('h1[class=title]', 0)->innertext));
$contentNodes = $nodes->find('div[class=entry]', 0);
$content = html_entity_decode($contentNodes->children(3)->innertext);
$explication = html_entity_decode($contentNodes->children(4)->innertext);
$youtubeNode = $nodes->find('iframe[class=youtube-player]', 0);
if ($youtubeNode) {
preg_match("/embed\/(.*)\?/", $youtubeNode->src, $result);
$youtubeId = $result[1];
$list[] = $youtubeId;
file_put_contents(public_path('frs/texts/' . $youtubeId . '.html'), $title . $content . $explication);
$today->addDay(1);
}
print($i . '/38 : ' . $j . '/' . count($urls[0]) . "\n");
}
}
file_put_contents(public_path('frs/ids.txt'), implode("\n", $list));
//copy lrc to correct name
$ids = explode("\n", trim(`dir/b public\\frs\\texts`));;
$count = count($ids);
for($i = 0; $i < $count; $i ++) {
$id = $ids[$i];
$content = strip_tags(str_replace("<br />", "<br />\n", file_get_contents('public/frs/texts/' . $id)));
$id = str_replace('.html', '', $id);
$root = file_get_contents("https://www.youtube.com/watch?v=" . $id);
$find = preg_match('/<meta name="title" content="(.*)">/', $root, $data);
if ($find) {
$title = str_slug(html_entity_decode($data[1]));
file_put_contents('public/frs/results/' . $title . '.lrc', '[00:00.00]' . $content);
}
var_dump($i . '/' . $count);
}
//
// $i = 0;
// foreach ($urls[0] as $url) {
// $content = file_get_html($url)->find('div[itemprop=articleBody]', 0)->innertext;
// $content = trim(str_replace('Previous Chapter Next Chapter', '', html_entity_decode(strip_tags(str_replace('</p>', "</p>\n\n", $content)))));
//
// $fileName = class_basename($url);
//// file_put_contents(public_path($dest . '\\' . $fileName . '.txt'), $content, FILE_APPEND);
// file_put_contents(public_path($dest), $content, FILE_APPEND);
// print($i++ . '/' . $count . ' : ' . $fileName . " Success\n");
// }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment