Upgrade LBCAlerte to version 3.3

Add upgrade script
This commit is contained in:
Jimmy Monin
2016-11-26 19:19:16 +01:00
parent a7c054b535
commit 58ffd500e6
89 changed files with 6436 additions and 758 deletions

View File

@ -9,15 +9,53 @@ class Ad
protected $_link_mobile;
protected $_title;
protected $_description;
protected $_price;
protected $_price = 0;
protected $_currency = "";
protected $_date;
protected $_category;
protected $_country;
protected $_city;
protected $_zip_code;
protected $_professional;
protected $_thumbnail_link;
protected $_urgent;
protected $_author;
protected $_photos = array();
protected $_properties = array();
public function setFromArray(array $options)
{
foreach ($options AS $name => $value) {
if (property_exists($this, "_".$name)) {
$this->{"_".$name} = $value;
}
}
return $this;
}
public function toArray()
{
return array(
"id" => $this->_id,
"link" => $this->_link,
"link_mobile" => $this->_link_mobile,
"title" => $this->_title,
"description" => $this->_description,
"price" => $this->_price,
"currency" => $this->_currency,
"date" => $this->_date,
"category" => $this->_category,
"country" => $this->_country,
"city" => $this->_city,
"zip_code" => $this->_zip_code,
"professional" => $this->_professional,
"photos" => $this->_photos,
"urgent" => $this->_urgent,
"author" => $this->_author,
"properties" => $this->_properties,
);
}
/**
@ -229,6 +267,25 @@ class Ad
}
/**
* @param string $zip_code
* @return \AdService\Ad
*/
public function setZipCode($zip_code)
{
$this->_zip_code = $zip_code;
return $this;
}
/**
* @return string
*/
public function getZipCode()
{
return $this->_zip_code;
}
/**
* @param bool $professional
* @return \AdService\Ad
@ -284,4 +341,84 @@ class Ad
{
return $this->_urgent;
}
/**
* @param string $author
* @return \AdService\Ad
*/
public function setAuthor($author)
{
$this->_author = $author;
return $this;
}
/**
* @return string
*/
public function getAuthor()
{
return $this->_author;
}
/**
* @param array $photos
* @return \AdService\Ad
*/
public function setPhotos(array $photos)
{
$this->_photos = $photos;
return $this;
}
/**
* @return array
*/
public function getPhotos()
{
return $this->_photos;
}
/**
* @param string $name
* @param string $value
* @return \AdService\Ad
*/
public function addProperty($name, $value)
{
$this->_properties[$name] = $value;
return $this;
}
/**
* @param string $name
* @return \AdService\Ad
*/
public function removeProperty($name)
{
unset($this->_properties[$name]);
return $this;
}
/**
* @param string $name
* @return string
*/
public function getProperty($name)
{
if (isset($this->_properties[$name])) {
return $this->_properties[$name];
}
return null;
}
/**
* @return array
*/
public function getProperties()
{
return $this->_properties;
}
}

View File

@ -3,6 +3,7 @@
namespace AdService\Parser;
use AdService\Filter;
use AdService\Ad;
abstract class AbstractParser extends \DOMDocument
{
@ -16,4 +17,14 @@ abstract class AbstractParser extends \DOMDocument
* @param Filter $filter
*/
abstract public function process($content, Filter $filter = null);
/**
* @param string $content
* @param Filter $filter
* @return null|Ad
*/
public function processAd($content)
{
return null;
}
}

View File

@ -21,7 +21,7 @@ class Lbc extends AbstractParser
return;
}
$this->scheme = $scheme;
$this->loadHTML($content);
$this->loadHTML('<?xml encoding="UTF-8">'.$content);
$timeToday = strtotime(date("Y-m-d")." 23:59:59");
$dateYesterday = $timeToday - 24*3600;
@ -89,7 +89,11 @@ class Lbc extends AbstractParser
// recherche de l'image
foreach ($result->getElementsByTagName("span") AS $node) {
if ($src = $node->getAttribute("data-imgsrc")) {
$ad->setThumbnailLink($this->formatLink($src));
$src = $this->formatLink($src);
if (false !== strpos($src, "/ad-thumb/")) {
$src = str_replace("/ad-thumb/", "/ad-large/", $src);
}
$ad->setThumbnailLink($src);
}
}
@ -167,6 +171,167 @@ class Lbc extends AbstractParser
return $ads;
}
/**
* Analyse une fiche d'annonce.
* @return Ad
*/
public function processAd($content, $scheme = "http")
{
$this->loadHTML('<?xml encoding="UTF-8">'.$content);
$this->scheme = $scheme;
// Recherche du conteneur principal
$sections = $this->getElementsByTagName("section");
$container = null;
foreach ($sections AS $section) {
if (false !== strpos((string) $section->getAttribute("class"), "adview")) {
$container = $section;
break;
}
}
// Ca ne semble pas une annonce valide
if (!$container) {
return null;
}
$ad = new Ad();
$ad->setProfessional(false)->setUrgent(false);
// Lien vers l'annonce
$links = $this->getElementsByTagName("link");
foreach ($links AS $link) {
if ("canonical" == $link->getAttribute("rel")) {
$ad->setLink($this->formatLink($link->getAttribute("href")))
->setLinkMobile(str_replace(
array("http://www.", "https://www."),
array("http://mobile.", "https://mobile."),
$ad->getLink()
));
}
}
// pas d'ID, pas d'annonce
if (!preg_match('/([0-9]+)\.htm.*/', $ad->getLink(), $m)) {
return null;
}
$ad->setId($m[1]);
// Catégorie
$navs = $this->getElementsByTagName("nav");
foreach ($navs AS $nav) {
if (false !== strpos($nav->getAttribute("class"), "breadcrumbsNav")) {
$li = $nav->getElementsByTagName("li")->item(2);
if ($li) {
$ad->setCategory(trim($li->nodeValue));
}
}
}
// Date de publication
if (preg_match("#publish_date\s*:\s*\"([0-9]{2})/([0-9]{2})/([0-9]{4})\"#", $content, $m)) {
$ad->setDate($m[3]."-".$m[2]."-".$m[1]);
}
// Récupération des images
$scripts = $container->getElementsByTagName("script");
foreach ($scripts AS $script) {
if (preg_match_all("#images\[[0-9]+\]\s*=\s*\"([^\"]+)\"\s*;#imsU", $script->nodeValue, $images)) {
$photos = array();
foreach ($images[1] AS $image) {
$image = $this->formatLink($image);
$photos[] = array(
"remote" => $image,
"local" => sha1($image).".jpg",
);
}
$ad->setPhotos($photos);
}
}
// Urgent
$ad->setUrgent(false !== strpos($content, "urgent : \"1\""));
$elements = $container->getElementsByTagName("*");
foreach ($elements AS $element) {
$itemprop = $element->getAttribute("itemprop");
$tag = strtolower($element->tagName);
// Titre
if ($tag == "h1") {
$ad->setTitle(trim($element->nodeValue));
continue;
}
// Pohoto
if ($tag == "div"
&& !$ad->getPhotos()
&& $value = $element->getAttribute("data-popin-content")
) {
$image = $this->formatLink($value);
$ad->setPhotos(array(array(
"remote" => $image,
"local" => sha1($image).".jpg",
)));
continue;
}
// Adresse
if ($itemprop == "address") {
if (preg_match("#(.*)\s+([0-9]{5})#", trim($element->nodeValue), $m)) {
$ad->setCity($m[1])
->setZipCode($m[2]);
}
continue;
}
// Prix
if ($itemprop == "price") {
$ad->setPrice($element->getAttribute("content"));
continue;
}
// Contenu
if ($itemprop == "description") {
$description = "";
foreach ($element->childNodes AS $sub_element) {
if (isset($sub_element->tagName) && "br" == $sub_element->tagName) {
$description .= "\n";
continue;
}
$description .= $sub_element->nodeValue;
}
$ad->setDescription($description);
continue;
}
// PRO
if ("ispro" == $element->getAttribute("class")) {
$ad->setProfessional(true);
continue;
}
// Auteur
if ($tag == "a"
&& false !== strpos($element->getAttribute("data-info"), "email::pseudo_annonceur")) {
$ad->setAuthor(trim($element->nodeValue));
continue;
}
// Autre propriété
if ("property" == $element->getAttribute("class")) {
$name = trim($element->nodeValue);
if ("Prix" == $name || "Ville" == $name) {
continue;
}
$value = trim($element->nextSibling->nextSibling->nodeValue);
$ad->addProperty($name, $value);
}
}
return $ad;
}
protected function formatLink($link)
{
if (0 === strpos($link, "//")) {

View File

@ -35,6 +35,12 @@ abstract class AbstractSiteConfig
*/
protected $has_date = true;
/**
* Indique si la sauvegarde d'annonce est possible.
* @var bool
*/
protected $allow_backup = false;
/**
* @param string $name
* @return mixed

View File

@ -6,4 +6,5 @@ class Lbc extends AbstractSiteConfig
{
protected $site_name = "LeBonCoin";
protected $site_url = "https://www.leboncoin.fr";
protected $allow_backup = true;
}