%PDF- %PDF-
Direktori : /home/bitrix/www/bitrix/modules/seo/classes/general/ |
Current File : /home/bitrix/www/bitrix/modules/seo/classes/general/seo_page_checker.php |
<? IncludeModuleLangFile(__FILE__); use Bitrix\Main\Web\HttpClient; class CSeoPageChecker { var $__site; var $__url; var $__lang; var $__server_name; var $__bCheckErrors = true; var $__getter; var $__result_headers; var $__result_data; var $__result_extended = array(); var $__result_meta = array('KEYWORDS' => '', 'DESCRIPTION' => ''); var $__result_errors = array(); var $__index; var $__index_total_len; var $pcre_backtrack_limit = false; var $__qualifier_links_count = 100; var $bError = false; var $errorString = ''; var $bSearch = false; function CSeoPageChecker($site, $url, $get = true, $check_errors = true) { global $APPLICATION; if (CModule::IncludeModule('search')) $this->bSearch = true; else $APPLICATION->ThrowException(GetMessage('SEO_ERROR_NO_SEARCH')); // don't return false or set bError! $this->__bCheckErrors = $check_errors; $this->__site = $site; $dbRes = CSite::GetByID($this->__site); if ($arRes = $dbRes->Fetch()) { $this->__lang = $arRes['LANGUAGE_ID']; $this->__server_name = $arRes['SERVER_NAME']; if (strlen($this->__server_name) <= 0) $this->__server_name = COption::GetOptionString('main', 'server_name', ''); if (strlen($this->__server_name) > 0) { $this->__url = (CMain::IsHTTPS() ? "https://" : "http://") .CBXPunycode::ToASCII($this->__server_name, $e = null) .$url; if(!$get || $this->GetHTTPData()) return true; if($this->bError && strlen($this->errorString) > 0) $APPLICATION->ThrowException($this->errorString); return false; } else { $this->bError = true; $APPLICATION->ThrowException(str_replace('#SITE_ID#', $this->__site, GetMessage('SEO_ERROR_NO_SERVER_NAME'))); return false; } } return false; } function GetHTTPData() { $this->__getter = new HttpClient(); $this->__getter->setStreamTimeout(25); $this->__getter->setRedirect(true); if ($result = $this->__getter->get($this->__url)) { $this->__result_data = $result; $headers = $this->__getter->getHeaders()->toArray(); foreach ($headers as $header) { $currHeader = array(); foreach($header['values'] as $value) $currHeader[] = $value; $currHeader = implode(", ", $currHeader); $this->__result_headers[$header["name"]] = $currHeader; } $this->_PrepareData(); unset($this->__getter); $this->bError = false; return true; } if($errors = $this->__getter->getError()) $this->errorString = implode(', ', $errors); unset($this->__getter); $this->bError = true; return false; } function __prepareText($text) { $res = array(); if ($this->bSearch) $res = stemming(CSearch::KillTags($text), $this->__lang); else $res = array(); return $res; } function _PrepareData() { if($this->pcre_backtrack_limit === false) $this->pcre_backtrack_limit = intval(ini_get("pcre.backtrack_limit")); $text_len = function_exists('mb_strlen') ? mb_strlen($this->__result_data, 'latin1') : strlen($this->__result_data); $text_len++; if($this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len) { @ini_set("pcre.backtrack_limit", $text_len); $this->pcre_backtrack_limit = intval(ini_get("pcre.backtrack_limit")); } if($this->__bCheckErrors && $this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len) { $this->__result_errors[] = array( 'CODE' => 'SEO_PCRE', 'TYPE' => 'NOTE', 'DETAIL' => array( '#PCRE_BACKTRACK_LIMIT#' => $this->pcre_backtrack_limit, '#TEXT_LEN#' => $text_len, ) ); } $this->__index = array('TOTAL' => array(), 'BOLD' => array(), 'ITALIC' => array(), 'LINK' => array(), 'DESCRIPTION' => array(), 'KEYWORDS' => array()); // replace all images on their not empty ALT or TITLE attributes $this->__result_data = preg_replace('/<img[^>]*(alt|title)=\"([^\"]*)\".*?>/is', '\\2', $this->__result_data); if ($this->__bCheckErrors && ($img_cnt = preg_match('/<img.*?>/is', $this->__result_data))) { $this->__result_errors[] = array( 'CODE' => 'SEO_IMG_NO_ALT', 'TYPE' => 'NOTE', 'DETAIL' => array( '#COUNT#' => $img_cnt ) ); } // get full words index $this->__index['TOTAL'] = $this->__prepareText($this->__result_data); // get bold words index $arRes = array(); if(preg_match_all("/<(b|strong)>(.*?)<\\/\\1>/is", $this->__result_data, $arRes)) { $this->__result_extended['BOLD'] = $arRes[0]; $this->__index['BOLD'] = $this->__prepareText(implode(" ", $arRes[2])); } // get italic words index if(preg_match_all("/<(i|em)>(.*?)<\\/\\1>/is", $this->__result_data, $arRes)) { $this->__result_extended['ITALIC'] = $arRes[0]; $this->__index['ITALIC'] = $this->__prepareText(implode(" ", $arRes[2])); } // get noindex tags if(preg_match_all("/<(noindex)>(.*?)<\\/\\1>/is", $this->__result_data, $arRes)) { $this->__result_extended['NOINDEX'] = $arRes[0]; $this->__index['NOINDEX'] = $this->__prepareText(implode(" ", $arRes[2])); } // get link words index if(preg_match_all("/<(a) ([^>]*)>(.*?)<\\/\\1>/is", $this->__result_data, $arRes)) { $this->__result_extended['LINK'] = $arRes[0]; $this->__index['LINK'] = $this->__prepareText(implode(" ", $arRes[3])); $this->__result_extended['NOFOLLOW'] = array(); $this->__result_extended['LINK_EXTERNAL'] = array(); $this->__index['LINK_EXTERNAL'] = array(); foreach ($arRes[2] as $key => $attrs) { if (false !== strpos($attrs, 'rel="nofollow"')) $this->__result_extended['NOFOLLOW'][] = $arRes[0][$key]; if (false !== ($pos = strpos($attrs, 'href="'))) { $pos1 = strpos($attrs, '"', $pos + 6); $url = substr($attrs, $pos, $pos1-$pos); if ($this->IsOuterUrl($url)) { $this->__index['LINK_EXTERNAL'] = array_merge($this->__index['LINK_EXTERNAL'], $this->__prepareText($arRes[3][$key])); $this->__result_extended['LINK_EXTERNAL'][] = $arRes[0][$key]; } } } if ($this->__bCheckErrors && count($arRes[0]) > $this->__qualifier_links_count) { $this->__result_errors[] = array( 'CODE' => 'SEO_LINKS_COUNT', 'TYPE' => 'NOTE', 'DETAIL' => array( '#COUNT#' => count($arRes[0]), '#COUNT_EXTERNAL#' => count($this->__result_extended['LINK_EXTERNAL']), '#QUALIFIER#' => $this->__qualifier_links_count, ) ); } } // get meta description words index if(preg_match('/<meta.*?name=\"description\".*?content=\"([^\"]+)\"[^>]*>/i', $this->__result_data, $arRes)) { $this->__result_meta['DESCRIPTION'] = $arRes[1]; $this->__result_extended['META_DESCRIPTION'] = $arRes[0]; $this->__index['DESCRIPTION'] = $this->__prepareText($this->__result_meta['DESCRIPTION']); } else { $this->__result_errors[] = array( 'CODE' => 'SEO_META_NO_DESCRIPTION', 'TYPE' => 'NOTE', 'DETAIL' => array() ); } // get meta keywords words index if(preg_match('/<meta.*?name=\"keywords\".*?content=\"([^\"]+)\"[^>]*>/i', $this->__result_data, $arRes)) { $this->__result_meta['KEYWORDS'] = $arRes[1]; $this->__result_extended['META_KEYWORDS'] = $arRes[0]; $this->__index['KEYWORDS'] = $this->__prepareText($this->__result_meta['KEYWORDS']); } else { $this->__result_errors[] = array( 'CODE' => 'SEO_META_NO_KEYWORDS', 'TYPE' => 'NOTE', 'DETAIL' => array() ); } // get titles words index if(preg_match("/<(title)>(.*?)<\\/\\1>/is", $this->__result_data, $arRes)) { $this->__result_extended['TITLE'] = $arRes[0]; $this->__index['TITLE'] = $this->__prepareText($arRes[2]); } if(preg_match_all("/<(h[\d]{1}).*?>.*?<\\/\\1>/is", $this->__result_data, $arRes)) { $this->__result_extended['H'] = $arRes[0]; } if(preg_match_all("/<(h1).*?>(.*?)<\\/\\1>/is", $this->__result_data, $arRes)) { if ($this->__bCheckErrors && count($arRes[0]) > 1) { $this->__result_errors[] = array( 'CODE' => 'SEO_H1_UNIQUE', 'TYPE' => 'NOTE', 'DETAIL' => array( '#COUNT#' => count($arRes[0]), '#VALUES#' => htmlspecialcharsbx('"'.implode('", "', $arRes[2]).'"'), ) ); } $this->__index['H1'] = $this->__prepareText(implode(" ", $arRes[2])); } elseif ($this->__bCheckErrors) { $this->__result_errors[] = array( 'CODE' => 'SEO_H1_ABSENT', 'TYPE' => 'NOTE', 'DETAIL' => array() ); } if ($this->__bCheckErrors) { foreach(GetModuleEvents('seo', 'onPageCheck', true) as $arEvent) { if (!ExecuteModuleEventEx($arEvent, array( 'QUERY' => array( 'URL' => $this->__url, 'LANG' => $this->__lang, 'SERVER_NAME' => $this->__server_name, 'SITE' => $this->__site, ), 'DATA' => array( 'HEADERS' => $this->__result_headers, 'BODY' => $this->__result_data, ), 'META' => $this->__result_meta, 'INDEX' => $this->__index, )) && ($ex = $GLOBALS['APPLICATION']->GetException())) { $this->__result_errors[] = array( 'CODE' => $ex->GetId(), 'TYPE' => 'NOTE', 'TEXT' => $ex->GetString(), ); } } } } function _GetContrast($word) { if (null == $this->__index_total_len) $this->__index_total_len = array_sum($this->__index['TOTAL']); $logDocLength = log($this->__index_total_len < 20 ? 20 : $this->__index_total_len); $count = intval($this->__index['TOTAL'][$word]); return log($count+1)/$logDocLength; } function GetStatistics() { if (!is_array($this->__index)) return false; if (null == $this->__index_total_len) $this->__index_total_len = array_sum($this->__index['TOTAL']); return array( 'URL' => $this->__url, 'TOTAL_LENGTH' => function_exists('mb_strlen') ? mb_strlen($this->__result_data, 'latin1') : strlen($this->__result_data), 'TOTAL_WORDS_COUNT' => $this->__index_total_len ? $this->__index_total_len : '-', 'UNIQUE_WORDS_COUNT' => $this->__index_total_len ? count($this->__index['TOTAL']) : '-', 'META_KEYWORDS' => $this->__result_meta['KEYWORDS'], 'META_DESCRIPTION' => $this->__result_meta['DESCRIPTION'], ); } function GetURL() { return $this->__url; } function CheckKeyword($keyword, $bStemmed = false) { if (!is_array($this->__index)) return false; if (is_array($keyword)) { $arResult = array(); foreach ($keyword as $key => $word) { $arResult[$key] = $this->CheckKeyword($bStemmed ? $key : $word, $bStemmed); } return $arResult; } if (!$bStemmed && $this->bSearch) $keyword = stemming($keyword, $this->__lang); if (is_array($keyword)) return $this->CheckKeyword($keyword, true); $arResult = array( 'TOTAL' => intval($this->__index['TOTAL'][$keyword]), 'BOLD' => intval($this->__index['BOLD'][$keyword]), 'ITALIC' => intval($this->__index['ITALIC'][$keyword]), 'LINK' => intval($this->__index['LINK'][$keyword]), 'LINK_EXTERNAL' => intval($this->__index['LINK_EXTERNAL'][$keyword]), 'DESCRIPTION' => intval($this->__index['DESCRIPTION'][$keyword]), 'KEYWORDS' => intval($this->__index['KEYWORDS'][$keyword]), 'TITLE' => intval($this->__index['TITLE'][$keyword]), 'H1' => intval($this->__index['H1'][$keyword]), 'CONTRAST' => $this->_GetContrast($keyword), ); return $arResult; } function GetExtendedData() { return array_merge(array('HEADERS' => $this->__result_headers), $this->__result_extended); } function GetErrors() { $arResult = false; if (count($this->__result_errors) > 0) { $arResult = array(); foreach ($this->__result_errors as $arError) { $arResult[] = array( 'CODE' => $arError['CODE'], 'TYPE' => $arError['TYPE'], 'TEXT' => isset($arError['TEXT']) ? $arError['TEXT'] : str_replace(array_keys($arError['DETAIL']), array_values($arError['DETAIL']), GetMessage($arError['CODE'].'_ERROR')), ); } } return $arResult; } function IsOuterUrl($url) { if (strncmp($url, '#', 1) === 0) return false; if (strncmp($url, 'mailto:', 7) === 0) return false; if (strncmp($url, 'javascript:', 11) === 0) return false; $pos = strpos($url, '://'); if ($pos === false) return false; static $arDomainNames = null; if (null == $arDomainNames) { $arDomainNames = array($_SERVER['SERVER_NAME']); $dbRes = CSite::GetList($by = 'sort', $order = 'asc', array('ACTIVE' => 'Y')); while ($arSite = $dbRes->Fetch()) { if ($arSite['DOMAINS']) $arDomainNames = array_merge($arDomainNames, explode("\r\n", $arSite['DOMAINS'])); } $arDomainNames = array_values(array_unique($arDomainNames)); } $url = substr($url, $pos+3); $pos = strpos($url, '/'); if ($pos === false) { $pos = strlen($url); } $domain = substr($url, 0, $pos); if (substr($domain, 0, 4) == 'www.') { $domain = substr($domain, 4); } if ($domain) return !in_array($domain, $arDomainNames); return false; } } ?>