4 class_exists(
'duzun\\hQuery\\HTML_Parser', 
false) or require_once __DIR__ . DIRECTORY_SEPARATOR . 'hQuery' . DIRECTORY_SEPARATOR . 'HTML_Parser.php';
    27     public static $cache_path;
    28     public static $cache_expires = 3600;
    31     public static $_mockup_class; 
    41     public static function fromHTML($html, $url=NULL) {
    42         $index_time = microtime(
true);
    43         if ( isset(self::$_mockup_class) ) {
    44             $doc = 
new self::$_mockup_class($html, 
false);
    47             $doc = 
new self($html, 
false);
    53         $index_time = microtime(
true) - $index_time;
    54         $doc->index_time = $index_time * 1000;
    67     public static function fromFile($filename, $use_include_path=
false, $context=NULL) {
    68         $read_time = microtime(
true);
    69         $html = file_get_contents($filename, $use_include_path, $context);
    70         $read_time = microtime(
true) - $read_time;
    71         if($html === 
false) 
return $html;
    72         $doc = self::fromHTML($html, $filename);
    73         $doc->source_type = 
'file';
    74         $doc->read_time = $read_time * 1000;
    88     public static function fromURL($url, $headers=NULL, $body=NULL, $options=NULL) {
    94             'expires'   => self::$cache_expires,
    96         $hd = array(
'Accept-Charset' => 
'UTF-8,*');
    98         if($options) $opt = $options + $opt;
    99         if($headers) $hd  = $headers + $hd;
   101         $expires = $opt[
'expires'];
   102         unset($opt[
'expires']);
   104         if(0 < $expires and $dir = self::$cache_path) {
   106             $t = realpath($dir) and $dir = $t or mkdir($dir, 0766, 
true);
   107             $dir .= DIRECTORY_SEPARATOR;
   108             $cch_id = hash(
'sha1', $url, 
true);
   109             $t = hash(
'md5', self::jsonize($opt), 
true);
   110             $cch_id = bin2hex(substr($cch_id, 0, -strlen($t)) . (substr($cch_id, -strlen($t)) ^ $t));
   111             $cch_fn = $dir . $cch_id;
   112             $ext = strtolower(strrchr($url, 
'.'));
   113             if(strlen($ext) < 7 && preg_match(
'/^\\.[a-z0-9]+$/', $ext)) {
   117             $read_time = microtime(
true);
   118             $ret = self::get_cache($cch_fn, $expires, 
false);
   119             $read_time = microtime(
true) - $read_time;
   121                 $source_type = 
'cache';
   123                 $hdrs = $ret[1][
'hdr'];
   124                 $code = $ret[1][
'code'];
   125                 $url  = $ret[1][
'url'];
   127                 self::$last_http_result = (object)array(
   141             $source_type = 
'url';
   142             $read_time = microtime(
true);
   144             $ret = self::http_wr($url, $hd, $body, $opt);
   145             $read_time = microtime(
true) - $read_time;
   148             $hdrs = $ret->headers;
   151             if($ret->url) $url = $ret->url;
   153             if(!empty($cch_fn)) {
   154                 $save = self::set_cache($cch_fn, $html, array(
'hdr' => $hdrs, 
'code' => $code, 
'url' => $url));
   161         $doc = self::fromHTML($html, $url);
   163             $doc->headers = $hdrs;
   164             $doc->source_type = $source_type;
   165             isset($read_time) and $doc->read_time = $read_time * 1000;
   166             if(!empty($cch_meta)) $doc->cch_meta = $cch_meta;
   182     public function find($sel, $_attr=NULL, $ctx=NULL) {
   184         $c = func_num_args();
   185         for($i=1;$i<$c;$i++) {
   186             $a = func_get_arg($i);
   188                 if($a instanceof 
hQuery\Node) $ctx = $a;
   189                 else throw new \Exception(
'Wrong context in ' . __METHOD__);
   191             elseif(is_array($a))  $attr = array_merge($attr, $a);
   192             elseif(is_string($a)) $attr = array_merge($attr, self::html_parseAttrStr($a));
   194         if(isset($ctx)) $ctx = $this->_get_ctx($ctx);
   196         $sel = self::html_selector2struc($sel);
   200         foreach($sel as $a) {
   207                     $cx = $this->_get_ctx($rb);
   213                     if(isset($c[
'i'])) $at[
'id'] = $c[
'i'];
   216                         $rc = $this->_find($c[
'n'], $c[
'c'], $at, $cx);
   220                         $ch = $this->_children($rc);
   221                         $rc = $this->_filter($ch, $c[
'n'], $c[
'c'], $at);
   226                         foreach($c[
'p'] as $p) {
   228                                 if($p < 0) $p += count($rc);
   229                                 if(count($rc) >= 1 || $p) {
   230                                     $rc = $p < 0 ? NULL : array_slice($rc, $p, 1, 
true);
   233                             elseif(is_array($p)) {
   236                                     case '<': $rc = array_slice($rc, 0, $ch, 
true);          
break;
   237                                     case '>': $rc = array_slice($rc, $ch, count($rc), 
true); 
break;
   238                                     case '-': $rc = $this->_prev($rc, $ch); 
break;
   239                                     case '+': $rc = $this->_next($rc, $ch); 
break;
   240                                     case '|': 
do $rc = $this->_parent($rc);   
while($ch-- > 0); 
break;
   241                                     case '*': 
do $rc = $this->_children($rc); 
while($ch-- > 0); 
break;
   251             if($rc) 
if(!$ra) $ra = $rc; 
else { 
foreach($rc as $rb => $rc) $ra[$rb] = $rc; }
   255             return new hQuery\Element($this, $ra);
   269     public function find_html($sel, $attr=NULL, $ctx=NULL) {
   270         $r = $this->find($sel, $attr=NULL, $ctx=NULL);
   272         if($r) 
foreach($r as $k => $v) $ret[$k] = $v->html();
   285     public function find_text($sel, $attr=NULL, $ctx=NULL) {
   286         $r = $this->find($sel, $attr=NULL, $ctx=NULL);
   288         if($r) 
foreach($r as $k => $v) $ret[$k] = $v->text();
   295     public function index() { 
return $this->_index_all(); }
   308     public static function jsonize($data, &$type = NULL, $ops = 0) {
   309         if(defined(
'JSON_UNESCAPED_UNICODE')) {
   310             $ops |= JSON_UNESCAPED_UNICODE;
   312         $str = $ops ? json_encode($data, $ops) : json_encode($data);
   313         if( $str === 
false  ) {
   314             $str = serialize($data);
   334             $type = self::serjstype($str);
   336         static $_json_support;
   337         if ( !isset($_json_support) ) {
   340             if ( function_exists(
'json_last_error') ) {
   343                 if ( function_exists(
'json_last_error_msg') ) {
   350                 $data = @unserialize($str);
   351                 if ( $data === 
false ) {
   352                     if ( strpos($str, 
"\n") !== 
false ) {
   353                         if ( $retry = strpos($str, 
"\r") === 
false ) {
   354                             $str = str_replace(
"\n", 
"\r\n", $str);
   356                         elseif ( $retry = strpos($str, 
"\r\n") !== 
false ) {
   357                             $str = str_replace(
"\r\n", 
"\n", $str);
   359                         $retry and $data = unserialize($str);
   365                 $data = json_decode($str, 
true);
   367                 if ( is_null($data) ) {
   369                     if( $_json_support == 0 ? $str !== 
'null' : json_last_error() != JSON_ERROR_NONE ) {
   370                         $t = preg_replace(
'/,\s*([\]\}])/m', 
'$1', $str) and
   371                         $data = json_decode($t, 
true);
   373                     if( is_null($data) ) {
   375                         if ( $_json_support ) {
   376                             if ( json_last_error() != JSON_ERROR_NONE ) {
   378                                 if ( $_json_support > 1 ) {
   379                                     error_log(
'json_decode: ' . json_last_error_msg());
   381                                 elseif( $_json_support > 0 ) {
   382                                     error_log(
"json_decode error with code #".json_last_error());
   389                             if ( $str !== 
'null' ) {
   390                                 error_log(
"json_decode error");
   398                 $data = json_decode($str, 
true);
   399                 if( is_null($data) && ($_json_support == 0 ? $str !== 
'null' : json_last_error() != JSON_ERROR_NONE) ) {
   400                     $data = unserialize($str);
   415         $c = substr($str, 0, 1);
   416         if($str === 
'N;' || strpos(
'sibadO', $c) !== 
false && substr($str, 1, 1) === 
':') {
   420             $l = substr($str, -1);
   421             if($c == 
'{' && $l == 
'}' || $c == 
'[' && $l == 
']') {
   436         function_exists(
'zlib_decode') and $_gzdecode = 
'zlib_decode' or
   437         function_exists(
'gzdecode')    and $_gzdecode = 
'gzdecode'    or
   447         if ( !isset($_gzdecode) ) {
   448             $_gzdecode = self::gz_supported();
   451         return $_gzdecode ? $_gzdecode($str) : self::_gzdecode($str);
   458     protected static function _gzdecode($gzdata, $maxlen=NULL) {
   460         $len = strlen($gzdata);
   464         $head = substr($gzdata, 0, 10);
   465         $head = unpack(
"n1id/C1cm/C1flg/V1mtime/C1xfl/C1os", $head);
   466         list($ID, $CM, $FLG, $MTIME, $XFL, $OS) = array_values($head);
   472         $head = unpack(
"V1crc/V1isize", substr($gzdata, $len-8, 8));
   473         list($CRC32, $ISIZE) = array_values($head);
   475         #-- check gzip stream identifier   477             trigger_error(
"gzdecode: not in gzip format", E_USER_WARNING);
   480         #-- check for deflate algorithm   482             trigger_error(
"gzdecode: cannot decode anything but deflated streams", E_USER_WARNING);
   485         #-- start of data, skip bonus fields   487         if ($FLG & $FEXTRA) {
   491             $s = strpos($gzdata, 
"\000", $s) + 1;
   493         if ($FLG & $FCOMMENT) {
   494             $s = strpos($gzdata, 
"\000", $s) + 1;
   500         #-- get data, uncompress   501         $gzdata = substr($gzdata, $s, $len-$s);
   503             $gzdata = gzinflate($gzdata, $maxlen);
   507             $gzdata = gzinflate($gzdata);
   511         $chk = crc32($gzdata);
   512         if ($CRC32 != $chk) {
   513             trigger_error(
"gzdecode: checksum failed (real$chk != comp$CRC32)", E_USER_WARNING);
   515         elseif ($ISIZE != strlen($gzdata)) {
   516             trigger_error(
"gzdecode: stream size mismatch", E_USER_WARNING);
   532     protected static function get_cache($fn, $expire=
false, $meta_only=
false) {
   534         if( $fm = @filemtime($fn) and (!$expire || $fm + $expire > time()) ) {
   535             $cnt = self::flock_get_contents($fn);
   539             if($gz = !strncmp($cnt, 
"\x1F\x8B", 2)) {
   540                 $cnt = self::gzdecode($cnt);
   543                 $n = (int)substr($cnt, 1, 0x10);
   546                     $meta = substr($cnt, $l, $n);
   547                     if($meta !== 
'') $meta = self::unjsonize($meta);
   549                 if($meta_only) $cnt = 
'';
   552                     if($cnt[$l] == 
"\n") {
   553                         $cnt = substr($cnt, ++$l);
   554                         if($cnt !== 
'') $cnt = self::unjsonize($cnt);
   557                         $cnt = substr($cnt, $l);
   562                 if($meta_only) $cnt = 
'';
   565         return $cnt || $meta ? array($cnt, $meta) : 
false;
   578     protected static function set_cache($fn, $cnt, $meta=NULL, $gzip=
true) {
   579         if($cnt === 
false) 
return !file_exists($fn) || unlink($fn);
   582            $meta = self::jsonize($meta);
   585         $meta = 
'#'.$n . 
"\n" . $meta;
   586         if(!is_string($cnt) || $cnt[0] == 
"\n") { $cnt = 
"\n" . self::jsonize($cnt); ++$n; }
   587         if($n) $cnt = $meta . $cnt;
   589         @mkdir(dirname($fn), 0777, 
true);
   591             $gl = is_int($gzip) ? $gzip : 1024;
   593             strlen($cnt) > $gl && self::gz_supported() and
   594             $cnt = gzencode($cnt);
   596         return self::flock_put_contents($fn, $cnt);
   611     static function do_flock($fp, $lock, $timeout_ms=384) {
   612         $l = flock($fp, $lock);
   613         if( !$l && ($lock & LOCK_UN) != LOCK_UN ) {
   614             $st = microtime(
true);
   615             $m = min( 1e3, $timeout_ms*1e3);
   616             $n = min(64e3, $timeout_ms*1e3);
   617             if($m == $n) $m = ($n >> 1) + 1;
   618             $timeout_ms = (float)$timeout_ms / 1000;
   621                 usleep($t = rand($m, $n));
   622                 $l = flock($fp, $lock);
   623             } 
while ( !$l && (microtime(
true)-$st) < $timeout_ms );
   628     static function flock_put_contents($fn, $cnt, $block=
false) {
   631         if( $f = fopen($fn, 
'c+') ) {
   632             $app = $block & FILE_APPEND and $block ^= $app;
   633             if( $block ? self::do_flock($f, LOCK_EX) : flock($f, LOCK_EX | LOCK_NB) ) {
   634                 if(is_array($cnt) || is_object($cnt)) $cnt = self::jsonize($cnt);
   635                 if($app) fseek($f, 0, SEEK_END);
   636                 if(
false !== ($ret = fwrite($f, $cnt))) {
   638                     ftruncate($f, ftell($f));
   647     static function flock_get_contents($fn, $block=
false) {
   650         if( $f = fopen($fn, 
'r') ) {
   651             if( flock($f, LOCK_SH | ($block ? 0 : LOCK_NB)) ) {
   653                 do $ret .= $r = fread($f, $s); 
while($r !== 
false && !feof($f));
   654                 if($ret == NULL && $r === 
false) $ret = $r;
   664     public static function parse_cookie($str) {
   666         if ( is_array($str) ) {
   667             foreach($str as $k => $v) {
   668                 $ret[$k] = self::parse_cookie($v);
   673         $str = explode(
';', $str);
   674         $t = explode(
'=', array_shift($str), 2);
   676         $ret[
'value'] = $t[1];
   677         foreach ($str as $t) {
   678             $t = explode(
'=', trim($t), 2);
   679             if ( count($t) == 2 ) {
   680                 $ret[strtolower($t[0])] = $t[1];
   683                 $ret[strtolower($t[0])] = 
true;
   687         if ( !empty($ret[
'expires']) && is_string($ret[
'expires']) ) {
   688             $t = strtotime($ret[
'expires']);
   689             if ( $t !== 
false and $t !== -1 ) {
   690                 $ret[
'expires'] = $t;
   720     public static function http_wr($host, $head = NULL, $body = NULL, $options = NULL) {
   721         self::$last_http_result =
   722         $ret = new \stdClass;
   723         empty($options) and $options = array();
   726         if($p = strpos($host, 
'://') and $p < 7) {
   728             $p = parse_url($host);
   730                 throw new \Exception(
'Wrong host specified'); 
   734             if(isset($p[
'query'])) {
   735                 $path .= 
'?' . $p[
'query'];
   737             if(isset($p[
'port'])) {
   740             unset($p[
'path'], $p[
'query']);
   745             $p = explode(
'/', $host, 2); list($host, $path) = $p;
   746             $p = explode(
':', $host, 2); list($host, $port) = $p;
   749         if(strncmp($path, 
'/', 1)) {
   755             if(isset($options[
'port'])) {
   756                 $port = $options[
'port'];
   759                 switch($options[
'scheme']) {
   762                     case 'https': $port = 443; 
break;
   763                     case 'ftp'  : $port = 21; 
break;
   764                     case 'sftp' : $port = 22; 
break;
   766                     default     : $port = 80;
   774             'host'   => isset($options[
'host']) ? $options[
'host'] : $host,
   775             'accept' => 
'text/html,application/xhtml+xml,application/xml;q =0.9,*/*;q=0.8',
   777         if(!empty($options[
'scheme'])) {
   778             switch($p[
'scheme']) {
   783                     $conhost = 
'tls://' . $host;
   786                     $conhost = $options[
'scheme'] . 
'://' . $host;
   790         static $boundary = 
"\r\n\r\n";
   791         $blen = strlen($boundary);
   793            if(is_array($body) || is_object($body)) {
   794               $body = http_build_query($body);
   795               $_h[
'content-type'] = 
'application/x-www-form-urlencoded';
   797            $body = (string)$body;
   798            $_h[
'content-length'] = strlen($body);
   800            empty($options[
'method']) and $options[
'method'] = 
'POST';
   806         !empty($options[
'method']) and $meth = strtoupper($options[
'method']) or $meth = 
'GET';
   809             if(!is_array($head)) {
   810                 $head = explode(
"\r\n", $head);
   812             foreach($head as $i => $v) {
   814                     $v = explode(
':', $v, 2);
   815                     if(count($v) != 2) 
continue; 
   818                 $i = strtolower(strtr($i, 
' _', 
'--'));
   823         if(@$options[
'decode'] == 
'gzip') {
   825                 $_h[
'accept-encoding'] = 
'gzip';
   832         if(!isset($options[
'close']) || @$options[
'close']) {
   833             $_h[
'connection'] = 
'close';
   836             $_h[
'connection'] = 
'keep-alive';
   839         $prot = empty($options[
'protocol']) ? 
'HTTP/1.1' : $options[
'protocol'];
   841         $head = array(
"$meth $path $prot");
   842         foreach($_h as $i => $v) {
   843             $i = explode(
'-', $i);
   844             foreach($i as &$j) $j = ucfirst($j);
   845             $i = implode(
'-', $i);
   846             $head[] = $i . 
': ' . $v;
   848         $rqst = implode(
"\r\n", $head) . $boundary . $body;
   851         $timeout = isset($options[
'timeout']) ? $options[
'timeout'] : @ini_get(
"default_socket_timeout");
   853         $ret->options = $options;
   860         $fs = @fsockopen($conhost, $port, $errno, $errstr, $timeout);
   862             throw new \Exception(
'unable to create socket "'.$conhost.
':'.$port.
'" '.$errstr, $errno);
   864         if(!fwrite($fs, $rqst)) {
   865             throw new \Exception(
"unable to write");
   870             while($open = !feof($fs) && ($p = @fgets($fs, 1024))) {
   871                 if($p == 
"\r\n") 
break;
   876                 $h = explode(
"\r\n", rtrim($rsps));
   877                 list($rprot, $rcode, $rmsg) = explode(
' ', array_shift($h), 3);
   879                     $v = explode(
':', $v, 2);
   880                     $k = strtoupper(strtr($v[0], 
'- ', 
'__'));
   881                     $v = isset($v[1]) ? trim($v[1]) : NULL;
   884                     if ( isset($_rh[$k]) ) {
   886                             if ( is_array($_rh[$k]) ) {
   890                                 $_rh[$k] = array($_rh[$k], $v);
   899                 $_preserve_method = 
true;
   904                          $_preserve_method = 
false;
   908                       if( @$options[
'redirects'] > 0 && $loc = @$_rh[
'LOCATION'] ) {
   909                          if ( !empty($options[
'host']) ) {
   910                             $host = $options[
'host'];
   912                          is_array($loc) and $loc = end($loc);
   913                          $loc = self::abs_url($loc, compact(
'host', 
'port', 
'path') + array(
'scheme' => empty($options[
'scheme'])?
'':$options[
'scheme']));
   914                          unset($_h[
'host'], $options[
'host'], $options[
'port'], $options[
'scheme']);
   915                          if ( isset($options[
'redirect_method']) ) {
   916                              $redirect_method = $options[
'redirect_method'];
   917                              if ( is_string($redirect_method) ) {
   918                                  $options[
'method'] = $redirect_method = strtoupper($redirect_method);
   919                                  $_preserve_method = 
true;
   920                                  if ( $redirect_method != 
'POST' && $redirect_method != 
'PUT' && $redirect_method != 
'DELETE' ) {
   925                                  $_preserve_method = (bool)$redirect_method;
   928                          if ( !$_preserve_method ) {
   930                              unset($options[
'method']);
   932                          --$options[
'redirects'];
   934                          if ( !empty($_rh[
'SET_COOKIE']) && !empty($options[
'use_cookies']) ) {
   935                             $t = self::parse_cookie((array)$_rh[
'SET_COOKIE']);
   940                                     if ( empty($c[
'expires']) || $c[
'expires'] >= $now ) {
   941                                         $_h[
'cookie'] = (empty($_h[
'cookie']) ? 
'' : $_h[
'cookie'] . 
'; ') .
   942                                                         $c[
'key'] . 
'=' . $c[
'value'];
   947                          return self::http_wr($loc, $_h, $body, $options);
   953                 if(@!$open || $rcode < 200 || $rcode == 204 || $rcode == 304 || $meth == 
'HEAD') {
   956                 elseif(isset($_rh[
'TRANSFER_ENCODING']) && strtolower($_rh[
'TRANSFER_ENCODING']) === 
'chunked') {
   959                 elseif(isset($_rh[
'CONTENT_LENGTH'])) {
   960                     $bl = (int)$_rh[
'CONTENT_LENGTH'];
   971                       while($bl > 0 and $open &= !feof($fs) && ($p = @fread($fs, $bl))) {
   977                       while($open &= !feof($fs) && ($p = @fgets($fs, 1024))) {
   978                          $_re = explode(
';', rtrim($p));
   982                           while($bl > 0 and $open &= !feof($fs) && ($p = @fread($fs, $bl))) {
   988                       if($open &= !feof($fs) && ($p = @fgets($fs, 1024))) {
   991                             $v = explode(
':', $p, 2);
   992                             $k = strtoupper(strtr($v[0], 
'- ', 
'__'));
   993                             $v = isset($v[1]) ? trim($v[1]) : NULL;
   996                             if ( isset($_rh[$k]) ) {
   998                                     if ( is_array($_rh[$k]) ) {
  1002                                         $_rh[$k] = array($_rh[$k], $v);
  1015                           while($open &= !feof($fs) && ($p = @fread($fs, 1024))) { 
  1022                     isset($options[
'decode']) && $options[
'decode'] == 
'gzip' &&
  1023                     isset($_rh[
'CONTENT_ENCODING']) && $_rh[
'CONTENT_ENCODING'] == 
'gzip'  1025                     $r = self::gzdecode($rsps);
  1027                         unset($_rh[
'CONTENT_ENCODING']);
  1031                         throw new \Exception(
"Can't gzdecode(response), try ['decode' => false] option");
  1034                 $ret->code    = $rcode;
  1036                 $ret->headers = isset($_rh) ? $_rh : NULL;
  1038                 $ret->method  = $meth;
  1042                 $ret->request = $rqst;
 static fromFile($filename, $use_include_path=false, $context=NULL)
static get_cache($fn, $expire=false, $meta_only=false)
static http_wr($host, $head=NULL, $body=NULL, $options=NULL)
find_text($sel, $attr=NULL, $ctx=NULL)
static set_cache($fn, $cnt, $meta=NULL, $gzip=true)
static fromHTML($html, $url=NULL)
find($sel, $_attr=NULL, $ctx=NULL)
static unjsonize($str, &$type=NULL)
static fromURL($url, $headers=NULL, $body=NULL, $options=NULL)
static do_flock($fp, $lock, $timeout_ms=384)
static _gzdecode($gzdata, $maxlen=NULL)
static jsonize($data, &$type=NULL, $ops=0)
find_html($sel, $attr=NULL, $ctx=NULL)