|
hQuery.php
|
Public Member Functions | |
| find ($sel, $_attr=NULL, $ctx=NULL) | |
| find_html ($sel, $attr=NULL, $ctx=NULL) | |
| find_text ($sel, $attr=NULL, $ctx=NULL) | |
| index () | |
Public Member Functions inherited from duzun\hQuery\HTML_Parser | |
| __get ($name) | |
| __set ($name, $value) | |
| location ($href=NULL) | |
| baseURI ($href=NULL) | |
| get/set baseURI | |
| __construct ($html, $idx=true) | |
| __toString () | |
| url2abs ($url) | |
| strlen () | |
| substr ($start, $length=NULL) | |
| _info () | |
| This method is for debugging only. | |
| hasClass ($id, $cl) | |
Public Member Functions inherited from duzun\hQuery\Node | |
| attr ($attr=NULL, $to_str=false) | |
| is_empty () | |
| isEmpty () | |
| isDoc () | |
| doc () | |
| find ($sel, $attr=NULL) | |
| exclude ($sel, $attr=NULL) | |
| __toString () | |
| html ($id=NULL) | |
| outerHtml ($id=NULL) | |
| text ($id=NULL) | |
| nodeName ($caseFolding=NULL, $id=NULL) | |
| pos ($restore=true) | |
| _children ($ids=NULL, $n=NULL) | |
| _next ($ids=NULL, $n=0) | |
| _prev ($ids=NULL, $n=0) | |
| _all ($ids=NULL) | |
| _has ($el, $eq=false) | |
| $el < $this, with $eq == true -> $el <= $this | |
| _filter_contains ($el, $eq=false) | |
| __get ($name) | |
| __set ($name, $value) | |
| __isset ($name) | |
| __unset ($name) | |
| count () | |
| current () | |
| valid () | |
| key () | |
| next () | |
| prev () | |
| rewind () | |
Static Public Member Functions | |
| static | fromHTML ($html, $url=NULL) |
| static | fromFile ($filename, $use_include_path=false, $context=NULL) |
| static | fromURL ($url, $headers=NULL, $body=NULL, $options=NULL) |
| static | jsonize ($data, &$type=NULL, $ops=0) |
| static | unjsonize ($str, &$type=NULL) |
| static | gz_supported () |
| static | gzdecode ($str) |
| static | do_flock ($fp, $lock, $timeout_ms=384) |
| static | flock_put_contents ($fn, $cnt, $block=false) |
| static | flock_get_contents ($fn, $block=false) |
| static | parse_cookie ($str) |
| static | http_wr ($host, $head=NULL, $body=NULL, $options=NULL) |
Static Public Member Functions inherited from duzun\hQuery\HTML_Parser | |
| static | get_url_base ($url, $array=false) |
| static | get_url_path ($url) |
| static | is_url_path ($path) |
| static | is_abs_path ($path) |
| static | abs_url ($url, $base) |
| static | detect_charset ($str) |
Static Public Member Functions inherited from duzun\hQuery\Node | |
| static | html_normal_pseudoClass ($p) |
| static | html_selector2struc ($sel) |
| static | html_parseAttrStr ($str, $case_folding=true, $extended=false) |
| static | html_attr2str ($attr, $quote='"') |
| static | parseCSStr ($str, $case_folding=true) |
| static | CSSArr2Str ($css) |
| static | str_range ($comp, $pos=0, $len=NULL) |
| static | array_select ($arr, $keys, $force_null=false) |
| static | convert_encoding ($a, $to, $from=NULL) |
Public Attributes | |
| $headers | |
Public Attributes inherited from duzun\hQuery\Node | |
| const | VERSION = '2.0.2' |
| $tag_map | |
Static Public Attributes | |
| static | $cache_path |
| static | $cache_expires = 3600 |
| static | $_mockup_class |
Static Public Attributes inherited from duzun\hQuery\HTML_Parser | |
| static | $del_spaces = false |
| static | $case_folding = true |
| static | $autoclose_tags = false |
| static | $_emptyTags = array('base','meta','link','hr','br','basefont','param','img','area','input','isindex','col') |
| static | $_specialTags = array('--'=>'--', '[CDATA['=>']]') |
| static | $_unparsedTags = array('style', 'script') |
| static | $_index_attribs = array('href', 'src') |
| static | $_url_attribs = array('href'=>'href', 'src'=>'src') |
Static Public Attributes inherited from duzun\hQuery\Node | |
| static | $last_http_result |
| static | $selected_doc = NULL |
| static | $_ar_ = array() |
| static | $_mi_ = PHP_INT_MAX |
| static | $_nl_ = NULL |
| static | $_fl_ = false |
| static | $_tr_ = true |
Static Protected Member Functions | |
| static | serjstype ($str) |
| static | _gzdecode ($gzdata, $maxlen=NULL) |
| static | get_cache ($fn, $expire=false, $meta_only=false) |
| static | set_cache ($fn, $cnt, $meta=NULL, $gzip=true) |
Static Protected Member Functions inherited from duzun\hQuery\Node | |
| static | html_findTagClose ($str, $p) |
Additional Inherited Members | |
Protected Member Functions inherited from duzun\hQuery\HTML_Parser | |
| _index_comments_html ($o) | |
| Index comment tags position in source HTML. | |
| _index_all () | |
| _get_ctx ($ctx) | |
| _find ($name, $class=NULL, $attr=NULL, $ctx=NULL, $rec=true) | |
| _filter ($ids, $name=NULL, $class=NULL, $attr=NULL, $ctx=NULL) | |
| get_aids_byAttr ($attr, $as_keys=false, $actx=NULL) | |
| get_aids_byClass ($cl, $as_keys=false, $actx=NULL) | |
| get_aids_byClassAttr ($cl, $attr, $as_keys=false, $actx=NULL) | |
| get_ids_byAid ($aid, $sort=true, $has_keys=false) | |
| get_ids_byAttr ($attr, $sort=true) | |
| get_ids_byClass ($cl, $sort=true) | |
| get_ids_byClassAttr ($cl, $attr, $sort=true) | |
| get_attr_byAid ($aid, $to_str=false) | |
| get_attr_byId ($id, $attr=NULL, $to_str=false) | |
Protected Member Functions inherited from duzun\hQuery\Node | |
| __construct ($doc, $ids, $is_ctx=false) | |
| _ctx_ids ($ids=NULL) | |
| _sub_ids ($eq=false) | |
| _doc_ids ($el, $force_array=true) | |
| _my_ids ($id=NULL, $keys=false) | |
| _parent ($ids=NULL, $n=0) | |
Protected Attributes inherited from duzun\hQuery\HTML_Parser | |
| $html = '' | |
| $tags | |
| $attrs | |
| $attribs | |
| $idx_attr | |
| $tag_idx | |
| $attr_idx | |
| $class_idx | |
| $o = NULL | |
| $indexed = false | |
Protected Attributes inherited from duzun\hQuery\Node | |
| $_prop = array() | |
| $doc | |
| $ids | |
| $exc | |
Static Protected Attributes inherited from duzun\hQuery\HTML_Parser | |
| static | $_tagID_first_letter = 'a-zA-Z_' |
| static | $_tagID_letters = 'a-zA-Z_0-9:\-' |
| static | $_icharset = 'UTF-8' |
Main Class, represents an HTML document.
An extremely fast web scraper that parses megabytes of HTML in a blink of an eye. PHP5+, no dependencies.
API Documentation at https://duzun.github.io/hQuery.php
Copyright (C) 2014-2018 Dumitru Uzun
Definition at line 21 of file hQuery.php.
|
staticprotected |
Alternative gzdecode() (for PHP < 5.4.0) source: https://github.com/Polycademy/upgradephp/blob/master/upgrade.php
Definition at line 458 of file hQuery.php.
|
static |
Lock with retries
| resource | $fp | - Open file pointer |
| int | $lock | - Lock type |
| int | $timeout_ms | - OPTIONAL Timeout to wait for unlock in miliseconds |
Definition at line 611 of file hQuery.php.
| duzun\hQuery::find | ( | $sel, | |
$_attr = NULL, |
|||
$ctx = NULL |
|||
| ) |
Finds a collection of nodes inside current document/context (similar to jQuery.fn.find()).
| string | $sel | - A valid CSS selector (some pseudo-selectors supported). |
| array | string | $attr | - OPTIONAL attributes as string or key-value pairs. |
| hQuery\Node | $ctx | - OPTIONAL the context where to search. If omitted, $this is used. |
Definition at line 182 of file hQuery.php.
Referenced by duzun\hQuery\HTML_Parser\hasClass().
| duzun\hQuery::find_html | ( | $sel, | |
$attr = NULL, |
|||
$ctx = NULL |
|||
| ) |
Combination of ->find() + ->html()
| string | $sel | - A valid CSS selector. |
| array | string | $attr | - OPTIONAL attributes as string or key-value pairs. |
| hQuery\Node | $ctx | - OPTIONAL the context where to search. If omitted, $this is used. |
Definition at line 269 of file hQuery.php.
| duzun\hQuery::find_text | ( | $sel, | |
$attr = NULL, |
|||
$ctx = NULL |
|||
| ) |
Combination of ->find() + ->text()
| string | $sel | - A valid CSS selector. |
| array | string | $attr | - OPTIONAL attributes as string or key-value pairs. |
| hQuery\Node | $ctx | - OPTIONAL the context where to search. If omitted, $this is used. |
Definition at line 285 of file hQuery.php.
|
static |
Read the HTML document from a file.
| string | $filename | - a valid filename |
| bool | $use_include_path | - OPTIONAL passed to file_get_contents() |
| resource | $context | - OPTIONAL A valid context resource created with stream_context_create(). See file_get_contents() |
Definition at line 67 of file hQuery.php.
|
static |
Parse and HTML string.
| string | $html | - source of some HTML document |
| string | $url | - OPTIONAL location of the document. Used for relative URLs inside the document. |
Definition at line 41 of file hQuery.php.
|
static |
Fetch the HTML document from remote $url.
| string | $url | - the URL of the document |
| array | $headers | - OPTIONAL request headers |
| array | string | $body | - OPTIONAL body of the request (for POST or PUT) |
| array | $options | - OPTIONAL request options (see self::http_wr() for more details) |
Definition at line 88 of file hQuery.php.
|
staticprotected |
Read data from a cache file.
| string | $fn | - cache filename |
| int | $expire | - OPTIONAL contents returned only if it is newer then $expire seconds |
| bool | $meta_only | - OPTIONAL if TRUE, read only meta-info (faster) |
Definition at line 532 of file hQuery.php.
|
static |
Find a function to decode gzip data.
Definition at line 435 of file hQuery.php.
|
static |
gzdecode() (for PHP < 5.4.0)
Definition at line 445 of file hQuery.php.
|
static |
Executes a HTTP write-read session.
| string | $host | - IP/HOST address or URL |
| array | $head | - list off HTTP headers to be sent along with the request to $host |
| mixed | $body | - data to be sent as the contents of the request. If is array or object, a http query is built. |
| array | $options | - list of option as key-value: timeout - connection timeout in seconds host - goes to headers, overrides $host (ex. $host == '127.0.0.1', $options['host'] == 'www.example.com') port - usefull when $host is not a full URL scheme - http, ssl, tls, udp, ... close - whether to close connection o not redirects - number of allowed redirects redirect_method - if (string), this is the new method for redirect request, else if true, preserve method, else use 'GET' on redirect. by default preserve on 307 and 308, GET on 301-303 |
Definition at line 720 of file hQuery.php.
| duzun\hQuery::index | ( | ) |
Index elements of the source HTML. (Called automatically)
Definition at line 295 of file hQuery.php.
|
static |
Serialize $data as JSON, fallback to serialize.
| mixed | $data | - the data to be serialized |
| &string | $type - returns the serialization method used ('json' | 'ser') |
Definition at line 308 of file hQuery.php.
|
staticprotected |
Tries to detect format of $str (json or ser).
| string | $str | - JSON encoded or PHP serialized data. |
Definition at line 414 of file hQuery.php.
|
staticprotected |
Save data to a cache file.
| string | $fn | - cache filename |
| mixed | $cnt | - contents to be cached |
| array | $meta | - OPTIONAL meta information related to contents. |
| bool | $gzip | - OPTIONAL if TRUE and gzip supported, store contents gzipped |
Definition at line 578 of file hQuery.php.
|
static |
Unserialize $data from either JSON or serialize.
| string | $str | - the data to be unserialized |
| &string | $type - if not set, returns the serialization method detected ('json' | 'ser'); if set, forces unjsonize() to use this method for unserialization. |
Definition at line 332 of file hQuery.php.
1.8.14