hQuery.php
|
Public Member Functions | |
find ($sel, $_attr=NULL, $ctx=NULL) | |
find_html ($sel, $attr=NULL, $ctx=NULL) | |
find_text ($sel, $attr=NULL, $ctx=NULL) | |
index () | |
![]() | |
__get ($name) | |
__set ($name, $value) | |
location ($href=NULL) | |
baseURI ($href=NULL) | |
get/set baseURI | |
__construct ($html, $idx=true) | |
__toString () | |
url2abs ($url) | |
strlen () | |
substr ($start, $length=NULL) | |
_info () | |
This method is for debugging only. | |
hasClass ($id, $cl) | |
![]() | |
attr ($attr=NULL, $to_str=false) | |
is_empty () | |
isEmpty () | |
isDoc () | |
doc () | |
find ($sel, $attr=NULL) | |
exclude ($sel, $attr=NULL) | |
__toString () | |
html ($id=NULL) | |
outerHtml ($id=NULL) | |
text ($id=NULL) | |
nodeName ($caseFolding=NULL, $id=NULL) | |
pos ($restore=true) | |
_children ($ids=NULL, $n=NULL) | |
_next ($ids=NULL, $n=0) | |
_prev ($ids=NULL, $n=0) | |
_all ($ids=NULL) | |
_has ($el, $eq=false) | |
$el < $this, with $eq == true -> $el <= $this | |
_filter_contains ($el, $eq=false) | |
__get ($name) | |
__set ($name, $value) | |
__isset ($name) | |
__unset ($name) | |
count () | |
current () | |
valid () | |
key () | |
next () | |
prev () | |
rewind () | |
Static Public Member Functions | |
static | fromHTML ($html, $url=NULL) |
static | fromFile ($filename, $use_include_path=false, $context=NULL) |
static | fromURL ($url, $headers=NULL, $body=NULL, $options=NULL) |
static | jsonize ($data, &$type=NULL, $ops=0) |
static | unjsonize ($str, &$type=NULL) |
static | gz_supported () |
static | gzdecode ($str) |
static | do_flock ($fp, $lock, $timeout_ms=384) |
static | flock_put_contents ($fn, $cnt, $block=false) |
static | flock_get_contents ($fn, $block=false) |
static | parse_cookie ($str) |
static | http_wr ($host, $head=NULL, $body=NULL, $options=NULL) |
![]() | |
static | get_url_base ($url, $array=false) |
static | get_url_path ($url) |
static | is_url_path ($path) |
static | is_abs_path ($path) |
static | abs_url ($url, $base) |
static | detect_charset ($str) |
![]() | |
static | html_normal_pseudoClass ($p) |
static | html_selector2struc ($sel) |
static | html_parseAttrStr ($str, $case_folding=true, $extended=false) |
static | html_attr2str ($attr, $quote='"') |
static | parseCSStr ($str, $case_folding=true) |
static | CSSArr2Str ($css) |
static | str_range ($comp, $pos=0, $len=NULL) |
static | array_select ($arr, $keys, $force_null=false) |
static | convert_encoding ($a, $to, $from=NULL) |
Public Attributes | |
$headers | |
![]() | |
const | VERSION = '2.0.2' |
$tag_map | |
Static Public Attributes | |
static | $cache_path |
static | $cache_expires = 3600 |
static | $_mockup_class |
![]() | |
static | $del_spaces = false |
static | $case_folding = true |
static | $autoclose_tags = false |
static | $_emptyTags = array('base','meta','link','hr','br','basefont','param','img','area','input','isindex','col') |
static | $_specialTags = array('--'=>'--', '[CDATA['=>']]') |
static | $_unparsedTags = array('style', 'script') |
static | $_index_attribs = array('href', 'src') |
static | $_url_attribs = array('href'=>'href', 'src'=>'src') |
![]() | |
static | $last_http_result |
static | $selected_doc = NULL |
static | $_ar_ = array() |
static | $_mi_ = PHP_INT_MAX |
static | $_nl_ = NULL |
static | $_fl_ = false |
static | $_tr_ = true |
Static Protected Member Functions | |
static | serjstype ($str) |
static | _gzdecode ($gzdata, $maxlen=NULL) |
static | get_cache ($fn, $expire=false, $meta_only=false) |
static | set_cache ($fn, $cnt, $meta=NULL, $gzip=true) |
![]() | |
static | html_findTagClose ($str, $p) |
Additional Inherited Members | |
![]() | |
_index_comments_html ($o) | |
Index comment tags position in source HTML. | |
_index_all () | |
_get_ctx ($ctx) | |
_find ($name, $class=NULL, $attr=NULL, $ctx=NULL, $rec=true) | |
_filter ($ids, $name=NULL, $class=NULL, $attr=NULL, $ctx=NULL) | |
get_aids_byAttr ($attr, $as_keys=false, $actx=NULL) | |
get_aids_byClass ($cl, $as_keys=false, $actx=NULL) | |
get_aids_byClassAttr ($cl, $attr, $as_keys=false, $actx=NULL) | |
get_ids_byAid ($aid, $sort=true, $has_keys=false) | |
get_ids_byAttr ($attr, $sort=true) | |
get_ids_byClass ($cl, $sort=true) | |
get_ids_byClassAttr ($cl, $attr, $sort=true) | |
get_attr_byAid ($aid, $to_str=false) | |
get_attr_byId ($id, $attr=NULL, $to_str=false) | |
![]() | |
__construct ($doc, $ids, $is_ctx=false) | |
_ctx_ids ($ids=NULL) | |
_sub_ids ($eq=false) | |
_doc_ids ($el, $force_array=true) | |
_my_ids ($id=NULL, $keys=false) | |
_parent ($ids=NULL, $n=0) | |
![]() | |
$html = '' | |
$tags | |
$attrs | |
$attribs | |
$idx_attr | |
$tag_idx | |
$attr_idx | |
$class_idx | |
$o = NULL | |
$indexed = false | |
![]() | |
$_prop = array() | |
$doc | |
$ids | |
$exc | |
![]() | |
static | $_tagID_first_letter = 'a-zA-Z_' |
static | $_tagID_letters = 'a-zA-Z_0-9:\-' |
static | $_icharset = 'UTF-8' |
Main Class, represents an HTML document.
An extremely fast web scraper that parses megabytes of HTML in a blink of an eye. PHP5+, no dependencies.
API Documentation at https://duzun.github.io/hQuery.php
Copyright (C) 2014-2018 Dumitru Uzun
Definition at line 21 of file hQuery.php.
|
staticprotected |
Alternative gzdecode() (for PHP < 5.4.0) source: https://github.com/Polycademy/upgradephp/blob/master/upgrade.php
Definition at line 458 of file hQuery.php.
|
static |
Lock with retries
resource | $fp | - Open file pointer |
int | $lock | - Lock type |
int | $timeout_ms | - OPTIONAL Timeout to wait for unlock in miliseconds |
Definition at line 611 of file hQuery.php.
duzun\hQuery::find | ( | $sel, | |
$_attr = NULL , |
|||
$ctx = NULL |
|||
) |
Finds a collection of nodes inside current document/context (similar to jQuery.fn.find()).
string | $sel | - A valid CSS selector (some pseudo-selectors supported). |
array | string | $attr | - OPTIONAL attributes as string or key-value pairs. |
hQuery\Node | $ctx | - OPTIONAL the context where to search. If omitted, $this is used. |
Definition at line 182 of file hQuery.php.
Referenced by duzun\hQuery\HTML_Parser\hasClass().
duzun\hQuery::find_html | ( | $sel, | |
$attr = NULL , |
|||
$ctx = NULL |
|||
) |
Combination of ->find() + ->html()
string | $sel | - A valid CSS selector. |
array | string | $attr | - OPTIONAL attributes as string or key-value pairs. |
hQuery\Node | $ctx | - OPTIONAL the context where to search. If omitted, $this is used. |
Definition at line 269 of file hQuery.php.
duzun\hQuery::find_text | ( | $sel, | |
$attr = NULL , |
|||
$ctx = NULL |
|||
) |
Combination of ->find() + ->text()
string | $sel | - A valid CSS selector. |
array | string | $attr | - OPTIONAL attributes as string or key-value pairs. |
hQuery\Node | $ctx | - OPTIONAL the context where to search. If omitted, $this is used. |
Definition at line 285 of file hQuery.php.
|
static |
Read the HTML document from a file.
string | $filename | - a valid filename |
bool | $use_include_path | - OPTIONAL passed to file_get_contents() |
resource | $context | - OPTIONAL A valid context resource created with stream_context_create(). See file_get_contents() |
Definition at line 67 of file hQuery.php.
|
static |
Parse and HTML string.
string | $html | - source of some HTML document |
string | $url | - OPTIONAL location of the document. Used for relative URLs inside the document. |
Definition at line 41 of file hQuery.php.
|
static |
Fetch the HTML document from remote $url.
string | $url | - the URL of the document |
array | $headers | - OPTIONAL request headers |
array | string | $body | - OPTIONAL body of the request (for POST or PUT) |
array | $options | - OPTIONAL request options (see self::http_wr() for more details) |
Definition at line 88 of file hQuery.php.
|
staticprotected |
Read data from a cache file.
string | $fn | - cache filename |
int | $expire | - OPTIONAL contents returned only if it is newer then $expire seconds |
bool | $meta_only | - OPTIONAL if TRUE, read only meta-info (faster) |
Definition at line 532 of file hQuery.php.
|
static |
Find a function to decode gzip data.
Definition at line 435 of file hQuery.php.
|
static |
gzdecode() (for PHP < 5.4.0)
Definition at line 445 of file hQuery.php.
|
static |
Executes a HTTP write-read session.
string | $host | - IP/HOST address or URL |
array | $head | - list off HTTP headers to be sent along with the request to $host |
mixed | $body | - data to be sent as the contents of the request. If is array or object, a http query is built. |
array | $options | - list of option as key-value: timeout - connection timeout in seconds host - goes to headers, overrides $host (ex. $host == '127.0.0.1', $options['host'] == 'www.example.com') port - usefull when $host is not a full URL scheme - http, ssl, tls, udp, ... close - whether to close connection o not redirects - number of allowed redirects redirect_method - if (string), this is the new method for redirect request, else if true, preserve method, else use 'GET' on redirect. by default preserve on 307 and 308, GET on 301-303 |
Definition at line 720 of file hQuery.php.
duzun\hQuery::index | ( | ) |
Index elements of the source HTML. (Called automatically)
Definition at line 295 of file hQuery.php.
|
static |
Serialize $data as JSON, fallback to serialize.
mixed | $data | - the data to be serialized |
&string | $type - returns the serialization method used ('json' | 'ser') |
Definition at line 308 of file hQuery.php.
|
staticprotected |
Tries to detect format of $str (json or ser).
string | $str | - JSON encoded or PHP serialized data. |
Definition at line 414 of file hQuery.php.
|
staticprotected |
Save data to a cache file.
string | $fn | - cache filename |
mixed | $cnt | - contents to be cached |
array | $meta | - OPTIONAL meta information related to contents. |
bool | $gzip | - OPTIONAL if TRUE and gzip supported, store contents gzipped |
Definition at line 578 of file hQuery.php.
|
static |
Unserialize $data from either JSON or serialize.
string | $str | - the data to be unserialized |
&string | $type - if not set, returns the serialization method detected ('json' | 'ser'); if set, forces unjsonize() to use this method for unserialization. |
Definition at line 332 of file hQuery.php.