hQuery.php
Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | Static Protected Member Functions | List of all members
duzun\hQuery Class Reference
Inheritance diagram for duzun\hQuery:
duzun\hQuery\HTML_Parser duzun\hQuery\Node

Public Member Functions

 find ($sel, $_attr=NULL, $ctx=NULL)
 
 find_html ($sel, $attr=NULL, $ctx=NULL)
 
 find_text ($sel, $attr=NULL, $ctx=NULL)
 
 index ()
 
- Public Member Functions inherited from duzun\hQuery\HTML_Parser
 __get ($name)
 
 __set ($name, $value)
 
 location ($href=NULL)
 
 baseURI ($href=NULL)
 get/set baseURI
 
 __construct ($html, $idx=true)
 
 __toString ()
 
 url2abs ($url)
 
 strlen ()
 
 substr ($start, $length=NULL)
 
 _info ()
 This method is for debugging only.
 
 hasClass ($id, $cl)
 
- Public Member Functions inherited from duzun\hQuery\Node
 attr ($attr=NULL, $to_str=false)
 
 is_empty ()
 
 isEmpty ()
 
 isDoc ()
 
 doc ()
 
 find ($sel, $attr=NULL)
 
 exclude ($sel, $attr=NULL)
 
 __toString ()
 
 html ($id=NULL)
 
 outerHtml ($id=NULL)
 
 text ($id=NULL)
 
 nodeName ($caseFolding=NULL, $id=NULL)
 
 pos ($restore=true)
 
 _children ($ids=NULL, $n=NULL)
 
 _next ($ids=NULL, $n=0)
 
 _prev ($ids=NULL, $n=0)
 
 _all ($ids=NULL)
 
 _has ($el, $eq=false)
 $el < $this, with $eq == true -> $el <= $this
 
 _filter_contains ($el, $eq=false)
 
 __get ($name)
 
 __set ($name, $value)
 
 __isset ($name)
 
 __unset ($name)
 
 count ()
 
 current ()
 
 valid ()
 
 key ()
 
 next ()
 
 prev ()
 
 rewind ()
 

Static Public Member Functions

static fromHTML ($html, $url=NULL)
 
static fromFile ($filename, $use_include_path=false, $context=NULL)
 
static fromURL ($url, $headers=NULL, $body=NULL, $options=NULL)
 
static jsonize ($data, &$type=NULL, $ops=0)
 
static unjsonize ($str, &$type=NULL)
 
static gz_supported ()
 
static gzdecode ($str)
 
static do_flock ($fp, $lock, $timeout_ms=384)
 
static flock_put_contents ($fn, $cnt, $block=false)
 
static flock_get_contents ($fn, $block=false)
 
static parse_cookie ($str)
 
static http_wr ($host, $head=NULL, $body=NULL, $options=NULL)
 
- Static Public Member Functions inherited from duzun\hQuery\HTML_Parser
static get_url_base ($url, $array=false)
 
static get_url_path ($url)
 
static is_url_path ($path)
 
static is_abs_path ($path)
 
static abs_url ($url, $base)
 
static detect_charset ($str)
 
- Static Public Member Functions inherited from duzun\hQuery\Node
static html_normal_pseudoClass ($p)
 
static html_selector2struc ($sel)
 
static html_parseAttrStr ($str, $case_folding=true, $extended=false)
 
static html_attr2str ($attr, $quote='"')
 
static parseCSStr ($str, $case_folding=true)
 
static CSSArr2Str ($css)
 
static str_range ($comp, $pos=0, $len=NULL)
 
static array_select ($arr, $keys, $force_null=false)
 
static convert_encoding ($a, $to, $from=NULL)
 

Public Attributes

 $headers
 
- Public Attributes inherited from duzun\hQuery\Node
const VERSION = '2.0.2'
 
 $tag_map
 

Static Public Attributes

static $cache_path
 
static $cache_expires = 3600
 
static $_mockup_class
 
- Static Public Attributes inherited from duzun\hQuery\HTML_Parser
static $del_spaces = false
 
static $case_folding = true
 
static $autoclose_tags = false
 
static $_emptyTags = array('base','meta','link','hr','br','basefont','param','img','area','input','isindex','col')
 
static $_specialTags = array('--'=>'--', '[CDATA['=>']]')
 
static $_unparsedTags = array('style', 'script')
 
static $_index_attribs = array('href', 'src')
 
static $_url_attribs = array('href'=>'href', 'src'=>'src')
 
- Static Public Attributes inherited from duzun\hQuery\Node
static $last_http_result
 
static $selected_doc = NULL
 
static $_ar_ = array()
 
static $_mi_ = PHP_INT_MAX
 
static $_nl_ = NULL
 
static $_fl_ = false
 
static $_tr_ = true
 

Static Protected Member Functions

static serjstype ($str)
 
static _gzdecode ($gzdata, $maxlen=NULL)
 
static get_cache ($fn, $expire=false, $meta_only=false)
 
static set_cache ($fn, $cnt, $meta=NULL, $gzip=true)
 
- Static Protected Member Functions inherited from duzun\hQuery\Node
static html_findTagClose ($str, $p)
 

Additional Inherited Members

- Protected Member Functions inherited from duzun\hQuery\HTML_Parser
 _index_comments_html ($o)
 Index comment tags position in source HTML.
 
 _index_all ()
 
 _get_ctx ($ctx)
 
 _find ($name, $class=NULL, $attr=NULL, $ctx=NULL, $rec=true)
 
 _filter ($ids, $name=NULL, $class=NULL, $attr=NULL, $ctx=NULL)
 
 get_aids_byAttr ($attr, $as_keys=false, $actx=NULL)
 
 get_aids_byClass ($cl, $as_keys=false, $actx=NULL)
 
 get_aids_byClassAttr ($cl, $attr, $as_keys=false, $actx=NULL)
 
 get_ids_byAid ($aid, $sort=true, $has_keys=false)
 
 get_ids_byAttr ($attr, $sort=true)
 
 get_ids_byClass ($cl, $sort=true)
 
 get_ids_byClassAttr ($cl, $attr, $sort=true)
 
 get_attr_byAid ($aid, $to_str=false)
 
 get_attr_byId ($id, $attr=NULL, $to_str=false)
 
- Protected Member Functions inherited from duzun\hQuery\Node
 __construct ($doc, $ids, $is_ctx=false)
 
 _ctx_ids ($ids=NULL)
 
 _sub_ids ($eq=false)
 
 _doc_ids ($el, $force_array=true)
 
 _my_ids ($id=NULL, $keys=false)
 
 _parent ($ids=NULL, $n=0)
 
- Protected Attributes inherited from duzun\hQuery\HTML_Parser
 $html = ''
 
 $tags
 
 $attrs
 
 $attribs
 
 $idx_attr
 
 $tag_idx
 
 $attr_idx
 
 $class_idx
 
 $o = NULL
 
 $indexed = false
 
- Protected Attributes inherited from duzun\hQuery\Node
 $_prop = array()
 
 $doc
 
 $ids
 
 $exc
 
- Static Protected Attributes inherited from duzun\hQuery\HTML_Parser
static $_tagID_first_letter = 'a-zA-Z_'
 
static $_tagID_letters = 'a-zA-Z_0-9:\-'
 
static $_icharset = 'UTF-8'
 

Detailed Description

Main Class, represents an HTML document.

An extremely fast web scraper that parses megabytes of HTML in a blink of an eye. PHP5+, no dependencies.

API Documentation at https://duzun.github.io/hQuery.php

Copyright (C) 2014-2018 Dumitru Uzun

Author
Dumitru Uzun (DUzun.ME) MIT
Version
2.0.2

Definition at line 21 of file hQuery.php.

Member Function Documentation

◆ _gzdecode()

static duzun\hQuery::_gzdecode (   $gzdata,
  $maxlen = NULL 
)
staticprotected

Alternative gzdecode() (for PHP < 5.4.0) source: https://github.com/Polycademy/upgradephp/blob/master/upgrade.php

Definition at line 458 of file hQuery.php.

◆ do_flock()

static duzun\hQuery::do_flock (   $fp,
  $lock,
  $timeout_ms = 384 
)
static

Lock with retries

Parameters
resource$fp- Open file pointer
int$lock- Lock type
int$timeout_ms- OPTIONAL Timeout to wait for unlock in miliseconds
Returns
true on success, false on fail
Author
Dumitru Uzun

Definition at line 611 of file hQuery.php.

◆ find()

duzun\hQuery::find (   $sel,
  $_attr = NULL,
  $ctx = NULL 
)

Finds a collection of nodes inside current document/context (similar to jQuery.fn.find()).

Parameters
string$sel- A valid CSS selector (some pseudo-selectors supported).
array | string$attr- OPTIONAL attributes as string or key-value pairs.
hQuery\Node$ctx- OPTIONAL the context where to search. If omitted, $this is used.
Returns
hQuery collection of matched elements or NULL

Definition at line 182 of file hQuery.php.

Referenced by duzun\hQuery\HTML_Parser\hasClass().

◆ find_html()

duzun\hQuery::find_html (   $sel,
  $attr = NULL,
  $ctx = NULL 
)

Combination of ->find() + ->html()

Parameters
string$sel- A valid CSS selector.
array | string$attr- OPTIONAL attributes as string or key-value pairs.
hQuery\Node$ctx- OPTIONAL the context where to search. If omitted, $this is used.
Returns
array list of HTML contents of all matched elements

Definition at line 269 of file hQuery.php.

◆ find_text()

duzun\hQuery::find_text (   $sel,
  $attr = NULL,
  $ctx = NULL 
)

Combination of ->find() + ->text()

Parameters
string$sel- A valid CSS selector.
array | string$attr- OPTIONAL attributes as string or key-value pairs.
hQuery\Node$ctx- OPTIONAL the context where to search. If omitted, $this is used.
Returns
array list of Text contents of all matched elements

Definition at line 285 of file hQuery.php.

◆ fromFile()

static duzun\hQuery::fromFile (   $filename,
  $use_include_path = false,
  $context = NULL 
)
static

Read the HTML document from a file.

Parameters
string$filename- a valid filename
bool$use_include_path- OPTIONAL passed to file_get_contents()
resource$context- OPTIONAL A valid context resource created with stream_context_create(). See file_get_contents()
Returns
hQuery $doc

Definition at line 67 of file hQuery.php.

◆ fromHTML()

static duzun\hQuery::fromHTML (   $html,
  $url = NULL 
)
static

Parse and HTML string.

Parameters
string$html- source of some HTML document
string$url- OPTIONAL location of the document. Used for relative URLs inside the document.
Returns
hQuery $doc

Definition at line 41 of file hQuery.php.

◆ fromURL()

static duzun\hQuery::fromURL (   $url,
  $headers = NULL,
  $body = NULL,
  $options = NULL 
)
static

Fetch the HTML document from remote $url.

Parameters
string$url- the URL of the document
array$headers- OPTIONAL request headers
array | string$body- OPTIONAL body of the request (for POST or PUT)
array$options- OPTIONAL request options (see self::http_wr() for more details)
Returns
hQuery $doc

Definition at line 88 of file hQuery.php.

◆ get_cache()

static duzun\hQuery::get_cache (   $fn,
  $expire = false,
  $meta_only = false 
)
staticprotected

Read data from a cache file.

Parameters
string$fn- cache filename
int$expire- OPTIONAL contents returned only if it is newer then $expire seconds
bool$meta_only- OPTIONAL if TRUE, read only meta-info (faster)
Returns
array [mixed <contents>, array <meta_info>]

Definition at line 532 of file hQuery.php.

◆ gz_supported()

static duzun\hQuery::gz_supported ( )
static

Find a function to decode gzip data.

Returns
string A gzip decode function name, or false if not found

Definition at line 435 of file hQuery.php.

◆ gzdecode()

static duzun\hQuery::gzdecode (   $str)
static

gzdecode() (for PHP < 5.4.0)

Definition at line 445 of file hQuery.php.

◆ http_wr()

static duzun\hQuery::http_wr (   $host,
  $head = NULL,
  $body = NULL,
  $options = NULL 
)
static

Executes a HTTP write-read session.

Parameters
string$host- IP/HOST address or URL
array$head- list off HTTP headers to be sent along with the request to $host
mixed$body- data to be sent as the contents of the request. If is array or object, a http query is built.
array$options- list of option as key-value: timeout - connection timeout in seconds host - goes to headers, overrides $host (ex. $host == '127.0.0.1', $options['host'] == 'www.example.com') port - usefull when $host is not a full URL scheme - http, ssl, tls, udp, ... close - whether to close connection o not redirects - number of allowed redirects redirect_method - if (string), this is the new method for redirect request, else if true, preserve method, else use 'GET' on redirect. by default preserve on 307 and 308, GET on 301-303
Returns
array [contents, headers, http-status-code, http-status-message]
Author
Dumitru Uzun

Definition at line 720 of file hQuery.php.

◆ index()

duzun\hQuery::index ( )

Index elements of the source HTML. (Called automatically)

Definition at line 295 of file hQuery.php.

◆ jsonize()

static duzun\hQuery::jsonize (   $data,
$type = NULL,
  $ops = 0 
)
static

Serialize $data as JSON, fallback to serialize.

Parameters
mixed$data- the data to be serialized
&string$type - returns the serialization method used ('json' | 'ser')
Returns
string the serialized data

Definition at line 308 of file hQuery.php.

◆ serjstype()

static duzun\hQuery::serjstype (   $str)
staticprotected

Tries to detect format of $str (json or ser).

Parameters
string$str- JSON encoded or PHP serialized data.
Returns
string 'json' | 'ser', or FALSE on failure to detect format.

Definition at line 414 of file hQuery.php.

◆ set_cache()

static duzun\hQuery::set_cache (   $fn,
  $cnt,
  $meta = NULL,
  $gzip = true 
)
staticprotected

Save data to a cache file.

Parameters
string$fn- cache filename
mixed$cnt- contents to be cached
array$meta- OPTIONAL meta information related to contents.
bool$gzip- OPTIONAL if TRUE and gzip supported, store contents gzipped
Returns
int|bool On success, number of written bytes, FALSE on fail.

Definition at line 578 of file hQuery.php.

◆ unjsonize()

static duzun\hQuery::unjsonize (   $str,
$type = NULL 
)
static

Unserialize $data from either JSON or serialize.

Parameters
string$str- the data to be unserialized
&string$type - if not set, returns the serialization method detected ('json' | 'ser'); if set, forces unjsonize() to use this method for unserialization.
Returns
mixed the unserialized data

Definition at line 332 of file hQuery.php.


The documentation for this class was generated from the following file: