XPath expressions
-
Web scrapping
SELECT BY PATH (parent/childrens... structures)
Select element with absolute
/html/body/aaa/bbb/ccc
Select all (//) elements 'ccc'
//ccc
Select all elements 'ccc' which are children of 'bbb'
//bbb/ccc
Select all (*) elements of some level
//bbb/ccc/*
Select all (*) elements
//*
Select elements 'ddd' which have 5 ancestors
/*/*/*/*/*/ddd
Select element at position [n] at some level
/html/body/aaa/bbb/ccc/*[2]
Select last element at some level
/html/body/aaa/bbb/ccc/*[last()]
Other child selection syntax: /AAA = /child::AAA /AAA/BBB = /child::AAA/child::BBB Possible syntax: /child::AAA/BBB
SELECT PARENT OF AN ELEMENT
Select parent of all elements type "ccc"
//ccc/..
or
//ccc/parent::*
SELECT DESCENDANTS OF AN ELEMENT
Select all descendants of an element
//ccc/descendant::*
Select some descendants of an element
//aaa/descendant::ddd
SELECT ANCESTORS
Select ancestors of elements type "ddd"
//ddd/ancestor::*
SELECTING FOLLOWING-SIBLINGS
Select following siblings of an element
//ccc/following-sibling::*
SELECTING PRECEDING-SIBLINGS
Select preceding siblings of an element
//eee/preceding-sibling::*
SELECT FOLLOWING ELEMETS
Select all elements after the elements type 'aaa'
//aaa/following::*
SELECT PRECEDING ELEMETS
Selects all nodes that appear before the current node in the document, except ancestors, attribute nodes and namespace nodes
//eee/preceding::*
SELECT DESCENDANT OR SELF
Select all descendants of an element and element itself
//bbb/descendant-or-self::*
'AND' OPERATOR FOR X/data/en2/xpath/
Combining multiple x/data/en2/xpath/ with operator "|"
//ddd | //fff
SELECTION BY PROPERTY
Select all elements of any type which have property "id"
//*[@id]
Select all elements of type "ddd" which have property "id"
//ddd[@id]
Select all values of properties "id" from all elements of type "ddd"
//ddd/@id
Select all elements of type "ddd" which have property "name"
//ddd[@name]
Select all elements of type "ddd" which have any property
//ddd[@*]
Select all elements of type "ddd" which have no property
//ddd[not(@*)]
Select all elements of type "ddd" with "id" property which contains a string
//ddd[contains(@id, "select")]
Select all elements of type "ddd" with "id" property which not contain a string
//ddd[not(contains(@id, "here"))]
Select all elements of type "ddd" with "id" property which begin with string
//ddd[starts-with(@id, "id")]
SELECT BY ELEMENT TAG NAME
Select all elements of type "ddd" (equivalent with //ddd)
//*[name()="ddd"]
Select all elements with names starting with string ...
//*[starts-with(name(),"d")]
Select all elements with names containing string ...
//*[contains(name(),"x")]
Select all elements with length-name (=, >, <) ...
//*[string-length(name()) = 5]
SELECT BY CHILDREN COUNTS
Select all elements which have two children "ccc"
//*[count(ccc)=2]
Select all elements which have three childrens of any type
//*[count(*)=3]
SELECT - USING TEXT OF ELEMENT
Select the text from all elements of type "ccc"
//ccc/text()
Select all elements of type "ccc" which have text "this"
//ccc[text()="this"]
Example1: select form
/html/body/form[1] or //form[1] or //form[@id='loginForm']
Example2: select input
//form/input[@name='username'] or //form[@id='loginForm']/input[1] or //input[@name='username']
Example3: select input
//input[@name='continue'][@type='button'] or //form[@id='loginForm']/input[4]