| 1 |
<?php |
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 |
class sfDomCssSelector |
|---|
| 23 |
{ |
|---|
| 24 |
protected $dom = null; |
|---|
| 25 |
|
|---|
| 26 |
public function __construct($dom) |
|---|
| 27 |
{ |
|---|
| 28 |
$this->dom = $dom; |
|---|
| 29 |
} |
|---|
| 30 |
|
|---|
| 31 |
public function getTexts($selector) |
|---|
| 32 |
{ |
|---|
| 33 |
$texts = array(); |
|---|
| 34 |
foreach ($this->getElements($selector) as $element) |
|---|
| 35 |
{ |
|---|
| 36 |
$texts[] = $element->nodeValue; |
|---|
| 37 |
} |
|---|
| 38 |
|
|---|
| 39 |
return $texts; |
|---|
| 40 |
} |
|---|
| 41 |
|
|---|
| 42 |
public function getElements($selector) |
|---|
| 43 |
{ |
|---|
| 44 |
$all_nodes = array(); |
|---|
| 45 |
foreach ($this->tokenize_selectors($selector) as $selector) |
|---|
| 46 |
{ |
|---|
| 47 |
$nodes = array($this->dom); |
|---|
| 48 |
foreach ($this->tokenize($selector) as $token) |
|---|
| 49 |
{ |
|---|
| 50 |
$combinator = $token['combinator']; |
|---|
| 51 |
$token = trim($token['name']); |
|---|
| 52 |
$pos = strpos($token, '#'); |
|---|
| 53 |
if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos))) |
|---|
| 54 |
{ |
|---|
| 55 |
|
|---|
| 56 |
$tagName = substr($token, 0, $pos); |
|---|
| 57 |
$id = substr($token, $pos + 1); |
|---|
| 58 |
$xpath = new DomXPath($this->dom); |
|---|
| 59 |
$element = $xpath->query(sprintf("//*[@id = '%s']", $id))->item(0); |
|---|
| 60 |
if (!$element || ($tagName && strtolower($element->nodeName) != $tagName)) |
|---|
| 61 |
{ |
|---|
| 62 |
|
|---|
| 63 |
return array(); |
|---|
| 64 |
} |
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 |
$nodes = array($element); |
|---|
| 68 |
|
|---|
| 69 |
continue; |
|---|
| 70 |
} |
|---|
| 71 |
|
|---|
| 72 |
$pos = strpos($token, '.'); |
|---|
| 73 |
if (false !== $pos && preg_match('/^[A-Za-z0-9]*$/', substr($token, 0, $pos))) |
|---|
| 74 |
{ |
|---|
| 75 |
|
|---|
| 76 |
$tagName = substr($token, 0, $pos); |
|---|
| 77 |
if (!$tagName) |
|---|
| 78 |
{ |
|---|
| 79 |
$tagName = '*'; |
|---|
| 80 |
} |
|---|
| 81 |
$className = substr($token, $pos + 1); |
|---|
| 82 |
|
|---|
| 83 |
|
|---|
| 84 |
$founds = $this->getElementsByTagName($nodes, $tagName, $combinator); |
|---|
| 85 |
$nodes = array(); |
|---|
| 86 |
foreach ($founds as $found) |
|---|
| 87 |
{ |
|---|
| 88 |
if (preg_match('/\b'.$className.'\b/', $found->getAttribute('class'))) |
|---|
| 89 |
{ |
|---|
| 90 |
$nodes[] = $found; |
|---|
| 91 |
} |
|---|
| 92 |
} |
|---|
| 93 |
|
|---|
| 94 |
continue; |
|---|
| 95 |
} |
|---|
| 96 |
|
|---|
| 97 |
|
|---|
| 98 |
if (preg_match('/^(\w*)(\[.+\])$/', $token, $matches)) |
|---|
| 99 |
{ |
|---|
| 100 |
$tagName = $matches[1] ? $matches[1] : '*'; |
|---|
| 101 |
preg_match_all('/ |
|---|
| 102 |
\[ |
|---|
| 103 |
([\w\-]+) # attribute |
|---|
| 104 |
([=~\|\^\$\*]?) # modifier (optional) |
|---|
| 105 |
=? # equal (optional) |
|---|
| 106 |
( |
|---|
| 107 |
"([^"]*)" # quoted value (optional) |
|---|
| 108 |
| |
|---|
| 109 |
([^\]]*) # non quoted value (optional) |
|---|
| 110 |
) |
|---|
| 111 |
\] |
|---|
| 112 |
/x', $matches[2], $matches, PREG_SET_ORDER); |
|---|
| 113 |
|
|---|
| 114 |
|
|---|
| 115 |
$founds = $this->getElementsByTagName($nodes, $tagName, $combinator); |
|---|
| 116 |
$nodes = array(); |
|---|
| 117 |
foreach ($founds as $found) |
|---|
| 118 |
{ |
|---|
| 119 |
$ok = false; |
|---|
| 120 |
foreach ($matches as $match) |
|---|
| 121 |
{ |
|---|
| 122 |
$attrName = $match[1]; |
|---|
| 123 |
$attrOperator = $match[2]; |
|---|
| 124 |
$attrValue = $match[4]; |
|---|
| 125 |
|
|---|
| 126 |
switch ($attrOperator) |
|---|
| 127 |
{ |
|---|
| 128 |
case '=': |
|---|
| 129 |
$ok = $found->getAttribute($attrName) == $attrValue; |
|---|
| 130 |
break; |
|---|
| 131 |
case '~': |
|---|
| 132 |
$ok = preg_match('/\b'.preg_quote($attrValue, '/').'\b/', $found->getAttribute($attrName)); |
|---|
| 133 |
break; |
|---|
| 134 |
case '|': |
|---|
| 135 |
$ok = preg_match('/^'.preg_quote($attrValue, '/').'-?/', $found->getAttribute($attrName)); |
|---|
| 136 |
break; |
|---|
| 137 |
case '^': |
|---|
| 138 |
$ok = 0 === strpos($found->getAttribute($attrName), $attrValue); |
|---|
| 139 |
break; |
|---|
| 140 |
case '$': |
|---|
| 141 |
$ok = $attrValue == substr($found->getAttribute($attrName), -strlen($attrValue)); |
|---|
| 142 |
break; |
|---|
| 143 |
case '*': |
|---|
| 144 |
$ok = false !== strpos($found->getAttribute($attrName), $attrValue); |
|---|
| 145 |
break; |
|---|
| 146 |
default : |
|---|
| 147 |
|
|---|
| 148 |
$ok = $found->hasAttribute($attrName); |
|---|
| 149 |
} |
|---|
| 150 |
|
|---|
| 151 |
if (false == $ok) |
|---|
| 152 |
{ |
|---|
| 153 |
break; |
|---|
| 154 |
} |
|---|
| 155 |
} |
|---|
| 156 |
|
|---|
| 157 |
if ($ok) |
|---|
| 158 |
{ |
|---|
| 159 |
$nodes[] = $found; |
|---|
| 160 |
} |
|---|
| 161 |
} |
|---|
| 162 |
|
|---|
| 163 |
continue; |
|---|
| 164 |
} |
|---|
| 165 |
|
|---|
| 166 |
|
|---|
| 167 |
$nodes = $this->getElementsByTagName($nodes, $token, $combinator); |
|---|
| 168 |
} |
|---|
| 169 |
|
|---|
| 170 |
foreach ($nodes as $node) |
|---|
| 171 |
{ |
|---|
| 172 |
if (!$node->getAttribute('sf_matched')) |
|---|
| 173 |
{ |
|---|
| 174 |
$node->setAttribute('sf_matched', true); |
|---|
| 175 |
$all_nodes[] = $node; |
|---|
| 176 |
} |
|---|
| 177 |
} |
|---|
| 178 |
} |
|---|
| 179 |
|
|---|
| 180 |
foreach ($all_nodes as $node) |
|---|
| 181 |
{ |
|---|
| 182 |
$node->removeAttribute('sf_matched'); |
|---|
| 183 |
} |
|---|
| 184 |
|
|---|
| 185 |
return $all_nodes; |
|---|
| 186 |
} |
|---|
| 187 |
|
|---|
| 188 |
protected function getElementsByTagName($nodes, $tagName, $combinator = ' ') |
|---|
| 189 |
{ |
|---|
| 190 |
$founds = array(); |
|---|
| 191 |
foreach ($nodes as $node) |
|---|
| 192 |
{ |
|---|
| 193 |
switch ($combinator) |
|---|
| 194 |
{ |
|---|
| 195 |
case ' ': |
|---|
| 196 |
|
|---|
| 197 |
foreach ($node->getElementsByTagName($tagName) as $element) |
|---|
| 198 |
{ |
|---|
| 199 |
$founds[] = $element; |
|---|
| 200 |
} |
|---|
| 201 |
break; |
|---|
| 202 |
case '>': |
|---|
| 203 |
|
|---|
| 204 |
foreach ($node->childNodes as $element) |
|---|
| 205 |
{ |
|---|
| 206 |
if ($tagName == $element->nodeName) |
|---|
| 207 |
{ |
|---|
| 208 |
$founds[] = $element; |
|---|
| 209 |
} |
|---|
| 210 |
} |
|---|
| 211 |
break; |
|---|
| 212 |
case '+': |
|---|
| 213 |
|
|---|
| 214 |
$element = $node->nextSibling; |
|---|
| 215 |
if ($element && '#text' == $element->nodeName) |
|---|
| 216 |
{ |
|---|
| 217 |
$element = $element->nextSibling; |
|---|
| 218 |
} |
|---|
| 219 |
|
|---|
| 220 |
if ($element && $tagName == $element->nodeName) |
|---|
| 221 |
{ |
|---|
| 222 |
$founds[] = $element; |
|---|
| 223 |
} |
|---|
| 224 |
break; |
|---|
| 225 |
} |
|---|
| 226 |
} |
|---|
| 227 |
|
|---|
| 228 |
return $founds; |
|---|
| 229 |
} |
|---|
| 230 |
|
|---|
| 231 |
protected function tokenize_selectors($selector) |
|---|
| 232 |
{ |
|---|
| 233 |
|
|---|
| 234 |
$tokens = array(); |
|---|
| 235 |
$quoted = false; |
|---|
| 236 |
$token = ''; |
|---|
| 237 |
for ($i = 0, $max = strlen($selector); $i < $max; $i++) |
|---|
| 238 |
{ |
|---|
| 239 |
if (',' == $selector[$i] && !$quoted) |
|---|
| 240 |
{ |
|---|
| 241 |
$tokens[] = trim($token); |
|---|
| 242 |
$token = ''; |
|---|
| 243 |
} |
|---|
| 244 |
else if ('"' == $selector[$i]) |
|---|
| 245 |
{ |
|---|
| 246 |
$token .= $selector[$i]; |
|---|
| 247 |
$quoted = $quoted ? false : true; |
|---|
| 248 |
} |
|---|
| 249 |
else |
|---|
| 250 |
{ |
|---|
| 251 |
$token .= $selector[$i]; |
|---|
| 252 |
} |
|---|
| 253 |
} |
|---|
| 254 |
if ($token) |
|---|
| 255 |
{ |
|---|
| 256 |
$tokens[] = trim($token); |
|---|
| 257 |
} |
|---|
| 258 |
|
|---|
| 259 |
return $tokens; |
|---|
| 260 |
} |
|---|
| 261 |
|
|---|
| 262 |
protected function tokenize($selector) |
|---|
| 263 |
{ |
|---|
| 264 |
|
|---|
| 265 |
$tokens = array(); |
|---|
| 266 |
$combinators = array(' ', '>', '+'); |
|---|
| 267 |
$quoted = false; |
|---|
| 268 |
$token = array('combinator' => ' ', 'name' => ''); |
|---|
| 269 |
for ($i = 0, $max = strlen($selector); $i < $max; $i++) |
|---|
| 270 |
{ |
|---|
| 271 |
if (in_array($selector[$i], $combinators) && !$quoted) |
|---|
| 272 |
{ |
|---|
| 273 |
|
|---|
| 274 |
$combinator = $selector[$i]; |
|---|
| 275 |
while (in_array($selector[$i + 1], $combinators)) |
|---|
| 276 |
{ |
|---|
| 277 |
if (' ' != $selector[++$i]) |
|---|
| 278 |
{ |
|---|
| 279 |
$combinator = $selector[$i]; |
|---|
| 280 |
} |
|---|
| 281 |
} |
|---|
| 282 |
|
|---|
| 283 |
$tokens[] = $token; |
|---|
| 284 |
$token = array('combinator' => $combinator, 'name' => ''); |
|---|
| 285 |
} |
|---|
| 286 |
else if ('"' == $selector[$i]) |
|---|
| 287 |
{ |
|---|
| 288 |
$token['name'] .= $selector[$i]; |
|---|
| 289 |
$quoted = $quoted ? false : true; |
|---|
| 290 |
} |
|---|
| 291 |
else |
|---|
| 292 |
{ |
|---|
| 293 |
$token['name'] .= $selector[$i]; |
|---|
| 294 |
} |
|---|
| 295 |
} |
|---|
| 296 |
if ($token['name']) |
|---|
| 297 |
{ |
|---|
| 298 |
$tokens[] = $token; |
|---|
| 299 |
} |
|---|
| 300 |
|
|---|
| 301 |
return $tokens; |
|---|
| 302 |
} |
|---|
| 303 |
} |
|---|
| 304 |
|
|---|