Anyone have a function that will search an html page for all rss links?
Steps: 1) Load html page 2) look for type=”application/rss+xml” 3) pull href link
Thanks
[code=php] <?php class DOMScraper { /** * @var object $dom DOMDocument */ private $dom; /** * Constructor * @return void * @param object $dom DOMDocument */ public function __construct(DOMDocument $dom) { $this->dom = $dom; } /** * Get element by Tag name * @return array * @param array $attrs Attribute_name => Attribute_value */ public function byTag($tag, $attrCriteria = null) { $elements = $this->dom->getElementsByTagName($tag); $data = array(); foreach($elements as $elem) { $attrs = array(); foreach($elem->attributes as $name => $value) { $attrs[$name] = $value->textContent; } $attrs['textContent'] = $elem->textContent; if(!empty($attrCriteria)) { if(!is_array($attrCriteria)) { user_error('attrCriteria must be an array'); return false; } if(!$this->criteraMatch($attrs, $attrCriteria)) { continue; } } $data[] = $attrs; } return $data; } /** * Test if node has desired attributes * @return bool * @param array $attrs * @param array $attrCritera */ private function criteraMatch($attrs, $attrCritera) { ksort($attrs); ksort($attrCritera); return count(array_intersect_assoc($attrCritera, $attrs)) == count($attrCritera); } } // USAGE: $dom = new DOMDocument(); $dom->loadHTMLFile('test.html'); $test = new DOMScraper($dom); $result = $test->byTag('a', array('type' => 'application/rss+xml')); echo "<pre>".print_r($result,1)."</pre>"; [/code]
Help @BrutusUnix spread the word by sharing this article on Twitter...