/    Sign up×
Community /Pin to ProfileBookmark

Anyone have a function that will search an html page for all rss links?

Steps:
1) Load html page
2) look for type=”application/rss+xml”
3) pull href link

Thanks

to post a comment
PHP

3 Comments(s)

Copy linkTweet thisAlerts:
@NogDogJul 09.2010 — You could do it pretty easily via the [url=http://php.net/dom]DOM extension[/url], though I'll note the "type" attribute for an <a> element is optional, and there's no guarantee that even if it is set it's correct.
Copy linkTweet thisAlerts:
@NogDogJul 09.2010 — PS: I just threw this together, seems to work with my simple test data:
[code=php]
<?php

class DOMScraper
{
/**
* @var object $dom DOMDocument
*/
private $dom;

/**
* Constructor
* @return void
* @param object $dom DOMDocument
*/
public function __construct(DOMDocument $dom)
{
$this->dom = $dom;
}

/**
* Get element by Tag name
* @return array
* @param array $attrs Attribute_name => Attribute_value
*/
public function byTag($tag, $attrCriteria = null)
{
$elements = $this->dom->getElementsByTagName($tag);
$data = array();
foreach($elements as $elem)
{
$attrs = array();
foreach($elem->attributes as $name => $value)
{
$attrs[$name] = $value->textContent;
}
$attrs['textContent'] = $elem->textContent;
if(!empty($attrCriteria))
{
if(!is_array($attrCriteria))
{
user_error('attrCriteria must be an array');
return false;
}
if(!$this->criteraMatch($attrs, $attrCriteria))
{
continue;
}
}
$data[] = $attrs;
}
return $data;
}

/**
* Test if node has desired attributes
* @return bool
* @param array $attrs
* @param array $attrCritera
*/
private function criteraMatch($attrs, $attrCritera)
{
ksort($attrs);
ksort($attrCritera);
return count(array_intersect_assoc($attrCritera, $attrs)) ==
count($attrCritera);
}
}

// USAGE:
$dom = new DOMDocument();
$dom->loadHTMLFile('test.html');
$test = new DOMScraper($dom);
$result = $test->byTag('a', array('type' => 'application/rss+xml'));
echo "<pre>".print_r($result,1)."</pre>";
[/code]
Copy linkTweet thisAlerts:
@BrutusUnixauthorJul 09.2010 — Thanks just had to change dom type to link not a.
×

Success!

Help @BrutusUnix spread the word by sharing this article on Twitter...

Tweet This
Sign in
Forgot password?
Sign in with TwitchSign in with GithubCreate Account
about: ({
version: 0.1.9 BETA 6.2,
whats_new: community page,
up_next: more Davinci•003 tasks,
coming_soon: events calendar,
social: @webDeveloperHQ
});

legal: ({
terms: of use,
privacy: policy
});
changelog: (
version: 0.1.9,
notes: added community page

version: 0.1.8,
notes: added Davinci•003

version: 0.1.7,
notes: upvote answers to bounties

version: 0.1.6,
notes: article editor refresh
)...
recent_tips: (
tipper: @meenaratha,
tipped: article
amount: 1000 SATS,

tipper: @meenaratha,
tipped: article
amount: 1000 SATS,

tipper: @AriseFacilitySolutions09,
tipped: article
amount: 1000 SATS,
)...