Monday, January 07, 2008

How to extract URLs (href property) from HTML

protected ArrayList GetUrl(string text)
{
ArrayList listURL = new ArrayList();
Regex r =
new Regex("href\\s*=\\s*(?:(?:\\\
"
(?<url>[^\\\"]*)\\\")|
(?<url>[^\\s]* ))"
);
MatchCollection mathColl = r.Matches(text);

foreach (Match math in mathColl)
{
foreach (Group gr in math.Groups)
{
listURL.Add(gr.Value);
}
}
return listURL;
}

No comments: