--- a/admin/agency2portfolio.php +++ b/admin/agency2portfolio.php @@ -1,1 +1,42 @@ +<?php +// Returns portfolio scraped live from directory.gov.au +// or null if can't find a portfolio +function agency2portfolio ($agency) { + static $cache = array(); + if (isset($cache[$agency])) { return $cache[$agency]; } + $c = curl_init('http://www.directory.gov.au/searchres.php'); + curl_setopt($c, CURLOPT_POST, true); + curl_setopt($c, CURLOPT_HEADER, false); + curl_setopt($c, CURLOPT_RETURNTRANSFER, true); + curl_setopt($c, CURLOPT_REFERER, 'http://www.directory.gov.au/adsearch.php'); + curl_setopt($c, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux i686; en-GB; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3'); + curl_setopt($c, CURLOPT_POSTFIELDS, array( + 'advkeywordfield' => '', + 'advorgunitfield' => $agency, + 'advrolefield' => '', + 'advsection' => 'All', + 'advsurnamefield' => '', + 'search' => 'Submit Query' + )); + $results = curl_exec($c); + + if (preg_match('#<span\s+class="standardlinks"><a\s+href="([^"]+)">#smi', $results, $m)) { + $nextURL = $m[1]; + } else { + $cache[$agency] = false; return false; + } + + curl_setopt($c, CURLOPT_URL, 'http://www.directory.gov.au' . $nextURL); + curl_setopt($c, CURLOPT_HTTPGET, true); + curl_setopt($c, CURLOPT_REFERER, 'http://www.directory.gov.au/searchres.php'); + $results = curl_exec($c); + if (preg_match('#portfolios:\s+([^<]+)#ims', $results, $m)) { + $cache[$agency] = $m[1]; return $m[1]; + } else { + $cache[$agency] = false; return false; + } +} + +?> +