Include textual subcategories in openspending/csv export
[contractdashboard.git] / admin / agency2portfolio.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
<?php
 
// Returns portfolio scraped live from directory.gov.au
// or null if can't find a portfolio
function agency2portfolio ($agency) {
        static $cache = array();
        if (isset($cache[$agency])) { return $cache[$agency]; }
        $c = curl_init('http://www.directory.gov.au/searchres.php');
        curl_setopt($c, CURLOPT_POST, true);
        curl_setopt($c, CURLOPT_HEADER, false);
        curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($c, CURLOPT_REFERER, 'http://www.directory.gov.au/adsearch.php');
        curl_setopt($c, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux i686; en-GB; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3');
        curl_setopt($c, CURLOPT_POSTFIELDS, array(
                'advkeywordfield' => '',
                'advorgunitfield' => $agency,
                'advrolefield' => '',
                'advsection' => 'All',
                'advsurnamefield' => '',
                'search' => 'Submit Query'
        ));
        $results = curl_exec($c);
        
        if (preg_match('#<span\s+class="standardlinks"><a\s+href="([^"]+)">#smi', $results, $m)) {
                $nextURL = $m[1];
        } else {
                $cache[$agency] = false; return false;
        }
        
        curl_setopt($c, CURLOPT_URL, 'http://www.directory.gov.au' . $nextURL);
        curl_setopt($c, CURLOPT_HTTPGET, true);
        curl_setopt($c, CURLOPT_REFERER, 'http://www.directory.gov.au/searchres.php');
        $results = curl_exec($c);
        if (preg_match('#portfolios:\s+([^<]+)#ims', $results, $m)) {
                $cache[$agency] = $m[1]; return $m[1];
        } else {
                $cache[$agency] = false; return false;
        }
}
 
?>