From: Alexander Sadleir Date: Wed, 21 Mar 2012 03:18:13 +0000 Subject: Fix CN frequency distribution graph X-Git-Url: http://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=4f352a46bf37661beb195fdeff3b4d9d94225682 --- Fix CN frequency distribution graph --- --- a/admin/neo4jimporter/pom.xml +++ b/admin/neo4jimporter/pom.xml @@ -7,7 +7,7 @@ org.neo4j neo4j - 1.6.1 + 1.5 postgresql --- a/admin/neo4jimporter/src/main/java/Importer.java +++ b/admin/neo4jimporter/src/main/java/Importer.java @@ -88,26 +88,26 @@ + " AND contractnotice.\"agencyName\" != 'Defence Materiel Organisation' GROUP BY contractnotice.\"agencyName\", " + " contractnotice.\"supplierABN\",contractnotice.\"supplierName\""); String previousAgency = ""; - GraphDatabaseService gds = inserter.getGraphDbService(); - HashMap supplierIDs = new HashMap(); - HashMap agencyIDs = new HashMap(); - +GraphDatabaseService gds = inserter.getGraphDbService(); +HashMap supplierIDs = new HashMap(); +HashMap agencyIDs = new HashMap(); + // Loop through the result set while (rs.next()) { long supplierID, agencyID; String supplierKey; - if (agencyIDs.get(rs.getString("agencyName")) == null) { - Node myNode = gds.createNode(); - myNode.setProperty("Label", rs.getString("agencyName")); - myNode.setProperty("type", "agency"); - agencyIDs.put(rs.getString("agencyName"), myNode.getId()); - if (myNode.getId() % 100 == 0) { - System.out.println("Agency " + myNode.getId()); - } - } - agencyID = agencyIDs.get(rs.getString("agencyName")); - - + if (agencyIDs.get(rs.getString("agencyName")) == null) { + Node myNode = gds.createNode(); + myNode.setProperty("Label", rs.getString("agencyName")); + myNode.setProperty("type", "agency"); + agencyIDs.put(rs.getString("agencyName"), myNode.getId()); + if (myNode.getId() %100 == 0) { + System.out.println("Agency "+myNode.getId()); +} + } + agencyID = agencyIDs.get(rs.getString("agencyName")); + + if (rs.getString("supplierABN") != "0" && rs.getString("supplierABN") != "") { supplierKey = rs.getString("supplierABN"); } else { @@ -115,19 +115,19 @@ } // inject some data if (supplierIDs.get(supplierKey) == null) { - Node myNode = gds.createNode(); - myNode.setProperty("Label", rs.getString("supplierName")); - myNode.setProperty("type", "supplier"); + Node myNode = gds.createNode(); + myNode.setProperty("Label", rs.getString("supplierName")); + myNode.setProperty("type", "supplier"); supplierIDs.put(supplierKey, myNode.getId()); - if (myNode.getId() % 1000 == 0) { - System.out.println("Supplier " + myNode.getId()); - } + if (myNode.getId() %1000 == 0) { + System.out.println("Supplier "+myNode.getId()); +} } supplierID = supplierIDs.get(supplierKey); long rel = inserter.createRelationship(agencyID, supplierID, - DynamicRelationshipType.withName("KNOWS"), null); + DynamicRelationshipType.withName("KNOWS"), null); inserter.setRelationshipProperty(rel, "Weight", rs.getDouble("sum")); } @@ -154,4 +154,6 @@ indexProvider.shutdown(); inserter.shutdown(); } + + } --- /dev/null +++ b/admin/partialdata/scraper.txt @@ -1,1 +1,74 @@ += 1024 && $i < 4; $i++) $size /= 1024; + return round($size, 2).$units[$i]; +} +$days = 4; +if (isset($_REQUEST['days'])) $days = $_REQUEST['days']; +$startDate = strtotime("05-Jun-2008"); +if (isset($_REQUEST['startDate'])) $startDate = $_REQUEST['startDate']; + +function getFile($startDate, $days, $minVal, $maxVal) { +global $split; + $endDate = strtotime(date("Y-m-d", $startDate)." +".$days." days"); +$file = date("dMY",$startDate).'to'.date("dMY",$endDate).'val'.$minVal.'to'.$maxVal.'.xls'; +echo "Fetching $file ($days days) ($minVal < value < $maxVal )... "; +$url = "https://www.tenders.gov.au/?event=public.advancedsearch.CNSONRedirect&type=cnEvent&atmType=archived%2Cclosed%2Cpublished%2Cproposed&agencyUUID=&agencyStatus=-1&portfolioUUID=&keyword=&KeywordTypeSearch=AllWord&CNID=&dateType=Publish+Date&dateStart=".date("d-M-Y",$startDate)."&dateEnd=".date("d-M-Y",$endDate)."&supplierName=&supplierABN=&valueFrom=".$minVal."&valueTo=".$maxVal."&ATMID=&AgencyRefId=&consultancy=&download=Download+results"; +echo ""; +$current = file_get_contents($url); +if (strpos($current,"There are no results that match your selection.")> 0 ) { + echo "Empty file!
"; +} +if (strpos($current,"Your search returned more than 1000 results.") === false) { + file_put_contents($file, $current); + echo "$file saved
"; + echo format_bytes(filesize($file))."
"; + echo 'Load next '.($days).' days
'; + echo 'Load next '.($days*2).' days
'; + echo 'Load next '.($days).' days with split
'; + flush(); +if (!isset($_REQUEST['split']) && !$split) { +echo "Success so fetching next $days...
"; +getFile($endDate, $days, "" , ""); +} + return true; +} else { + echo "Too many records!
"; + echo 'Load '.($days/2).' days instead?
'; + echo 'Split instead?
'; + flush(); +if (!isset($_REQUEST['split']) && !$split) { +echo "Failure so splitting ...
"; + doSplit($startDate, $days); +} + return false; +} +} +function doSplit($startDate, $days) { +global $split; +$split = true; +set_time_limit(20); +getFile($startDate, $days, 0, 12000); +getFile($startDate, $days, 12000, 16000); + getFile($startDate, $days, 16000, 20000); + getFile($startDate, $days, 20000, 30000); + getFile($startDate, $days, 30000, 40000); +// getFile($startDate, $days, 40000, 80000); + getFile($startDate, $days, 40000, 60000); + getFile($startDate, $days, 60000, 80000); +// getFile($startDate, $days, 80000, 300000); + getFile($startDate, $days, 80000, 150000); + getFile($startDate, $days, 150000, 300000); + getFile($startDate, $days, 300000, 999999999); +} +if (isset($_REQUEST['split'])) { + doSplit($startDate, $days); +} else { + getFile($startDate, $days, "" , ""); +} +?> + --- a/displayCalendar.php +++ b/displayCalendar.php @@ -20,8 +20,8 @@ $query->execute(); databaseError($conn->errorInfo()); - //MethodCountGraph($supplier); - //CnCGraph($supplier); + MethodCountGraph($supplier); + CnCGraph($supplier); echo " --- a/exportData.csv.php +++ b/exportData.csv.php @@ -10,9 +10,7 @@ foreach ($unspscresult->fetchAll() as $row) { $unspsc[$row['UNSPSC']] = $row['Title']; } -/*SELECT count(*), extract("week" from "publishDate") as week,extract("year" from "publishDate") as year -FROM contractnotice -where "childCN" is null group by week, year order by year, week*/ + $query = $conn->prepare(' SELECT "CNID",contractnotice."agencyName",agency_nametoabn.abn as "agencyABN", EXTRACT(EPOCH FROM "publishDate") as "publishDate", --- a/lib/graphs.inc.php +++ b/lib/graphs.inc.php @@ -27,7 +27,11 @@ var d1 = []; var d2 = []; prepare($query); $query->execute(); $errors = $conn->errorInfo(); @@ -40,7 +44,9 @@ echo "d1.push([ " . intval($delta['cnid']) . ", " . intval($delta['count']) . "]); \n"; }; - $query = 'select cnid, count(*) from (select ("CNID" - MOD("CNID",100)) as cnid from contractnotice where "CNID" < 999999 and "parentCN" is not null) as a group by cnid order by cnid'; + $query = 'select cnid, count(*) from (select ("CNID"::integer - MOD("CNID"::integer,100)) as cnid + from contractnotice where "CNID" not like \'%-A%\' and "parentCN" is not null) + as a group by cnid order by cnid'; $query = $conn->prepare($query); $query->execute(); $errors = $conn->errorInfo(); @@ -71,7 +77,7 @@ points: { show: true } }, bars: { show: true } - }, + }] ]; var options = { @@ -136,7 +142,7 @@ global $conn; includeFlot(); - $query = 'SELECT SUM("value") as val, MAX(contractnotice."agencyName") as agencyname FROM contractnotice join agency_nametoabn on contractnotice."agencyName"=agency_nametoabn."agencyName" WHERE "childCN" is null + $query = 'SELECT SUM("value") as val, MAX(contractnotice."agencyName") as agencyname FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" WHERE "childCN" is null GROUP BY abn ORDER BY SUM("value") DESC'; $query = $conn->prepare($query); $query->execute(); @@ -195,15 +201,11 @@ ; function agencySuppliersGraph($agency) { - - global $conn; $agency = "AusAid"; $topX = 15; - $query = 'SELECT SUM(value) as val, "supplierName" FROM contractnotice WHERE (extract ("YEAR" from "contractStart") >= :startYear) AND "childCN" is null AND "agencyName" = :agency + $query = 'SELECT SUM(value) as val, supplierName FROM `contractnotice` WHERE (YEAR(contractStart) >= $startYear) AND "childCN" is null AND agencyName = \'$agency\' GROUP BY lower(supplierName) ORDER BY val DESC limit $topX'; $query = $conn->prepare($query); - $query->bindParam(":startYear",$startYear); - $query->bindParam(":agency",$agency); $query->execute(); databaseError($conn->errorInfo()); $suppliers = Array(); @@ -214,11 +216,9 @@ } - $query = 'SELECT sum(a.val) as value, count(1) as count from (SELECT SUM(value) as val, "supplierName" FROM contractnotice WHERE (extract ("YEAR" from "contractStart") >= :startYear) AND "childCN" is null and "agencyName" = :agency -GROUP BY lower("supplierName") ORDER BY val DESC LIMIT 18446744073709551610 OFFSET $topX) as a'; - $query = $conn->prepare($query); - $query->bindParam(":startYear",$startYear); - $query->bindParam(":agency",$agency); + $query = 'SELECT sum(a.val) as value, count(1) as count from (SELECT SUM(value) as val, supplierName FROM `contractnotice` WHERE (YEAR(contractStart) >= $startYear) AND "childCN" is null and agencyName = \'$agency\' +GROUP BY lower(supplierName) ORDER BY val DESC LIMIT 18446744073709551610 OFFSET $topX) as a'; + $query = $conn->prepare($query); $query->execute(); databaseError($conn->errorInfo()); foreach ($query->fetchAll() as $row) { @@ -230,10 +230,8 @@ } function CnCGraph() { - - global $conn; - $query = 'select procurementMethod, count(1) as count, SUM(value) as val, MONTH(contractStart) as month, YEAR(contractStart) as year from contractnotice -where $agencyQ $supplierQ $standardQ group by procurementMethod,year,month order by procurementMethod,year,month'; + $query = "select procurementMethod, count(1) as count, SUM(value) as val, MONTH(contractStart) as month, YEAR(contractStart) as year from `contractnotice` +where $agencyQ $supplierQ $standardQ group by procurementMethod,year,month order by procurementMethod,year,month"; $query = $conn->prepare($query); $query->execute(); databaseError($conn->errorInfo()); @@ -275,21 +273,19 @@ "Consultancies", "Confidentialities" ); - $query = 'SELECT \'consultancy\', count(1) FROM contractnotice WHERE $agencyQ $supplierQ consultancy=\'Yes\' AND "childCN" is null;'; + $query = 'SELECT \'consultancy\', count(1) FROM `contractnotice` WHERE $agencyQ $supplierQ consultancy=\'Yes\' AND "childCN" is null;'; $result = $conn->query($query); $row = $result->fetch(PDO::FETCH_ASSOC); $attributes[0] = $row[1]; - $query = 'SELECT \'confidentiality\', count(1) FROM contractnotice WHERE $agencyQ $supplierQ (confidentialityContract=\'Yes\' OR confidentialityOutputs=\'Yes\') AND "childCN" is null;'; + $query = 'SELECT \'confidentiality\', count(1) FROM `contractnotice` WHERE $agencyQ $supplierQ (confidentialityContract=\'Yes\' OR confidentialityOutputs=\'Yes\') AND "childCN" is null;'; $result = $conn->query($query); $row = $result->fetch(PDO::FETCH_ASSOC); $attributes[1] = $row[1]; } function ContractPublishedGraph() { - - global $conn; $query = 'SELECT YEAR(publishDate), MONTH(publishDate), -SUM(value) as val, count(1) as count FROM contractnotice +SUM(value) as val, count(1) as count FROM `contractnotice` WHERE (YEAR(publishDate) >= 2008) AND "childCN" is null GROUP BY MONTH(publishDate), YEAR(publishDate) @@ -312,10 +308,8 @@ } function ContractStartingGraph() { - - global $conn; $query = 'SELECT YEAR(contractStart), MONTH(contractStart), -SUM(value) as val, count(1) as count FROM contractnotice +SUM(value) as val, count(1) as count FROM `contractnotice` WHERE (YEAR(contractStart) >= 2008) AND "childCN" is null GROUP BY MONTH(contractStart), YEAR(contractStart) @@ -338,9 +332,7 @@ } function MethodCountGraph() { - - global $conn; - $query = 'select procurementMethod, count(1) as count, SUM(value) as value, MONTH(contractStart) as month, YEAR(contractStart) as year from contractnotice + $query = 'select procurementMethod, count(1) as count, SUM(value) as value, MONTH(contractStart) as month, YEAR(contractStart) as year from `contractnotice` where $agencyQ $supplierQ $standardQ group by procurementMethod,year,month order by procurementMethod,year,month'; $methods = Array("Direct", "Open", "Select"); $dates = Array(); @@ -375,9 +367,7 @@ } function MethodValueGraph() { - - global $conn; - $query = "select procurementMethod, SUM(value) as value, MONTH(contractStart) as month, YEAR(contractStart) as year from contractnotice + $query = "select procurementMethod, SUM(value) as value, MONTH(contractStart) as month, YEAR(contractStart) as year from `contractnotice` where $agencyQ $supplierQ $standardQ group by procurementMethod,year,month order by procurementMethod,year,month"; $methods = Array("Direct", "Open", "Select"); $dates = Array();