Port data exporter to postgres
--- /dev/null
+++ b/admin/neo4jimporter/.classpath
@@ -1,1 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java"/>
+ <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"/>
+ <classpathentry kind="src" output="target/test-classes" path="src/test/java"/>
+ <classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
--- /dev/null
+++ b/admin/neo4jimporter/.project
@@ -1,1 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>neo4jimporter</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.m2e.core.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ <nature>org.eclipse.m2e.core.maven2Nature</nature>
+ </natures>
+</projectDescription>
--- /dev/null
+++ b/admin/neo4jimporter/.settings/org.eclipse.jdt.core.prefs
@@ -1,1 +1,7 @@
+#Sun Aug 07 18:15:32 EST 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
--- /dev/null
+++ b/admin/neo4jimporter/.settings/org.eclipse.m2e.core.prefs
@@ -1,1 +1,6 @@
+#Sun Aug 07 18:14:30 EST 2011
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
--- /dev/null
+++ b/admin/neo4jimporter/pom.xml
@@ -1,1 +1,18 @@
-
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.lambdacomplex.contractdashboard</groupId>
+ <artifactId>neo4jimporter</artifactId>
+ <version>0.0.1-SNAPSHOT</version>
+ <dependencies>
+ <dependency>
+ <groupId>org.neo4j</groupId>
+ <artifactId>neo4j-kernel</artifactId>
+ <version>1.4</version>
+ </dependency>
+ <dependency>
+ <groupId>postgresql</groupId>
+ <artifactId>postgresql</artifactId>
+ <version>9.0-801.jdbc4</version>
+</dependency>
+ </dependencies>
+</project>
--- /dev/null
+++ b/admin/neo4jimporter/src/main/java/Importer.java
@@ -1,1 +1,155 @@
+import java.io.ObjectInputStream.GetField;
+import java.math.BigInteger;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.SQLWarning;
+import java.sql.Statement;
+import java.util.HashMap;
+import java.util.Map;
+import org.neo4j.graphdb.DynamicRelationshipType;
+import org.neo4j.kernel.impl.batchinsert.BatchInserter;
+import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
+
+public class Importer {
+
+ public static void main(String[] argv) {
+
+ Map<String,String> props = new HashMap<String, String>();
+ props.put("neostore.nodestore.db.mapped_memory", "22000000"); // <expected number of nodes * 9 bytes>
+ props.put("neostore.relationshipstore.db.mapped_memory", "22000000"); // <expected number of relationships * 33 bytes>
+ // create the batch inserter
+ BatchInserter inserter = new
+ BatchInserterImpl("neo4j-db/",props
+ );
+
+
+
+
+
+ System.out.println("-------- PostgreSQL "
+ + "JDBC Connection Testing ------------");
+
+ try {
+
+ Class.forName("org.postgresql.Driver");
+
+ } catch (ClassNotFoundException e) {
+
+ System.out.println("Where is your PostgreSQL JDBC Driver? "
+ + "Include in your library path!");
+ e.printStackTrace();
+
+ }
+
+ System.out.println("PostgreSQL JDBC Driver Registered!");
+
+ Connection conn = null;
+
+ try {
+
+ conn = DriverManager.getConnection(
+ "jdbc:postgresql://127.0.0.1:5432/contractDashboard",
+ "postgres", "snmc");
+
+ } catch (SQLException e) {
+
+ System.out.println("Connection Failed! Check output console");
+ e.printStackTrace();
+
+ }
+
+ if (conn != null) {
+ System.out.println("You made it, take control your database now!");
+ } else {
+ System.out.println("Failed to make connection!");
+ }
+ try {
+ // Print all warnings
+ for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn
+ .getNextWarning()) {
+ System.out.println("SQL Warning:");
+ System.out.println("State : " + warn.getSQLState());
+ System.out.println("Message: " + warn.getMessage());
+ System.out.println("Error : " + warn.getErrorCode());
+ }
+
+ // Get a statement from the connection
+ Statement stmt = conn.createStatement();
+
+ // Execute the query
+ ResultSet rs = stmt.executeQuery("SELECT distinct contractnotice.\"agencyName\", contractnotice.\"supplierABN\", contractnotice.\"supplierName\" FROM public.contractnotice limit 300;");
+String previousAgency = "";
+
+ // Loop through the result set
+ while (rs.next()) {
+ long supplier,agency;
+ agency = doHash(rs.getString("agencyName"));
+ if (rs.getString("agencyName") != previousAgency) {
+ if (!inserter.nodeExists(agency)) {
+ Map<String, Object> properties = new HashMap<String, Object>();
+ properties.put("Label", rs.getString("agencyName"));
+ inserter.createNode(agency, properties);
+ }
+ }
+ if (rs.getString("supplierABN") != "0") {
+ supplier = doHash(rs.getString("supplierABN"));
+ } else {
+ supplier = doHash(rs.getString("supplierName"));
+ }
+ // inject some data
+ if (!inserter.nodeExists(supplier)) {
+ Map<String, Object> properties = new HashMap<String, Object>();
+
+ properties.put("Label", rs.getString("supplierName"));
+ inserter.createNode(supplier, properties);
+ }
+
+ inserter.createRelationship(agency, supplier,
+ DynamicRelationshipType.withName("KNOWS"), null);
+
+ }
+ // Close the result set, statement and the connection
+ rs.close();
+ stmt.close();
+ conn.close();
+ } catch (SQLException se) {
+ System.out.println("SQL Exception:");
+
+ // Loop through the SQL Exceptions
+ while (se != null) {
+ System.out.println("State : " + se.getSQLState());
+ System.out.println("Message: " + se.getMessage());
+ System.out.println("Error : " + se.getErrorCode());
+
+ se = se.getNextException();
+ }
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ // shutdown, makes sure all changes are written to disk
+ inserter.shutdown();
+ }
+
+ static long doHash(String input) {
+ MessageDigest m;
+ try {
+ m = MessageDigest.getInstance("MD5");
+ m.reset();
+ m.update(input.getBytes());
+ byte[] digest = m.digest();
+ return new BigInteger(1, digest).longValue();
+ } catch (NoSuchAlgorithmException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return 0;
+
+ }
+
+
+}
--- /dev/null
+++ b/admin/neo4jimporter/src/main/java/JDBCExample.java
@@ -1,1 +1,50 @@
-
+import java.sql.DriverManager;
+import java.sql.Connection;
+import java.sql.SQLException;
+
+public class JDBCExample {
+
+ public static void main(String[] argv) {
+
+ System.out.println("-------- PostgreSQL "
+ + "JDBC Connection Testing ------------");
+
+ try {
+
+ Class.forName("org.postgresql.Driver");
+
+ } catch (ClassNotFoundException e) {
+
+ System.out.println("Where is your PostgreSQL JDBC Driver? "
+ + "Include in your library path!");
+ e.printStackTrace();
+ return;
+
+ }
+
+ System.out.println("PostgreSQL JDBC Driver Registered!");
+
+ Connection connection = null;
+
+ try {
+
+ connection = DriverManager.getConnection(
+ "jdbc:postgresql://127.0.0.1:5432/contractDashboard", "postgres",
+ "snmc");
+
+ } catch (SQLException e) {
+
+ System.out.println("Connection Failed! Check output console");
+ e.printStackTrace();
+ return;
+
+ }
+
+ if (connection != null) {
+ System.out.println("You made it, take control your database now!");
+ } else {
+ System.out.println("Failed to make connection!");
+ }
+ }
+
+}
Binary files /dev/null and b/admin/neo4jimporter/target/classes/Importer.class differ
Binary files /dev/null and b/admin/neo4jimporter/target/classes/JDBCExample.class differ
--- a/admin/partialdata/scrapesingle.php
+++ b/admin/partialdata/scrapesingle.php
@@ -1,4 +1,6 @@
<?php
+include_once ("../../lib/common.inc.php");
+
$cnid = 1234;
// http://www.lastcraft.com/browser_documentation.php
// http://code.google.com/p/phpquery/
@@ -800,12 +802,80 @@
</body>
</html> ');
-
+ $datamapping0711 = array(
+ "Agency" => "agencyName",
+ "Parent CN ID" => "parentCN",
+ "CN ID" => "CNID",
+ "Publish Date" => "publishDate",
+ "Amendment Date" => "amendDate",
+ "Status" => "",
+ "StartDate" => "contractStart",
+ "EndDate" => "contractEnd",
+ "Contract Value (AUD)" => "value",
+ "Description" => "description",
+ "Agency Reference ID" => "agencyID",
+ "Category" => "category",
+ "Procurement Method" => "procurementMethod",
+ "ATM ID" => "atmID",
+ "SON ID" => "SONID",
+ "Confidentiality - Contract" => "confidentialityContract",
+ "Confidentiality - Contract Reason(s)" => "confidentialityContractReason",
+ "Confidentiality - Outputs" => "confidentialityOutputs",
+ "Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason",
+ "Consultancy" => "consultancy",
+ "Consultancy Reason(s)" => "consultancyReason",
+ "Amendment Reason" => "amendmentReason",
+ "Name" => "supplierName",
+ "Postal Address" => "supplierAddress",
+ "Town/City" => "supplierCity",
+ "Postcode" => "supplierPostcode",
+ "Country" => "supplierCountry",
+ "ABN Exempt" => "supplierABNExempt",
+ "ABN" => "supplierABN",
+ "Branch" => "contactBranch",
+ "Division" => "contactDivision",
+ "Office Postcode" => "contactPostcode"
+ );
+$cnFields = Array();
foreach(pq('tr') as $tr) {
$tra = dom_to_array($tr);
- $tra['th'] = trim(str_replace("/th>","",$tra['th']));
-echo $tra['th']. " = " .trim(print_r($tra['td'],true))."<br>\n";
-
+ $fieldName = trim(str_replace("/th>","",$tra['th']));
+ $fieldValue = trim(print_r($tra['td'],true));
+ if ($fieldName == "State/Territory" || $fieldName == "Contact Name" || $fieldName == "Contact Phone") {
+ // do nothing
+ } else if ($fieldName == "Contract Period") {
+ $contractPeriod = explode("to",$fieldValue);
+ $cnFields["contractStart"] = trim($contractPeriod[0]);
+ $cnFields["contractEnd"] = trim($contractPeriod[1]);
+ } else {
+ $fieldName = $datamapping0711[$fieldName];
+ if ($fieldName == "parentCN" || $fieldName == "CNID") {
+ $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix
+ $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers
+ } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") {
+ $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($fieldValue));
+ }
+echo $fieldName. " = " .$fieldValue."<br>\n";
+$cnFields[$fieldName] = $fieldValue;
+ }
}
-
+$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( ';
+ for($key = 0; $key < sizeof($cnFields); $key++) {
+ $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?";
+ }
+ $contractNoticeInsertQ.= ");";
+ echo $contractNoticeInsertQ;
+ //$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ);
+
+$contractNoticeUpdateQ = 'UPDATE contractnotice SET ';
+$count = 0;
+ foreach ($cnFields as $key => $f) {
+
+ $count++;
+ $contractNoticeUpdateQ.= $key."=? ".($count >= sizeof($cnFields) ? "" : ", ");
+ }
+ $contractNoticeUpdateQ.= " WHERE CNID=?;";
+ $cnFields[] = $cnFields["CNID"];
+ echo $contractNoticeUpdateQ;
+ $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ);
?>
--- a/exportData.csv.php
+++ b/exportData.csv.php
@@ -3,28 +3,34 @@
include_once("./lib/common.inc.php");
setlocale(LC_CTYPE, 'C');
// source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951
-$result = mysql_query('
-SELECT CNID,contractnotice.agencyName,agency.abn as agencyABN,DATE(publishDate),contractStart,contractEnd,value,description,procurementMethod,category,categoryUNSPSC
-,supplierABN,supplierName,supplierCity,supplierPostcode,supplierCountry,contactPostcode,
+$query = $conn->prepare('
+SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN",DATE("publishDate"),"contractStart","contractEnd",value,description,"procurementMethod",category,"categoryUNSPSC"
+,"supplierABN","supplierName","supplierCity","supplierPostcode","supplierCountry","contactPostcode",
(
- case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID
+ case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID,
-concat("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN",CNID) as sourceURL
-FROM `contractnotice` join agency on contractnotice.agencyName=agency.agencyName where childCN = 0');
-if (!$result) die('Couldn\'t fetch records');
-$num_fields = mysql_num_fields($result);
+(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL
+FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" where "childCN" = 0 limit 5');
+$query->execute();
+if (!$query) {
+ databaseError($conn->errorInfo());
+die('Couldn\'t fetch records');
+}
+
+$num_fields = $query->columnCount();
$headers = array();
for ($i = 0; $i < $num_fields; $i++) {
- $headers[] = mysql_field_name($result , $i);
+ $meta = $query->getColumnMeta($i);
+ $headers[] = $meta['name'];
}
$fp = fopen('php://output', 'w');
-if ($fp && $result) {
+if ($fp && $query) {
header('Content-Type: text/csv');
header('Content-Disposition: attachment; filename="export.'.date("c").'.csv"');
header('Pragma: no-cache');
header('Expires: 0');
fputcsv($fp, $headers);
- while ($row = mysql_fetch_assoc($result)) {
+foreach ($query->fetchAll(PDO::FETCH_NUM) as $row) {
foreach ($row as &$colvalue) {
$colvalue = preg_replace( '/[^[:print:]]/', '',
utf8_encode($colvalue));