From: maxious Date: Mon, 08 Aug 2011 05:23:40 +0000 Subject: Port data exporter to postgres X-Git-Url: http://maxious.lambdacomplex.org/git/?p=contractdashboard.git&a=commitdiff&h=0bdd84df9780e4b933b01e1dd0d7a6ec1f469155 --- Port data exporter to postgres --- --- /dev/null +++ b/admin/neo4jimporter/.classpath @@ -1,1 +1,11 @@ + + + + + + + + + + --- /dev/null +++ b/admin/neo4jimporter/.project @@ -1,1 +1,24 @@ + + + neo4jimporter + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + --- /dev/null +++ b/admin/neo4jimporter/.settings/org.eclipse.jdt.core.prefs @@ -1,1 +1,7 @@ +#Sun Aug 07 18:15:32 EST 2011 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 +org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.source=1.5 --- /dev/null +++ b/admin/neo4jimporter/.settings/org.eclipse.m2e.core.prefs @@ -1,1 +1,6 @@ +#Sun Aug 07 18:14:30 EST 2011 +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 --- /dev/null +++ b/admin/neo4jimporter/pom.xml @@ -1,1 +1,18 @@ - + + 4.0.0 + org.lambdacomplex.contractdashboard + neo4jimporter + 0.0.1-SNAPSHOT + + + org.neo4j + neo4j-kernel + 1.4 + + + postgresql + postgresql + 9.0-801.jdbc4 + + + --- /dev/null +++ b/admin/neo4jimporter/src/main/java/Importer.java @@ -1,1 +1,155 @@ +import java.io.ObjectInputStream.GetField; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLWarning; +import java.sql.Statement; +import java.util.HashMap; +import java.util.Map; +import org.neo4j.graphdb.DynamicRelationshipType; +import org.neo4j.kernel.impl.batchinsert.BatchInserter; +import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl; + +public class Importer { + + public static void main(String[] argv) { + + Map props = new HashMap(); + props.put("neostore.nodestore.db.mapped_memory", "22000000"); // + props.put("neostore.relationshipstore.db.mapped_memory", "22000000"); // + // create the batch inserter + BatchInserter inserter = new + BatchInserterImpl("neo4j-db/",props + ); + + + + + + System.out.println("-------- PostgreSQL " + + "JDBC Connection Testing ------------"); + + try { + + Class.forName("org.postgresql.Driver"); + + } catch (ClassNotFoundException e) { + + System.out.println("Where is your PostgreSQL JDBC Driver? " + + "Include in your library path!"); + e.printStackTrace(); + + } + + System.out.println("PostgreSQL JDBC Driver Registered!"); + + Connection conn = null; + + try { + + conn = DriverManager.getConnection( + "jdbc:postgresql://127.0.0.1:5432/contractDashboard", + "postgres", "snmc"); + + } catch (SQLException e) { + + System.out.println("Connection Failed! Check output console"); + e.printStackTrace(); + + } + + if (conn != null) { + System.out.println("You made it, take control your database now!"); + } else { + System.out.println("Failed to make connection!"); + } + try { + // Print all warnings + for (SQLWarning warn = conn.getWarnings(); warn != null; warn = warn + .getNextWarning()) { + System.out.println("SQL Warning:"); + System.out.println("State : " + warn.getSQLState()); + System.out.println("Message: " + warn.getMessage()); + System.out.println("Error : " + warn.getErrorCode()); + } + + // Get a statement from the connection + Statement stmt = conn.createStatement(); + + // Execute the query + ResultSet rs = stmt.executeQuery("SELECT distinct contractnotice.\"agencyName\", contractnotice.\"supplierABN\", contractnotice.\"supplierName\" FROM public.contractnotice limit 300;"); +String previousAgency = ""; + + // Loop through the result set + while (rs.next()) { + long supplier,agency; + agency = doHash(rs.getString("agencyName")); + if (rs.getString("agencyName") != previousAgency) { + if (!inserter.nodeExists(agency)) { + Map properties = new HashMap(); + properties.put("Label", rs.getString("agencyName")); + inserter.createNode(agency, properties); + } + } + if (rs.getString("supplierABN") != "0") { + supplier = doHash(rs.getString("supplierABN")); + } else { + supplier = doHash(rs.getString("supplierName")); + } + // inject some data + if (!inserter.nodeExists(supplier)) { + Map properties = new HashMap(); + + properties.put("Label", rs.getString("supplierName")); + inserter.createNode(supplier, properties); + } + + inserter.createRelationship(agency, supplier, + DynamicRelationshipType.withName("KNOWS"), null); + + } + // Close the result set, statement and the connection + rs.close(); + stmt.close(); + conn.close(); + } catch (SQLException se) { + System.out.println("SQL Exception:"); + + // Loop through the SQL Exceptions + while (se != null) { + System.out.println("State : " + se.getSQLState()); + System.out.println("Message: " + se.getMessage()); + System.out.println("Error : " + se.getErrorCode()); + + se = se.getNextException(); + } + } catch (Exception e) { + System.out.println(e); + } + // shutdown, makes sure all changes are written to disk + inserter.shutdown(); + } + + static long doHash(String input) { + MessageDigest m; + try { + m = MessageDigest.getInstance("MD5"); + m.reset(); + m.update(input.getBytes()); + byte[] digest = m.digest(); + return new BigInteger(1, digest).longValue(); + } catch (NoSuchAlgorithmException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return 0; + + } + + +} --- /dev/null +++ b/admin/neo4jimporter/src/main/java/JDBCExample.java @@ -1,1 +1,50 @@ - +import java.sql.DriverManager; +import java.sql.Connection; +import java.sql.SQLException; + +public class JDBCExample { + + public static void main(String[] argv) { + + System.out.println("-------- PostgreSQL " + + "JDBC Connection Testing ------------"); + + try { + + Class.forName("org.postgresql.Driver"); + + } catch (ClassNotFoundException e) { + + System.out.println("Where is your PostgreSQL JDBC Driver? " + + "Include in your library path!"); + e.printStackTrace(); + return; + + } + + System.out.println("PostgreSQL JDBC Driver Registered!"); + + Connection connection = null; + + try { + + connection = DriverManager.getConnection( + "jdbc:postgresql://127.0.0.1:5432/contractDashboard", "postgres", + "snmc"); + + } catch (SQLException e) { + + System.out.println("Connection Failed! Check output console"); + e.printStackTrace(); + return; + + } + + if (connection != null) { + System.out.println("You made it, take control your database now!"); + } else { + System.out.println("Failed to make connection!"); + } + } + +} --- a/admin/partialdata/scrapesingle.php +++ b/admin/partialdata/scrapesingle.php @@ -1,4 +1,6 @@ '); - + $datamapping0711 = array( + "Agency" => "agencyName", + "Parent CN ID" => "parentCN", + "CN ID" => "CNID", + "Publish Date" => "publishDate", + "Amendment Date" => "amendDate", + "Status" => "", + "StartDate" => "contractStart", + "EndDate" => "contractEnd", + "Contract Value (AUD)" => "value", + "Description" => "description", + "Agency Reference ID" => "agencyID", + "Category" => "category", + "Procurement Method" => "procurementMethod", + "ATM ID" => "atmID", + "SON ID" => "SONID", + "Confidentiality - Contract" => "confidentialityContract", + "Confidentiality - Contract Reason(s)" => "confidentialityContractReason", + "Confidentiality - Outputs" => "confidentialityOutputs", + "Confidentiality - Outputs Reason(s)" => "confidentialityOutputsReason", + "Consultancy" => "consultancy", + "Consultancy Reason(s)" => "consultancyReason", + "Amendment Reason" => "amendmentReason", + "Name" => "supplierName", + "Postal Address" => "supplierAddress", + "Town/City" => "supplierCity", + "Postcode" => "supplierPostcode", + "Country" => "supplierCountry", + "ABN Exempt" => "supplierABNExempt", + "ABN" => "supplierABN", + "Branch" => "contactBranch", + "Division" => "contactDivision", + "Office Postcode" => "contactPostcode" + ); +$cnFields = Array(); foreach(pq('tr') as $tr) { $tra = dom_to_array($tr); - $tra['th'] = trim(str_replace("/th>","",$tra['th'])); -echo $tra['th']. " = " .trim(print_r($tra['td'],true))."
\n"; - + $fieldName = trim(str_replace("/th>","",$tra['th'])); + $fieldValue = trim(print_r($tra['td'],true)); + if ($fieldName == "State/Territory" || $fieldName == "Contact Name" || $fieldName == "Contact Phone") { + // do nothing + } else if ($fieldName == "Contract Period") { + $contractPeriod = explode("to",$fieldValue); + $cnFields["contractStart"] = trim($contractPeriod[0]); + $cnFields["contractEnd"] = trim($contractPeriod[1]); + } else { + $fieldName = $datamapping0711[$fieldName]; + if ($fieldName == "parentCN" || $fieldName == "CNID") { + $fieldValue = substr($fieldValue, 2); // take off the "CN" prefix + $fieldValue = str_replace("-A", "00", $fieldValue); // make amendments really big numbers + } elseif ($fieldName == "amendDate" || $fieldName == "publishDate" || $fieldName == "contractStart" || $fieldName == "contractEnd") { + $contractNoticeInsert[] = date('Y-m-d H:i:s', strtotime($fieldValue)); + } +echo $fieldName. " = " .$fieldValue."
\n"; +$cnFields[$fieldName] = $fieldValue; + } } - +$contractNoticeInsertQ = 'INSERT INTO contractnotice ("' . implode('" , "', array_keys($cnFields)) . '") VALUES ( '; + for($key = 0; $key < sizeof($cnFields); $key++) { + $contractNoticeInsertQ.= ($key == 0 ? "" : ", ") . "?"; + } + $contractNoticeInsertQ.= ");"; + echo $contractNoticeInsertQ; + //$contractNoticeInsertQ = $conn->prepare($contractNoticeInsertQ); + +$contractNoticeUpdateQ = 'UPDATE contractnotice SET '; +$count = 0; + foreach ($cnFields as $key => $f) { + + $count++; + $contractNoticeUpdateQ.= $key."=? ".($count >= sizeof($cnFields) ? "" : ", "); + } + $contractNoticeUpdateQ.= " WHERE CNID=?;"; + $cnFields[] = $cnFields["CNID"]; + echo $contractNoticeUpdateQ; + $contractNoticeUpdateQ = $conn->prepare($contractNoticeUpdateQ); ?> --- a/exportData.csv.php +++ b/exportData.csv.php @@ -3,28 +3,34 @@ include_once("./lib/common.inc.php"); setlocale(LC_CTYPE, 'C'); // source: http://stackoverflow.com/questions/81934/easy-way-to-export-a-sql-table-without-access-to-the-server-or-phpmyadmin#81951 -$result = mysql_query(' -SELECT CNID,contractnotice.agencyName,agency.abn as agencyABN,DATE(publishDate),contractStart,contractEnd,value,description,procurementMethod,category,categoryUNSPSC -,supplierABN,supplierName,supplierCity,supplierPostcode,supplierCountry,contactPostcode, +$query = $conn->prepare(' +SELECT "CNID",contractnotice."agencyName",agency.abn as "agencyABN",DATE("publishDate"),"contractStart","contractEnd",value,description,"procurementMethod",category,"categoryUNSPSC" +,"supplierABN","supplierName","supplierCity","supplierPostcode","supplierCountry","contactPostcode", ( - case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID + case when "supplierABN" != 0 THEN "supplierABN"::text ELSE "supplierName" END) as supplierID, -concat("https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN",CNID) as sourceURL -FROM `contractnotice` join agency on contractnotice.agencyName=agency.agencyName where childCN = 0'); -if (!$result) die('Couldn\'t fetch records'); -$num_fields = mysql_num_fields($result); +(\'https://www.tenders.gov.au/?event=public.advancedsearch.keyword&keyword=CN\'::text || "CNID"::text) as sourceURL +FROM contractnotice join agency on contractnotice."agencyName"=agency."agencyName" where "childCN" = 0 limit 5'); +$query->execute(); +if (!$query) { + databaseError($conn->errorInfo()); +die('Couldn\'t fetch records'); +} + +$num_fields = $query->columnCount(); $headers = array(); for ($i = 0; $i < $num_fields; $i++) { - $headers[] = mysql_field_name($result , $i); + $meta = $query->getColumnMeta($i); + $headers[] = $meta['name']; } $fp = fopen('php://output', 'w'); -if ($fp && $result) { +if ($fp && $query) { header('Content-Type: text/csv'); header('Content-Disposition: attachment; filename="export.'.date("c").'.csv"'); header('Pragma: no-cache'); header('Expires: 0'); fputcsv($fp, $headers); - while ($row = mysql_fetch_assoc($result)) { +foreach ($query->fetchAll(PDO::FETCH_NUM) as $row) { foreach ($row as &$colvalue) { $colvalue = preg_replace( '/[^[:print:]]/', '', utf8_encode($colvalue));