From: Maxious <maxious@lambdacomplex.org>
Date: Fri, 07 Oct 2011 07:27:48 +0000
Subject: Work on new auto service alert import
X-Git-Url: https://maxious.lambdacomplex.org/git/?p=busui.git&a=commitdiff&h=b51809622ece7cc397444cc3cc9ea28345974c8f
---
Work on new auto service alert import
---


--- /dev/null
+++ b/servicealerts/importer.py
@@ -1,1 +1,88 @@
+#dependencies http://code.google.com/p/python-twitter/
 
+# info
+# http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128
+# http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist
+# http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/ more training
+# http://www.postgresql.org/docs/9.1/static/pgtrgm.html
+
+# data sources
+# http://twitter.com/#!/ACTEmergencyInf instant site wide
+# http://twitter.com/#!/ACTPol_Traffic
+# http://esa.act.gov.au/feeds/currentincidents.xml
+
+# source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283
+import nltk
+import twitter
+import psycopg2
+def insert_service_alert_sitewide(heading, message, url):
+        
+def insert_service_alert_for_street(streets, heading, message, url):
+    	conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'"
+	# print the connection string we will use to connect
+	print "Connecting to database\n	->%s" % (conn_string)
+	try:
+		# get a connection, if a connect cannot be made an exception will be raised here
+		conn = psycopg2.connect(conn_string)
+
+		# conn.cursor will return a cursor object, you can use this cursor to perform queries
+		cursor = conn.cursor()
+
+		# execute our Query
+		cursor.execute("select max(value), extract(dow from max(time)) as dow, \
+extract(year from max(time))::text || lpad(extract(month from max(time))::text,2,'0') \
+|| lpad(extract(month from max(time))::text,2,'0') as yearmonthweek, to_char(max(time),'J') \
+from environmentdata_values where \"dataSourceID\"='NSWAEMODemand' \
+group by extract(dow from time), extract(year from time),  extract(week from time) \
+order by  extract(year from time),  extract(week from time), extract(dow from time)")
+
+		# retrieve the records from the database
+		records = cursor.fetchall()
+
+  	  	for record in records:
+			ys.append(record[0])
+# >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar'))
+#>>> cur.statusmessage
+#'INSERT 0 1'
+	except:
+		# Get the most recent exception
+		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+		# Exit the script and print an error telling what happened.
+		sys.exit("Database connection failed!\n ->%s" % (exceptionValue))
+		
+def get_tweets(user):
+    tapi = twitter.Api()
+    return tapi.GetUserTimeline(user)
+
+def extract_entity_names(t):
+    entity_names = []
+    
+    if hasattr(t, 'node') and t.node:
+        if t.node == 'NE':
+            entity_names.append(' '.join([child[0] for child in t]))
+        else:
+            for child in t:
+                entity_names.extend(extract_entity_names(child))
+                
+    return entity_names
+
+def extract_names(sample):     
+    sentences = nltk.sent_tokenize(sample)
+    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
+    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
+    chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True)
+    # chunked/tagged may be enough to just find and match the nouns
+
+    entity_names = []
+    for tree in chunked_sentences:
+        # Print results per sentence
+        # print extract_entity_names(tree)
+        
+        entity_names.extend(extract_entity_names(tree))
+
+    # Print all entity names
+    #print entity_names
+
+    # Print unique entity names
+    print set(entity_names)
+

--- a/servicealerts/servicealerts_twitter/NameFinder.java
+++ /dev/null
@@ -1,56 +1,1 @@
-InputStream modelIn = new FileInputStream("en-ner-person.bin");
 
-try {
-  TokenNameFinder model = new TokenNameFinderModel(modelIn);
-}
-catch (IOException e) {
-  e.printStackTrace();
-}
-finally {
-  if (modelIn != null) {
-    try {
-      modelIn.close();
-    }
-    catch (IOException e) {
-    }
-  }
-}
-
-NameFinderME nameFinder = new NameFinderME(model);
-
-for (String document[][] : documents) {
-
-  for (String[] sentence : document) {
-    Span nameSpans[] = find(sentence);
-    // do something with the names
-  }
-
-  nameFinder.clearAdaptiveData()
-}
-
-
-                InputStream in = getClass()
-                        .getClassLoader()
-                        .getResourceAsStream(
-                                "opennlp/tools/namefind/AnnotatedSentences.txt");
-
-                String encoding = "ISO-8859-1";
-
-                ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
-                        new PlainTextByLineStream(new InputStreamReader(in,
-                                encoding)));
-
-                TokenNameFinderModel nameFinderModel = NameFinderME.train("en",
-                        "default", sampleStream, Collections
-                                .<String, Object> emptyMap(), 70, 1);
-
-                TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
-
-                // now test if it can detect the sample sentences
-
-                String sentence[] = { "Alisa", "appreciated", "the", "hint",
-                        "and", "enjoyed", "a", "delicious", "traditional",
-                        "meal." };
-
-                Span names[] = nameFinder.find(sentence);
-

--- a/servicealerts/servicealerts_twitter/pom.xml
+++ /dev/null
@@ -1,19 +1,1 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>org.lambdacomplex.bus</groupId>
-  <artifactId>servicealerts_twitter</artifactId>
-  <version>0.0.1-SNAPSHOT</version>
-  <dependencies>
-<dependency>
-  <groupId>org.apache.opennlp</groupId>
-  <artifactId>opennlp</artifactId>
-  <version>1.5.1-incubating</version>
-</dependency>
-<dependency>
-  <groupId>org.apache.opennlp</groupId>
-  <artifactId>opennlp-tools</artifactId>
-  <version>1.5.1-incubating</version>
-</dependency>
-  </dependencies>
-</project>