Work on new auto service alert import
--- /dev/null
+++ b/servicealerts/importer.py
@@ -1,1 +1,88 @@
+#dependencies http://code.google.com/p/python-twitter/
+# info
+# http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128
+# http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist
+# http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/ more training
+# http://www.postgresql.org/docs/9.1/static/pgtrgm.html
+
+# data sources
+# http://twitter.com/#!/ACTEmergencyInf instant site wide
+# http://twitter.com/#!/ACTPol_Traffic
+# http://esa.act.gov.au/feeds/currentincidents.xml
+
+# source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283
+import nltk
+import twitter
+import psycopg2
+def insert_service_alert_sitewide(heading, message, url):
+
+def insert_service_alert_for_street(streets, heading, message, url):
+ conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'"
+ # print the connection string we will use to connect
+ print "Connecting to database\n ->%s" % (conn_string)
+ try:
+ # get a connection, if a connect cannot be made an exception will be raised here
+ conn = psycopg2.connect(conn_string)
+
+ # conn.cursor will return a cursor object, you can use this cursor to perform queries
+ cursor = conn.cursor()
+
+ # execute our Query
+ cursor.execute("select max(value), extract(dow from max(time)) as dow, \
+extract(year from max(time))::text || lpad(extract(month from max(time))::text,2,'0') \
+|| lpad(extract(month from max(time))::text,2,'0') as yearmonthweek, to_char(max(time),'J') \
+from environmentdata_values where \"dataSourceID\"='NSWAEMODemand' \
+group by extract(dow from time), extract(year from time), extract(week from time) \
+order by extract(year from time), extract(week from time), extract(dow from time)")
+
+ # retrieve the records from the database
+ records = cursor.fetchall()
+
+ for record in records:
+ ys.append(record[0])
+# >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar'))
+#>>> cur.statusmessage
+#'INSERT 0 1'
+ except:
+ # Get the most recent exception
+ exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
+ # Exit the script and print an error telling what happened.
+ sys.exit("Database connection failed!\n ->%s" % (exceptionValue))
+
+def get_tweets(user):
+ tapi = twitter.Api()
+ return tapi.GetUserTimeline(user)
+
+def extract_entity_names(t):
+ entity_names = []
+
+ if hasattr(t, 'node') and t.node:
+ if t.node == 'NE':
+ entity_names.append(' '.join([child[0] for child in t]))
+ else:
+ for child in t:
+ entity_names.extend(extract_entity_names(child))
+
+ return entity_names
+
+def extract_names(sample):
+ sentences = nltk.sent_tokenize(sample)
+ tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
+ tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
+ chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True)
+ # chunked/tagged may be enough to just find and match the nouns
+
+ entity_names = []
+ for tree in chunked_sentences:
+ # Print results per sentence
+ # print extract_entity_names(tree)
+
+ entity_names.extend(extract_entity_names(tree))
+
+ # Print all entity names
+ #print entity_names
+
+ # Print unique entity names
+ print set(entity_names)
+
--- a/servicealerts/servicealerts_twitter/NameFinder.java
+++ /dev/null
@@ -1,56 +1,1 @@
-InputStream modelIn = new FileInputStream("en-ner-person.bin");
-try {
- TokenNameFinder model = new TokenNameFinderModel(modelIn);
-}
-catch (IOException e) {
- e.printStackTrace();
-}
-finally {
- if (modelIn != null) {
- try {
- modelIn.close();
- }
- catch (IOException e) {
- }
- }
-}
-
-NameFinderME nameFinder = new NameFinderME(model);
-
-for (String document[][] : documents) {
-
- for (String[] sentence : document) {
- Span nameSpans[] = find(sentence);
- // do something with the names
- }
-
- nameFinder.clearAdaptiveData()
-}
-
-
- InputStream in = getClass()
- .getClassLoader()
- .getResourceAsStream(
- "opennlp/tools/namefind/AnnotatedSentences.txt");
-
- String encoding = "ISO-8859-1";
-
- ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
- new PlainTextByLineStream(new InputStreamReader(in,
- encoding)));
-
- TokenNameFinderModel nameFinderModel = NameFinderME.train("en",
- "default", sampleStream, Collections
- .<String, Object> emptyMap(), 70, 1);
-
- TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
-
- // now test if it can detect the sample sentences
-
- String sentence[] = { "Alisa", "appreciated", "the", "hint",
- "and", "enjoyed", "a", "delicious", "traditional",
- "meal." };
-
- Span names[] = nameFinder.find(sentence);
-
--- a/servicealerts/servicealerts_twitter/pom.xml
+++ /dev/null
@@ -1,19 +1,1 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>org.lambdacomplex.bus</groupId>
- <artifactId>servicealerts_twitter</artifactId>
- <version>0.0.1-SNAPSHOT</version>
- <dependencies>
-<dependency>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp</artifactId>
- <version>1.5.1-incubating</version>
-</dependency>
-<dependency>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
- <version>1.5.1-incubating</version>
-</dependency>
- </dependencies>
-</project>