Skip to content

Commit

Permalink
Merge pull request #87 from JokerYan/master
Browse files Browse the repository at this point in the history
Auto parsing and tagging
  • Loading branch information
ziyun99 authored Oct 17, 2019
2 parents 737e656 + 7b62b8c commit 4477681
Show file tree
Hide file tree
Showing 8 changed files with 380 additions and 99 deletions.
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@ language: java
jdk: oraclejdk11

before_install:
- chmod +x gradlew
- chmod +x gradlew

script:
- ./gradlew check
- ./gradlew test
108 changes: 99 additions & 9 deletions src/main/java/seedu/duke/email/EmailContentParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,40 @@
import seedu.duke.Duke;
import seedu.duke.email.entity.Email;

import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A parser to process the content of emails to support automatic management of email.
* A parser to process the content of emails to support automatic management of email
*/
public class EmailContentParser {
private static int KEYWORD_SUBJECT_WEIGHTAGE = 5;
private static int KEYWORD_SENDER_WEIGHTAGE = 3;
private static int KEYWORD_BODY_WEIGHTAGE = 1;
private static ArrayList<KeywordPair> keywordList;
private static int INFINITY = 0x3f3f3f;
private static int FUZZY_LIMIT = 3;

/**
* Finds all keywords in email.
* @param email Email to be scanned for keywords
*/
public static void allKeywordInEmail(Email email) {
for (KeywordPair keywordPair : keywordList) {
if (keywordInEmail(email, keywordPair) > 0) {
Duke.getUI().showDebug(keywordPair.getKeyword() + ": "
+ keywordInEmail(email, keywordPair) + " => " + email.getSubject());
email.addTag(keywordPair.getKeyword());
int relevance = keywordInEmail(email, keywordPair);
if (relevance > 0) {
Duke.getUI().showDebug(keywordPair.getKeyword() + ": " + keywordInEmail(email, keywordPair) + " => " + email.getSubject());
email.addTag(keywordPair, relevance);
}
}
}

/**
* Calculates the keyword occurrence score within an email based on its position and number of
* occurrence.
* Calculates the keyword relevance score within an email based on its position and number of occurrence.
*
* @param email the email where the keyword pair is to be looked for
* @param keywordPair the target keyword pair
Expand Down Expand Up @@ -86,21 +89,97 @@ public static int keywordInString(String input, KeywordPair keywordPair) {
public static void initKeywordList() {
ArrayList<KeywordPair> keywordList = new ArrayList<>();
keywordList.add(new KeywordPair("CS2113T", new ArrayList<String>(List.of(
"CS2113T", "CS2113", "TAN KIAN WEI, JASON", "Leow Wei Xiang"))));
"CS2113T", "CS2113", "TAN KIAN WEI, JASON", "Leow Wei Xiang", "Akshay Narayan", "Akshay"))));
keywordList.add(new KeywordPair("CS2101", new ArrayList<String>(List.of(
"CS2101", "Anita Toh Ann Lee"))));
keywordList.add(new KeywordPair("CG2271", new ArrayList<String>(List.of(
"CG2271", "Djordje Jevdjic"))));
keywordList.add(new KeywordPair("CS2102", new ArrayList<String>(List.of(
"CS2102", "Adi Yoga Sidi Prabawa"))));
keywordList.add(new KeywordPair("CS3230", new ArrayList<String>(List.of(
"CS3230", "Divesh Aggarwal"))));
keywordList.add(new KeywordPair("CEG Admin", new ArrayList<String>(List.of(
"Low Mun Bak"))));
keywordList.add(new KeywordPair("SEP", new ArrayList<String>(List.of(
"SEP", "Student Exchange Programme"))));
keywordList.add(new KeywordPair("Tutorial", new ArrayList<String>(List.of(
"Tutorial"))));
keywordList.add(new KeywordPair("Assignment", new ArrayList<String>(List.of(
"Assignment"))));
keywordList.add(new KeywordPair("Spam", new ArrayList<String>(List.of(
"UHC Wellness", "luminus-do-not-reply", "NUS Libraries"))));

EmailContentParser.keywordList = keywordList;
}

/**
* A pair of keyword with its possible expressions.
* Computes the edit distance between A and B, which is the number of steps required to transform A to B
* if only addition, deletion, update of a single character is allowed for each step.
*
* @param A first string
* @param B second string
* @return edit distance between A and B
*/
public static int editDistance(String A, String B) {
if (A.length() == 0 || B.length() == 0) {
return A.length() + B.length();
}
A = A.toLowerCase();
B = B.toLowerCase();
//Prepare a distance array for DP
int[][] dist = new int[A.length() + 1][B.length() + 1];
//Initialize distance array with all zeros
for (int[] row : dist) {
Arrays.fill(row, 0);
}
//Initialize starting positions for DP
for (int i = 0; i <= A.length(); i++) {
dist[i][0] = i;
}
for (int j = 0; j <= B.length(); j++) {
dist[0][j] = j;
}
//Start DP
for (int i = 1; i <= A.length(); i++) {
for (int j = 1; j <= B.length(); j++) {
int min = INFINITY;
min = Math.min(min, dist[i - 1][j - 1] + (A.charAt(i - 1) == B.charAt(j - 1) ? 0 : 1));
min = Math.min(min, dist[i - 1][j] + 1);
min = Math.min(min, dist[i][j - 1] + 1);
dist[i][j] = min;
}
}
return dist[A.length()][B.length()];
}

/**
* Searches a keyword in input string with some tolerance of inaccuracy.
*
* @param input input string where the keyword is searched
* @param target the target keyword to be searched
* @return a relevance score related to both occurrence and relevance
*/
private static int fuzzySearchInString(String input, String target) {
int score = 0;
String[] inputWords = input.split("\\W");
String[] targetWords = target.split("\\W");
for (String inputWord : inputWords) {
for (String targetWord : targetWords) {
if (inputWord.length() == 0 || targetWord.length() == 0) {
continue;
}
int distance = editDistance(inputWord, targetWord);
if (distance <= FUZZY_LIMIT) {
score += FUZZY_LIMIT - distance + 1;
}
}
}
Duke.getUI().showError(score + " : " + input + " <> " + target);
return score;
}

/**
* A pair of keyword with its possible expressions
*/
public static class KeywordPair {
private String keyword;
Expand All @@ -117,6 +196,17 @@ public KeywordPair(String keyword, ArrayList<String> expressions) {
this.expressions = expressions;
}

/**
* Constructs a keyword pair with only keyword. Expression will be the same as the keyword by
* default.
*
* @param keyword the value of keyword looked for
*/
public KeywordPair(String keyword) {
this.keyword = keyword;
this.expressions = new ArrayList<>(List.of(keyword));
}

public String getKeyword() {
return this.keyword;
}
Expand Down
31 changes: 31 additions & 0 deletions src/main/java/seedu/duke/email/EmailFormatParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.ResolverStyle;
import java.util.ArrayList;
import java.util.Locale;

public class EmailFormatParser {
Expand Down Expand Up @@ -68,6 +69,24 @@ public static Email parseRawJson(String jsonString) throws EmailParsingException
}
}

public static Email parseIndexJson(String jsonString) throws EmailParsingException {
try {
JSONObject indexJson = new JSONObject(jsonString);
String subject = indexJson.getString("subject");
Sender sender = new Sender(indexJson.getString("sender"));
LocalDateTime receivedDateTime = parseEmailDateTime(indexJson.getString("receivedDateTime"));
JSONArray tagArray = indexJson.getJSONArray("tags");
ArrayList<Email.Tag> tags = new ArrayList<>();
for (int i = 0; i < tagArray.length(); i++) {
JSONObject tagObject = tagArray.getJSONObject(i);
tags.add(new Email.Tag(tagObject));
}
return new Email(subject, sender, receivedDateTime, tags);
} catch (JSONException e) {
throw new EmailParsingException("Email index json failed to parse");
}
}

/**
* Parses the email date time string to a LocalDateTime.
*
Expand Down Expand Up @@ -116,6 +135,18 @@ public Sender(JSONObject senderInfo) throws JSONException {
this.address = senderInfo.getJSONObject("emailAddress").getString("address");
}

/**
* Constructor of the sender based on the string output of a sender;
*
* @param senderString the string of sender toString() output used to parse a sender
*/
public Sender(String senderString) {
String name = senderString.split("=>")[0].strip();
String address = senderString.split("=>")[1].strip();
this.name = name;
this.address = address;
}

public String toString() {
return name + " => " + address;
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/seedu/duke/email/EmailList.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public String toString() {
*/
public String[] show(int index) {
Email email = this.get(index);
String emailContent = email.getBody();
String emailContent = email.colorBodyOnTag();
String responseMsg = "Showing email in browser: " + email.getSubject();
String[] responseArray = {responseMsg, emailContent};
return responseArray;
Expand Down
65 changes: 14 additions & 51 deletions src/main/java/seedu/duke/email/EmailStorage.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package seedu.duke.email;

import org.json.JSONException;
import org.json.JSONObject;
import seedu.duke.Duke;
import seedu.duke.common.network.Http;
import seedu.duke.email.entity.Email;
Expand Down Expand Up @@ -92,29 +94,20 @@ public static ArrayList<String> getHtmlList() {
* current email list with local storage after that by calling syncEmailListWithHtml().
*/
public static void syncWithServer() {
EmailList serverEmailList = Http.fetchEmail(50);
EmailList serverEmailList = Http.fetchEmail(60);
for (Email serverEmail : serverEmailList) {
boolean exist = false;
for (Email localEmail : Duke.getEmailList()) {
// Check existence of serverEmail in localEmail by comparing the email subject and
// ReceivedDateTime.
// If not checked by ReceivedDateTime, emails with same subject is filtered out from being
// added to emailList.
boolean isEqualSubject = localEmail.getSubject().equals(serverEmail.getSubject());
boolean isEqualDateTime =
localEmail.getReceivedDateTime().equals(serverEmail.getReceivedDateTime());
if (isEqualSubject && isEqualDateTime) {
if (localEmail.getSubject().equals(serverEmail.getSubject())) {
exist = true;
break;
}
}
if (!exist) {
allKeywordInEmail(serverEmail);
Duke.getEmailList().add(serverEmail);
}
}
for (Email email : Duke.getEmailList()) {
allKeywordInEmail(email);
}
saveEmails(Duke.getEmailList());
}

Expand All @@ -133,21 +126,8 @@ public static void saveEmails(EmailList emailList) {
indexFile.createNewFile();
FileOutputStream indexOut = new FileOutputStream(indexFile, false);
String content = "";
String separator = " |";
for (Email email : emailList) {
content += email.getFilename() + separator;
ArrayList<String> tags = email.getTags();

// if this email does not have tags, add new line and continue with next email.
if (tags == null || tags.size() == 0) {
content += "\n";
continue;
}

for (String tag : tags) {
content += " #" + tag;
}
content += separator + "\n";
content += email.getIndexJson().toString() + "\n";
}
indexOut.write(content.getBytes());
indexOut.close();
Expand All @@ -163,6 +143,8 @@ public static void saveEmails(EmailList emailList) {
} catch (IOException e) {
e.printStackTrace();
Duke.getUI().showError("Write to output file IO exception!");
} catch (JSONException e) {
Duke.getUI().showError("Email index formatting exception!");
}
}

Expand Down Expand Up @@ -249,11 +231,8 @@ public static EmailList readEmailFromFile() {
Scanner scanner = new Scanner(indexIn);
while (scanner.hasNextLine()) {
String input = scanner.nextLine();
if (input.length() <= 2) {
throw new TaskStorage.StorageException("Invalid Save File!");
}
String[] splitString = input.split("\\|");
String filename = splitString[0].strip();
Email indexEmail = EmailFormatParser.parseIndexJson(input);
String filename = indexEmail.getFilename();

String fileDir = getFolderDir() + filename;
File emailFile = new File(fileDir);
Expand All @@ -263,24 +242,11 @@ public static EmailList readEmailFromFile() {
while (emailScanner.hasNextLine()) {
emailContent += emailScanner.nextLine();
}
Email email = EmailFormatParser.parseRawJson(emailContent);

// If this email entry has no tags information, add this email to emailList and continue
// with next email iteration.
if (splitString.length == 1) {
emailList.add(email);
continue;
Email fileEmail = EmailFormatParser.parseRawJson(emailContent);
for (Email.Tag tag : indexEmail.getTags()) {
fileEmail.addTag(tag);
}

String[] tags = splitString[1].strip().split("#");
for (String tag : tags) {
if (tag.strip().equals("")) {
continue;
}
System.out.println(tag);
email.addTag(tag.strip());
}
emailList.add(email);
emailList.add(fileEmail);
}
Duke.getUI().showMessage("Saved email file successfully loaded...");
indexIn.close();
Expand All @@ -289,9 +255,6 @@ public static EmailList readEmailFromFile() {
return emailList;
} catch (IOException e) {
Duke.getUI().showError("Read save file IO exception");
} catch (TaskStorage.StorageException e) {
Duke.getUI().showError(e.getMessage());
emailList = new EmailList();
} catch (EmailFormatParser.EmailParsingException e) {
Duke.getUI().showError("Email save file is in wrong format");
}
Expand Down
Loading

0 comments on commit 4477681

Please sign in to comment.