Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
Added functionality to create new revisions based on the timestamp of the last revision
  • Loading branch information
Peter Kraker committed Nov 19, 2015
1 parent 9b5ad83 commit 7830de1
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 148 deletions.
23 changes: 15 additions & 8 deletions server/classes/headstart/persistence/SQLitePersistence.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,21 @@ public function createVisualization($vis_id, $vis_title, $data) {

}

public function getLastVersion($vis_id) {
return $this->getRevision($vis_id, null);
public function getLastVersion($vis_id, $details=false) {
return $this->getRevision($vis_id, null, $details);
}

public function getRevision($vis_id, $rev_id) {
public function getRevision($vis_id, $rev_id, $details=false) {

$id = ($rev_id == null)?("revisions.rev_id"):("?");
$array = ($rev_id == null)?(array(addslashes($vis_id))):(array(addslashes($vis_id), $rev_id));
$return_fields = ($details==true)?("revisions.*"):("revisions.rev_data");

$result = $this->prepareExecuteAndReturnFirstResult("SELECT revisions.rev_data FROM revisions, visualizations
$result = $this->prepareExecuteAndReturnResult("SELECT $return_fields FROM revisions, visualizations
WHERE visualizations.vis_id = ?
AND visualizations.vis_id = revisions.rev_vis
AND visualizations.vis_latest =" . $id
, $array);
, $array, !$details);

return $result;
}
Expand All @@ -76,7 +77,7 @@ public function writeRevision($vis_id, $data, $rev_id=null) {
$rev = $rev_id;

if($rev == null) {
$ver = $this->prepareExecuteAndReturnFirstResult("SELECT vis_latest FROM visualizations WHERE vis_id=?", array($vis_id));
$ver = $this->prepareExecuteAndReturnResult("SELECT vis_latest FROM visualizations WHERE vis_id=?", array($vis_id), true);
$rev = $ver + 1;
}

Expand All @@ -102,11 +103,17 @@ private function prepareAndExecute($stmt, $array) {
return array("status" => $result, "query" => $query);
}

private function prepareExecuteAndReturnFirstResult($stmt, $array) {
private function prepareExecuteAndReturnResult($stmt, $array, $first=false) {
$result = $this->prepareAndExecute($stmt, $array);
$fetch_result = $result["query"]->fetch();

return $fetch_result[0];
if($fetch_result == false) {
return false;
} else if($first == true) {
return $fetch_result[0];
} else {
return $fetch_result;
}

}

Expand Down
114 changes: 34 additions & 80 deletions server/preprocessing/other-scripts/rplos.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ library(proxy)
library(SnowballC)
library(rplos)
library(jsonlite)
#library(lsa)
#library(stylo)

debug = FALSE

# Get data from PLOS API
start.time <- Sys.time()
Expand Down Expand Up @@ -64,41 +64,30 @@ corpus <- tm_map(corpus, stemDocument)

tdm <- TermDocumentMatrix(corpus)

#tdm <- weightTfIdf(tdm, normalize = TRUE)

tdm <- removeSparseTerms(tdm, 0.3)

tdm_matrix = t(as.matrix(tdm))
#diag(tdm_matrix) <- NA

distance_matrix_2 <- as.matrix(proxy::dist(tdm_matrix, method = "cosine"))
distance_matrix = as.dist(distance_matrix_2)

#td.mat <- as.matrix(TermDocumentMatrix(corpus))
#td.mat.lsa <- lw_bintf(td.mat) * gw_idf(td.mat)
#lsaSpace <- lsa(td.mat.lsa)
#tdm_matrix = t(as.textmatrix(lsaSpace))
#distance_matrix_2 <- as.matrix(proxy::dist(tdm_matrix, method = "cosine"))
#distance_matrix = as.dist(distance_matrix_2)

#distance_matrix <- apply(distance_matrix, 2, mean, na.rm=TRUE)
#write.csv(as.matrix(distance_matrix), "matrix.csv")


# Perform clustering, use elbow to determine a good number of clusters
css_cluster <- css.hclust(distance_matrix, hclust.FUN.MoreArgs=list(method="ward.D"))
cut_off = elbow.batch(css_cluster)
#cut_off = elbow.batch(css_cluster,inc.thres=c(0.01,0.05,0.1),
# ev.thres=c(0.95,0.9,0.8,0.75,0.67,0.5,0.33,0.2,0.1),precision=3)

num_clusters = cut_off$k
meta_cluster = attr(css_cluster,"meta")
cluster = meta_cluster$hclust.obj
labels = labels(distance_matrix)
groups <- cutree(cluster, k=num_clusters)

# Plot result of clustering to PDF file
# pdf("clustering.pdf", width=19, height=12)
# plot(cluster, labels=metadata$title, cex=0.6)
# rect.hclust(cluster, k=num_clusters, border="red")
# dev.off()
if(debug == TRUE) {
# Plot result of clustering to PDF file
pdf("clustering.pdf", width=19, height=12)
plot(cluster, labels=metadata$title, cex=0.6)
rect.hclust(cluster, k=num_clusters, border="red")
dev.off()
}

num_clusters

Expand All @@ -108,70 +97,35 @@ nm.nmin = nmds.min(nm)
x = nm.nmin$X1
y = nm.nmin$X2

# Plot results from multidimensional scaling, highlight clusters with symbols
# pdf("mds.pdf")
groups <- cutree(cluster, k=num_clusters)
#plot(nm.nmin, pch=groups)
# dev.off()
if(debug == TRUE) {
# Plot results from multidimensional scaling, highlight clusters with symbols
pdf("mds.pdf")
plot(nm.nmin, pch=groups)
dev.off()
}

# Prepare the output
result = cbind(x,y,groups,labels)
output = merge(metadata, result, by.x="id", by.y="labels", all=TRUE)
names(output)[names(output)=="groups"] <- "area_uri"
output["area"] = paste("Cluster ", output$area_uri, sep="")

#BigramTokenizer <-
# function(x)
# unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE)

#dtm <- DocumentTermMatrix(corpus_unstemmed, control = list(tokenize = BigramTokenizer))

# m_naming <- list(content = "titleabstract", id = "id")
#
# myReader_naming <- readTabular(mapping = m_naming)
#
# (corpus_naming <- Corpus(DataframeSource(cooc), readerControl = list(reader = myReader_naming)))
#
# corpus_naming <- tm_map(corpus_naming, removePunctuation)
#
# corpus_naming <- tm_map(corpus_naming, stripWhitespace)
#
# corpus_naming <- tm_map(corpus_naming, content_transformer(tolower))
#
# corpus_naming <- tm_map(corpus_naming, removeWords, stopwords("english"))

#corpus_naming <- tm_map(corpus_naming, stemDocument)

#subjects = cooc$subject
#subjects = strsplit(subjects, "; ")
#output$subjects_cleaned = sub(".*[/]", "", subjects)

#dtm = DocumentTermMatrix(corpus_unstemmed)
#dtm = weightTfIdf(dtm)

#for (i in 1:num_clusters) {
#inGroup <- which(output$area_uri==i)
#within <- table(inGroup$subjects_cleaned)
#most_freq_term = sort(colSums(as.matrix(within)), decreasing=TRUE)[1:4]
#output$area[output$area_uri==i] = paste(names(most_freq_term), collapse=", ")
#}

#output$area

output_json = toJSON(output)
print(output_json)

# Write output to file
# file_handle = file("output_file.csv", open="w")
# write.csv(output, file=file_handle, row.names=FALSE)
# close(file_handle)
#
# # Write some stats to a file
# file_handle = file("stats.txt", open="w")
# writeLines(c(paste("Number of Clusters:", num_clusters, sep=" ")
# , paste("Description:", attributes(cut_off)$description)
# , paste("Stress:", min(nm$stress), sep=" ")
# , paste("R2:", max(nm$r2), sep=" ")
# ), file_handle)
#
# close(file_handle)
if(debug == TRUE) {
# Write output to file
file_handle = file("output_file.csv", open="w")
write.csv(output, file=file_handle, row.names=FALSE)
close(file_handle)

# # Write some stats to a file
file_handle = file("stats.txt", open="w")
writeLines(c(paste("Number of Clusters:", num_clusters, sep=" ")
, paste("Description:", attributes(cut_off)$description)
, paste("Stress:", min(nm$stress), sep=" ")
, paste("R2:", max(nm$r2), sep=" ")
), file_handle)

close(file_handle)
}
13 changes: 11 additions & 2 deletions server/services/helper.php
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
<?php

function getUserID() {
if(isset($_SESSION['userInfo'])) {
if (isset($_SESSION['userInfo'])) {
return $_SESSION['userInfo']['userID'];
} else {
return false;
}
}


function redirect($url) {
if (headers_sent()) {
die('<script type="text/javascript">window.location=\'' . $url . '\';</script>');
} else {
header('Location: ' . $url);
die();
}
}

?>
82 changes: 24 additions & 58 deletions server/services/searchPLOS.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,57 +5,15 @@
<?php
require dirname(__FILE__) . '/../classes/headstart/preprocessing/calculation/RCalculation.php';
require dirname(__FILE__) . '/../classes/headstart/persistence/SQLitePersistence.php';
require dirname(__FILE__) . '/../classes/headstart/preprocessing/naming/KeywordNaming.php';
require_once dirname(__FILE__) . '/../classes/headstart/library/CommUtils.php';
require_once dirname(__FILE__) . '/../classes/headstart/library/toolkit.php';

require 'helper.php';

use headstart\library;


function attachMostUsedKeywords(&$array, $num_keywords) {

$working_array = array();

foreach($array as $entry) {
$uri = $entry["area_uri"];
$keywords = split("; ", $entry["subject"]);
foreach($keywords as &$keyword) {
$keyword = substr($keyword, strrpos($keyword, "/") + 1);
}

if(isset($working_array[$uri])) {
$working_array[$uri] = array_merge($working_array[$uri], $keywords);
} else {
$working_array[$uri] = $keywords;
}
}

$result_array = array();
foreach($working_array as $key => $current_array) {
$counted_sorted_array = array_count_values($current_array);
arsort($counted_sorted_array);
$important_terms = array_keys(array_slice($counted_sorted_array, 0, $num_keywords));
$final_string = implode(", ", $important_terms);
$result_array[$key] = $final_string;
}

foreach($array as $key => $entry) {
$array[$key]["area"] = $result_array[$entry["area_uri"]];
}

}

function redirect($url){
if (headers_sent()){
die('<script type="text/javascript">window.location=\''.$url.'\';</script>');
}else{
header('Location: ' . $url);
die();
}
}

$INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/";
$INI_DIR = dirname(__FILE__) . "/../preprocessing/conf/";

$ini_array = library\Toolkit::loadIni($INI_DIR);

Expand All @@ -69,9 +27,17 @@ function redirect($url){

$settings = $ini_array["general"];

if ($persistence->getLastVersion($query) != null) {
redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
return;
$last_version = $persistence->getLastVersion($query, true);

if ($last_version != false) {
$now = new DateTime();
$last_version_timestamp = new DateTime($last_version["rev_timestamp"]);
$diff = $last_version_timestamp->diff($now);

if ($diff->d == 0) {
redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
return;
}
}

$WORKING_DIR = $ini_array["general"]["preprocessing_dir"] . $ini_array["output"]["output_dir"];
Expand All @@ -85,26 +51,26 @@ function redirect($url){

$output_json = mb_convert_encoding($output_json, "UTF-8");

if(!library\Toolkit::isJSON($output_json)) {
echo "Sorry! Something went wrong - most likely we haven't found any documents matching your search term. Please <a href=\"http://" . $settings["host"] . $settings["vis_path"] ."\">go back and try again.</a>";
if (!library\Toolkit::isJSON($output_json)) {
echo "Sorry! Something went wrong - most likely we haven't found any documents matching your search term. Please <a href=\"http://" . $settings["host"] . $settings["vis_path"] . "\">go back and try again.</a>";
// echo $output_json;
return;
}

$result = json_decode($output_json, true);

attachMostUsedKeywords($result, 3);


$naming = new \headstart\preprocessing\naming\KeywordNaming($ini_array);
$naming->performNaming($result, 3);

$input_json = json_encode($result);

if ($persistence->getLastVersion($unique_id) == null) {
$persistence->createVisualization($unique_id, "PLOS Search: " .$query, $input_json);
if ($persistence->getLastVersion($query) == false) {
$persistence->createVisualization($unique_id, "PLOS Search: " . $query, $input_json);
} else {
$persistence->writeRevision($unique_id, $input_json);
}

redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);


redirect("http://" . $settings["host"] . $settings["vis_path"] . "index.php?id=" . $unique_id);
?>
</body>
</html>

0 comments on commit 7830de1

Please sign in to comment.