Skip to content

Commit

Permalink
Added async to mysql writer, and fixed some data issues
Browse files Browse the repository at this point in the history
  • Loading branch information
bbchristians committed May 26, 2020
1 parent 7ca734b commit ade9c4c
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 27 deletions.
20 changes: 12 additions & 8 deletions sql/easy_out.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
BEGIN;
-- Constants
SET @project_name = "aaberg/sql2o";
Set @instance_id = "-2011115859";
SET @project_name = "apache/camel";
Set @instance_id = "-1";

-- Query
SELECT
Expand All @@ -12,21 +12,25 @@ BEGIN;
FirstFile.f_comment as v1_comment,
SecondCommit.commit_hash as v2_commit,
SecondCommit.commit_date as v2_commit_date,
SecondCommit.author_date as v2_author_date,
SecondFile.f_path as v2_path,
SecondFile.containing_class as v2_class, SecondFile.containing_method as v2_method,
SecondFile.f_comment as v2_comment
FROM satd.SATD
INNER JOIN satd.SATDInFile as FirstFile
ON SATD.first_file = FirstFile.f_id
ON SATD.first_file = FirstFile.f_id
INNER JOIN satd.SATDInFile as SecondFile
ON SATD.second_file = SecondFile.f_id
ON SATD.second_file = SecondFile.f_id
INNER JOIN satd.Commits as FirstCommit
on SATD.first_commit=FirstCommit.commit_hash
ON SATD.first_commit = FirstCommit.commit_hash
AND SATD.p_id = FirstCommit.p_id
INNER JOIN satd.Commits as SecondCommit
on SATD.second_commit=SecondCommit.commit_hash
ON SATD.second_commit = SecondCommit.commit_hash
AND SATD.p_id = SecondCommit.p_id
INNER JOIN satd.Projects
on FirstCommit.p_id=Projects.p_id
WHERE Projects.p_name=@project_name
ON SATD.p_id=Projects.p_id
-- WHERE Projects.p_name=@project_name
-- AND SecondCommit.commit_hash="849ae58cfb2d68bf8f6c7a5ee6598fc7363a4b67"
-- WHERE SATD.satd_instance_id=@instance_id
ORDER BY satd_id DESC;

14 changes: 7 additions & 7 deletions sql/satd.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
DROP TABLE IF EXISTS satd.Commits, satd.SATD, satd.SATDInFile, satd.Projects;

CREATE TABLE IF NOT EXISTS satd.Projects (
p_id INT AUTO_INCREMENT,
p_id INT AUTO_INCREMENT NOT NULL,
p_name VARCHAR(255) NOT NULL UNIQUE,
p_url VARCHAR(255) NOT NULL UNIQUE,
PRIMARY KEY (p_id)
Expand All @@ -11,11 +11,11 @@ CREATE TABLE IF NOT EXISTS satd.SATDInFile (
f_id INT AUTO_INCREMENT,
f_comment VARCHAR(4096),
f_comment_type VARCHAR(32),
f_path VARCHAR(256),
f_path VARCHAR(512),
start_line INT,
end_line INT,
containing_class VARCHAR(256),
containing_method VARCHAR(256),
containing_class VARCHAR(512),
containing_method VARCHAR(512),
PRIMARY KEY (f_id)
);

Expand All @@ -28,7 +28,7 @@ CREATE TABLE IF NOT EXISTS satd.Commits(
committer_name varchar(256),
committer_email varchar(256),
commit_date DATETIME,
PRIMARY KEY (commit_hash),
PRIMARY KEY (p_id, commit_hash),
FOREIGN KEY (p_id) REFERENCES Projects(p_id)
);

Expand All @@ -43,8 +43,8 @@ CREATE TABLE IF NOT EXISTS satd.SATD (
resolution VARCHAR(64),
PRIMARY KEY (satd_id),
FOREIGN KEY (p_id) REFERENCES satd.Projects(p_id),
FOREIGN KEY (first_commit) REFERENCES satd.Commits(commit_hash),
FOREIGN KEY (second_commit) REFERENCES satd.Commits(commit_hash),
FOREIGN KEY (p_id, first_commit) REFERENCES satd.Commits(p_id, commit_hash),
FOREIGN KEY (p_id, second_commit) REFERENCES satd.Commits(p_id, commit_hash),
FOREIGN KEY (first_file) REFERENCES satd.SATDInFile(f_id),
FOREIGN KEY (second_file) REFERENCES satd.SATDInFile(f_id)
);
65 changes: 53 additions & 12 deletions src/main/java/edu/rit/se/satd/writer/MySQLOutputWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,27 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.sql.*;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

public class MySQLOutputWriter implements OutputWriter {

private static final int COMMENTS_MAX_CHARS = 4096;

private String dbURI;
private String user;
private String pass;
private final Map<String, Integer> cachedProjectKeys = new HashMap<>();

private final String dbURI;
private final String user;
private final String pass;

private final ScheduledThreadPoolExecutor finalWriteExecutor;


public MySQLOutputWriter(String propertiesPath) throws IOException {
final Properties properties = new Properties();
Expand All @@ -31,6 +41,8 @@ public MySQLOutputWriter(String propertiesPath) throws IOException {
properties.getProperty("USE_SSL"));
this.user = properties.getProperty("USERNAME");
this.pass = properties.getProperty("PASSWORD");
final int maxConnections = Integer.parseInt(properties.getProperty("MAX_CONNECTIONS", "151"));
this.finalWriteExecutor = new ScheduledThreadPoolExecutor( Math.max(1, maxConnections - 1));

try {
// Load driver
Expand All @@ -44,15 +56,42 @@ public MySQLOutputWriter(String propertiesPath) throws IOException {
public void writeDiff(SATDDifference diff) throws IOException {
Connection conn = null;
try {
// Write the first part synchronously, because we don't want it duplicated
// and duplication is possible.
conn = DriverManager.getConnection(this.dbURI, this.user, this.pass);
final int projectId = this.getProjectId(conn, diff.getProjectName(), diff.getProjectURI());
int projectId;
// Cache project key to shorten each write by one query
if( this.cachedProjectKeys.containsKey(diff.getProjectName()) ) {
projectId = this.cachedProjectKeys.get(diff.getProjectName());
} else {
projectId = this.getProjectId(conn, diff.getProjectName(), diff.getProjectURI());
this.cachedProjectKeys.put(diff.getProjectName(), projectId);
}
final String oldCommitId = this.getCommitId(conn, new CommitMetaData(diff.getOldCommit()), projectId);
final String newCommitId = this.getCommitId(conn, new CommitMetaData(diff.getNewCommit()), projectId);
for( SATDInstance satdInstance : diff.getSatdInstances() ) {
final int oldFileId = this.getSATDInFileId(conn, satdInstance, true);
final int newFileId = this.getSATDInFileId(conn, satdInstance, false);
this.getSATDInstanceId(conn, satdInstance, newCommitId, oldCommitId, newFileId, oldFileId);
}

// Now finish the remaining writes async and allow time for the previous writer to complete.
final Connection asyncConn = conn;
conn = null;
final Thread writeLastAsync = new Thread(() -> {
try {
for (SATDInstance satdInstance : diff.getSatdInstances()) {
final int oldFileId = this.getSATDInFileId(asyncConn, satdInstance, true);
final int newFileId = this.getSATDInFileId(asyncConn, satdInstance, false);
this.getSATDInstanceId(asyncConn, satdInstance, newCommitId, oldCommitId, newFileId, oldFileId, projectId);
}
} catch (SQLException e) {
throw new UncheckedIOException(new IOException(e));
} finally {
try {
asyncConn.close();
} catch (SQLException e) {
System.err.println("Error closing SQL connection in thread");
}
}
});
finalWriteExecutor.schedule(writeLastAsync, 100, TimeUnit.MILLISECONDS);

} catch (SQLException e) {
// Issues with SQL will be wrapped in an IOException to maintain interface consistency
throw new IOException(e);
Expand Down Expand Up @@ -159,7 +198,8 @@ private int getSATDInFileId(Connection conn, SATDInstance satdInstance, boolean
}

private int getSATDInstanceId(Connection conn, SATDInstance satdInstance,
String newCommitHash, String oldCommitHash, int newFileId, int oldFileId) throws SQLException{
String newCommitHash, String oldCommitHash,
int newFileId, int oldFileId, int projectId) throws SQLException{
final PreparedStatement queryStmt = conn.prepareStatement(
"SELECT SATD.satd_id FROM SATD WHERE SATD.first_commit=? AND " +
"SATD.second_commit=? AND SATD.first_file=? AND SATD.second_file=?"
Expand All @@ -176,15 +216,16 @@ private int getSATDInstanceId(Connection conn, SATDInstance satdInstance,
// Otherwise, add it and then return the newly generated key
final PreparedStatement updateStmt = conn.prepareStatement(
"INSERT INTO SATD(first_commit, second_commit, first_file, second_file, " +
"resolution, satd_instance_id) " +
"VALUES (?,?,?,?,?,?)",
"resolution, satd_instance_id, p_id) " +
"VALUES (?,?,?,?,?,?,?)",
Statement.RETURN_GENERATED_KEYS);
updateStmt.setString(1, oldCommitHash); // first_commit
updateStmt.setString(2, newCommitHash); // second_commit
updateStmt.setInt(3, oldFileId); // first_file
updateStmt.setInt(4, newFileId); // second_file
updateStmt.setString(5, satdInstance.getResolution().name()); // resolution
updateStmt.setInt(6, satdInstance.getId()); // satd_instance_id
updateStmt.setInt(7, projectId); // p_id
updateStmt.executeUpdate();
final ResultSet updateRes = updateStmt.getGeneratedKeys();
if (updateRes.next()) {
Expand Down

0 comments on commit ade9c4c

Please sign in to comment.