Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.15
1.16
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class FrameDetail extends FrameEntity implements FrameInterface {
public int retryCount;
public int exitStatus;
public long maxRss;
public long maxPss;
public int dispatchOrder;
public String lastResource;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,10 +326,13 @@ public interface FrameDao {
* @param f
* @param maxRss
* @param rss
* @param maxPss
* @param pss
* @param lluTime
* @throws FrameReservationException if the frame is locked by another thread.
*/
void updateFrameMemoryUsageAndLluTime(FrameInterface f, long maxRss, long rss, long lluTime);
void updateFrameMemoryUsageAndLluTime(FrameInterface f, long maxRss, long rss, long maxPss,
long pss, long lluTime);

/**
* Attempt to put a exclusive row lock on the given frame. The frame must be in the specified
Expand Down
9 changes: 9 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/JobDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,15 @@ public interface JobDao {
*/
void updateMaxRSS(JobInterface job, long maxRss);

/**
* Update jobs max PSS. Only updates if the passed in value is greater than the current value of
* int_max_pss
*
* @param job
* @param maxPss
*/
void updateMaxPSS(JobInterface job, long maxPss);

/**
* Inserts a key/value pair into the jobs env table
*
Expand Down
10 changes: 10 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/LayerDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,16 @@ public interface LayerDao {
*/
void updateLayerMaxRSS(LayerInterface layer, long val, boolean force);

/**
* Updates the layer's maximum PSS value. If force is true, the max RSS is updated no matter
* what the current value is. If force is false, the value is only updated the val is greater
* than than the existing value.
*
* @param layer
* @param val
*/
void updateLayerMaxPSS(LayerInterface layer, long val, boolean force);

/**
* Increases the value of the minimum memory when the supplied value is larger than the current
* value
Expand Down
5 changes: 3 additions & 2 deletions cuebot/src/main/java/com/imageworks/spcue/dao/ProcDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ public interface ProcDao {
* @param usedKb
* @param maxKb
*/
void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, long vsize, long maxVsize,
long usedGpuMemory, long maxUsedGpuMemory, long usedSwapMemory, byte[] children);
void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, long pss, long maxPss,
long vsize, long maxVsize, long usedGpuMemory, long maxUsedGpuMemory,
long usedSwapMemory, byte[] children);

/**
* get aq virual proc from its unique id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ public FrameDetail mapRow(ResultSet rs, int rowNum) throws SQLException {
frame.layerId = rs.getString("pk_layer");
frame.showId = rs.getString("pk_show");
frame.maxRss = rs.getLong("int_mem_max_used");
frame.maxPss = rs.getLong("int_pss_max_used");
frame.name = rs.getString("str_name");
frame.number = rs.getInt("int_number");
frame.dispatchOrder = rs.getInt("int_dispatch_order");
Expand Down Expand Up @@ -709,14 +710,15 @@ public ResourceUsage getResourceUsage(FrameInterface f) {
RESOURCE_USAGE_MAPPER, f.getFrameId());
}

private static final String UPDATE_FRAME_MEMORY_USAGE_AND_LLU_TIME = "UPDATE " + "frame "
+ "SET " + "ts_updated = current_timestamp," + "int_mem_max_used = ?,"
+ "int_mem_used = ?," + "ts_llu = ? " + "WHERE " + "pk_frame = ? ";
private static final String UPDATE_FRAME_MEMORY_USAGE_AND_LLU_TIME =
"UPDATE " + "frame " + "SET " + "ts_updated = current_timestamp,"
+ "int_mem_max_used = ?," + "int_mem_used = ?," + "int_pss_max_used = ?,"
+ "int_pss_used = ?," + "ts_llu = ? " + "WHERE " + "pk_frame = ? ";

@Override
public void updateFrameMemoryUsageAndLluTime(FrameInterface f, long maxRss, long rss,
long lluTime) {
getJdbcTemplate().update(UPDATE_FRAME_MEMORY_USAGE_AND_LLU_TIME, maxRss, rss,
long maxPss, long pss, long lluTime) {
getJdbcTemplate().update(UPDATE_FRAME_MEMORY_USAGE_AND_LLU_TIME, maxRss, rss, maxPss, pss,
new Timestamp(lluTime * 1000l), f.getFrameId());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,12 @@ public void updateMaxRSS(JobInterface job, long value) {
job.getJobId(), value);
}

public void updateMaxPSS(JobInterface job, long value) {
getJdbcTemplate().update(
"UPDATE job_mem SET int_max_pss=? WHERE pk_job=? AND int_max_pss < ?", value,
job.getJobId(), value);
}

private static final String UPDATE_JOB_FINISHED = "UPDATE " + "job " + "SET "
+ "str_state = ?, " + "str_visible_name = NULL, " + "ts_stopped = current_timestamp "
+ "WHERE " + "str_state = 'PENDING' " + "AND " + "pk_job = ?";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,22 @@ public void updateLayerMaxRSS(LayerInterface layer, long val, boolean force) {
getJdbcTemplate().update(sb.toString(), options);
}

private static final String UPDATE_LAYER_MAX_PSS =
"UPDATE " + "layer_mem " + "SET " + "int_max_pss = ? " + "WHERE " + "pk_layer = ?";

@Override
public void updateLayerMaxPSS(LayerInterface layer, long val, boolean force) {
StringBuilder sb = new StringBuilder(UPDATE_LAYER_MAX_PSS);
Object[] options;
if (!force) {
options = new Object[] {val, layer.getLayerId(), val};
sb.append(" AND int_max_pss < ?");
} else {
options = new Object[] {val, layer.getLayerId()};
}
getJdbcTemplate().update(sb.toString(), options);
}

@Override
public void updateLayerTags(LayerInterface layer, Set<String> tags) {
if (tags.size() == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ public CachedJobWhiteboardMapper(NestedJobWhiteboardMapper result) {
+ "(job_resource.int_gpus + job_resource.int_local_gpus) AS int_gpus, "
+ "job_resource.int_min_cores, " + "job_resource.int_min_gpus, "
+ "job_resource.int_max_cores, " + "job_resource.int_max_gpus, "
+ "job_mem.int_max_rss " + "FROM " + "show, " + "dept, " + "folder_level, "
+ "folder_resource, " + "folder " + "LEFT JOIN " + "job " + "ON "
+ "job_mem.int_max_rss, " + "job_mem.int_max_pss " + "FROM " + "show, " + "dept, "
+ "folder_level, " + "folder_resource, " + "folder " + "LEFT JOIN " + "job " + "ON "
+ " (folder.pk_folder = job.pk_folder AND job.str_state='PENDING') " + "LEFT JOIN "
+ "facility " + "ON " + "(job.pk_facility = facility.pk_facility) " + "LEFT JOIN "
+ "job_stat " + "ON " + "(job.pk_job = job_stat.pk_job) " + "LEFT JOIN "
Expand Down Expand Up @@ -290,6 +290,7 @@ private static final NestedJob mapResultSetToJob(ResultSet rs) throws SQLExcepti
+ "host_stat.int_load, " + "proc.pk_proc, " + "proc.int_cores_reserved AS proc_cores, "
+ "proc.int_gpus_reserved AS proc_gpus, " + "proc.int_mem_reserved AS proc_memory, "
+ "proc.int_mem_used AS used_memory, " + "proc.int_mem_max_used AS max_memory, "
+ "proc.int_pss_used AS used_pss, " + "proc.int_pss_max_used AS max_pss, "
+ "proc.int_gpu_mem_reserved AS proc_gpu_memory, " + "proc.ts_ping, "
+ "proc.ts_booked, " + "proc.ts_dispatched, " + "proc.b_unbooked, "
+ "redirect.str_name AS str_redirect, " + "job.str_name AS job_name, "
Expand Down Expand Up @@ -390,6 +391,8 @@ public NestedHost mapRow(ResultSet rs, int row) throws SQLException {
.setReservedMemory(rs.getLong("proc_memory"))
.setReservedGpuMemory(rs.getLong("proc_gpu_memory"))
.setUsedMemory(rs.getLong("used_memory"))
.setUsedPss(rs.getLong("used_pss"))
.setMaxPss(rs.getLong("max_pss"))
.setFrameName(rs.getString("frame_name"))
.setJobName(rs.getString("job_name"))
.setShowName(rs.getString("show_name"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,16 +171,16 @@ public boolean clearVirtualProcAssignment(FrameInterface frame) {
frame.getFrameId()) == 1;
}

private static final String UPDATE_PROC_MEMORY_USAGE =
"UPDATE " + "proc " + "SET " + "int_mem_used = ?, " + "int_mem_max_used = ?,"
+ "int_virt_used = ?, " + "int_virt_max_used = ?, " + "int_gpu_mem_used = ?, "
+ "int_gpu_mem_max_used = ?, " + "int_swap_used = ?, " + "bytea_children = ?, "
+ "ts_ping = current_timestamp " + "WHERE " + "pk_frame = ?";
private static final String UPDATE_PROC_MEMORY_USAGE = "UPDATE " + "proc " + "SET "
+ "int_mem_used = ?, " + "int_mem_max_used = ?," + "int_pss_used = ?, "
+ "int_pss_max_used = ?, " + "int_virt_used = ?, " + "int_virt_max_used = ?, "
+ "int_gpu_mem_used = ?, " + "int_gpu_mem_max_used = ?, " + "int_swap_used = ?, "
+ "bytea_children = ?, " + "ts_ping = current_timestamp " + "WHERE " + "pk_frame = ?";

@Override
public void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, long vss,
long maxVss, long usedGpuMemory, long maxUsedGpuMemory, long usedSwapMemory,
byte[] children) {
public void updateProcMemoryUsage(FrameInterface f, long rss, long maxRss, long pss,
long maxPss, long vss, long maxVss, long usedGpuMemory, long maxUsedGpuMemory,
long usedSwapMemory, byte[] children) {
/*
* This method is going to repeat for a proc every 1 minute, so if the proc is being touched
* by another thread, then return quietly without updating memory usage.
Expand All @@ -202,13 +202,15 @@ public PreparedStatement createPreparedStatement(Connection conn)
conn.prepareStatement(UPDATE_PROC_MEMORY_USAGE);
updateProc.setLong(1, rss);
updateProc.setLong(2, maxRss);
updateProc.setLong(3, vss);
updateProc.setLong(4, maxVss);
updateProc.setLong(5, usedGpuMemory);
updateProc.setLong(6, maxUsedGpuMemory);
updateProc.setLong(7, usedSwapMemory);
updateProc.setBytes(8, children);
updateProc.setString(9, f.getFrameId());
updateProc.setLong(3, pss);
updateProc.setLong(4, maxPss);
updateProc.setLong(5, vss);
updateProc.setLong(6, maxVss);
updateProc.setLong(7, usedGpuMemory);
updateProc.setLong(8, maxUsedGpuMemory);
updateProc.setLong(9, usedSwapMemory);
updateProc.setBytes(10, children);
updateProc.setString(11, f.getFrameId());
return updateProc;
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ public static JobStats mapJobStats(ResultSet rs) throws SQLException {
JobStats.Builder statsBuilder = JobStats.newBuilder()
.setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores")))
.setReservedGpus(rs.getInt("int_gpus")).setMaxRss(rs.getLong("int_max_rss"))
.setTotalFrames(rs.getInt("int_frame_count"))
.setMaxPss(rs.getLong("int_max_pss")).setTotalFrames(rs.getInt("int_frame_count"))
.setTotalLayers(rs.getInt("int_layer_count"))
.setWaitingFrames(rs.getInt("int_waiting_count"))
.setRunningFrames(rs.getInt("int_running_count"))
Expand Down Expand Up @@ -1188,6 +1188,7 @@ public Layer mapRow(ResultSet rs, int rowNum) throws SQLException {
LayerStats.Builder statsBuilder = LayerStats.newBuilder()
.setReservedCores(Convert.coreUnitsToCores(rs.getInt("int_cores")))
.setReservedGpus(rs.getInt("int_gpus")).setMaxRss(rs.getLong("int_max_rss"))
.setMaxPss(rs.getLong("int_max_pss"))
.setTotalFrames(rs.getInt("int_total_count"))
.setWaitingFrames(rs.getInt("int_waiting_count"))
.setRunningFrames(rs.getInt("int_running_count"))
Expand Down Expand Up @@ -1309,8 +1310,10 @@ public Frame mapRow(ResultSet rs, int rowNum) throws SQLException {
.setState(FrameState.valueOf(SqlUtil.getString(rs, "str_state")))
.setLayerName(SqlUtil.getString(rs, "layer_name"))
.setUsedMemory(rs.getLong("int_mem_used"))
.setUsedPss(rs.getLong("int_pss_used"))
.setReservedMemory(rs.getLong("int_mem_reserved"))
.setReservedGpuMemory(rs.getLong("int_gpu_mem_reserved"))
.setMaxPss(rs.getLong("int_pss_max_used"))
.setCheckpointState(
CheckpointState.valueOf(SqlUtil.getString(rs, "str_checkpoint_state")))
.setCheckpointCount(rs.getInt("int_checkpoint_count"));
Expand Down Expand Up @@ -1454,7 +1457,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "frame.int_dispatch_order," + "frame.ts_started," + "frame.ts_stopped,"
+ "frame.ts_llu," + "frame.int_retries," + "frame.str_state," + "frame.str_host,"
+ "frame.int_cores," + "frame.int_gpus," + "frame.int_mem_max_used,"
+ "frame.int_mem_used, " + "frame.int_mem_reserved, " + "frame.int_gpu_mem_reserved, "
+ "frame.int_mem_used, " + "frame.int_pss_max_used," + "frame.int_pss_used, "
+ "frame.int_mem_reserved, " + "frame.int_gpu_mem_reserved, "
+ "frame.str_checkpoint_state," + "frame.int_checkpoint_count,"
+ "frame.int_total_past_core_time," + "frame.int_total_past_gpu_time,"
+ "layer.str_name AS layer_name," + "job.str_name AS job_name,"
Expand Down Expand Up @@ -1608,7 +1612,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "job_usage.int_core_time_fail, " + "job_usage.int_gpu_time_success, "
+ "job_usage.int_gpu_time_fail, " + "job_usage.int_frame_success_count, "
+ "job_usage.int_frame_fail_count, " + "job_usage.int_clock_time_high,"
+ "job_usage.int_clock_time_success," + "job_mem.int_max_rss,"
+ "job_usage.int_clock_time_success," + "job_mem.int_max_rss," + "job_mem.int_max_pss,"
+ "(job_resource.int_cores + job_resource.int_local_cores) AS int_cores,"
+ "(job_resource.int_gpus + job_resource.int_local_gpus) AS int_gpus, "
+ "job.str_loki_url " + "FROM " + "job," + "folder," + "show," + "facility,"
Expand All @@ -1627,11 +1631,11 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "layer_usage.int_frame_success_count, " + "layer_usage.int_frame_fail_count, "
+ "layer_usage.int_clock_time_low, " + "layer_usage.int_clock_time_high,"
+ "layer_usage.int_clock_time_success," + "layer_usage.int_clock_time_fail,"
+ "layer_mem.int_max_rss," + "layer_resource.int_cores," + "layer_resource.int_gpus "
+ "FROM " + "layer, " + "job," + "layer_stat, " + "layer_resource, " + "layer_usage, "
+ "layer_mem " + "WHERE " + "layer.pk_job = job.pk_job " + "AND "
+ "layer.pk_layer = layer_stat.pk_layer " + "AND "
+ "layer.pk_layer = layer_resource.pk_layer " + "AND "
+ "layer_mem.int_max_rss," + "layer_mem.int_max_pss," + "layer_resource.int_cores,"
+ "layer_resource.int_gpus " + "FROM " + "layer, " + "job," + "layer_stat, "
+ "layer_resource, " + "layer_usage, " + "layer_mem " + "WHERE "
+ "layer.pk_job = job.pk_job " + "AND " + "layer.pk_layer = layer_stat.pk_layer "
+ "AND " + "layer.pk_layer = layer_resource.pk_layer " + "AND "
+ "layer.pk_layer = layer_usage.pk_layer " + "AND "
+ "layer.pk_layer = layer_mem.pk_layer";

Expand All @@ -1645,7 +1649,7 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "layer_usage.int_frame_fail_count, " + "layer_usage.int_clock_time_low, "
+ "layer_usage.int_clock_time_high, " + "layer_usage.int_clock_time_success, "
+ "layer_usage.int_clock_time_fail, " + "layer_mem.int_max_rss, "
+ "layer_resource.int_cores, " + "layer_resource.int_gpus, "
+ "layer_mem.int_max_pss, " + "layer_resource.int_cores, " + "layer_resource.int_gpus, "
+ "limit_names.str_limit_names " + "FROM " + "layer " + "JOIN "
+ "job ON layer.pk_job = job.pk_job " + "JOIN "
+ "layer_stat ON layer.pk_layer = layer_stat.pk_layer " + "JOIN "
Expand Down Expand Up @@ -1746,8 +1750,8 @@ public Show mapRow(ResultSet rs, int rowNum) throws SQLException {
+ "frame.int_number," + "frame.int_dispatch_order," + "frame.ts_started,"
+ "frame.ts_stopped," + "frame.ts_llu," + "frame.int_retries,"
+ "frame.str_state," + "frame.str_host," + "frame.int_cores,"
+ "frame.int_mem_max_used," + "frame.int_mem_used, "
+ "frame.int_mem_reserved, " + "frame.int_gpus,"
+ "frame.int_mem_max_used," + "frame.int_mem_used, " + "frame.int_pss_max_used,"
+ "frame.int_pss_used, " + "frame.int_mem_reserved, " + "frame.int_gpus,"
+ "frame.int_gpu_mem_max_used, " + "frame.int_gpu_mem_used, "
+ "frame.int_gpu_mem_reserved, " + "frame.str_checkpoint_state,"
+ "frame.int_checkpoint_count," + "frame.int_total_past_core_time,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,12 @@ public interface DispatchSupport {
* @param frame
* @param rss
* @param maxRss
* @param pss
* @param maxPss
* @param lluTime
*/
void updateFrameMemoryUsageAndLluTime(FrameInterface frame, long rss, long maxRss,
long lluTime);
void updateFrameMemoryUsageAndLluTime(FrameInterface frame, long rss, long maxRss, long pss,
long maxPss, long lluTime);

/**
* Update memory usage data for a given frame's proc record. The frame is used to update the
Expand All @@ -401,14 +403,16 @@ void updateFrameMemoryUsageAndLluTime(FrameInterface frame, long rss, long maxRs
* @param frame
* @param rss
* @param maxRss
* @param pss
* @param maxPss
* @param vsize
* @param maxVsize
* @param usedGpuMemory
* @param maxUsedGpuMemory
*/
void updateProcMemoryUsage(FrameInterface frame, long rss, long maxRss, long vsize,
long maxVsize, long usedGpuMemory, long maxUsedGpuMemory, long usedSwapMemory,
byte[] children);
void updateProcMemoryUsage(FrameInterface frame, long rss, long maxRss, long pss, long maxPss,
long vsize, long maxVsize, long usedGpuMemory, long maxUsedGpuMemory,
long usedSwapMemory, byte[] children);

/**
* Return true if adding the given core units would put the show over its burst value.
Expand Down
Loading
Loading