cctools
work_queue.h
Go to the documentation of this file.
1 /*
2 Copyright (C) 2022 The University of Notre Dame
3 This software is distributed under the GNU General Public License.
4 See the file COPYING for details.
5 */
6 
7 #ifndef WORK_QUEUE_H
8 #define WORK_QUEUE_H
9 
20 #include <sys/types.h>
21 #include "timestamp.h"
22 #include "category.h"
23 #include "rmsummary.h"
24 
25 #define WORK_QUEUE_DEFAULT_PORT 9123
26 #define WORK_QUEUE_RANDOM_PORT 0
28 #define WORK_QUEUE_WAITFORTASK -1
30 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120
31 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
35 typedef enum {
39 
42 typedef enum {
52 
55 typedef enum {
56  WORK_QUEUE_SCHEDULE_UNSET = 0,
63 
66 typedef enum {
82 
85 typedef enum {
94 
97 typedef enum {
105 
106 
107 // Here we repeat the category_mode_t declaration but with work_queue names.
108 // This is needed to generate uniform names in the API and bindings:
109 
110 typedef enum {
129 
130 
136  char *tag;
137  char *command_line;
139  char *output;
140  struct list *input_files;
141  struct list *output_files;
142  struct list *env_list;
143  int taskid;
146  char *host;
147  char *hostname;
149  char *coprocess;
151  char *category;
154  double priority;
157  int try_count;
161  /* All times in microseconds */
162  /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
181  int64_t bytes_received;
182  int64_t bytes_sent;
191  struct list *features;
193  /* deprecated fields */
194  //int total_submissions; /**< @deprecated Use try_count. */
195 
222 };
223 
227  /* Stats for the current state of workers: */
234  /* Cumulative stats for workers: */
243  /* Stats for the current state of tasks: */
249  /* Cumulative stats for tasks: */
257  /* All times in microseconds */
258  /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
259 
260  /* Master time statistics: */
271  /* Workers time statistics: */
276  /* BW statistics */
277  int64_t bytes_sent;
278  int64_t bytes_received;
279  double bandwidth;
281  /* resources statistics */
290  int64_t total_cores;
291  int64_t total_memory;
292  int64_t total_disk;
294  int64_t committed_cores;
296  int64_t committed_disk;
298  int64_t max_cores;
299  int64_t max_memory;
300  int64_t max_disk;
302  int64_t min_cores;
303  int64_t min_memory;
304  int64_t min_disk;
306  double manager_load;
340  double capacity;
342  double efficiency;
345  int64_t total_gpus;
346  int64_t committed_gpus;
347  int64_t max_gpus;
348  int64_t min_gpus;
350  int port;
351  int priority;
357 };
358 
359 
360 /* Forward declare the queue's structure. This structure is opaque and defined in work_queue.c */
361 struct work_queue;
362 
363 
367 
376 struct work_queue_task *work_queue_task_create(const char *full_command);
377 
383 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
384 
389 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
390 
396 void work_queue_task_specify_coprocess( struct work_queue_task *t, const char *coprocess_name );
397 
414 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
415 
430 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
431 
442 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t flags);
443 
457 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags, int recursive);
458 
471 int work_queue_task_specify_url(struct work_queue_task *t, const char *url, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
472 
485 int work_queue_task_specify_file_command(struct work_queue_task *t, const char *cmd, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
486 
493 
499 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
500 
506 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
507 
513 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
514 
520 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
521 
529 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
530 
537 void work_queue_task_specify_start_time_min( struct work_queue_task *t, int64_t useconds );
538 
546 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
547 
555 void work_queue_task_specify_running_time_max( struct work_queue_task *t, int64_t seconds );
556 
563 void work_queue_task_specify_running_time_min( struct work_queue_task *t, int64_t seconds );
564 
571 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
572 
578 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
579 
584 void work_queue_task_specify_feature(struct work_queue_task *t, const char *name);
585 
593 
600 void work_queue_task_specify_environment_variable( struct work_queue_task *t, const char *name, const char *value );
601 
608 
614 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
615 
621 
622 
674 
675 
677 
681 
698 struct work_queue *work_queue_create(int port);
699 
700 
709 struct work_queue *work_queue_ssl_create(int port, const char *key, const char *cert);
710 
724 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory, int watchdog);
725 
735 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory, int watchdog);
736 
745 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
746 
747 
758 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
759 
764 void work_queue_block_host(struct work_queue *q, const char *hostname);
765 
773 void work_queue_block_host_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
774 
775 
780 void work_queue_unblock_host(struct work_queue *q, const char *hostname);
781 
785 void work_queue_unblock_all(struct work_queue *q);
786 
800 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
801 
802 
817 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
818 
819 
827 struct work_queue_task *work_queue_wait_for_tag(struct work_queue *q, const char *tag, int timeout);
828 
840 int work_queue_hungry(struct work_queue *q);
841 
849 int work_queue_empty(struct work_queue *q);
850 
857 int work_queue_port(struct work_queue *q);
858 
863 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
864 
869 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
870 
876 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
877 
882 char *work_queue_status(struct work_queue *q, const char *request);
883 
884 
889 struct rmsummary **work_queue_workers_summary(struct work_queue *q);
890 
896 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
897 
902 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
903 
908 double work_queue_get_effective_bandwidth(struct work_queue *q);
909 
916 char * work_queue_get_worker_summary( struct work_queue *q );
917 
927 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
928 
929 
939 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
940 
941 
950 int work_queue_specify_draining_by_hostname(struct work_queue *q, const char *hostname, int drain_flag);
951 
958 int work_queue_specify_category_mode(struct work_queue *q, const char *category, work_queue_category_mode_t mode);
959 
965 void work_queue_specify_category_max_concurrent(struct work_queue *q, const char *category, int max_concurrent);
966 
974 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
975 
981 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
982 
987 const char *work_queue_name(struct work_queue *q);
988 
993 void work_queue_specify_name(struct work_queue *q, const char *name);
994 
999 void work_queue_specify_debug_path(struct work_queue *q, const char *path);
1000 
1005 void work_queue_specify_tlq_port(struct work_queue *q, int port);
1006 
1011 void work_queue_specify_priority(struct work_queue *q, int priority);
1012 
1021 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
1022 
1028 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
1029 
1034 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
1035 
1041 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
1042 
1048 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
1049 
1054 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
1055 
1060 int work_queue_shut_down_workers(struct work_queue *q, int n);
1061 
1066 void work_queue_delete(struct work_queue *q);
1067 
1073 int work_queue_specify_log(struct work_queue *q, const char *logfile);
1074 
1080 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
1081 
1087 void work_queue_specify_password( struct work_queue *q, const char *password );
1088 
1095 int work_queue_specify_password_file( struct work_queue *q, const char *file );
1096 
1101 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
1102 
1107 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
1108 
1114 void work_queue_manager_preferred_connection(struct work_queue *q, const char *preferred_connection);
1115 
1138 int work_queue_tune(struct work_queue *q, const char *name, double value);
1139 
1145 void work_queue_specify_max_resources(struct work_queue *q, const struct rmsummary *rm);
1146 
1152 void work_queue_specify_min_resources(struct work_queue *q, const struct rmsummary *rm);
1153 
1159 void work_queue_specify_category_max_resources(struct work_queue *q, const char *category, const struct rmsummary *rm);
1160 
1166 void work_queue_specify_category_min_resources(struct work_queue *q, const char *category, const struct rmsummary *rm);
1167 
1173 void work_queue_specify_category_first_allocation_guess(struct work_queue *q, const char *category, const struct rmsummary *rm);
1174 
1180 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
1181 
1182 
1187 const char *work_queue_result_str(work_queue_result_t result);
1188 
1190 
1194 
1195 #define WORK_QUEUE_TASK_ORDER_FIFO 0
1196 #define WORK_QUEUE_TASK_ORDER_LIFO 1
1204 void work_queue_specify_task_order(struct work_queue *q, int order);
1205 
1206 
1207 #define WORK_QUEUE_MANAGER_MODE_STANDALONE 0
1208 #define WORK_QUEUE_MANAGER_MODE_CATALOG 1
1217 void work_queue_specify_manager_mode(struct work_queue *q, int mode);
1218 
1219 
1225 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
1226 
1235 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
1236 
1244 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
1245 
1253 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
1254 
1262 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
1263 
1271 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
1272 
1278 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
1279 
1280 
1283 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
1284 
1286 
1287 // Renames for backwards compatibility
1288 #define work_queue_master_preferred_connection work_queue_manager_preferred_connection
1289 #define work_queue_specify_master_mode work_queue_specify_manager_mode
1290 #define work_queue_blacklist_add work_queue_block_host
1291 #define work_queue_blacklist_add_with_timeout work_queue_block_host_with_timeout
1292 #define work_queue_blacklist_remove work_queue_unblock_host
1293 #define work_queue_blacklist_clear work_queue_unblock_all
1294 
1295 #endif
work_queue_stats::total_tasks_complete
int total_tasks_complete
Definition: work_queue.h:324
work_queue_task_specify_category
void work_queue_task_specify_category(struct work_queue_task *t, const char *category)
Label the task with the given category.
WORK_QUEUE_ALLOCATION_MODE_GREEDY_BUCKETING
@ WORK_QUEUE_ALLOCATION_MODE_GREEDY_BUCKETING
Use the greedy bucketing algorithm to label resources.
Definition: work_queue.h:125
work_queue_task::output_files
struct list * output_files
The output files (other than the standard output stream) created by the program to be retrieved from ...
Definition: work_queue.h:141
work_queue_task_specify_coprocess
void work_queue_task_specify_coprocess(struct work_queue_task *t, const char *coprocess_name)
Indicate the command to be executed.
WORK_QUEUE_TASK_CANCELED
@ WORK_QUEUE_TASK_CANCELED
Task was canceled before completion.
Definition: work_queue.h:92
category.h
work_queue_specify_catalog_server
void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port)
Specify the catalog server the manager should report to.
work_queue_stats::total_workers_removed
int total_workers_removed
Definition: work_queue.h:316
WORK_QUEUE_RESULT_FORSAKEN
@ WORK_QUEUE_RESULT_FORSAKEN
The task failed, but it was not a task error.
Definition: work_queue.h:75
work_queue_task::time_when_done
timestamp_t time_when_done
The time at which the task is mark as retrieved, after transfering output files and other final proce...
Definition: work_queue.h:164
work_queue_specify_catalog_servers
void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts)
Specify the catalog server(s) the manager should report to.
work_queue_specify_keepalive_timeout
void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout)
Change the keepalive timeout for identifying dead workers for a given queue.
work_queue_specify_snapshot_file
int work_queue_specify_snapshot_file(struct work_queue_task *t, const char *monitor_snapshot_file)
When monitoring, indicates a json-encoded file that instructs the monitor to take a snapshot of the t...
work_queue_tune
int work_queue_tune(struct work_queue *q, const char *name, double value)
Tune advanced parameters for work queue.
work_queue_task::total_bytes_sent
int64_t total_bytes_sent
Definition: work_queue.h:218
work_queue_specify_password_file
int work_queue_specify_password_file(struct work_queue *q, const char *file)
Add a mandatory password file that each worker must present.
work_queue_stats::total_tasks_failed
int total_tasks_failed
Definition: work_queue.h:325
work_queue_stats::workers_full
int workers_full
Definition: work_queue.h:353
work_queue_stats::tasks_with_results
int tasks_with_results
Number of tasks with retrieved results and waiting to be returned to user.
Definition: work_queue.h:247
work_queue_task::max_retries
int max_retries
Number of times the task is tried to be executed on some workers until success.
Definition: work_queue.h:155
work_queue_task::time_when_retrieval
timestamp_t time_when_retrieval
The time when output files start to be transfered back to the manager.
Definition: work_queue.h:174
work_queue_get_stats_hierarchy
void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s)
Get statistics of the manager queue together with foremen information.
work_queue_stats::workers_init
int workers_init
Number of workers connected, but that have not send their available resources report yet.
Definition: work_queue.h:229
work_queue_specify_min_taskid
int work_queue_specify_min_taskid(struct work_queue *q, int minid)
Set the minimum taskid of future submitted tasks.
work_queue_category_mode_t
work_queue_category_mode_t
Definition: work_queue.h:110
work_queue_stats::committed_cores
int64_t committed_cores
Committed number of cores aggregated across the connected workers.
Definition: work_queue.h:294
WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION
@ WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION
The task used more resources than requested.
Definition: work_queue.h:72
WORK_QUEUE_TASK_RUNNING
@ WORK_QUEUE_TASK_RUNNING
Task has been dispatched to some worker.
Definition: work_queue.h:88
work_queue_task::bytes_transferred
int64_t bytes_transferred
Number of bytes transferred since task has last started transferring input data.
Definition: work_queue.h:183
work_queue_stats::min_memory
int64_t min_memory
The smallest memory size in MB observed among the connected workers.
Definition: work_queue.h:303
work_queue_activate_fast_abort
int work_queue_activate_fast_abort(struct work_queue *q, double multiplier)
Turn on or off fast abort functionality for a given queue for tasks without an explicit category.
work_queue_task_specify_start_time_min
void work_queue_task_specify_start_time_min(struct work_queue_task *t, int64_t useconds)
Specify the minimum start time allowed for the task (in microseconds since the Epoch).
WORK_QUEUE_REMOTECMD
@ WORK_QUEUE_REMOTECMD
File-spec is a regular file.
Definition: work_queue.h:100
WORK_QUEUE_RESULT_DISK_ALLOC_FULL
@ WORK_QUEUE_RESULT_DISK_ALLOC_FULL
The task filled its loop device allocation but needed more space.
Definition: work_queue.h:78
work_queue_stats
Statistics describing a work queue.
Definition: work_queue.h:226
work_queue_task::time_task_submit
timestamp_t time_task_submit
Definition: work_queue.h:196
work_queue_create
struct work_queue * work_queue_create(int port)
Create a new work queue.
category
Definition: category.h:69
work_queue_stats::total_disk
int64_t total_disk
Total disk space in MB aggregated across the connected workers.
Definition: work_queue.h:292
work_queue_task::hostname
char * hostname
The name of the host on which it ran.
Definition: work_queue.h:147
work_queue_task::tag
char * tag
An optional user-defined logical name for the task.
Definition: work_queue.h:136
WORK_QUEUE_RESULT_OUTPUT_MISSING
@ WORK_QUEUE_RESULT_OUTPUT_MISSING
The task ran but failed to generate a specified output file.
Definition: work_queue.h:69
work_queue_specify_min_resources
void work_queue_specify_min_resources(struct work_queue *q, const struct rmsummary *rm)
Sets the minimum resources a task without an explicit category ("default" category).
work_queue_task::time_committed
timestamp_t time_committed
Definition: work_queue.h:198
work_queue_stats::total_workers_lost
int total_workers_lost
Definition: work_queue.h:317
work_queue_specify_draining_by_hostname
int work_queue_specify_draining_by_hostname(struct work_queue *q, const char *hostname, int drain_flag)
Set the draining mode per worker hostname.
work_queue_stats::time_workers_execute_exhaustion
timestamp_t time_workers_execute_exhaustion
Total time workers spent executing tasks that exhausted resources.
Definition: work_queue.h:274
work_queue_task::fast_abort_count
int fast_abort_count
Number of times this task has been terminated for running too long.
Definition: work_queue.h:159
work_queue_stats::total_bytes_received
int64_t total_bytes_received
Definition: work_queue.h:338
work_queue_specify_category_max_resources
void work_queue_specify_category_max_resources(struct work_queue *q, const char *category, const struct rmsummary *rm)
Sets the maximum resources a task in the category may use.
work_queue_task_specify_max_retries
void work_queue_task_specify_max_retries(struct work_queue_task *t, int64_t max_retries)
Specify the number of times this task is retried on worker errors.
work_queue_stats::total_tasks_cancelled
int total_tasks_cancelled
Definition: work_queue.h:326
work_queue_task::resources_measured
struct rmsummary * resources_measured
When monitoring is enabled, it points to the measured resources used by the task in its latest attemp...
Definition: work_queue.h:186
work_queue_stats::min_disk
int64_t min_disk
The smallest disk space in MB observed among the connected workers.
Definition: work_queue.h:304
work_queue_task_specify_output_file
int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task.
work_queue_task_specify_tag
void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag)
Attach a user defined string tag to the task.
work_queue_task::features
struct list * features
User-defined features this task requires.
Definition: work_queue.h:191
work_queue_stats::workers_lost
int workers_lost
Total number of worker connections that were unexpectedly lost.
Definition: work_queue.h:241
work_queue_task_specify_file
int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a file to a task.
WORK_QUEUE_SCHEDULE_FILES
@ WORK_QUEUE_SCHEDULE_FILES
Select worker that has the most data required by the task.
Definition: work_queue.h:58
work_queue_stats::total_workers_connected
int total_workers_connected
Definition: work_queue.h:314
work_queue_task_specify_cores
void work_queue_task_specify_cores(struct work_queue_task *t, int cores)
Specify the number of cores required by a task.
work_queue_stats::bytes_received
int64_t bytes_received
Total number of file bytes (not including protocol control msg bytes) received from the workers by th...
Definition: work_queue.h:278
work_queue_specify_category_first_allocation_guess
void work_queue_specify_category_first_allocation_guess(struct work_queue *q, const char *category, const struct rmsummary *rm)
Set the initial guess for resource autolabeling for the given category.
WORK_QUEUE_RESULT_SIGNAL
@ WORK_QUEUE_RESULT_SIGNAL
The task was terminated with a signal.
Definition: work_queue.h:71
work_queue_specify_name
void work_queue_specify_name(struct work_queue *q, const char *name)
Change the project name for a given queue.
CATEGORY_ALLOCATION_MODE_MAX_THROUGHPUT
@ CATEGORY_ALLOCATION_MODE_MAX_THROUGHPUT
As above, but maximizing throughput.
Definition: category.h:59
work_queue_task_specify_priority
void work_queue_task_specify_priority(struct work_queue_task *t, double priority)
Specify the priority of this task relative to others in the queue.
WORK_QUEUE_RESULT_STDOUT_MISSING
@ WORK_QUEUE_RESULT_STDOUT_MISSING
The task ran but its stdout has been truncated.
Definition: work_queue.h:70
work_queue_task_specify_running_time
void work_queue_task_specify_running_time(struct work_queue_task *t, int64_t useconds)
Specify the maximum time (in microseconds) the task is allowed to run in a worker.
work_queue_task_specify_end_time
void work_queue_task_specify_end_time(struct work_queue_task *t, int64_t useconds)
Specify the maximum end time allowed for the task (in microseconds since the Epoch).
work_queue_generate_disk_alloc_full_filename
char * work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid)
Generate a worker-level unique filename to indicate a disk allocation being full.
work_queue_specify_tlq_port
void work_queue_specify_tlq_port(struct work_queue *q, int port)
Change the home host and port for a given queue (used by TLQ).
work_queue_wait
struct work_queue_task * work_queue_wait(struct work_queue *q, int timeout)
Wait for a task to complete.
work_queue_block_host_with_timeout
void work_queue_block_host_with_timeout(struct work_queue *q, const char *hostname, time_t seconds)
Block workers in hostname from a queue, but remove block after timeout seconds.
work_queue_port
int work_queue_port(struct work_queue *q)
Get the listening port of the queue.
work_queue_task::bytes_sent
int64_t bytes_sent
Number of bytes sent since task has last started sending input data.
Definition: work_queue.h:182
WORK_QUEUE_ALLOCATION_MODE_MAX
@ WORK_QUEUE_ALLOCATION_MODE_MAX
When monitoring is enabled, tasks are tried with maximum specified values of cores,...
Definition: work_queue.h:114
work_queue_stats::capacity
double capacity
Definition: work_queue.h:340
work_queue_task::resources_requested
struct rmsummary * resources_requested
Number of cores, disk, memory, time, etc.
Definition: work_queue.h:187
work_queue_stats::max_disk
int64_t max_disk
The largest disk space in MB observed among the connected workers.
Definition: work_queue.h:300
WORK_QUEUE_TASK_RETRIEVED
@ WORK_QUEUE_TASK_RETRIEVED
Task results are available at the manager.
Definition: work_queue.h:90
work_queue_stats::total_worker_slots
int total_worker_slots
Definition: work_queue.h:354
work_queue_stats::capacity_weighted
int capacity_weighted
The estimated number of tasks that this manager can support placing greater weight on the most recent...
Definition: work_queue.h:288
work_queue_stats::workers_removed
int workers_removed
Total number of worker connections that were released by the manager, idled-out, fast-aborted,...
Definition: work_queue.h:236
CATEGORY_ALLOCATION_MODE_GREEDY_BUCKETING
@ CATEGORY_ALLOCATION_MODE_GREEDY_BUCKETING
Use the greedy bucketing algorithm to label resources.
Definition: category.h:61
WORK_QUEUE_RESULT_MAX_RETRIES
@ WORK_QUEUE_RESULT_MAX_RETRIES
The task could not be completed successfully in the given number of retries.
Definition: work_queue.h:76
work_queue_status
char * work_queue_status(struct work_queue *q, const char *request)
Get queue information as json.
work_queue_task_specify_buffer
int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t flags)
Add an input buffer to a task.
work_queue_task::time_app_delay
timestamp_t time_app_delay
Definition: work_queue.h:221
work_queue_task::exhausted_attempts
int exhausted_attempts
Number of times the task failed given exhausted resources.
Definition: work_queue.h:158
WORK_QUEUE_SCHEDULE_RAND
@ WORK_QUEUE_SCHEDULE_RAND
Select a random worker.
Definition: work_queue.h:60
WORK_QUEUE_FAILURE_ONLY
@ WORK_QUEUE_FAILURE_ONLY
Only return this output file if the task failed.
Definition: work_queue.h:47
work_queue_stats::workers_fast_aborted
int workers_fast_aborted
Total number of worker connections terminated for being too slow.
Definition: work_queue.h:239
WORK_QUEUE_URL
@ WORK_QUEUE_URL
File-spec refers to an URL.
Definition: work_queue.h:103
work_queue_stats::port
int port
Definition: work_queue.h:350
work_queue_stats::time_when_started
timestamp_t time_when_started
Absolute time at which the manager started.
Definition: work_queue.h:261
work_queue_task::time_task_finish
timestamp_t time_task_finish
Definition: work_queue.h:197
WORK_QUEUE_DIRECTORY
@ WORK_QUEUE_DIRECTORY
File-spec is a directory.
Definition: work_queue.h:102
work_queue_cancel_by_tasktag
struct work_queue_task * work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag)
Cancel a submitted task using its tag and remove it from queue.
work_queue_task::resource_request
category_allocation_t resource_request
See category_allocation_t.
Definition: work_queue.h:152
work_queue_stats::workers_idled_out
int workers_idled_out
Total number of worker that disconnected for being idle.
Definition: work_queue.h:238
work_queue_stats::capacity_disk
int capacity_disk
The estimated number of workers' MB of disk that this manager can effectively support.
Definition: work_queue.h:285
work_queue_task_specify_feature
void work_queue_task_specify_feature(struct work_queue_task *t, const char *name)
Label the task with a user-defined feature.
work_queue_task::monitor_output_directory
char * monitor_output_directory
Custom output directory for the monitoring output files.
Definition: work_queue.h:188
work_queue_stats::capacity_cores
int capacity_cores
The estimated number of workers' cores that this manager can effectively support.
Definition: work_queue.h:283
work_queue_stats::committed_disk
int64_t committed_disk
Committed disk space in MB aggregated across the connected workers.
Definition: work_queue.h:296
work_queue_block_host
void work_queue_block_host(struct work_queue *q, const char *hostname)
Block workers in hostname from working for queue q.
work_queue_stats::workers_blocked
int workers_blocked
Total number of workers blocked by the manager.
Definition: work_queue.h:240
work_queue_stats::workers_idle
int workers_idle
Number of workers that are not running a task.
Definition: work_queue.h:230
work_queue_stats::total_exhausted_attempts
int total_exhausted_attempts
Definition: work_queue.h:327
work_queue_task::time_workers_execute_failure
timestamp_t time_workers_execute_failure
Accumulated time for runs that terminated in worker failure/disconnection.
Definition: work_queue.h:179
work_queue_task_create
struct work_queue_task * work_queue_task_create(const char *full_command)
Create a new task object.
work_queue_task_specify_input_file_do_not_cache
int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task, without caching.
work_queue_task::coprocess
char * coprocess
The name of the coprocess name in the worker that executes this task.
Definition: work_queue.h:149
work_queue_get_effective_bandwidth
double work_queue_get_effective_bandwidth(struct work_queue *q)
Get current queue bandwidth.
work_queue_task::result
work_queue_result_t result
The result of the task (see work_queue_result_t.
Definition: work_queue.h:145
work_queue_specify_max_resources
void work_queue_specify_max_resources(struct work_queue *q, const struct rmsummary *rm)
Sets the maximum resources a task without an explicit category ("default" category).
work_queue_task::output
char * output
The standard output of the task.
Definition: work_queue.h:139
WORK_QUEUE_TASK_READY
@ WORK_QUEUE_TASK_READY
Task is ready to be run, waiting in queue.
Definition: work_queue.h:87
work_queue_task::time_execute_cmd_start
timestamp_t time_execute_cmd_start
Definition: work_queue.h:207
WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME
@ WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME
The task ran for more than the specified time (relative since running in a worker).
Definition: work_queue.h:77
work_queue_stats::tasks_complete
int tasks_complete
Definition: work_queue.h:321
work_queue_stats::tasks_on_workers
int tasks_on_workers
Number of tasks currently dispatched to some worker.
Definition: work_queue.h:245
WORK_QUEUE_SCHEDULE_TIME
@ WORK_QUEUE_SCHEDULE_TIME
Select worker that has the fastest execution time on previous tasks.
Definition: work_queue.h:59
work_queue_task::total_bytes_transferred
int64_t total_bytes_transferred
Definition: work_queue.h:219
work_queue_stats::total_receive_time
timestamp_t total_receive_time
Definition: work_queue.h:330
work_queue_unblock_host
void work_queue_unblock_host(struct work_queue *q, const char *hostname)
Unblock host from a queue.
work_queue_stats::max_memory
int64_t max_memory
The largest memory size in MB observed among the connected workers.
Definition: work_queue.h:299
WORK_QUEUE_RESULT_RMONITOR_ERROR
@ WORK_QUEUE_RESULT_RMONITOR_ERROR
The task failed because the monitor did not produce a summary report.
Definition: work_queue.h:79
work_queue_task::time_receive_output_finish
timestamp_t time_receive_output_finish
Definition: work_queue.h:205
work_queue_stats::capacity_gpus
int capacity_gpus
The estimated number of workers' GPUs that this manager can effectively support.
Definition: work_queue.h:286
work_queue_stats::time_application
timestamp_t time_application
Total time spent outside work_queue_wait.
Definition: work_queue.h:269
work_queue_stats::total_workers_joined
int total_workers_joined
Definition: work_queue.h:315
work_queue_stats::total_exhausted_execute_time
timestamp_t total_exhausted_execute_time
Definition: work_queue.h:335
work_queue_task_state
work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid)
Get the current state of the task.
WORK_QUEUE_ALLOCATION_MODE_MIN_WASTE
@ WORK_QUEUE_ALLOCATION_MODE_MIN_WASTE
As above, but tasks are first tried with an automatically computed allocation to minimize resource wa...
Definition: work_queue.h:121
work_queue_task_specify_file_piece
int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a file piece to a task.
work_queue_stats::workers_joined
int workers_joined
Total number of worker connections that were established to the manager.
Definition: work_queue.h:235
work_queue_cancel_by_taskid
struct work_queue_task * work_queue_cancel_by_taskid(struct work_queue *q, int id)
Cancel a submitted task using its task id and remove it from queue.
work_queue_stats::workers_ready
int workers_ready
Definition: work_queue.h:352
WORK_QUEUE_FILE
@ WORK_QUEUE_FILE
File-spec is a regular file.
Definition: work_queue.h:98
CATEGORY_ALLOCATION_MODE_FIXED
@ CATEGORY_ALLOCATION_MODE_FIXED
When monitoring is disabled, all tasks run as WORK_QUEUE_ALLOCATION_MODE_FIXED.
Definition: category.h:36
work_queue_stats::tasks_done
int tasks_done
Total number of tasks completed and returned to user.
Definition: work_queue.h:252
work_queue_set_bandwidth_limit
void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth)
Limit the queue bandwidth when transferring files to and from workers.
work_queue_stats::time_internal
timestamp_t time_internal
Total time the queue spents in internal processing.
Definition: work_queue.h:267
work_queue_task::time_send_input_finish
timestamp_t time_send_input_finish
Definition: work_queue.h:201
work_queue_task_specify_url
int work_queue_task_specify_url(struct work_queue_task *t, const char *url, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a url as an input for a task.
work_queue_task::time_workers_execute_all
timestamp_t time_workers_execute_all
Accumulated time for executing the command on any worker, regardless of whether the task completed (i...
Definition: work_queue.h:177
work_queue_task::time_receive_result_finish
timestamp_t time_receive_result_finish
Definition: work_queue.h:203
work_queue_task_specify_output_file_do_not_cache
int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task without caching.
work_queue_task::total_time_until_worker_failure
timestamp_t total_time_until_worker_failure
Definition: work_queue.h:215
work_queue_stats::total_tasks_dispatched
int total_tasks_dispatched
Definition: work_queue.h:323
work_queue_stats::total_good_transfer_time
timestamp_t total_good_transfer_time
Definition: work_queue.h:331
work_queue_enable_monitoring
int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory, int watchdog)
Enables resource monitoring on the give work queue.
work_queue_specify_num_tasks_left
void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks)
Specify the number of tasks not yet submitted to the queue.
WORK_QUEUE_SCHEDULE_WORST
@ WORK_QUEUE_SCHEDULE_WORST
Select the worst fit worker (the worker with more unused resources).
Definition: work_queue.h:61
work_queue_stats::total_workers_fast_aborted
int total_workers_fast_aborted
Definition: work_queue.h:319
work_queue_ssl_create
struct work_queue * work_queue_ssl_create(int port, const char *key, const char *cert)
Create a new work queue using SSL.
work_queue_empty
int work_queue_empty(struct work_queue *q)
Determine whether the queue is empty.
work_queue_specify_category_mode
int work_queue_specify_category_mode(struct work_queue *q, const char *category, work_queue_category_mode_t mode)
Turn on or off first-allocation labeling for a given category.
work_queue_stats::efficiency
double efficiency
Definition: work_queue.h:342
rmsummary
Definition: rmsummary.h:26
work_queue_specify_transactions_log
int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile)
Add a log file that records the states of the connected workers and tasks.
work_queue_task::disk_allocation_exhausted
int disk_allocation_exhausted
Non-zero if a task filled its loop device allocation, zero otherwise.
Definition: work_queue.h:166
work_queue_stats::workers_able
int workers_able
Number of workers on which the largest task can run.
Definition: work_queue.h:232
work_queue_submit
int work_queue_submit(struct work_queue *q, struct work_queue_task *t)
Submit a task to a queue.
work_queue_stats::avg_capacity
int avg_capacity
Definition: work_queue.h:355
work_queue_specify_debug_path
void work_queue_specify_debug_path(struct work_queue *q, const char *path)
Change the debug log path for a given queue (used by TLQ).
WORK_QUEUE_WATCH
@ WORK_QUEUE_WATCH
Watch the output file and send back changes as the task runs.
Definition: work_queue.h:46
work_queue_schedule_t
work_queue_schedule_t
Task scheduling modes used by work_queue_specify_algorithm and work_queue_task_specify_algorithm.
Definition: work_queue.h:55
work_queue_stats::time_polling
timestamp_t time_polling
Total time blocking waiting for worker communications (i.e., manager idle waiting for a worker messag...
Definition: work_queue.h:268
work_queue_workers_summary
struct rmsummary ** work_queue_workers_summary(struct work_queue *q)
Summary data for all workers in buffer.
work_queue_task_specify_file_command
int work_queue_task_specify_file_command(struct work_queue_task *t, const char *cmd, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags)
Gets/puts file at remote_name using cmd at worker.
WORK_QUEUE_BUFFER
@ WORK_QUEUE_BUFFER
Data comes from buffer memory.
Definition: work_queue.h:99
work_queue_stats::min_cores
int64_t min_cores
The lowest number of cores observed among the connected workers.
Definition: work_queue.h:302
work_queue_task::resources_allocated
struct rmsummary * resources_allocated
Resources allocated to the task its latest attempt.
Definition: work_queue.h:185
work_queue_task_specify_running_time_min
void work_queue_task_specify_running_time_min(struct work_queue_task *t, int64_t seconds)
Specify the minimum time (in seconds) the task is expected to run in a worker.
work_queue_stats::start_time
timestamp_t start_time
Definition: work_queue.h:328
work_queue_stats::max_gpus
int64_t max_gpus
Definition: work_queue.h:347
WORK_QUEUE_SCHEDULE_FCFS
@ WORK_QUEUE_SCHEDULE_FCFS
Select worker on a first-come-first-serve basis.
Definition: work_queue.h:57
work_queue_stats::tasks_dispatched
int tasks_dispatched
Total number of tasks dispatch to workers.
Definition: work_queue.h:251
work_queue_get_stats_category
void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s)
Get the task statistics for the given category.
work_queue_stats::capacity_instantaneous
int capacity_instantaneous
The estimated number of tasks that this manager can support considering only the most recently comple...
Definition: work_queue.h:287
work_queue_task_specify_command
void work_queue_task_specify_command(struct work_queue_task *t, const char *cmd)
Indicate the command to be executed.
work_queue_task::return_status
int return_status
The exit code of the command line.
Definition: work_queue.h:144
timestamp_t
UINT64_T timestamp_t
A type to hold the current time, in microseconds since January 1st, 1970.
Definition: timestamp.h:20
work_queue_task_specify_memory
void work_queue_task_specify_memory(struct work_queue_task *t, int64_t memory)
Specify the amount of disk space required by a task.
work_queue_task_specify_running_time_max
void work_queue_task_specify_running_time_max(struct work_queue_task *t, int64_t seconds)
Specify the maximum time (in seconds) the task is allowed to run in a worker.
work_queue_stats::time_receive
timestamp_t time_receive
Total time spent in receiving results from workers (output files.).
Definition: work_queue.h:263
work_queue_task::category
char * category
User-provided label for the task.
Definition: work_queue.h:151
work_queue_unblock_all
void work_queue_unblock_all(struct work_queue *q)
Unblock all host.
work_queue_stats::tasks_running
int tasks_running
Number of tasks currently executing at some worker.
Definition: work_queue.h:246
WORK_QUEUE_FILE_PIECE
@ WORK_QUEUE_FILE_PIECE
File-spec refers to only a part of a file.
Definition: work_queue.h:101
work_queue_specify_priority
void work_queue_specify_priority(struct work_queue *q, int priority)
Change the priority for a given queue.
work_queue_stats::total_cores
int64_t total_cores
Total number of cores aggregated across the connected workers.
Definition: work_queue.h:290
WORK_QUEUE_TASK_UNKNOWN
@ WORK_QUEUE_TASK_UNKNOWN
There is no such task.
Definition: work_queue.h:86
WORK_QUEUE_RESULT_OUTPUT_TRANSFER_ERROR
@ WORK_QUEUE_RESULT_OUTPUT_TRANSFER_ERROR
The task failed because an output could be transfered to the manager (not enough disk space,...
Definition: work_queue.h:80
WORK_QUEUE_OUTPUT
@ WORK_QUEUE_OUTPUT
Specify an output object.
Definition: work_queue.h:37
work_queue_stats::tasks_exhausted_attempts
int tasks_exhausted_attempts
Total number of task executions that failed given resource exhaustion.
Definition: work_queue.h:255
work_queue_stats::manager_load
double manager_load
In the range of [0,1].
Definition: work_queue.h:306
WORK_QUEUE_RESULT_SUCCESS
@ WORK_QUEUE_RESULT_SUCCESS
The task ran successfully.
Definition: work_queue.h:67
work_queue_task_specify_disk
void work_queue_task_specify_disk(struct work_queue_task *t, int64_t disk)
Specify the amount of disk space required by a task.
work_queue_task::time_workers_execute_last
timestamp_t time_workers_execute_last
Duration of the last complete execution for this task.
Definition: work_queue.h:176
WORK_QUEUE_TASK_DONE
@ WORK_QUEUE_TASK_DONE
Task is done, and returned through work_queue_wait >
Definition: work_queue.h:91
work_queue_task_specify_algorithm
void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm)
Select the scheduling algorithm for a single task.
work_queue_stats::committed_memory
int64_t committed_memory
Committed memory in MB aggregated across the connected workers.
Definition: work_queue.h:295
work_queue_stats::idle_percentage
double idle_percentage
Definition: work_queue.h:343
work_queue_stats::capacity_memory
int capacity_memory
The estimated number of workers' MB of RAM that this manager can effectively support.
Definition: work_queue.h:284
work_queue_stats::max_cores
int64_t max_cores
The highest number of cores observed among the connected workers.
Definition: work_queue.h:298
work_queue_hungry
int work_queue_hungry(struct work_queue *q)
Determine whether the queue is 'hungry' for more tasks.
WORK_QUEUE_PREEXIST
@ WORK_QUEUE_PREEXIST
If the filename already exists on the host, use it in place.
Definition: work_queue.h:50
work_queue_task::time_receive_output_start
timestamp_t time_receive_output_start
Definition: work_queue.h:204
work_queue_task_specify_directory
int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags, int recursive)
Add a directory to a task.
work_queue_activate_fast_abort_category
int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier)
Turn on or off fast abort functionality for a given category.
CATEGORY_ALLOCATION_MODE_EXHAUSTIVE_BUCKETING
@ CATEGORY_ALLOCATION_MODE_EXHAUSTIVE_BUCKETING
Use the exhaustive bucketing algorithm to label resources.
Definition: category.h:64
work_queue_task::total_transfer_time
timestamp_t total_transfer_time
Definition: work_queue.h:210
work_queue_task::try_count
int try_count
The number of times the task has been dispatched to a worker.
Definition: work_queue.h:157
work_queue_stats::committed_gpus
int64_t committed_gpus
Definition: work_queue.h:346
WORK_QUEUE_SYMLINK
@ WORK_QUEUE_SYMLINK
Create a symlink to the file rather than copying it, if possible.
Definition: work_queue.h:45
category_allocation_t
category_allocation_t
Definition: category.h:22
work_queue_task_clone
struct work_queue_task * work_queue_task_clone(const struct work_queue_task *task)
Create a copy of a task Create a functionally identical copy of a work_queue_task that can be re-subm...
work_queue_task::time_when_commit_end
timestamp_t time_when_commit_end
The time when the task is completely transfered to a worker.
Definition: work_queue.h:172
work_queue_stats::workers_connected
int workers_connected
Number of workers currently connected to the manager.
Definition: work_queue.h:228
work_queue_specify_password
void work_queue_specify_password(struct work_queue *q, const char *password)
Add a mandatory password that each worker must present.
work_queue_stats::time_workers_execute
timestamp_t time_workers_execute
Total time workers spent executing done tasks.
Definition: work_queue.h:272
work_queue_wait_for_tag
struct work_queue_task * work_queue_wait_for_tag(struct work_queue *q, const char *tag, int timeout)
Wait for a task with a given task to complete.
work_queue_stats::tasks_waiting
int tasks_waiting
Number of tasks waiting to be dispatched.
Definition: work_queue.h:244
work_queue_manager_preferred_connection
void work_queue_manager_preferred_connection(struct work_queue *q, const char *preferred_connection)
Set the preference for using hostname over IP address to connect.
work_queue_task::env_list
struct list * env_list
Environment variables applied to the task.
Definition: work_queue.h:142
work_queue_initialize_categories
void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file)
Initialize first value of categories.
work_queue_stats::time_send
timestamp_t time_send
Total time spent in sending tasks to workers (tasks descriptions, and input files....
Definition: work_queue.h:262
work_queue_stats::total_good_execute_time
timestamp_t total_good_execute_time
Definition: work_queue.h:334
work_queue_task::time_receive_result_start
timestamp_t time_receive_result_start
Definition: work_queue.h:202
work_queue_stats::time_send_good
timestamp_t time_send_good
Total time spent in sending data to workers for tasks with result WQ_RESULT_SUCCESS.
Definition: work_queue.h:264
WORK_QUEUE_SUCCESS_ONLY
@ WORK_QUEUE_SUCCESS_ONLY
Only return this output file if the task succeeded.
Definition: work_queue.h:48
work_queue_stats::workers_busy
int workers_busy
Number of workers that are running at least one task.
Definition: work_queue.h:231
work_queue_specify_estimate_capacity_on
void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on)
Change whether to estimate manager capacity for a given queue.
WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT
@ WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT
As above, but maximizing throughput.
Definition: work_queue.h:123
work_queue_stats::capacity_tasks
int capacity_tasks
The estimated number of tasks that this manager can effectively support.
Definition: work_queue.h:282
WORK_QUEUE_TASK_WAITING_RETRIEVAL
@ WORK_QUEUE_TASK_WAITING_RETRIEVAL
Task results are available at the worker.
Definition: work_queue.h:89
work_queue_stats::total_gpus
int64_t total_gpus
Definition: work_queue.h:345
work_queue_stats::total_send_time
timestamp_t total_send_time
Definition: work_queue.h:329
work_queue_task_specify_environment_variable
void work_queue_task_specify_environment_variable(struct work_queue_task *t, const char *name, const char *value)
Specify an environment variable to be added to the task.
work_queue_stats::time_status_msgs
timestamp_t time_status_msgs
Total time spent sending and receiving status messages to and from workers, including workers' standa...
Definition: work_queue.h:266
WORK_QUEUE_INPUT
@ WORK_QUEUE_INPUT
Specify an input object.
Definition: work_queue.h:36
work_queue_task_specify_input_buf
int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname)
Add an input buffer to a task.
work_queue_task::time_send_input_start
timestamp_t time_send_input_start
Definition: work_queue.h:200
work_queue_task::min_running_time
int64_t min_running_time
Minimum time (in seconds) the task needs to run.
Definition: work_queue.h:168
work_queue_task_specify_gpus
void work_queue_task_specify_gpus(struct work_queue_task *t, int gpus)
Specify the number of gpus required by a task.
work_queue_stats::tasks_submitted
int tasks_submitted
Total number of tasks submitted to the queue.
Definition: work_queue.h:250
work_queue_enable_monitoring_full
int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory, int watchdog)
Enables resource monitoring on the give work queue.
work_queue_task::time_when_commit_start
timestamp_t time_when_commit_start
The time when the task starts to be transfered to a worker.
Definition: work_queue.h:171
work_queue_task_specify_monitor_output
void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output)
Specify a custom name for the monitoring summary.
work_queue_get_stats
void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s)
Get queue statistics (only from manager).
work_queue_task::total_cmd_exhausted_execute_time
timestamp_t total_cmd_exhausted_execute_time
Definition: work_queue.h:214
work_queue_name
const char * work_queue_name(struct work_queue *q)
Get the project name of the queue.
work_queue_stats::bandwidth
double bandwidth
Average network bandwidth in MB/S observed by the manager when transferring to workers.
Definition: work_queue.h:279
work_queue_file_flags_t
work_queue_file_flags_t
File handling flags used by work_queue_task_specify_file and similar.
Definition: work_queue.h:42
work_queue_specify_keepalive_interval
void work_queue_specify_keepalive_interval(struct work_queue *q, int interval)
Change the keepalive interval for a given queue.
work_queue_specify_category_max_concurrent
void work_queue_specify_category_max_concurrent(struct work_queue *q, const char *category, int max_concurrent)
Set a maximum number of tasks of this category that can execute concurrently.
work_queue_task_specify_enviroment_variable
void work_queue_task_specify_enviroment_variable(struct work_queue_task *t, const char *name, const char *value)
Same as work_queue_task_specify_environment_variable, but with a typo in environment.
work_queue_stats::time_receive_good
timestamp_t time_receive_good
Total time spent in sending data to workers for tasks with result WQ_RESULT_SUCCESS.
Definition: work_queue.h:265
work_queue_task_specify_input_file
int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task.
WORK_QUEUE_NOCACHE
@ WORK_QUEUE_NOCACHE
Do not cache file at execution site.
Definition: work_queue.h:43
work_queue_task::input_files
struct list * input_files
The files to transfer to the worker and place in the executing directory.
Definition: work_queue.h:140
work_queue_enable_category_resource
int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel)
Turn on or off first-allocation labeling for a given category and resource.
work_queue_stats::bytes_sent
int64_t bytes_sent
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the ...
Definition: work_queue.h:277
work_queue_task::cmd_execution_time
timestamp_t cmd_execution_time
Definition: work_queue.h:212
CATEGORY_ALLOCATION_MODE_MAX
@ CATEGORY_ALLOCATION_MODE_MAX
When monitoring is enabled, tasks are tried with maximum specified values of cores,...
Definition: category.h:43
work_queue_task::time_workers_execute_exhaustion
timestamp_t time_workers_execute_exhaustion
Accumulated time spent in attempts that exhausted resources.
Definition: work_queue.h:178
work_queue_task::command_line
char * command_line
The program(s) to execute, as a shell command line.
Definition: work_queue.h:137
WORK_QUEUE_CACHE
@ WORK_QUEUE_CACHE
Cache file at execution site for later use.
Definition: work_queue.h:44
work_queue_get_worker_summary
char * work_queue_get_worker_summary(struct work_queue *q)
Summarize workers.
work_queue_stats::workers_blacklisted
int workers_blacklisted
Definition: work_queue.h:356
work_queue_task::total_cmd_execution_time
timestamp_t total_cmd_execution_time
Definition: work_queue.h:213
work_queue_task
A task description.
Definition: work_queue.h:135
work_queue_task::total_bytes_received
int64_t total_bytes_received
Definition: work_queue.h:217
work_queue_stats::tasks_cancelled
int tasks_cancelled
Total number of tasks cancelled.
Definition: work_queue.h:254
work_queue_invalidate_cached_file
void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type)
Invalidate cached file.
work_queue_stats::total_memory
int64_t total_memory
Total memory in MB aggregated across the connected workers.
Definition: work_queue.h:291
WORK_QUEUE_ALLOCATION_MODE_EXHAUSTIVE_BUCKETING
@ WORK_QUEUE_ALLOCATION_MODE_EXHAUSTIVE_BUCKETING
Use the exhaustive bucketing algorithm to label resources.
Definition: work_queue.h:127
work_queue_task_delete
void work_queue_task_delete(struct work_queue_task *t)
Delete a task.
WORK_QUEUE_ALLOCATION_MODE_FIXED
@ WORK_QUEUE_ALLOCATION_MODE_FIXED
When monitoring is disabled, all tasks run as WORK_QUEUE_ALLOCATION_MODE_FIXED.
Definition: work_queue.h:111
work_queue_task::bytes_received
int64_t bytes_received
Number of bytes received since task has last started receiving input data.
Definition: work_queue.h:181
work_queue_task::time_when_submitted
timestamp_t time_when_submitted
The time at which this task was added to the queue.
Definition: work_queue.h:163
work_queue_result_t
work_queue_result_t
Task result types set when a task is complete.
Definition: work_queue.h:66
work_queue_stats::workers_released
int workers_released
Total number of worker connections that were asked by the manager to disconnect.
Definition: work_queue.h:237
work_queue_stats::tasks_failed
int tasks_failed
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS.
Definition: work_queue.h:253
work_queue_specify_algorithm
void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm)
Change the worker selection algorithm.
timestamp.h
work_queue_task::host
char * host
The address and port of the host on which it ran.
Definition: work_queue.h:146
work_queue_stats::total_bytes_sent
int64_t total_bytes_sent
Definition: work_queue.h:337
WORK_QUEUE_RESULT_INPUT_MISSING
@ WORK_QUEUE_RESULT_INPUT_MISSING
The task cannot be run due to a missing input file.
Definition: work_queue.h:68
work_queue_task::taskid
int taskid
A unique task id number.
Definition: work_queue.h:143
work_queue_result_str
const char * work_queue_result_str(work_queue_result_t result)
Explain result codes from tasks.
work_queue_stats::priority
int priority
Definition: work_queue.h:351
work_queue_cancel_all_tasks
struct list * work_queue_cancel_all_tasks(struct work_queue *q)
Cancel all submitted tasks and remove them from the queue.
work_queue_stats::time_workers_execute_good
timestamp_t time_workers_execute_good
Total time workers spent executing done tasks with result WQ_RESULT_SUCCESS.
Definition: work_queue.h:273
work_queue_file_t
work_queue_file_t
Types of files that can be attached to a task.
Definition: work_queue.h:97
work_queue_task_state_t
work_queue_task_state_t
State of a task throughout its lifetime.
Definition: work_queue.h:85
work_queue_task::priority
double priority
The priority of this task relative to others in the queue: higher number run earlier.
Definition: work_queue.h:154
work_queue_stats::min_gpus
int64_t min_gpus
Definition: work_queue.h:348
work_queue_task::worker_selection_algorithm
work_queue_schedule_t worker_selection_algorithm
How to choose worker to run the task.
Definition: work_queue.h:138
work_queue_stats::total_workers_idled_out
int total_workers_idled_out
Definition: work_queue.h:318
work_queue_stats::total_execute_time
timestamp_t total_execute_time
Definition: work_queue.h:333
WORK_QUEUE_RESULT_UNKNOWN
@ WORK_QUEUE_RESULT_UNKNOWN
The result could not be classified.
Definition: work_queue.h:74
work_queue_task::monitor_snapshot_file
char * monitor_snapshot_file
Filename the monitor checks to produce snapshots.
Definition: work_queue.h:190
work_queue_specify_category_min_resources
void work_queue_specify_category_min_resources(struct work_queue *q, const char *category, const struct rmsummary *rm)
Sets the minimum resources a task in the category may use.
work_queue_shut_down_workers
int work_queue_shut_down_workers(struct work_queue *q, int n)
Shut down workers connected to the work_queue system.
work_queue_delete
void work_queue_delete(struct work_queue *q)
Delete a work queue.
WORK_QUEUE_RESULT_TASK_TIMEOUT
@ WORK_QUEUE_RESULT_TASK_TIMEOUT
The task ran after the specified (absolute since epoch) end time.
Definition: work_queue.h:73
work_queue_file_type_t
work_queue_file_type_t
The input/output type of files used by work_queue_task_specify_file and similar.
Definition: work_queue.h:35
CATEGORY_ALLOCATION_MODE_MIN_WASTE
@ CATEGORY_ALLOCATION_MODE_MIN_WASTE
As above, but tasks are first tried with an automatically computed allocation to minimize resource wa...
Definition: category.h:55
work_queue_specify_log
int work_queue_specify_log(struct work_queue *q, const char *logfile)
Add a log file that records cummulative statistics of the connected workers and submitted tasks.
work_queue_task::time_execute_cmd_finish
timestamp_t time_execute_cmd_finish
Definition: work_queue.h:208