If something happens that prevents existing jobs from terminating, it
is generally bad practice to keep on forking more processes
indefinitely. To alleviate the problem, implement a somewhat trivial
process limitation feature. This will define a limit of maximum number
for the processes pending execution. If the number exceeds the global
limit, new processes are to be terminate itself immediately.
This however prevents only "limited forks" for creating too many
processes for waiting execution. Normal forks still have no
limit. This should still make the system to stay alive longer during
situations where something prevents (temporarily) executing processes
to finish in time.
Signed-off-by: Timo Kokkonen <timo.t.kokkonen@iki.fi>
static unsigned int max_jobs;
static unsigned int job_count;
static unsigned int jobs_pending;
static unsigned int max_jobs;
static unsigned int job_count;
static unsigned int jobs_pending;
+static unsigned int max_jobs_pending;
int get_child_count(void)
{
int get_child_count(void)
{
+static int deny_job(void)
+{
+ int ret;
+ char byte = -1;
+
+ pr_info("Denying new job. %d jobs currently and %d pending, "
+ "limit of pending jobs is %d\n",
+ job_count, jobs_pending, max_jobs_pending);
+
+ ret = write(job_get_permission_fd[1], &byte, 1);
+ if (ret != 1) {
+ pr_err("Failed to write 1 byte: %m\n");
+ return -1;
+ }
+
+ return 0;
+}
+
static int handle_job_request(struct event_handler *h)
{
int ret, pid;
static int handle_job_request(struct event_handler *h)
{
int ret, pid;
if (pid > 0) {
if (job_count >= max_jobs) {
if (pid > 0) {
if (job_count >= max_jobs) {
+ if (jobs_pending < max_jobs_pending)
+ jobs_pending++;
+ else
+ deny_job();
} else {
ret = grant_new_job();
return 0;
} else {
ret = grant_new_job();
return 0;
no_count_cpus:
pr_info("Set maximum number of parallel jobs to %d\n", max_jobs);
no_count_cpus:
pr_info("Set maximum number of parallel jobs to %d\n", max_jobs);
+ max_jobs_pending = max_jobs * 50 + 25;
+ pr_info("Set maximum number of pending jobs to %d\n", max_jobs_pending);
+
if (ret < 0)
pr_err("Job control request failure: %m\n");
if (ret < 0)
pr_err("Job control request failure: %m\n");
+ if (byte < 0) {
+ pr_info("Did not get permission to execute. Terminating\n");
+
+ /*
+ * Avoid running exit handler, that would tell the
+ * parent we died normally and decrement the job
+ * counters.
+ */
+ raise(SIGKILL);
+ }
+
pr_info("Continuing\n");
return child;
}
pr_info("Continuing\n");
return child;
}