Scaling mode: prune execution data has no effect

export function getWorkflowHooksWorkerExecuter(
	mode: WorkflowExecuteMode,
	executionId: string,
	workflowData: IWorkflowBase,
	optionalParameters?: IWorkflowHooksOptionalParameters,
): WorkflowHooks {
	optionalParameters = optionalParameters || {};
	const hookFunctions = hookFunctionsSaveWorker();
	const preExecuteFunctions = hookFunctionsPreExecute(optionalParameters.parentProcessMode);
	for (const key of Object.keys(preExecuteFunctions)) {
		if (hookFunctions[key] === undefined) {
			hookFunctions[key] = [];
		}
		hookFunctions[key]!.push.apply(hookFunctions[key], preExecuteFunctions[key]);
	}
	return new WorkflowHooks(hookFunctions, mode, executionId, workflowData, optionalParameters);
}


/**
 * Returns WorkflowHooks instance for running the main workflow
 *
 * @export
 * @param {IWorkflowExecutionDataProcess} data
 * @param {string} executionId
 * @returns {WorkflowHooks}
 */
export function getWorkflowHooksMain(
	data: IWorkflowExecutionDataProcess,
	executionId: string,
	isMainProcess = false,
): WorkflowHooks {
	const hookFunctions = hookFunctionsSave();
	const pushFunctions = hookFunctionsPush();
	for (const key of Object.keys(pushFunctions)) {
		if (hookFunctions[key] === undefined) {
			hookFunctions[key] = [];
		}
		hookFunctions[key]!.push.apply(hookFunctions[key], pushFunctions[key]);
	}

	if (isMainProcess) {
		const preExecuteFunctions = hookFunctionsPreExecute();
		for (const key of Object.keys(preExecuteFunctions)) {
			if (hookFunctions[key] === undefined) {
				hookFunctions[key] = [];
			}
			hookFunctions[key]!.push.apply(hookFunctions[key], preExecuteFunctions[key]);
		}
	}

	return new WorkflowHooks(hookFunctions, data.executionMode, executionId, data.workflowData, {
		sessionId: data.sessionId,
		retryOf: data.retryOf as string,
	});
}

Hi @king, I’m sorry to hear you are having trouble.

I am not sure I fully understand your question though. How exactly are you trying to prune execution data and what isn’t working?

1 Like

Thank you. I can delete data in stand-alone mode, but I can’t delete data in scaling mode

By scaling mode I assume you mean queue mode as described here? How are you deleting data and what error are you getting (if any)?

Yes, there are no errors, but i can’t delete data simply

1637661784(1)

Hi @MutedJam , @king .
Data Pruning also not working for me in queue mode but working in without queue mode.

Hey @Tarun_Charan, I am sorry to hear you’re having trouble with this.

Can you confirm how exactly you have set up n8n (which database do you have in use, which environment variables are you setting, which version of n8n do you have in use)?

Could you perhaps share a docker compose configuration using which your problem can be reproduced?

@MutedJam
Used postgres and have n8nworker, n8nwebhook, n8nmain. When I execute a workflow with a webhook node pruning does not start. When a worflow is started by a webhook but has a subworkflow then pruning works.

here is the docker compose-

version: ‘3.8’
services:

postgresdb:
image: postgres
restart: unless-stopped
environment:
- POSTGRES_DB=$POSTGRES_DB
- POSTGRES_USER=$POSTGRES_USER
- POSTGRES_PASSWORD=$POSTGRES_PASSWORD
ports:
- $POSTGRES_PORT:$POSTGRES_PORT
networks:
- local-net

ngnix:
depends_on:
- n8n
- n8nworker
- n8nwebhook
image: nginx
container_name: ngnix
restart: unless-stopped
volumes:
- ./ngnix.conf:/etc/nginx/nginx.conf:ro
networks:
- local-net
ports:
- 8080:81

n8n:
depends_on:
- postgresdb
- redis
image: n8nio/n8n:0.166.0
container_name: n8n
restart: unless-stopped
environment:
- EXECUTIONS_MODE=queue
- NODE_ENV=production
- N8N_PORT=$N8N_PORT_M
- N8N_DISABLE_PRODUCTION_MAIN_PROCESS=true
- DB_TYPE=postgresdb
- DB_POSTGRESDB_DATABASE=$POSTGRES_DB
- DB_POSTGRESDB_HOST=postgresdb
- DB_POSTGRESDB_PORT=$POSTGRES_PORT
- DB_POSTGRESDB_USER=$POSTGRES_USER
- DB_POSTGRESDB_PASSWORD=$POSTGRES_PASSWORD
- QUEUE_BULL_REDIS_HOST=redis
- QUEUE_BULL_REDIS_PORT=$REDIS_PORT
- N8N_LOG_LEVEL=debug
- EXECUTIONS_DATA_PRUNE=true
- EXECUTIONS_DATA_MAX_AGE=1
ports:
- $N8N_PORT_M:$N8N_PORT_M
networks:
- local-net
volumes:
- ~/.n8n:/home/node/.n8n

n8nworker:
depends_on:
- postgresdb
- redis
image: n8nio/n8n:0.166.0
container_name: n8nworker
restart: unless-stopped
environment:
- EXECUTIONS_MODE=queue
- EXECUTIONS_PROCESS=main
- NODE_ENV=production
- N8N_PORT=$N8N_PORT_W
- DB_TYPE=postgresdb
- DB_POSTGRESDB_DATABASE=$POSTGRES_DB
- DB_POSTGRESDB_HOST=postgresdb
- DB_POSTGRESDB_PORT=$POSTGRES_PORT
- DB_POSTGRESDB_USER=$POSTGRES_USER
- DB_POSTGRESDB_PASSWORD=$POSTGRES_PASSWORD
- QUEUE_BULL_REDIS_HOST=redis
- QUEUE_BULL_REDIS_PORT=$REDIS_PORT
- N8N_LOG_LEVEL=debug
- EXECUTIONS_DATA_PRUNE=true
- EXECUTIONS_DATA_MAX_AGE=1
ports:
- $N8N_PORT_W:$N8N_PORT_W
networks:
- local-net
volumes:
- ~/.n8n:/home/node/.n8n
command: n8n worker

n8nwebhook:
depends_on:
- postgresdb
- redis
image: n8nio/n8n:0.166.0
container_name: n8nwebhook
restart: unless-stopped
environment:
- EXECUTIONS_MODE=queue
- EXECUTIONS_PROCESS=main
- NODE_ENV=production
- N8N_PORT=$N8N_PORT_WH
- DB_TYPE=postgresdb
- DB_POSTGRESDB_DATABASE=$POSTGRES_DB
- DB_POSTGRESDB_HOST=postgresdb
- DB_POSTGRESDB_PORT=$POSTGRES_PORT
- DB_POSTGRESDB_USER=$POSTGRES_USER
- DB_POSTGRESDB_PASSWORD=$POSTGRES_PASSWORD
- QUEUE_BULL_REDIS_HOST=redis
- QUEUE_BULL_REDIS_PORT=$REDIS_PORT
- N8N_LOG_LEVEL=debug
- EXECUTIONS_DATA_PRUNE=true
- EXECUTIONS_DATA_MAX_AGE=1
ports:
- $N8N_PORT_WH:$N8N_PORT_WH
networks:
- local-net
volumes:
- ~/.n8n:/home/node/.n8n
command: n8n webhook

redis:
image: redis
networks:
- local-net
ports:
- 6379:6379
restart: always
command: --loglevel debug

volumes:
db:

networks:
local-net:
driver: bridge

nginx config -
events { }
http {

upstream n8nmain {
    server n8n:5678;
}

upstream n8nwebhookserver {
    server n8nwebhook:5680;
}


server {
    listen 81;
    server_name n8n_server; 

    
    location /webhook {
        add_header X-debug-message "WEBHOOK PROCESSOR" always;
        proxy_pass http://n8nwebhookserver;
    }

    location / {
        add_header X-debug-message "GENERAL PROCESSOR" always;
        proxy_pass http://n8nmain;
    }
    
}

}

Thanks @Tarun_Charan, I’ve set some time aside up a test environment on my side, send a few webhook requests and keep an eye on the data pruning.

Thanks for taking time aside for it. Waiting for your observations.

So I tried this on my end but eventually got stuck (the setup is working fine, I just didn’t quite understand the pruning logic tbh). I’ve asked internal for a bit of assistance on this one.

If this is causing any problems for you, you should be able to simply drop unwanted rows manually from the execution_entity database table in the meantime.

So just a quick update on this one: @ivov helped a lot here, so I could now confirm worker instances executing a workflow would not trigger the data pruning logic.

Manually executing a workflow would, however, happen on your main instance and trigger the pruning logic. The hook would fire for both parent and sub-workflows. Here is my current example executions table:

This screenshot was taken at ~14:00 UTC, so quite a few of these executions were older than the 1 hour set in EXECUTIONS_DATA_MAX_AGE. workflowId 1 is a workflow without a sub-workflow, workflowId 5 is a parent workflow and workflowId 6 is a sub-workflow called by workflow 5.

Now when running any workflow manually this workflow would be executed by the main instance and I can see the below log entry:

2022-05-27T14:03:15.311Z | verbose  | Pruning execution data from database {"file":"WorkflowExecuteAdditionalData.js","function":"pruneExecutionData"}

Afterwards, all finished executions older than 1 hour are cleared from my database as expected:

Is there a chance your main instance hasn’t executed a workflow for a while? Could you try and execute any workflow manually and confirm whether this clears the old executions for you?

Thanks for such a detailed explanation.
Are there any plans to rectify the issue, so that pruning happens in queue mode.
But running manually or using a sub workflow works with pruning.

1 Like

Hi @Tarun_Charan, yes, this will eventually be fixed! Unfortunately there’s no ETA yet but we’ll confirm once this has been done.

Ok sure. Thanks for all the help.

Hey hi, Nico here :smiley:

I did a quick test, cause data were not pruned before.

We are running N8N in Queue Mode, with ECS, each component in a Task Definition, and Postgres on a RDS.

I set these ENV VAR:
EXECUTIONS_DATA_PRUNE=true
EXECUTIONS_DATA_MAX_AGE=1

I delete Tasks that were running, and also forced a Workflow Manually, and we reduced size from 1.5 GB to 153 MB.

Still I find it weird that with 30 rows, we have such number, any idea? Maybe storing logs?

Regards,

Nico

Hey @Nicolas_Andres_Calvo,

Can you open a new thread? It looks like your issue is not directly related to the one in this thread and is more around the data itself.

1 Like