6
August
2025
Batch file operations - rename and merge PDF
18:24

Batch file operations - rename and merge PDF

6 August 2025 18:24

I decided to share two programs in Python. The first is renaming files by serial number depending on the creation date. The second is combining PDF files by their number.

The task at work was to scan the last sheets of the user manual, where the serial number is indicated. There were 6 such files.

Sets of documents were placed manually into folders numbered 01, 02, 03, etc. until the last.
One sheet - one PDF file. Then some of them were combined into two sheets.

Folder 01 contains 6 files

  • random name. PDF
  • random name. PDF
  • random name. PDF
  • random name. PDF
  • random name. PDF
  • random name. PDF

Task: get 01 set in folder
01_res1.pdf
01_res2.pdf
01_res3.pdf
01_res4.pdf

Divided the task into two subtasks:

  • traversal of folders and renaming files by creation date - into the folder name + serial number: 01-1.pdf, 01-2.pdf, 01-3.pdf, 01-4.pdf, 01-5.pdf and 01-6.pdf.
  • combining files according to the algorithm: 3 and 4 into one PDF file, 5 and 6 into another PDF file.

First script rename_files.py may be useful for batch renaming files not only in PDF format, but in any format.
Second script combine_files.py implements the logic for merging PDF files in each directory.


Input parameter for running programs- name of the parent folder (which contains subfolders 01, 02, 03, etc.).

Launch example:

python rename_files.py /home/user/parent_folder
python combine_files.py /home/user/parent_folder

where parent_folder contains folders 01, 02, 03, etc. with PDF files.

Run rename_files.py with caution, not in the working directory, because All files are renamed in batch mode without prompting.

Contents of the rename_files.py file:

# This script takes path to directory with set of subdirectories, numbered 01, 02, 03, ... etc.

and renames PDF files in subdirectories after the name of subdirectory and integer number in order of creation of the file.

example: before script run subdirectory 01 consists of files : random912371.pdf, random167722.pdf, random561724.pdf

and after the script run, in subdirectory 01 files renamed to: 01-1.pdf,

01-2.pdf, 01-3.pdf

import os
from pathlib import Path
import sys
import pathlib

def list_dirs(folder_path):
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isdir(file_path):
dir_path = file_path
print(f "Processing directory '{dir_path}'")
rename_files_with_creation_date(dir_path)
print("Done!")

def rename_files_with_creation_date(folder_path):

tpl = "{0}-{num:02d}"

tpl = "{0}-{num}" 
cnt = 1
folder_name = folder_path.split("/")[-1]
parent_folder_name = os.path.split(folder_path)[-1]
os.chdir(folder_path)
files = filter(os.path.isfile, os.listdir(folder_path))  #get files only
files = [os.path.join(folder_path, f)
         for f in files]  #add path folder_path to each file f
files.sort(key=lambda x: os.path.getmtime(x))  #sort by creation time

for filename in files:
    #print(f "{cnt}. Renaming '{filename}'...")
    if os.path.isfile(filename):
        ext = pathlib.Path(filename).suffix
        new_name = tpl.format(folder_name, num=cnt) + ext
        filename_new = os.path.join(folder_path, new_name)
        if (filename != filename_new):
            print(f "Ranaming '{filename}' to '{filename_new}'")
            if not os.path.isfile(filename_new):
                os.rename(filename, filename_new)
                #print("OK")
            else:
                print(f "File '{filename_new}' exists, skipping.")
        cnt = cnt + 1

if name == "main":
if len(sys.argv) == 2:
folder_path = sys.argv[1]
else:
folder_path = input("Enter the folder path: ")
list_dirs(folder_path)

In addition to combining, the combine_files.py file implements page rotation from portrait to landscape view (90 degrees). This transformation can be removed by editing the source text of the script.

# This script combines files

import os
import sys
import pathlib
import logging
import threading
import time
from pathlib import Path
from subprocess import PIPE, run

def list_dirs(folder_path):
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isdir(file_path):
dir_path = file_path
print(f "Processing directory '{dir_path}'")
combine_files(dir_path)
print("Done!")

def combine_files(folder_path):
folder_name = folder_path.split("/")[-1]
parent_folder_name = os.path.split(folder_path)[-1]
os.chdir(folder_path)
files = filter(os.path.isfile, os.listdir(folder_path))
files = sorted(files)

cnt = 1
for filename in files:
    full_name = os.path.join(folder_path, filename)
    if (filename == parent_folder_name + "-1.pdf"):
        suffix = "_res1.pdf" 
        new_name = folder_name + suffix
        cmdstr = "qpdf " + full_name + " " + new_name + " --rotate=+90" 
        print(cmdstr)
        result = run(cmdstr.split(" "))
        time.sleep(1)
        cmdstr = "rm " + full_name
        print(cmdstr)
        result = run(cmdstr.split(" "))
    if (filename == parent_folder_name + "-2.pdf"):
        suffix = "_res2.pdf" 
        new_name = folder_name + suffix
        new_name = os.path.join(folder_path, new_name)
        cmdstr = "qpdf " + full_name + " " + new_name + " --rotate=+90" 
        print(cmdstr)
        result = run(cmdstr.split(" "))
        time.sleep(1)
        cmdstr = "rm " + full_name
        print(cmdstr)
        result = run(cmdstr.split(" "))
    if (filename == parent_folder_name + "-3.pdf"):
        suffix = "_res3.pdf" 
        new_name = folder_name + suffix
        full_new_name = os.path.join(folder_path, new_name)
        cmdstr = "qpdf --empty --pages " + full_name + " " + \
            full_name.replace("3.pdf", "4.pdf") + " -- --rotate=+90 " + full_new_name
        print(cmdstr)
        result = run(cmdstr.split(" "))
        time.sleep(1)
        cmdstr = "rm " + full_name
        print(cmdstr)
        result = run(cmdstr.split(" "))
    if (filename == parent_folder_name + "-4.pdf"):
        cmdstr = "rm " + os.path.join(folder_path, filename)
        print(cmdstr)
        result = run(cmdstr.split(" "))
    if (filename == parent_folder_name + "-5.pdf"):
        suffix = "_res4.pdf" 
        new_name = folder_name + suffix
        full_new_name = os.path.join(folder_path, new_name)
        cmdstr = "qpdf --empty --pages " + full_name + " " + \
            full_name.replace("5.pdf", "6.pdf") + " -- --rotate=+90 " + full_new_name
        print(cmdstr)
        result = run(cmdstr.split(" "))
        time.sleep(1)
        cmdstr = "rm " + os.path.join(folder_path, filename)
        print(cmdstr)
        result = run(cmdstr.split(" "))
    if (filename == parent_folder_name + "-6.pdf"):
        cmdstr = "rm " + os.path.join(folder_path, filename)
        print(cmdstr)
        result = run(cmdstr.split(" "))

if name == "main":
if len(sys.argv) == 2:
folder_path = sys.argv[1]
else:
folder_path = input("Enter the folder path: ")
list_dirs(folder_path)

Archive with 2 programs:
rename_files_by_created_date_and_combine.zip

I took the idea of renaming files from here:GitHub - rename files by created date, but there the file name is in the form Day_Month_Year_Hour_Minute_Second, and I have FOLDER_NAME-sequence_number.



Related publications