From 7797e0d628485a6ea2c01b3e3be50f254423da49 Mon Sep 17 00:00:00 2001 From: ChristophWenk Date: Sun, 10 Jul 2022 14:22:19 +0200 Subject: [PATCH 1/3] Switch of dry_run --- Readme.md | 2 +- pdf_sorter/settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Readme.md b/Readme.md index 79b1f20..3ebc50c 100644 --- a/Readme.md +++ b/Readme.md @@ -78,7 +78,7 @@ the configuration file and may include property keys generated from the regex pa "target_directory": "F:\\Dokumente\\Rechnungen\\Helsana\\Leistungsabrechnungen", "file_name_format": "{company_name}_{date}_{document_type}_{document_id}.pdf", "document_id": "ABCD", - "date": "2022-01-01" + "date": "2022-12-31" } ``` diff --git a/pdf_sorter/settings.py b/pdf_sorter/settings.py index f0233af..5e6b701 100644 --- a/pdf_sorter/settings.py +++ b/pdf_sorter/settings.py @@ -2,7 +2,7 @@ import logging # Do (False) or do not (True) rename files and move them -dry_run = True +dry_run = False # Folder that contains the PDFs to process pdf_files_dir = 'F:/Downloads/02_pdf_sorter' From 8b80c88e99bf6193f3b8c705dc846acee6d344ca Mon Sep 17 00:00:00 2001 From: ChristophWenk Date: Sun, 10 Jul 2022 14:57:15 +0200 Subject: [PATCH 2/3] Got rid of multiline logs --- Readme.md | 2 ++ pdf_sorter/main.py | 19 ++++++++----------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Readme.md b/Readme.md index 3ebc50c..f28bd3f 100644 --- a/Readme.md +++ b/Readme.md @@ -28,6 +28,8 @@ Other Python or package versions might work but have not been tested. ## Document Type Configuration New document types can be added by creating new configuration files. The process is described below. +Place the files in the directory defined in `settings.config_files_dir`. +The default is `'../resources/config_files'`. ### Configuration File Name The configuration file name has to adhere to the scheme below. The [Company] and the [Document Type] values have both to be found in the PDF text content. This will only be used to select the correct configuration file for the PDF in diff --git a/pdf_sorter/main.py b/pdf_sorter/main.py index 72574de..66f6cc1 100644 --- a/pdf_sorter/main.py +++ b/pdf_sorter/main.py @@ -60,10 +60,9 @@ def get_attr_from_regex(config, regex, file_name, not_processed_list, pdf_text): def process_files(path, config_file_path): - logger.info("\n" - "##################################\n" - "# Starting new PDF Sort Run #\n" - "##################################") + logger.info("##################################") + logger.info("# Starting new PDF Sort Run #") + logger.info("##################################") if settings.dry_run is True: logger.warning("Dry Run active: Running in preview mode. No files will be renamed or moved.") @@ -118,17 +117,15 @@ def process_files(path, config_file_path): continue logger.info('==============================================================================================') - logger.info("\n" - "##################################\n" - "# PDF Sort Run completed #\n" - "##################################") + logger.info("##################################") + logger.info("# PDF Sort Run completed #") + logger.info("##################################") if not_processed_list: + logger.warning("The following PDF files could not be processed or have just been partially processed:") output_list = "" for file_name in not_processed_list: - output_list += "\n" + file_name - logger.warning("The following PDF files could not be processed or have just been partially processed:" + - output_list) + logger.warning(file_name) # Main Function From c685aa4b52a16b24bc9d47cf4030448fdb444d04 Mon Sep 17 00:00:00 2001 From: ChristophWenk Date: Sun, 10 Jul 2022 15:36:19 +0200 Subject: [PATCH 3/3] Conda environment provided --- Readme.md | 6 +++++- environment.yml | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 environment.yml diff --git a/Readme.md b/Readme.md index f28bd3f..332cd18 100644 --- a/Readme.md +++ b/Readme.md @@ -24,7 +24,11 @@ General functionality should look like this: - pypdf2 2.4.2 - dateparser 1.1.1 -Other Python or package versions might work but have not been tested. +Other Python or package versions might work but have not been tested. + +A conda environment configuration is provided in +the `environment.yml` file. You can set it up with `conda env create -f environment.yml`. Activate it with +`conda activate PDFSorter`. ## Document Type Configuration New document types can be added by creating new configuration files. The process is described below. diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..f4e1e32 --- /dev/null +++ b/environment.yml @@ -0,0 +1,29 @@ +name: PDFSorter +channels: + - defaults +dependencies: + - bzip2=1.0.8=he774522_0 + - ca-certificates=2022.4.26=haa95532_0 + - certifi=2022.6.15=py310haa95532_0 + - dateparser=1.1.1=pyhd3eb1b0_0 + - libffi=3.4.2=hd77b12b_4 + - openssl=1.1.1p=h2bbff1b_0 + - pip=21.2.4=py310haa95532_0 + - python=3.10.4=hbb2ffb3_0 + - python-dateutil=2.8.2=pyhd3eb1b0_0 + - pytz=2022.1=py310haa95532_0 + - regex=2021.8.3=py310h2bbff1b_0 + - setuptools=61.2.0=py310haa95532_0 + - six=1.16.0=pyhd3eb1b0_1 + - sqlite=3.38.5=h2bbff1b_0 + - tk=8.6.12=h2bbff1b_0 + - tzdata=2022a=hda174b7_0 + - tzlocal=2.1=py310haa95532_0 + - vc=14.2=h21ff451_1 + - vs2015_runtime=14.27.29016=h5e58377_2 + - wheel=0.37.1=pyhd3eb1b0_0 + - wincertstore=0.2=py310haa95532_2 + - xz=5.2.5=h8cc25b3_1 + - zlib=1.2.12=h8cc25b3_2 + - pip: + - pypdf2==2.4.2