Skip to content

Path / files related

Search within folder :

Via glob

import glob
import os
character_json_paths = glob.glob(os.path.join(DIR_JSON, "*.json"))

Via simple os.listdir

from pathlib import Path
from typing import List
import os

def search_files_of_type(
    path: str, suffix: str = ".json",
    exclude_files_name: List[str] = [],
    exclude_ignore_suffix: bool = True
) -> list:
    # Remove suffix if True
    if exclude_ignore_suffix and len(exclude_files_name) > 0:
        new_excl = [Path(v).with_suffix("") for v in exclude_files_name]
        # listdir ONLY list the direct level, NOT all nested path
        return [v for v in os.listdir(path) if v.endswith(suffix) and Path(v).with_suffix("") not in new_excl]
        return [v for v in os.listdir(path) if v.endswith(suffix) and v not in exclude_files_name]

Path manipulation

File suffix(i.e. extension)

v = "/ada/wdwdw.txt"
filename_no_suffix = str(Path(v).with_suffix(""))
# /ada/wdwdw

join/resolve a path :

import os
os.path.join("/somewhere/folder", "train.npz")

# or
from pathlib import Path

calculate size of every files in a dir (not subfolders) :

import os

model_path = "/somewhere"
sum(os.path.getsize(f.path) for f in os.scandir(model_path) if f.is_file())

Temp folder/file

Via tempfile :

with tempfile.TemporaryFile()as fp:
    fp.write(b'Hello world!')

## file is now closed and removed

with tempfile.TemporaryDirectory() as path:
    images_from_path = convert_from_path(
        # do something....

General - Text File

Read file:

Get all lines into a list

pdf_list = []
with open("./pdf_list.log") as f:
    pdf_list = f.readlines()

Sub category :

PDF skills

npz file

json file

CSV file

