Skip to content

Functions

Commonly used functions in Python, working as a data engineer.

WIP

Work in progress

List

List Comprehension

Flatten

python
def flatten(lst:list) -> list:
    flattened_list = []

    for element in lst:
        if isinstance(element, list):
            flattened_list.extend(flatten(element))
        else:
            flattened_list.append(element)
    
    return flattened_list

Dict Comprehension

Datetime

Logging

ZIP

Unzip Files

python
import zipfile

# if needed, set password with pwd
with zipfile.ZipFile('./files.zip', 'r') as zip_ref:
    zip_ref.extractall('./extracted', pwd=None)

Zip Files

python
import shutil

# Destination, Format, Folder
shutil.make_archive('./files_2', 'zip', './to_zip')

TIP

if you need to set password on the zip file see pyminizip

Yaml

Read yml changing variables to mapped values

yaml
path: 's3://bucket-$ENV/data'
python
# poetry add PyYAML
import string, yaml

def load_yaml(file_path:str, context=None) -> dict:

    def string_constructot(loader, node):
        t = string.Template(node.value)
        value = t.substitute(context)
        return value
    
    l = yaml.SafeLoader
    l.add_constructor('tag:yaml.org,2002:str', string_constructor)

    token_re = string.Template.pattern
    l.add_implicit_resolver('tag:yaml.org,2002:str', token_re, None)

    with open (file_path, 'r') as file:
        x = yamli.safe_load(file_path)
    
    return x

To use it, just declare a dict where the key is the variable to be replaced, and the desired value

context = {
    'ENV': 'VALUE'
}

yaml_with_values = load_yaml('./file.yaml', context)

Json

Read

python
import json

with open('file_1.json', 'r') as file:
    data = json.load(file)
    print(f"tipo do dado: {type(data)}")
    print('-----')
    print(data)

Save

python
import json

data = {
    'name': 'Lewis Hamilton',
    'nationality': 'British',
    'team': 'Mercedes'
}

with open('file_2.json', 'w') as file:
    json.dump(data, file)

Csv

Read

Pure Python, no external libraries.

python
import csv

with open('file_1.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(row)

Save

python
import csv

data = [
    ['name', 'area', 'country_code2', 'country_code3'],
    ['Albania', 28748, 'AL', 'ALB'],
    ['Algeria', 2381741, 'DZ', 'DZA'],
]

with open('file_2.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(data)

Txt

Read single line

python
with open('file.txt', 'r') as file:
    contents = file.read()
    print(contents)

Read multiple lines

python
with open('file.txt', 'r') as file:
    contents = file.read().splitlines()

    for line in contents:
        print(line)

Write

python
with open('file.txt', 'w') as file:
    file.write('Hello\nWorld!')

Feel free to use any content here.