Skip to content Skip to sidebar Skip to footer

How To Convert Nested Dict Data To Data Frame Using Pandas

Delete the question, change the title, and re-upload it ====== Python generated the code as below using a dict data. I want to print out the data as shown in Excel, but I don't kno

Solution 1:

Try this,

import pandas as pd

raw = {
    "20211231": {
        0: {
            0: {
                "item_code": "B 001",
                "qty": "1",
                "supply_price": "0",
                "tax_amount": "0",
                "unit_price": "0",
            },
            1: {
                "item_code": "B 002",
                "qty": "2",
                "supply_price": "0",
                "tax_amount": "0",
                "unit_price": "0",
            },
            "customer_code": "00104",
            "remark1": "비고1",
            "remark2": "비고2",
            "remark3": "",
        },
        1: {
            0: {
                "item_code": " UT1213K",
                "qty": "10",
                "supply_price": "140",
                "tax_amount": "14",
                "unit_price": "14.00",
            },
            1: {
                "item_code": "B 001",
                "qty": "20",
                "supply_price": "0",
                "tax_amount": "0",
                "unit_price": "0",
            },
            2: {
                "item_code": "B 002",
                "qty": "30",
                "supply_price": "450",
                "tax_amount": "45",
                "unit_price": "15.00",
            },
            3: {
                "item_code": "B 003",
                "qty": "40",
                "supply_price": "640",
                "tax_amount": "64",
                "unit_price": "16.00",
            },
            "customer_code": "00103",
            "remark1": "비고103-1",
            "remark2": "비고103-2",
            "remark3": "비고103-3",
        },
    }
}
formatted_dict = []
for first_level_key, first_level_value in raw.items():
    for second_level_key, second_level_value in first_level_value.items():
        third_level_tmp_dict = {}
        for third_level_key, third_level_value in second_level_value.items():
            ifisinstance(third_level_key, str):
                third_level_tmp_dict[third_level_key] = third_level_value
        for third_level_key, third_level_value in second_level_value.items():
            ifisinstance(third_level_key, int):
                new_record = {}
                new_record.update(
                    {
                        "date": first_level_key,
                        "customer": second_level_key,
                        "item": third_level_key,
                    }
                )
                new_record.update(third_level_value)
                new_record.update(third_level_tmp_dict)
                formatted_dict.append(new_record)

result = pd.DataFrame(formatted_dict).set_index(
    ["date", "customer", "customer_code", "remark1", "remark2", "remark3"]
)[["item", "item_code", "qty", "supply_price", "tax_amount", "unit_price"]]
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
print(result)

result enter image description here

Solution 2:

solution 2

df = (pd.DataFrame(raw).stack()
        .apply(pd.Series).stack()
        .apply(pd.Series).stack()).reset_index()

cond = df['level_3'] ==0
df1 = df[cond].drop(['level_3'], axis=1)
df2 = df[~cond].copy()
df1 = df1.set_index(df1.columns[:-1].to_list())[0].unstack().reset_index()
df2 = df2.set_index(df2.columns[:-1].to_list())[0].unstack().reset_index()
df = pd.merge(df1, df2)
# df.columns = ['customer', 'date', 'customer_code', 'remark1', 'remark2', 'remark3', 'item',
#               'item_code', 'qty', 'supply_price', 'tax_amount', 'unit_price']
df

enter image description here


solution 1

from pandas import json_normalize
import json
# convert to json to ensure the key values is string to use json_normalize
data = json.loads(json.dumps(raw))
obj = json_normalize(data,sep='#').iloc[0]
obj = obj.reset_index()
objn = obj['index'].str.split('#', expand=True)
objn['value'] = obj[0]

objn.columns = ['date', 'customer', 'item', 'col', 'value']
cond = objn['col'].isnull()
obj1 = objn[cond].copy().dropna(how='all', axis=1)
obj2 = objn[~cond].copy()

df1 = obj1.set_index(['date', 'customer', 'item'])['value'].unstack().reset_index()
df2 = obj2.set_index(['date', 'customer', 'item', 'col'])['value'].unstack().reset_index()

dfn = pd.merge(df2, df1, on=['date', 'customer'])
dfn['customer'] = dfn['customer'].astype(int)
dfn['item'] = dfn['item'].astype(int)
cols = ['date', 'customer', 'customer_code', 'remark1', 'remark2', 'remark3',
        'item', 'item_code', 'qty', 'supply_price', 'tax_amount', 'unit_price']

df_ouput = dfn[cols].set_index(['date', 'customer', 'customer_code', 'remark1', 'remark2', 'remark3']).sort_index()

output:

enter image description here

print(obj.head())
#                       index      0# 0     20211231#0#0#item_code  B 001# 1           20211231#0#0#qty      1# 2  20211231#0#0#supply_price      0# 3    20211231#0#0#tax_amount      0# 4    20211231#0#0#unit_price      0

print(objn.head(15))

# date customer           item           col     value# 0   20211231        0              0     item_code     B 001# 1   20211231        0              0           qty         1# 2   20211231        0              0  supply_price         0# 3   20211231        0              0    tax_amount         0# 4   20211231        0              0    unit_price         0# 5   20211231        0              1     item_code     B 002# 6   20211231        0              1           qty         2# 7   20211231        0              1  supply_price         0# 8   20211231        0              1    tax_amount         0# 9   20211231        0              1    unit_price         0# 10  20211231        0  customer_code          None     00104# 11  20211231        0        remark1          None       비고1# 12  20211231        0        remark2          None       비고2# 13  20211231        0        remark3          None# 14  20211231        1              0     item_code   UT1213K

print(df1.head())

# item      date customer customer_code  remark1  remark2  remark3# 0     20211231        0         00104      비고1      비고2# 1     20211231        1         00103  비고103-1  비고103-2  비고103-3


print(df2.head())

# col      date customer item item_code qty supply_price tax_amount unit_price# 0    20211231        0    0     B 001   1            0          0          0# 1    20211231        0    1     B 002   2            0          0          0# 2    20211231        1    0   UT1213K  10          140         14      14.00# 3    20211231        1    1     B 001  20            0          0          0# 4    20211231        1    2     B 002  30          450         45      15.00

Post a Comment for "How To Convert Nested Dict Data To Data Frame Using Pandas"