How To Convert Nested Dict Data To Data Frame Using Pandas
Delete the question, change the title, and re-upload it ====== Python generated the code as below using a dict data. I want to print out the data as shown in Excel, but I don't kno
Solution 1:
Try this,
import pandas as pd
raw = {
"20211231": {
0: {
0: {
"item_code": "B 001",
"qty": "1",
"supply_price": "0",
"tax_amount": "0",
"unit_price": "0",
},
1: {
"item_code": "B 002",
"qty": "2",
"supply_price": "0",
"tax_amount": "0",
"unit_price": "0",
},
"customer_code": "00104",
"remark1": "비고1",
"remark2": "비고2",
"remark3": "",
},
1: {
0: {
"item_code": " UT1213K",
"qty": "10",
"supply_price": "140",
"tax_amount": "14",
"unit_price": "14.00",
},
1: {
"item_code": "B 001",
"qty": "20",
"supply_price": "0",
"tax_amount": "0",
"unit_price": "0",
},
2: {
"item_code": "B 002",
"qty": "30",
"supply_price": "450",
"tax_amount": "45",
"unit_price": "15.00",
},
3: {
"item_code": "B 003",
"qty": "40",
"supply_price": "640",
"tax_amount": "64",
"unit_price": "16.00",
},
"customer_code": "00103",
"remark1": "비고103-1",
"remark2": "비고103-2",
"remark3": "비고103-3",
},
}
}
formatted_dict = []
for first_level_key, first_level_value in raw.items():
for second_level_key, second_level_value in first_level_value.items():
third_level_tmp_dict = {}
for third_level_key, third_level_value in second_level_value.items():
ifisinstance(third_level_key, str):
third_level_tmp_dict[third_level_key] = third_level_value
for third_level_key, third_level_value in second_level_value.items():
ifisinstance(third_level_key, int):
new_record = {}
new_record.update(
{
"date": first_level_key,
"customer": second_level_key,
"item": third_level_key,
}
)
new_record.update(third_level_value)
new_record.update(third_level_tmp_dict)
formatted_dict.append(new_record)
result = pd.DataFrame(formatted_dict).set_index(
["date", "customer", "customer_code", "remark1", "remark2", "remark3"]
)[["item", "item_code", "qty", "supply_price", "tax_amount", "unit_price"]]
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
print(result)
Solution 2:
solution 2
df = (pd.DataFrame(raw).stack()
.apply(pd.Series).stack()
.apply(pd.Series).stack()).reset_index()
cond = df['level_3'] ==0
df1 = df[cond].drop(['level_3'], axis=1)
df2 = df[~cond].copy()
df1 = df1.set_index(df1.columns[:-1].to_list())[0].unstack().reset_index()
df2 = df2.set_index(df2.columns[:-1].to_list())[0].unstack().reset_index()
df = pd.merge(df1, df2)
# df.columns = ['customer', 'date', 'customer_code', 'remark1', 'remark2', 'remark3', 'item',
# 'item_code', 'qty', 'supply_price', 'tax_amount', 'unit_price']
df
solution 1
from pandas import json_normalize
import json
# convert to json to ensure the key values is string to use json_normalize
data = json.loads(json.dumps(raw))
obj = json_normalize(data,sep='#').iloc[0]
obj = obj.reset_index()
objn = obj['index'].str.split('#', expand=True)
objn['value'] = obj[0]
objn.columns = ['date', 'customer', 'item', 'col', 'value']
cond = objn['col'].isnull()
obj1 = objn[cond].copy().dropna(how='all', axis=1)
obj2 = objn[~cond].copy()
df1 = obj1.set_index(['date', 'customer', 'item'])['value'].unstack().reset_index()
df2 = obj2.set_index(['date', 'customer', 'item', 'col'])['value'].unstack().reset_index()
dfn = pd.merge(df2, df1, on=['date', 'customer'])
dfn['customer'] = dfn['customer'].astype(int)
dfn['item'] = dfn['item'].astype(int)
cols = ['date', 'customer', 'customer_code', 'remark1', 'remark2', 'remark3',
'item', 'item_code', 'qty', 'supply_price', 'tax_amount', 'unit_price']
df_ouput = dfn[cols].set_index(['date', 'customer', 'customer_code', 'remark1', 'remark2', 'remark3']).sort_index()
output:
print(obj.head())
# index 0# 0 20211231#0#0#item_code B 001# 1 20211231#0#0#qty 1# 2 20211231#0#0#supply_price 0# 3 20211231#0#0#tax_amount 0# 4 20211231#0#0#unit_price 0
print(objn.head(15))
# date customer item col value# 0 20211231 0 0 item_code B 001# 1 20211231 0 0 qty 1# 2 20211231 0 0 supply_price 0# 3 20211231 0 0 tax_amount 0# 4 20211231 0 0 unit_price 0# 5 20211231 0 1 item_code B 002# 6 20211231 0 1 qty 2# 7 20211231 0 1 supply_price 0# 8 20211231 0 1 tax_amount 0# 9 20211231 0 1 unit_price 0# 10 20211231 0 customer_code None 00104# 11 20211231 0 remark1 None 비고1# 12 20211231 0 remark2 None 비고2# 13 20211231 0 remark3 None# 14 20211231 1 0 item_code UT1213K
print(df1.head())
# item date customer customer_code remark1 remark2 remark3# 0 20211231 0 00104 비고1 비고2# 1 20211231 1 00103 비고103-1 비고103-2 비고103-3
print(df2.head())
# col date customer item item_code qty supply_price tax_amount unit_price# 0 20211231 0 0 B 001 1 0 0 0# 1 20211231 0 1 B 002 2 0 0 0# 2 20211231 1 0 UT1213K 10 140 14 14.00# 3 20211231 1 1 B 001 20 0 0 0# 4 20211231 1 2 B 002 30 450 45 15.00
Post a Comment for "How To Convert Nested Dict Data To Data Frame Using Pandas"