admin管理员组文章数量:1435554
This post was edited to grab the actual JSON file (large) instead of the sample snippet that I extracted,(which works in this post). I was wondering why I get a key error when i use record_path on this data set.
under the results key there are 2 nested keys named 'active_ingredients' and 'packaging' when i normalize i get
result = pd.json_normalize(data['results'], record_path=["packaging"],meta=['product_ndc'])
the expected columns
package_ndc description marketing_start_date sample marketing_end_date product_ndcs
but when i add active_ingredients to the record_path list i get a key error. The same goes for meta as well. When i add the other columns like 'brand_name' and 'generic_name' to the meta list, I get a key error. to see the keys
this doesnt work
result = pd.json_normalize(data['results'], record_path=["packaging","active_ingredients"],meta=['product_ndc','brand_name','generic_name'])
Thanks for any help
Here is the actual code I use to grab the data which produces the key error.
import pandas as pd
import json
import requests, zipfile, io, os
cwd = os.getcwd()
zip_url = '.json.zip'
r = requests.get(zip_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(cwd)
with open('drug-ndc-0001-of-0001.json', 'r') as file:
data = json.load(file)
packaging_data = pd.json_normalize(
data['results'],
record_path=["packaging"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
active_ingredients_data = pd.json_normalize(
data['results'],
record_path=["active_ingredients"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
i paired it with your answer and getting the same issues I had before I posted the question.
This post was edited to grab the actual JSON file (large) instead of the sample snippet that I extracted,(which works in this post). I was wondering why I get a key error when i use record_path on this data set.
under the results key there are 2 nested keys named 'active_ingredients' and 'packaging' when i normalize i get
result = pd.json_normalize(data['results'], record_path=["packaging"],meta=['product_ndc'])
the expected columns
package_ndc description marketing_start_date sample marketing_end_date product_ndcs
but when i add active_ingredients to the record_path list i get a key error. The same goes for meta as well. When i add the other columns like 'brand_name' and 'generic_name' to the meta list, I get a key error. to see the keys
this doesnt work
result = pd.json_normalize(data['results'], record_path=["packaging","active_ingredients"],meta=['product_ndc','brand_name','generic_name'])
Thanks for any help
Here is the actual code I use to grab the data which produces the key error.
import pandas as pd
import json
import requests, zipfile, io, os
cwd = os.getcwd()
zip_url = 'https://download.open.fda.gov/drug/ndc/drug-ndc-0001-of-0001.json.zip'
r = requests.get(zip_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(cwd)
with open('drug-ndc-0001-of-0001.json', 'r') as file:
data = json.load(file)
packaging_data = pd.json_normalize(
data['results'],
record_path=["packaging"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
active_ingredients_data = pd.json_normalize(
data['results'],
record_path=["active_ingredients"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
i paired it with your answer and getting the same issues I had before I posted the question.
Share Improve this question edited Nov 17, 2024 at 12:04 Brad asked Nov 16, 2024 at 16:31 BradBrad 1198 bronze badges 5 |2 Answers
Reset to default 0When you specify multiple record_path entries (like "packaging"
and "active_ingredients"
), pandas expects that the second record_path
("active_ingredients"
) exists within every element of the first record_path ("packaging"
), but, in your data, active_ingredients
is not a nested property of packaging
Do this to solve this
import pandas as pd
data = {
"meta": {
"disclaimer": "Do not rely on openFDA to make decisions regarding medical care. While we make every effort to ensure that data is accurate, you should assume all results are unvalidated. We may limit or otherwise restrict your access to the API in line with our Terms of Service.",
"terms": "https://open.fda.gov/terms/",
"license": "https://open.fda.gov/license/",
"last_updated": "2024-11-15",
"results": {
"skip": 0,
"limit": 2,
"total": 118943
}
},
"results": [
{
"product_ndc": "73647-062",
"generic_name": "MENTHOL, CAMPHOR",
"labeler_name": "Just Brands LLC",
"brand_name": "JUST CBD - CBD AND THC ULTRA RELIEF",
"active_ingredients": [
{
"name": "CAMPHOR (SYNTHETIC)",
"strength": "2 g/100g"
},
{
"name": "MENTHOL",
"strength": "6 g/100g"
}
],
"finished": True,
"packaging": [
{
"package_ndc": "73647-062-04",
"description": "113 g in 1 BOTTLE, PUMP (73647-062-04)",
"marketing_start_date": "20230314",
"sample": False
}
],
"listing_expiration_date": "20251231",
"openfda": {
"manufacturer_name": ["Just Brands LLC"],
"spl_set_id": ["f664eb79-8897-3a49-e053-2995a90a37b4"],
"is_original_packager": [True],
"unii": ["5TJD82A1ET", "L7T10EIP3A"]
},
"marketing_category": "OTC MONOGRAPH DRUG",
"dosage_form": "GEL",
"spl_id": "16c906dd-6989-9a79-e063-6394a90afa71",
"product_type": "HUMAN OTC DRUG",
"route": ["TOPICAL"],
"marketing_start_date": "20230314",
"product_id": "73647-062_16c906dd-6989-9a79-e063-6394a90afa71",
"application_number": "M017",
"brand_name_base": "JUST CBD - CBD AND THC ULTRA RELIEF"
},
{
"product_ndc": "0591-4039",
"marketing_end_date": "20250930",
"generic_name": "CLOBETASOL PROPIONATE",
"labeler_name": "Actavis Pharma, Inc.",
"brand_name": "CLOBETASOL PROPIONATE",
"active_ingredients": [
{
"name": "CLOBETASOL PROPIONATE",
"strength": ".05 g/mL"
}
],
"finished": True,
"packaging": [
{
"package_ndc": "0591-4039-46",
"description": "1 BOTTLE in 1 CARTON (0591-4039-46) / 59 mL in 1 BOTTLE",
"marketing_start_date": "20150828",
"marketing_end_date": "20250930",
"sample": False
},
{
"package_ndc": "0591-4039-74",
"description": "1 BOTTLE in 1 CARTON (0591-4039-74) / 125 mL in 1 BOTTLE",
"marketing_start_date": "20150828",
"marketing_end_date": "20250930",
"sample": False
}
],
"openfda": {
"manufacturer_name": ["Actavis Pharma, Inc."],
"rxcui": ["861512"],
"spl_set_id": ["907e425a-720a-4180-b97c-9e25008a3658"],
"is_original_packager": [True],
"unii": ["779619577M"]
},
"marketing_category": "NDA AUTHORIZED GENERIC",
"dosage_form": "SPRAY",
"spl_id": "33a56b8b-a9a6-4287-bbf4-d68ad0c59e07",
"product_type": "HUMAN PRESCRIPTION DRUG",
"route": ["TOPICAL"],
"marketing_start_date": "20150828",
"product_id": "0591-4039_33a56b8b-a9a6-4287-bbf4-d68ad0c59e07",
"application_number": "NDA021835",
"brand_name_base": "CLOBETASOL PROPIONATE",
"pharm_class": [
"Corticosteroid Hormone Receptor Agonists [MoA]",
"Corticosteroid [EPC]"
]
}
]
}
packaging_data = pd.json_normalize(
data['results'],
record_path=["packaging"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
active_ingredients_data = pd.json_normalize(
data['results'],
record_path=["active_ingredients"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
combined_data = pd.merge(
packaging_data,
active_ingredients_data,
on=['product_ndc', 'brand_name', 'generic_name'],
how='outer'
)
print(packaging_data)
print(active_ingredients_data)
print(combined_data)
which gives
package_ndc description \
0 73647-062-04 113 g in 1 BOTTLE, PUMP (73647-062-04)
1 0591-4039-46 1 BOTTLE in 1 CARTON (0591-4039-46) / 59 mL i...
2 0591-4039-74 1 BOTTLE in 1 CARTON (0591-4039-74) / 125 mL ...
marketing_start_date sample marketing_end_date product_ndc \
0 20230314 False NaN 73647-062
1 20150828 False 20250930 0591-4039
2 20150828 False 20250930 0591-4039
brand_name generic_name
0 JUST CBD - CBD AND THC ULTRA RELIEF MENTHOL, CAMPHOR
1 CLOBETASOL PROPIONATE CLOBETASOL PROPIONATE
2 CLOBETASOL PROPIONATE CLOBETASOL PROPIONATE
name strength product_ndc \
0 CAMPHOR (SYNTHETIC) 2 g/100g 73647-062
1 MENTHOL 6 g/100g 73647-062
2 CLOBETASOL PROPIONATE .05 g/mL 0591-4039
brand_name generic_name
0 JUST CBD - CBD AND THC ULTRA RELIEF MENTHOL, CAMPHOR
1 JUST CBD - CBD AND THC ULTRA RELIEF MENTHOL, CAMPHOR
2 CLOBETASOL PROPIONATE CLOBETASOL PROPIONATE
package_ndc description \
0 0591-4039-46 1 BOTTLE in 1 CARTON (0591-4039-46) / 59 mL i...
...
0 CLOBETASOL PROPIONATE .05 g/mL
1 CLOBETASOL PROPIONATE .05 g/mL
2 CAMPHOR (SYNTHETIC) 2 g/100g
3 MENTHOL 6 g/100g
EDIT
I changed the naming to mirror your changes in your edit: The first script uses the variable names packaging_df
, active_ingredients_df
, and combined_df
for the DataFrames related to packaging
, active_ingredients
, and their merged result, respectively, whereas the second script uses packaging_data
, active_ingredients_data
, and combined_data
for the same purposes. The difference lies solely in the naming conventions, with no impact on functionality or logic. The output is the same, so if you still experience issues, it must come from something else, probably in something you do before.
import pandas as pd
data = {
"meta": {
"disclaimer": "Do not rely on openFDA to make decisions regarding medical care. While we make every effort to ensure that data is accurate, you should assume all results are unvalidated. We may limit or otherwise restrict your access to the API in line with our Terms of Service.",
"terms": "https://open.fda.gov/terms/",
"license": "https://open.fda.gov/license/",
"last_updated": "2024-11-15",
"results": {
"skip": 0,
"limit": 2,
"total": 118943
}
},
"results": [
{
"product_ndc": "73647-062",
"generic_name": "MENTHOL, CAMPHOR",
"labeler_name": "Just Brands LLC",
"brand_name": "JUST CBD - CBD AND THC ULTRA RELIEF",
"active_ingredients": [
{
"name": "CAMPHOR (SYNTHETIC)",
"strength": "2 g/100g"
},
{
"name": "MENTHOL",
"strength": "6 g/100g"
}
],
"finished": True,
"packaging": [
{
"package_ndc": "73647-062-04",
"description": "113 g in 1 BOTTLE, PUMP (73647-062-04)",
"marketing_start_date": "20230314",
"sample": False
}
],
"listing_expiration_date": "20251231",
"openfda": {
"manufacturer_name": ["Just Brands LLC"],
"spl_set_id": ["f664eb79-8897-3a49-e053-2995a90a37b4"],
"is_original_packager": [True],
"unii": ["5TJD82A1ET", "L7T10EIP3A"]
},
"marketing_category": "OTC MONOGRAPH DRUG",
"dosage_form": "GEL",
"spl_id": "16c906dd-6989-9a79-e063-6394a90afa71",
"product_type": "HUMAN OTC DRUG",
"route": ["TOPICAL"],
"marketing_start_date": "20230314",
"product_id": "73647-062_16c906dd-6989-9a79-e063-6394a90afa71",
"application_number": "M017",
"brand_name_base": "JUST CBD - CBD AND THC ULTRA RELIEF"
},
{
"product_ndc": "0591-4039",
"marketing_end_date": "20250930",
"generic_name": "CLOBETASOL PROPIONATE",
"labeler_name": "Actavis Pharma, Inc.",
"brand_name": "CLOBETASOL PROPIONATE",
"active_ingredients": [
{
"name": "CLOBETASOL PROPIONATE",
"strength": ".05 g/mL"
}
],
"finished": True,
"packaging": [
{
"package_ndc": "0591-4039-46",
"description": "1 BOTTLE in 1 CARTON (0591-4039-46) / 59 mL in 1 BOTTLE",
"marketing_start_date": "20150828",
"marketing_end_date": "20250930",
"sample": False
},
{
"package_ndc": "0591-4039-74",
"description": "1 BOTTLE in 1 CARTON (0591-4039-74) / 125 mL in 1 BOTTLE",
"marketing_start_date": "20150828",
"marketing_end_date": "20250930",
"sample": False
}
],
"openfda": {
"manufacturer_name": ["Actavis Pharma, Inc."],
"rxcui": ["861512"],
"spl_set_id": ["907e425a-720a-4180-b97c-9e25008a3658"],
"is_original_packager": [True],
"unii": ["779619577M"]
},
"marketing_category": "NDA AUTHORIZED GENERIC",
"dosage_form": "SPRAY",
"spl_id": "33a56b8b-a9a6-4287-bbf4-d68ad0c59e07",
"product_type": "HUMAN PRESCRIPTION DRUG",
"route": ["TOPICAL"],
"marketing_start_date": "20150828",
"product_id": "0591-4039_33a56b8b-a9a6-4287-bbf4-d68ad0c59e07",
"application_number": "NDA021835",
"brand_name_base": "CLOBETASOL PROPIONATE",
"pharm_class": [
"Corticosteroid Hormone Receptor Agonists [MoA]",
"Corticosteroid [EPC]"
]
}
]
}
packaging_data = pd.json_normalize(
data['results'],
record_path=["packaging"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
active_ingredients_data = pd.json_normalize(
data['results'],
record_path=["active_ingredients"],
meta=['product_ndc', 'brand_name', 'generic_name']
)
combined_data = pd.merge(
packaging_data,
active_ingredients_data,
on=['product_ndc', 'brand_name', 'generic_name'],
how='outer'
)
print(packaging_data)
print(active_ingredients_data)
print(combined_data)
Reading through the documentation for pandas.json_normalize the record_path takes a str variable.
Thus you would need to create two dataframes for each record path and then merge them together based on a common field.
本文标签: pythonjson normalization recordpath key not foundStack Overflow
版权声明:本文标题:python - json normalization record_path key not found - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1745652163a2668493.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
META
for discussing the workings of [stackoverflow / StackExchange.) – greybeard Commented Nov 16, 2024 at 22:26