#!/usr/bin/env python # coding: utf-8 # --- # ## Let's now play with JSON to access **the NARA Catalog Morgenthau Diary** Vol.696: Jan. 22 - Jan. 26, 1944: # ### - we use an API call where we specify a NARA fileUnit ID directly: 28277406 # ### - # # # |
NARA Catalog Hierarchy
|
JSON Structure for the Query of FDR FileUnit 28277406:"The Morgenthau Diaries"
| # | ----- | ----- | # | | | # In[22]: import requests import json # Going down the JSON tree to the RESULT section: response = requests.get("https://catalog.archives.gov/api/v2/records/search?q=*&abbreviated=true&limit=200&naId=28277406", headers={"Content-Type": "application/json", "x-api-key": "API-KEY"}) data = response.json() with open('data.FU.json', 'w') as f: json.dump(data, f) result = data["body"]["hits"]["hits"][0] fields = result["fields"] record = result["_source"]["record"] result # ## How to extract the TITLE value from the JSON tree # In[20]: title_value = record["title"] # Printing the Title Value print( "- TITLE VALUE:", title_value ) # ## How to extract the URL value from the FILE section of the objects/object portion of the JSON tree # In[27]: # Going down to the fiole section of the objects/object portion of the JSON tree object = fields["firstDigitalObject"][0] # Grabbing the @url value object_url = object["objectUrl"] # We choose to print the URL key which will give us the location of the file print( "- URL VALUE:", object_url) # Grabbing the @name value object_id = object["objecId"] # Printing the name value print( "- NAME VALUE:", object_id) # ## ==> Follow the URL link and "GET" the actual .PDF file for the entire Morgenthau Diary for Volume 696 and download it locally # In[29]: # We then get that URL r = requests.get(object_url, allow_redirects=True) # and finally open and write the .PDF file open('696.pdf', 'wb').write(r.content) # In[ ]: