Source code for quest.api.metadata
import pandas as pd
from ..static import UriType
from ..plugins import load_providers
from ..util import classify_uris, construct_service_uri, parse_service_uri
from ..database import get_db, db_session, select_collections, select_datasets
[docs]def get_metadata(uris, as_dataframe=False):
"""Get metadata for uris.
Args:
uris (string, comma separated string, or list of strings, Required):
list of uris to retrieve metadata for
as_dataframe (bool, Optional, Default=False):
include details of newly created dataset as a pandas Dataframe
Returns:
metadata (dict or pd.DataFrame, Default=dict):
metadata at each uri keyed on uris
"""
# group uris by type
grouped_uris = classify_uris(uris)
# handle case when no uris are passed in
if not any(grouped_uris):
metadata = pd.DataFrame()
if not as_dataframe:
metadata = metadata.to_dict(orient='index')
return metadata
metadata = []
# get metadata for service type uris
if UriType.SERVICE in grouped_uris.groups.keys():
svc_df = grouped_uris.get_group(UriType.SERVICE)
svc_df[['provider', 'service', 'catalog_id']] = svc_df['uri'].apply(parse_service_uri).apply(pd.Series)
for (provider, service), grp in svc_df.groupby(['provider', 'service']):
provider_plugin = load_providers()[provider]
if not grp.query('catalog_id != catalog_id').empty:
service_metadata = provider_plugin.get_services()[service]
index = construct_service_uri(provider, service)
metadata.append(pd.DataFrame(service_metadata, index=[index]))
selected_catalog_entries = grp.query('catalog_id == catalog_id').uri.tolist()
if selected_catalog_entries:
catalog_entries = provider_plugin.search_catalog(service)
catalog_entries = catalog_entries.loc[selected_catalog_entries]
metadata.append(catalog_entries)
if UriType.PUBLISHER in grouped_uris.groups.keys():
svc_df = grouped_uris.get_group(UriType.PUBLISHER)
svc_df[['provider', 'publish', 'catalog_id']] = svc_df['uri'].apply(parse_service_uri).apply(pd.Series)
for (provider, publisher), grp in svc_df.groupby(['provider', 'publish']):
provider_plugin = load_providers()[provider]
publisher_metadata = provider_plugin.get_publishers()[publisher]
index = construct_service_uri(provider, publisher)
metadata.append(pd.DataFrame(publisher_metadata, index=[index]))
if UriType.COLLECTION in grouped_uris.groups.keys():
# get metadata for collections
tmp_df = grouped_uris.get_group(UriType.COLLECTION)
collections = select_collections(lambda c: c.name in tmp_df['uri'].tolist())
collections = pd.DataFrame(collections)
collections.set_index('name', inplace=True, drop=False)
metadata.append(collections)
if UriType.DATASET in grouped_uris.groups.keys():
tmp_df = grouped_uris.get_group(UriType.DATASET)
datasets = select_datasets(lambda c: c.name in tmp_df['uri'].tolist())
datasets = pd.DataFrame(datasets)
datasets.set_index('name', inplace=True, drop=False)
metadata.append(datasets)
metadata = pd.concat(metadata)
if not as_dataframe:
metadata = metadata.to_dict(orient='index')
return metadata
[docs]def update_metadata(uris, display_name=None, description=None,
metadata=None, quest_metadata=None):
"""Update metadata for resource(s)
Args:
uris (string, comma separated string, or list of strings, Required):
list of uris to update metadata for.
display_name (string or list, Optional,Default=None):
display name for each uri
description (string or list, Optional,Default=None):
description for each uri
metadata (dict or list of dicts, Optional, Default=None):
user defiend metadata
quest_metadata (dict or list of dicts, Optional, Default=None):
metadata used by QUEST
Returns:
metadata (dict):
metadata of each uri keyed on uris
"""
db = get_db()
get_db_entity_funcs = {
UriType.COLLECTION: lambda x: db.Collection[x],
UriType.DATASET: lambda x: db.Dataset[x],
UriType.SERVICE: lambda x: db.QuestCatalog[x.split('/')[-1]],
}
# group uris by type
grouped_uris = classify_uris(uris, as_dataframe=True, exclude=[UriType.PUBLISHER], require_same_type=True)
resource = list(grouped_uris.groups.keys())[0]
uris = grouped_uris.get_group(resource)
get_db_entity = get_db_entity_funcs[resource]
if resource == UriType.SERVICE:
# then make sure there are only quest catalog entries
if not uris.uri.apply(lambda x: 'quest' in x).all():
raise ValueError('Metadata on service catalog entries cannot be changed.')
uris = uris.uri.tolist()
n = len(uris)
if n > 1:
if display_name is None:
display_name = [None] * n
elif not isinstance(display_name, list):
raise ValueError('display_name must be a list if more that one uri is passed in')
if description is None:
description = [None] * n
elif not isinstance(description, list):
raise ValueError('description must be a list if more that one uri is passed in')
if not isinstance(metadata, list):
metadata = [metadata] * n
if not isinstance(quest_metadata, list):
quest_metadata = [quest_metadata] * n
else:
display_name = [display_name]
description = [description]
metadata = [metadata]
quest_metadata = [quest_metadata]
for uri, name, desc, meta, quest_meta in zip(uris, display_name, description, metadata, quest_metadata):
if quest_meta is None:
quest_meta = {}
if name:
quest_meta.update({'display_name': name})
if desc:
quest_meta.update({'description': desc})
if meta:
quest_meta.update({'metadata': meta})
with db_session:
entity = get_db_entity(uri)
entity.set(**quest_meta)
return get_metadata(uris)