Source code for tamr_client.project

from typing import List, Optional, Tuple, Union

from tamr_client import response
from tamr_client._types import (
    Attribute,
    Instance,
    JsonDict,
    Project,
    Session,
    UnknownProject,
    URL,
)
from tamr_client.attribute import _get_all_from_parent
from tamr_client.categorization import project as categorization_project
from tamr_client.exception import TamrClientException
from tamr_client.golden_records import project as golden_records_project
from tamr_client.mastering import project as mastering_project
from tamr_client.schema_mapping import project as schema_mapping_project


[docs]class NotFound(TamrClientException): """Raised when referencing (e.g. updating or deleting) a project that does not exist on the server.""" pass
[docs]class Ambiguous(TamrClientException): """Raised when referencing a project by name that matches multiple possible targets.""" pass
class AlreadyExists(TamrClientException): """Raised when a project with these specifications already exists.""" pass
[docs]def by_resource_id(session: Session, instance: Instance, id: str) -> Project: """Get project by resource ID. Fetches project from Tamr server. Args: instance: Tamr instance containing this dataset id: Project ID Raises: project.NotFound: If no project could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ url = URL(instance=instance, path=f"projects/{id}") return _by_url(session, url)
[docs]def by_name(session: Session, instance: Instance, name: str) -> Project: """Get project by name Fetches project from Tamr server. Args: instance: Tamr instance containing this project name: Project name Raises: project.NotFound: If no project could be found with that name. project.Ambiguous: If multiple targets match project name. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get( url=str(URL(instance=instance, path="projects")), params={"filter": f"name=={name}"}, ) # Check that exactly one project is returned matches = r.json() if len(matches) == 0: raise NotFound(str(r.url)) if len(matches) > 1: raise Ambiguous(str(r.url)) # Make Project from response url = URL(instance=instance, path=matches[0]["relativeId"]) return _from_json(url=url, data=matches[0])
def _by_url(session: Session, url: URL) -> Project: """Get project by URL. Fetches project from Tamr server. Args: url: Project URL Raises: project.NotFound: If no project could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data) def _from_json(url: URL, data: JsonDict) -> Project: """Make project from JSON data (deserialize) Args: url: Project URL data: Project JSON data from Tamr server """ proj_type = data["type"] if proj_type == "DEDUP": return mastering_project._from_json(url, data) elif proj_type == "CATEGORIZATION": return categorization_project._from_json(url, data) elif proj_type == "SCHEMA_MAPPING_RECOMMENDATIONS": return schema_mapping_project._from_json(url, data) elif proj_type == "GOLDEN_RECORDS": return golden_records_project._from_json(url, data) else: return UnknownProject( url, name=data["name"], description=data.get("description") ) def _create( session: Session, instance: Instance, name: str, project_type: str, description: Optional[str] = None, external_id: Optional[str] = None, unified_dataset_name: Optional[str] = None, ) -> Project: """Create a project in Tamr. Args: instance: Tamr instance name: Project name project_type: Project type description: Project description external_id: External ID of the project unified_dataset_name: Name of the unified dataset Returns: Project created in Tamr Raises: project.AlreadyExists: If a project with these specifications already exists. requests.HTTPError: If any other HTTP error is encountered. """ if not unified_dataset_name: unified_dataset_name = name + "_unified_dataset" data = { "name": name, "type": project_type, "unifiedDatasetName": unified_dataset_name, "description": description, "externalId": external_id, } project_url = URL(instance=instance, path="projects") r = session.post(url=str(project_url), json=data) if r.status_code == 409: raise AlreadyExists(r.json()["message"]) data = response.successful(r).json() project_path = data["relativeId"] project_url = URL(instance=instance, path=str(project_path)) return _by_url(session=session, url=project_url)
[docs]def get_all( session: Session, instance: Instance, *, filter: Optional[Union[str, List[str]]] = None, ) -> Tuple[Project, ...]: """Get all projects from an instance Args: instance: Tamr instance from which to get projects filter: Filter expression, e.g. "externalId==wobbly" Multiple expressions can be passed as a list Returns: The projects retrieved from the instance Raises: requests.HTTPError: If an HTTP error is encountered. """ url = URL(instance=instance, path="projects") if filter is not None: r = session.get(str(url), params={"filter": filter}) else: r = session.get(str(url)) projects_json = response.successful(r).json() projects = [] for project_json in projects_json: project_url = URL(instance=instance, path=project_json["relativeId"]) project = _from_json(project_url, project_json) projects.append(project) return tuple(projects)
[docs]def attributes(session: Session, project: Project) -> Tuple[Attribute, ...]: """Get all attributes from a project Args: project: Project containing the desired attributes Returns: The attributes for the specified project Raises: requests.HTTPError: If an HTTP error is encountered. """ return _get_all_from_parent(session, project)