{ "cells": [ { "cell_type": "markdown", "id": "dimensional-heater", "metadata": {}, "source": [ "# SPOC API \n", "The SPOC API updates the species occurrence dataframe used in the Streamlit SpOc Verifier application. The [FastAPI](https://fastapi.tiangolo.com/) can be run from the root directory with:\n", "\n", "`uvicorn src.api.main:app --reload --root-path src/api/`" ] }, { "cell_type": "code", "execution_count": 4, "id": "retired-locking", "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline\n", "\n", "import pandas as pd\n", "import requests" ] }, { "cell_type": "code", "execution_count": 5, "id": "auburn-capacity", "metadata": {}, "outputs": [], "source": [ "all_records = pd.read_json(\"../data/species-records.json\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "healthy-pursuit", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 12148 entries, 0 to 12147\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Paper ID 12148 non-null object\n", " 1 Instance ID 12148 non-null object\n", " 2 Species 12148 non-null object\n", " 3 GBIF 12148 non-null object\n", " 4 Time 12148 non-null object\n", " 5 Place 12148 non-null object\n", " 6 div_enum 12148 non-null int64 \n", "dtypes: int64(1), object(6)\n", "memory usage: 759.2+ KB\n" ] } ], "source": [ "all_records.info()" ] }, { "cell_type": "code", "execution_count": 6, "id": "approved-pacific", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Paper IDInstance IDSpeciesGBIFTimePlace
0fhl_2011_Brezicha_25959.tei.xmlurn:lsid:marinespecies.org:taxname:240762Strongylocentrotus franciscanushttps://www.gbif.org/species/search?q=Strongyl...Porter,Pacific Ocean,Friday Harbor,Salish Sea,...
1fhl_2011_Brezicha_25959.tei.xmlurn:lsid:marinespecies.org:taxname:431072radiushttps://www.gbif.org/species/search?q=radius&q...Porter,Pacific Ocean,Friday Harbor,Salish Sea,...
2fhl_2011_Brezicha_25959.tei.xmlurn:lsid:marinespecies.org:taxname:711954Parastichopus californicushttps://www.gbif.org/species/search?q=Parastic...Porter,Pacific Ocean,Friday Harbor,Salish Sea,...
3fhl_2011_Bockmon_26635.tei.xmlurn:lsid:marinespecies.org:taxname:112078Ammoniahttps://www.gbif.org/species/search?q=Ammonia&...San Juan Island,Friday Harbor,north end,North ...
4fhl_2011_Bockmon_26635.tei.xmlurn:lsid:marinespecies.org:taxname:112078ammoniahttps://www.gbif.org/species/search?q=ammonia&...San Juan Island,Friday Harbor,north end,North ...
\n", "
" ], "text/plain": [ " Paper ID Instance ID \\\n", "0 fhl_2011_Brezicha_25959.tei.xml urn:lsid:marinespecies.org:taxname:240762 \n", "1 fhl_2011_Brezicha_25959.tei.xml urn:lsid:marinespecies.org:taxname:431072 \n", "2 fhl_2011_Brezicha_25959.tei.xml urn:lsid:marinespecies.org:taxname:711954 \n", "3 fhl_2011_Bockmon_26635.tei.xml urn:lsid:marinespecies.org:taxname:112078 \n", "4 fhl_2011_Bockmon_26635.tei.xml urn:lsid:marinespecies.org:taxname:112078 \n", "\n", " Species \\\n", "0 Strongylocentrotus franciscanus \n", "1 radius \n", "2 Parastichopus californicus \n", "3 Ammonia \n", "4 ammonia \n", "\n", " GBIF Time \\\n", "0 https://www.gbif.org/species/search?q=Strongyl... \n", "1 https://www.gbif.org/species/search?q=radius&q... \n", "2 https://www.gbif.org/species/search?q=Parastic... \n", "3 https://www.gbif.org/species/search?q=Ammonia&... \n", "4 https://www.gbif.org/species/search?q=ammonia&... \n", "\n", " Place \n", "0 Porter,Pacific Ocean,Friday Harbor,Salish Sea,... \n", "1 Porter,Pacific Ocean,Friday Harbor,Salish Sea,... \n", "2 Porter,Pacific Ocean,Friday Harbor,Salish Sea,... \n", "3 San Juan Island,Friday Harbor,north end,North ... \n", "4 San Juan Island,Friday Harbor,north end,North ... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_records.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "satisfactory-spending", "metadata": {}, "outputs": [], "source": [ "borked = all_records[all_records['Paper ID'].isin(['asdfs'])]" ] }, { "cell_type": "code", "execution_count": 25, "id": "forward-internship", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Paper IDInstance IDSpeciesGBIFTimePlace
10690ucla_1975_ZimmermanRichard.tei.xmlurn:lsid:marinespecies.org:taxname:106994Spirontocarishttps://www.gbif.org/species/search?q=Spironto...1975-05-27pacifica,The wash,Santa Catalina Island,Cove
3094fhl_2014_McGowan_27210.tei.xmlurn:lsid:marinespecies.org:taxname:370041Phyllospadixhttps://www.gbif.org/species/search?q=Phyllosp...San Juan Island,shoreline,Wahl,Cattle Point,Bu...
8634hms_vd999tv5289.tei.xmlurn:lsid:marinespecies.org:taxname:279812Atherinops affinishttps://www.gbif.org/species/search?q=Atherino...Pajaro,PAJARO RIVER,view,Monterey Bay,Elkhorn,...
3976hms_bn277dh2861.tei.xmlurn:lsid:marinespecies.org:taxname:138086Chaetopleurahttps://www.gbif.org/species/search?q=Chaetopl...Bayne,green,Pacific Grove,tide,post,Division,h...
6799hms_jw427gt5299.tei.xmlurn:lsid:marinespecies.org:taxname:137928Elysiahttps://www.gbif.org/species/search?q=Elysia&q...1988-06-06Santa Barbara,Moss Landing Harbor,Pacific Grove
\n", "
" ], "text/plain": [ " Paper ID \\\n", "10690 ucla_1975_ZimmermanRichard.tei.xml \n", "3094 fhl_2014_McGowan_27210.tei.xml \n", "8634 hms_vd999tv5289.tei.xml \n", "3976 hms_bn277dh2861.tei.xml \n", "6799 hms_jw427gt5299.tei.xml \n", "\n", " Instance ID Species \\\n", "10690 urn:lsid:marinespecies.org:taxname:106994 Spirontocaris \n", "3094 urn:lsid:marinespecies.org:taxname:370041 Phyllospadix \n", "8634 urn:lsid:marinespecies.org:taxname:279812 Atherinops affinis \n", "3976 urn:lsid:marinespecies.org:taxname:138086 Chaetopleura \n", "6799 urn:lsid:marinespecies.org:taxname:137928 Elysia \n", "\n", " GBIF Time \\\n", "10690 https://www.gbif.org/species/search?q=Spironto... 1975-05-27 \n", "3094 https://www.gbif.org/species/search?q=Phyllosp... \n", "8634 https://www.gbif.org/species/search?q=Atherino... \n", "3976 https://www.gbif.org/species/search?q=Chaetopl... \n", "6799 https://www.gbif.org/species/search?q=Elysia&q... 1988-06-06 \n", "\n", " Place \n", "10690 pacifica,The wash,Santa Catalina Island,Cove \n", "3094 San Juan Island,shoreline,Wahl,Cattle Point,Bu... \n", "8634 Pajaro,PAJARO RIVER,view,Monterey Bay,Elkhorn,... \n", "3976 Bayne,green,Pacific Grove,tide,post,Division,h... \n", "6799 Santa Barbara,Moss Landing Harbor,Pacific Grove " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_records.sample(5)" ] }, { "cell_type": "code", "execution_count": 18, "id": "integral-fifteen", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Paper IDInstance IDSpeciesGBIFTimePlace
10360carl_1983_OlanderAndrea.tei.xmlurn:lsid:marinespecies.org:taxname:269761Lythrypnushttps://www.gbif.org/species/search?q=Lythrypn...Santa Barbara Island,Santa Barbara,white,Turne...
\n", "
" ], "text/plain": [ " Paper ID \\\n", "10360 carl_1983_OlanderAndrea.tei.xml \n", "\n", " Instance ID Species \\\n", "10360 urn:lsid:marinespecies.org:taxname:269761 Lythrypnus \n", "\n", " GBIF Time \\\n", "10360 https://www.gbif.org/species/search?q=Lythrypn... \n", "\n", " Place \n", "10360 Santa Barbara Island,Santa Barbara,white,Turne... " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_records[all_records['Paper ID'].isin(['carl_1983_OlanderAndrea.tei.xml']) & all_records['Instance ID'].isin(['urn:lsid:marinespecies.org:taxname:269761'])]" ] }, { "cell_type": "code", "execution_count": 20, "id": "wireless-terrain", "metadata": {}, "outputs": [], "source": [ "result = requests.put(\"http://localhost:8000/records/\",\n", " data={ 'paper_id': 'fhl_2014_Sullivan_27302.tei.xml', \n", " 'instance_id': 'urn:lsid:marinespecies.org:taxname:6'})" ] }, { "cell_type": "code", "execution_count": 22, "id": "proof-romantic", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'{\"detail\":[{\"loc\":[\"body\",0],\"msg\":\"Expecting value: line 1 column 1 (char 0)\",\"type\":\"value_error.jsondecode\",\"ctx\":{\"msg\":\"Expecting value\",\"doc\":\"paper_id=fhl_2014_Sullivan_27302.tei.xml&instance_id=urn%3Alsid%3Amarinespecies.org%3Ataxname%3A6\",\"pos\":0,\"lineno\":1,\"colno\":1}}]}'" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.text" ] }, { "cell_type": "code", "execution_count": 1, "id": "acknowledged-lightning", "metadata": {}, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": 15, "id": "previous-leader", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'Paper ID': 'hms_mj526dg0556.tei.xml',\n", " 'Instance ID': 'urn:lsid:marinespecies.org:taxname:249634',\n", " 'Species': 'glandula',\n", " 'GBIF': 'https://www.gbif.org/species/search?q=glandula&qField=SCIENTIFIC',\n", " 'Time': '',\n", " 'Place': '',\n", " 'div_enum': 5},\n", " {'Paper ID': 'fhl_2014_McMahon_34528.tei.xml',\n", " 'Instance ID': 'urn:lsid:marinespecies.org:taxname:395240',\n", " 'Species': 'concholepas',\n", " 'GBIF': 'https://www.gbif.org/species/search?q=concholepas&qField=SCIENTIFIC',\n", " 'Time': '',\n", " 'Place': 'Talmage,Anlauf,Blanchard,Taylor,Marshall,Ross,Pacific,bivalve,Davis',\n", " 'div_enum': 1},\n", " {'Paper ID': 'fhl_2011_Pierce_26647.tei.xml',\n", " 'Instance ID': 'urn:lsid:marinespecies.org:taxname:325914',\n", " 'Species': 'Mus',\n", " 'GBIF': 'https://www.gbif.org/species/search?q=Mus&qField=SCIENTIFIC',\n", " 'Time': '',\n", " 'Place': 'view,cascade,University',\n", " 'div_enum': 2},\n", " {'Paper ID': 'ucla_1978_SkollerDebbie_EnvComplexity.tei.xml',\n", " 'Instance ID': 'urn:lsid:marinespecies.org:taxname:402317',\n", " 'Species': 'Hypsypops rubicunda',\n", " 'GBIF': 'https://www.gbif.org/species/search?q=Hypsypops rubicunda&qField=SCIENTIFIC',\n", " 'Time': '1978-05-05',\n", " 'Place': 'tide',\n", " 'div_enum': 4},\n", " {'Paper ID': 'hms_ms463wm9540.tei.xml',\n", " 'Instance ID': 'urn:lsid:marinespecies.org:taxname:106122',\n", " 'Species': 'Balanus',\n", " 'GBIF': 'https://www.gbif.org/species/search?q=Balanus&qField=SCIENTIFIC',\n", " 'Time': '',\n", " 'Place': 'Hopkins Marine Life Refuge,Pacific Grove',\n", " 'div_enum': 2}]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "json.loads(all_records.sample(5).to_json(orient='records'))" ] }, { "cell_type": "code", "execution_count": null, "id": "valuable-findings", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 5 }