Skip to content

Usage guide

!pip install -U odp-sdk --quiet
from odp.client import OdpClient # The SDK
from odp.dto.catalog import DataCollectionDto, DatasetDto, DatasetSpec, ObservableDto, DataCollectionSpec
from odp.dto import MetadataDto
from odp.dto.common.contact_info import ContactInfo
from odp.client.exc import OdpFileAlreadyExistsError, OdpResourceExistsError
import pyarrow as pa
import pandas as pd

from datetime import datetime, timedelta, date
from decimal import Decimal
import requests
client = OdpClient()

Create Dataset in Catalog for personal testing

dataset_name= client.personalize_name("tabular-v2-example-dataset")

# Declare a dataset manifest to add to the catalog
dataset_manifest = DatasetDto(
    metadata=MetadataDto(
        name=dataset_name,
        display_name=dataset_name,
        description=f'A test dataset for tabular v2, {dataset_name}',
        labels={"hubocean.io/test": True, 'tabular_v2': 1.0},
    ),
    spec=DatasetSpec(
        storage_controller="registry.hubocean.io/storageController/storage-tabular",
        storage_class="registry.hubocean.io/storageClass/tabular",
        maintainer=ContactInfo(
            contact="User McUsername <user.mcusername@emailprovider.com>",
            organisation="Organisation Name",
        ),
    ),
)
try:
    dataset_dto = client.catalog.create(dataset_manifest)
except OdpResourceExistsError:
    dataset_dto = client.catalog.get("catalog.hubocean.io/dataset/" + dataset_manifest.metadata.name)
    print("Dataset already exists. Fetched existing dataset:", dataset_dto.metadata.name)

Minimal tabular dataset creation

tab = client.table_v2(dataset_dto)
# 1. Drop the table
tab.drop()
tab = client.table_v2(dataset_dto)
# 2. Create new table with our desired schema
schema = pa.schema([
    ('id', pa.int64()),
    ('category', pa.string()),
    pa.field('geometry', pa.string(), metadata={  # geometry type with metadata
        b'isGeometry': b'1'
    })
])
tab.create(schema)

# 3. Verify the new schema
print(tab.schema())
id: int64
category: string
geometry: string
  -- field metadata --
  isGeometry: '1'
# Make some fake input data 
inputdata = [{'id': 1, 'category': 'A', 'geometry': 'POINT(1 1)'}, {'id': 2, 'category': 'B', 'geometry': 'POINT(2 1)'}]
inputdata2 = [{'id': 3, 'category': 'C', 'geometry': 'POINT(2 1)'}, {'id': 4, 'category': 'D', 'geometry': 'POINT(2 2)'}]

Insert data from dictionary

# tx indicates a transaction
with tab as tx:
  tx.insert(inputdata)
# Quick output one row to check: see further down guide for full .select() functionality and best practice
next(tab.select().rows())
{'id': 1, 'category': 'A', 'geometry': 'POINT(1 1)'}

Insert data from dataframe

# Creating the DataFrame
df = pd.DataFrame(inputdata2)
df
id category geometry
0 3 C POINT(2 1)
1 4 D POINT(2 2)
with tab as tx:
  tx.insert(df.to_dict(orient='records'))
# Quick output one row to check: see further down guide for full .select() functionality and best practice
next(tab.select().rows())
{'id': 3, 'category': 'C', 'geometry': 'POINT(2 1)'}
# Quick output defined number of rows to check: see further down guide for full .select() functionality and best practice
next(tab.select().pages(size=4))
[{'id': 3, 'category': 'C', 'geometry': 'POINT(2 1)'},
 {'id': 4, 'category': 'D', 'geometry': 'POINT(2 2)'},
 {'id': 3, 'category': 'C', 'geometry': 'POINT(2 1)'},
 {'id': 4, 'category': 'D', 'geometry': 'POINT(2 2)'}]

Clean up and remove data

# .drop() deletes all data and the schema
tab.drop()

# if you wish to remove the catalog entry here use this:
#client.catalog.delete(dataset_dto)
try:
    outputrow = next(tab.select().rows())
except FileNotFoundError as e:
    print(f"Error: {e} No data in the table")
    outputrow = None
Error:  No data in the table

Complex tabular dataset creation

tab = client.table_v2(dataset_dto)
schema = pa.schema([
    # Standard fields demonstrating basic type support
    # String fields can be indexed and are suitable for exact matching and sorting
    pa.field('id', pa.string(), nullable=False),  # Required field example

    # Numeric types supporting mathematical operations and range queries
    pa.field('int_value', pa.int64()),    # For exact counting/integers
    pa.field('float_value', pa.float64()), # For measurements/decimals

    # Decimal type - note only 10,2 precision is supported in the tabular controller
    pa.field('precise_value', pa.decimal128(10, 2)),  # For exact decimal calculations

    # Boolean field for flags/states
    pa.field('flag', pa.bool_()),

    # Temporal fields with indexing capabilities
    pa.field('date_only', pa.date32()),  # Date without time
    pa.field('timestamp_ms', pa.timestamp('ms'), metadata={
        b'index': b'1'  # Enables temporal range queries and sorting
    }),

    # Geometry handling - demonstrates spatial capabilities
    # The geometry field enables spatial queries (intersects, contains, within)
    pa.field('geometry', pa.string(), metadata={
        b'isGeometry': b'1',  # Marks as geometry type
        b'index': b'1',      # Enables spatial indexing
        b'big': b'1'         # Stores as separate file if large
    }),

    # Geographic coordinate fields - enables H3 and other geo functions
    pa.field('latitude', pa.float64(), metadata={
        b'geo': b'latitude'  # Marks for geo operations
    }),
    pa.field('longitude', pa.float64(), metadata={
        b'geo': b'longitude' # Marks for geo operations
    }),

    # Large data field examples - automatically managed by the big column handler
    pa.field('large_text', pa.string(), metadata={
        b'big': b'1'  # Stored separately if exceeds STR_LIMIT (128 bytes)
    }),
    pa.field('large_binary', pa.binary(), metadata={
        b'big': b'1'  # Stored separately if exceeds STR_LIMIT
    })
])
try:
    tab.create(schema)
    print(tab.schema())
except Exception as e:
    print(f"Error creating table: {e}")
    print("\nSchema may already exist:")
    print(tab.schema())
id: string not null
int_value: int64
float_value: double
precise_value: decimal128(10, 2)
flag: bool
date_only: date32[day]
timestamp_ms: timestamp[ms]
  -- field metadata --
  index: '1'
geometry: string
  -- field metadata --
  isGeometry: '1'
  index: '1'
  big: '1'
latitude: double
  -- field metadata --
  geo: 'latitude'
longitude: double
  -- field metadata --
  geo: 'longitude'
large_text: string
  -- field metadata --
  big: '1'
large_binary: binary
  -- field metadata --
  big: '1'
base_time = datetime(2024, 1, 1, 12, 0, 0)
samplerows = [{'id': 'LOCATION-001', 'int_value': 100, 'float_value': 23.456, 'precise_value': Decimal('123.45'), 'flag': True, 'date_only': base_time.date(), 'timestamp_ms': base_time, 'geometry': 'POINT(-122.4194 37.7749)', 'latitude': 37.7749, 'longitude': -122.4194, 'large_text': 'COASTAL_DATA' * 1000, 'large_binary': b'BINARY_DATA_1' * 1000}, {'id': 'LOCATION-002', 'int_value': 200, 'float_value': 45.678, 'precise_value': Decimal('567.89'), 'flag': False, 'date_only': (base_time + timedelta(days=1)).date(), 'timestamp_ms': base_time + timedelta(hours=12), 'geometry': 'POINT(-98.4936 29.4241)', 'latitude': 29.4241, 'longitude': -98.4936, 'large_text': 'INLAND_DATA' * 1000, 'large_binary': b'BINARY_DATA_2' * 1000}]
with tab as tx:
    tx.insert(samplerows)
    print("Sample row inserted successfully")
Sample row inserted successfully
# Quick output as dataframe: see further down guide for full .select() functionality and best practice
next(tab.select().dataframes())
id int_value float_value precise_value flag date_only timestamp_ms geometry latitude longitude large_text large_binary
0 LOCATION-001 100 23.456 123.45 True 2024-01-01 2024-01-01 12:00:00 POINT(-122.4194 37.7749) 37.7749 -122.4194 COASTAL_DATACOASTAL_DATACOASTAL_DATACOASTAL_DA... BINARY_DATA_1BINARY_DATA_1BINARY_DATA_1BINARY_...
1 LOCATION-002 200 45.678 567.89 False 2024-01-02 2024-01-02 00:00:00 POINT(-98.4936 29.4241) 29.4241 -98.4936 INLAND_DATAINLAND_DATAINLAND_DATAINLAND_DATAIN... BINARY_DATA_2BINARY_DATA_2BINARY_DATA_2BINARY_...
# Quick output as dataframe: see further down guide for full .select() functionality and best practice
# Select now does some filters and and only returns some columns: see further down guide for more details
next(tab.select('flag == True AND precise_value < 200.0 AND latitude > 35', cols=['id','geometry']).dataframes())
id geometry
0 LOCATION-001 POINT(-122.4194 37.7749)

Clean up and remove data

# .drop() deletes all data and the schema
tab.drop()

# if you wish to remove the catalog entry here use this:
#client.catalog.delete(dataset_dto)
try:
    outputrow = next(tab.select().rows())
except FileNotFoundError as e:
    print(f"Error: {e} No data in the table")
    outputrow = None
Error:  No data in the table

Large tabular dataset creation

# Download an 11MB CSV file of real data (environmental data from the Mareano program in Norway)
response = requests.get('https://sadataproducts.blob.core.windows.net/huboceandatapublic/mareano_vulnerable.csv')

# Save the content to a file
with open('mareano_vulnerable.csv', 'wb') as file:
    file.write(response.content)
df = pd.read_csv('mareano_vulnerable.csv')
df['dato'] = pd.to_datetime(df['dato'])
df.head(2)
gml_id lokalId navnerom versjonId målemetode datauttaksdato opphav referanseStasjon toktNummer prøveNummer antall dato lengdegrad breddegrad indikatorVME indicatorVME geometry_epsg25833 geometry
0 id68c569dc-1605-4af0-a75b-4b6858cfe245 5d4269c0-bba8-436a-b774-55d7991c178e https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:01:37 Havforskningsinstituttet 1 2006612 53 0 2006-06-13 21.655499 70.456017 Blomkålkorallskog Nephtheidae medow POINT (748055.943029 7830322.159233) POINT (21.6555 70.456)
1 id4f09dad2-c9f9-4c2d-8ac7-088597b613ce 1cc3b1e5-4b20-428d-8df3-524bb57cb79b https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:01:37 Havforskningsinstituttet 2 2006612 2 0 2006-05-27 22.391058 71.351889 Blomkålkorallskog Nephtheidae medow POINT (763192.003704 7932756.291837) POINT (22.3911 71.3519)
# Define your schema
schema = pa.schema([
    ('gml_id', pa.string()),
    ('lokalId', pa.string()),
    ('navnerom', pa.string()),
    ('versjonId', pa.int64()),  # 'long' maps to int64
    ('målemetode', pa.string()),
    ('datauttaksdato', pa.string()),
    ('opphav', pa.string()),
    ('referanseStasjon', pa.int64()),
    ('toktNummer', pa.int64()),
    ('prøveNummer', pa.int64()),
    ('antall', pa.int64()),
    ('dato', pa.date32()),
    ('lengdegrad', pa.float64()),  # 'double' maps to float64
    ('breddegrad', pa.float64()),
    ('indikatorVME', pa.string()),
    ('indicatorVME', pa.string()),
    pa.field('geometry_epsg25833', pa.string()),
    pa.field('geometry', pa.string(), metadata={  # geometry type with metadata
        b'isGeometry': b'1',
        b'big': b'1',
        b'index': b'1'
    })
])
tab.drop()
try:
    tab.create(schema)
    print(tab.schema())
except Exception as e:
    print(f"Error creating table: {e}")
    print("\nSchema may already exist:")
    print(tab.schema())
gml_id: string
lokalId: string
navnerom: string
versjonId: int64
målemetode: string
datauttaksdato: string
opphav: string
referanseStasjon: int64
toktNummer: int64
prøveNummer: int64
antall: int64
dato: date32[day]
lengdegrad: double
breddegrad: double
indikatorVME: string
indicatorVME: string
geometry_epsg25833: string
geometry: string
  -- field metadata --
  isGeometry: '1'
  big: '1'
  index: '1'
with tab as tx:
    tx.insert(df.to_dict(orient='records'))
    print("Sample data from dataframe inserted successfully")
Sample data from dataframe inserted successfully
tab = client.table_v2(dataset_dto)

Selecting data

Selecting certain columns

# One row: all columns
rows = tab.select().rows()
next(rows)
{'gml_id': 'id68c569dc-1605-4af0-a75b-4b6858cfe245',
 'lokalId': '5d4269c0-bba8-436a-b774-55d7991c178e',
 'navnerom': 'https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata',
 'versjonId': 20201201,
 'målemetode': 'GNSSKodemålingEnkleMålinger',
 'datauttaksdato': '2024-10-27T08:01:37',
 'opphav': 'Havforskningsinstituttet',
 'referanseStasjon': 1,
 'toktNummer': 2006612,
 'prøveNummer': 53,
 'antall': 0,
 'dato': datetime.date(2006, 6, 13),
 'lengdegrad': 21.655499,
 'breddegrad': 70.456017,
 'indikatorVME': 'Blomkålkorallskog',
 'indicatorVME': 'Nephtheidae medow',
 'geometry_epsg25833': 'POINT (748055.943029 7830322.159233)',
 'geometry': 'POINT (21.6555 70.456)'}
# One row: select columns
rows = tab.select(cols=['dato','geometry']).rows()
next(rows)
{'dato': datetime.date(2006, 6, 13), 'geometry': 'POINT (21.6555 70.456)'}

Selecting methods

  • NB for any of these methods the .rows(), .pages() and .dataframes() creates a generator object.
  • using next() will load the first item of this object (whether a row, page or dataframe)
  • successive next() will load the next item, unless you refresh the .select() command
  • you can iterate over these or concatenate to get the full outputs
rows = tab.select(cols=['dato','geometry']).rows()
next(rows)
{'dato': datetime.date(2006, 6, 13), 'geometry': 'POINT (21.6555 70.456)'}
next(rows)
{'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.3911 71.3519)'}
for i, row in enumerate(rows):
    if i >= 3:
        break
    print(row)
{'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.4169 71.3309)'}
{'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.4892 71.3288)'}
{'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.5345 71.2944)'}
pages = (tab.select(cols=['dato','geometry']).pages(size=5)) ## User defined sizes
next(pages)
[{'dato': datetime.date(2006, 6, 13), 'geometry': 'POINT (21.6555 70.456)'},
 {'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.3911 71.3519)'},
 {'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.4169 71.3309)'},
 {'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.4892 71.3288)'},
 {'dato': datetime.date(2006, 5, 27), 'geometry': 'POINT (22.5345 71.2944)'}]
dataframes = tab.select().dataframes() #2000 rows at a time
next(dataframes).tail(2)
gml_id lokalId navnerom versjonId målemetode datauttaksdato opphav referanseStasjon toktNummer prøveNummer antall dato lengdegrad breddegrad indikatorVME indicatorVME geometry_epsg25833 geometry
1998 idab6c883d-81d6-4000-b0d1-ddf4016210e2 a664bdcd-31b8-4ebb-a4d4-7fc633d9cccf https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:01:40 Havforskningsinstituttet 1977 2019106 2034 0 2019-04-11 16.344705 74.805678 Blomkålkorallskog Nephtheidae medow POINT (539337.08994 8302373.717892) POINT (16.3447 74.8057)
1999 id400875c3-fe87-4a84-ad5b-2cb7fe390855 13d0dccf-a533-483e-a24a-57f191a76b74 https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:01:40 Havforskningsinstituttet 1978 2019106 2035 0 2019-04-11 16.202381 74.800507 Blomkålkorallskog Nephtheidae medow POINT (535186.634094 8301704.358199) POINT (16.2024 74.8005)
batches = tab.select().batches() #Platform defined size for performance
next(batches)
pyarrow.RecordBatch
gml_id: string
lokalId: string
navnerom: string
versjonId: int64
målemetode: string
datauttaksdato: string
opphav: string
referanseStasjon: int64
toktNummer: int64
prøveNummer: int64
antall: int64
dato: date32[day]
lengdegrad: double
breddegrad: double
indikatorVME: string
indicatorVME: string
geometry_epsg25833: string
geometry: string
----
gml_id: ["id68c569dc-1605-4af0-a75b-4b6858cfe245","id4f09dad2-c9f9-4c2d-8ac7-088597b613ce","ided86afca-a75e-4860-8554-3a06faaaed86","id52e24142-8e59-43b4-8d2e-ea18e10fc3c4","idd7216d5d-d605-4d01-aafc-3f4bdec477b9","id14d54066-5b42-43e5-b0d0-28cd8c953e9a","idd31c948c-8f70-4573-a965-2b6d1eb483ba","id34c749ef-6bb8-44f5-9e1d-21e679ca8690","idf0774d13-325f-405a-a3dd-3c7c8506d5e6","idb3b3c7e7-7f25-4952-bae8-156edc9f016c",...,"id5167ff60-058c-49ba-99fc-00b8202a0564","idd7577b02-0496-4cae-be40-2379cfd8940d","idf7cacaba-b981-46fe-bebd-7ce0e1733b83","id99d10998-e71d-4d6b-b376-b53437035c34","idb62085e3-3439-464c-a542-d9c50ae9fbac","id9016ec1e-2c66-46e6-a2f9-be4f59016c4c","id34d81efc-a4f0-4b57-9667-b3fcffe3bc3b","idbeb5463e-357e-4762-a236-bada387e07ef","idab6c883d-81d6-4000-b0d1-ddf4016210e2","id400875c3-fe87-4a84-ad5b-2cb7fe390855"]
lokalId: ["5d4269c0-bba8-436a-b774-55d7991c178e","1cc3b1e5-4b20-428d-8df3-524bb57cb79b","370dad98-0f0d-40cd-899f-247d7e6ca514","aa824a4d-d3f9-408c-a6b9-558938c2c9b8","232af407-c87c-42be-9bb5-1cb710345b40","92f84e2e-13f4-4985-b6ba-260530b6782e","5533b0ee-ef1f-4799-b339-f405e4c97bdc","a3e08608-d95a-4772-bdd8-d5c610a13ea9","2887fa6b-830a-4d62-bae6-7b18a771f5fd","a155a3f0-7a0b-43f8-8d9f-e21a1522fc0d",...,"c3014828-a680-487f-a579-e0c1f8bbe249","3d420c87-37f4-4744-93ec-64b333364266","8cb60bdc-33ee-40fd-a6e0-50b592305352","71eba7f2-812e-4671-aec2-6134519da253","bfcaba41-dc3b-4d3c-9d92-12f650a99eac","c691a82b-36fb-4e31-b5bb-ea31946bcb65","a8614ddf-0109-4302-8708-17ce88b61a72","b3ad115f-7d95-4000-aece-b78c14324506","a664bdcd-31b8-4ebb-a4d4-7fc633d9cccf","13d0dccf-a533-483e-a24a-57f191a76b74"]
navnerom: ["https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata",...,"https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata","https://data.geonorge.no/sosi/biomangfold/sarbare_marine_bunndyr_observasjonsdata"]
versjonId: [20201201,20201201,20201201,20201201,20201201,20201201,20201201,20201201,20201201,20201201,...,20201201,20201201,20201201,20201201,20201201,20201201,20201201,20201201,20201201,20201201]
målemetode: ["GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger",...,"GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger","GNSSKodemålingEnkleMålinger"]
datauttaksdato: ["2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37","2024-10-27T08:01:37",...,"2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40","2024-10-27T08:01:40"]
opphav: ["Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet",...,"Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet","Havforskningsinstituttet"]
referanseStasjon: [1,2,3,4,5,6,7,8,9,10,...,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978]
toktNummer: [2006612,2006612,2006612,2006612,2006612,2006612,2006612,2006612,2006612,2006612,...,2019106,2019106,2019106,2019106,2019106,2019106,2019106,2019106,2019106,2019106]
prøveNummer: [53,2,3,4,5,6,7,8,10,11,...,2026,2027,2028,2029,2030,2031,2032,2033,2034,2035]
...

Full dataset output - via pages

pages = list(tab.select().pages())
all_rows = [row for page in pages for row in page]
pd.DataFrame(all_rows).tail(2)
gml_id lokalId navnerom versjonId målemetode datauttaksdato opphav referanseStasjon toktNummer prøveNummer antall dato lengdegrad breddegrad indikatorVME indicatorVME geometry_epsg25833 geometry
28734 ida8c42293-ddac-4e8b-becd-56b02daed734 15e05579-1ea8-4048-b783-88874ddd2cb1 https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:02:11 Havforskningsinstituttet 3553 2024007005 3637 0 2024-04-28 19.856621 74.734207 Umbellulabestander Umbellula fields POINT (642588.674615 8299784.77461) POINT (19.8566 74.7342)
28735 id5b22000a-bfef-4b00-8779-dfb679d1b124 3f7a1c2c-67e0-4dd9-b108-4bba6c169283 https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:02:11 Havforskningsinstituttet 3554 2024007005 3638 0 2024-04-28 19.832780 74.715392 Umbellulabestander Umbellula fields POINT (642061.899443 8297636.647602) POINT (19.8328 74.7154)

Full dataset output - via dataframes

dfs = list(tab.select().dataframes())
pd.concat(dfs, ignore_index=True).tail(2)
gml_id lokalId navnerom versjonId målemetode datauttaksdato opphav referanseStasjon toktNummer prøveNummer antall dato lengdegrad breddegrad indikatorVME indicatorVME geometry_epsg25833 geometry
28734 ida8c42293-ddac-4e8b-becd-56b02daed734 15e05579-1ea8-4048-b783-88874ddd2cb1 https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:02:11 Havforskningsinstituttet 3553 2024007005 3637 0 2024-04-28 19.856621 74.734207 Umbellulabestander Umbellula fields POINT (642588.674615 8299784.77461) POINT (19.8566 74.7342)
28735 id5b22000a-bfef-4b00-8779-dfb679d1b124 3f7a1c2c-67e0-4dd9-b108-4bba6c169283 https://data.geonorge.no/sosi/biomangfold/sarb... 20201201 GNSSKodemålingEnkleMålinger 2024-10-27T08:02:11 Havforskningsinstituttet 3554 2024007005 3638 0 2024-04-28 19.832780 74.715392 Umbellulabestander Umbellula fields POINT (642061.899443 8297636.647602) POINT (19.8328 74.7154)

Select - querying on column values

next(tab.select("(referanseStasjon < 100 OR referanseStasjon > 3547) AND antall > 0", cols=['referanseStasjon','geometry','dato','antall']).dataframes())
referanseStasjon geometry dato antall
0 41 POINT (22.6732 70.4544) 2006-06-11 2
1 73 POINT (22.4803 70.2649) 2007-04-06 3
2 84 POINT (20.3212 70.0829) 2007-04-05 2
3 85 POINT (20.3498 70.1325) 2007-04-05 1
4 98 POINT (18.0847 70.1418) 2007-04-11 2
5 3548 POINT (20.533 74.6876) 2024-04-28 10
6 3549 POINT (20.2775 74.6425) 2024-04-28 6
7 3551 POINT (19.9896 74.7329) 2024-04-28 1
df = next(tab.select("dato >= ?", vars=[date.fromisoformat('2024-04-28')], cols=['gml_id','geometry','dato','antall']).dataframes())
df
gml_id geometry dato antall
0 id466e4033-8b85-48ba-8ee7-108ecb96dee6 POINT (20.6803 74.5988) 2024-04-28 10
1 id6244d794-9f6f-4926-85c9-a03166690535 POINT (20.5641 74.6297) 2024-04-28 3
2 id0711fa5e-90af-4f11-b8ea-759b40f46a47 POINT (20.533 74.6876) 2024-04-28 10
3 id799150c7-97cf-4f29-bcc8-e746aeb7fc2b POINT (20.2775 74.6425) 2024-04-28 6
4 id7cc125b8-ddab-448e-9cdb-5ceaf7d2bca7 POINT (20.0846 74.6837) 2024-04-28 0
5 id9e2ff775-9f40-403d-a78c-23f007e2e511 POINT (19.9896 74.7329) 2024-04-28 1
6 ide06be530-247e-4a4a-9436-4f09b8f62b4d POINT (19.8502 74.7406) 2024-04-28 0
7 id3ad91152-1912-4607-b838-32e3ccf8f210 POINT (19.8566 74.7342) 2024-04-28 0
8 idbb14d359-2ac0-42e1-8782-8b19686407f9 POINT (19.8328 74.7154) 2024-04-28 0

Select - geospatial data query - within

next(tab.select('geometry within "POLYGON((20 70, 22 70, 22 74, 20 74, 20 70))"', cols=['referanseStasjon','geometry','dato','antall']).dataframes())
referanseStasjon geometry dato antall
0 1 POINT (21.6555 70.456) 2006-06-13 0
1 9 POINT (21.9351 71.2608) 2006-05-28 0
2 10 POINT (21.4463 71.209) 2006-06-01 0
3 11 POINT (21.7138 71.2206) 2006-06-01 0
4 12 POINT (21.9102 71.1594) 2006-06-01 0
... ... ... ... ...
165 629 POINT (20.0519 70.7718) 2010-09-22 0
166 635 POINT (20.1405 70.6695) 2010-09-22 0
167 636 POINT (20.1092 70.6261) 2010-09-23 0
168 637 POINT (20.2597 70.5555) 2010-09-23 1
169 638 POINT (20.1546 70.5658) 2010-09-23 0

170 rows × 4 columns

Select - geospatial data query - contains

next(tab.select('geometry contains "POINT (20.533 74.6876)"', cols=['referanseStasjon','geometry','dato','antall']).dataframes())
referanseStasjon geometry dato antall
0 3548 POINT (20.533 74.6876) 2024-04-28 10

Select - geospatial data query - intersect

next(tab.select('geometry intersect "POLYGON((20 70, 22 70, 22 74, 20 74, 20 70))"', cols=['referanseStasjon','geometry','dato','antall']).dataframes())
referanseStasjon geometry dato antall
0 1 POINT (21.6555 70.456) 2006-06-13 0
1 9 POINT (21.9351 71.2608) 2006-05-28 0
2 10 POINT (21.4463 71.209) 2006-06-01 0
3 11 POINT (21.7138 71.2206) 2006-06-01 0
4 12 POINT (21.9102 71.1594) 2006-06-01 0
... ... ... ... ...
165 629 POINT (20.0519 70.7718) 2010-09-22 0
166 635 POINT (20.1405 70.6695) 2010-09-22 0
167 636 POINT (20.1092 70.6261) 2010-09-23 0
168 637 POINT (20.2597 70.5555) 2010-09-23 1
169 638 POINT (20.1546 70.5658) 2010-09-23 0

170 rows × 4 columns

tab.drop()

Clean up and remove data

# .drop() deletes all data and the schema
tab.drop()
try:
    outputrow = next(tab.select().rows())
except FileNotFoundError as e:
    print(f"Error: {e} No data in the table")
    outputrow = None
Error:  No data in the table
# to delete the catalog entry created at the top
client.catalog.delete(dataset_dto)