# --- # jupyter: # jupytext: # text_representation: # extension: .py # format_name: percent # format_version: '1.3' # jupytext_version: 1.19.1 # kernelspec: # display_name: pic312 # language: python # name: python3 # --- # %% [markdown] # # SQLITE Tutorial # # # This tutorial shows you how query the existing `opacity.db` and also shows you how to customize your own `opacity.db` # # A lot of this code is embedded in PICASO. # %% import sqlite3 import io import numpy as np import os import picaso.justdoit as jdi opa = jdi.opannection() # %% [markdown] # ## Establishing a Connection to a Database # %% #this is where your opacity file should be located if you've set your environments correctly db_filename = opa.db_filename #these functions are so that you can store your float arrays as bytes to minimize storage def adapt_array(arr): out = io.BytesIO() np.save(out, arr) out.seek(0) return sqlite3.Binary(out.read()) def convert_array(text): out = io.BytesIO(text) out.seek(0) return np.load(out) #tell sqlite what to do with an array sqlite3.register_adapter(np.ndarray, adapt_array) sqlite3.register_converter("array", convert_array) # %% #this will be how we execute commands to grab chunks of data #this is how you establish a connection to the db conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES) cur = conn.cursor() #usually you want to close your database right after you open it using #conn.close() #for now, we will keep it open for the tutorial # %% [markdown] # ## Using `SELECT * FROM` to Query Items # # ### Get from header # %% #let's start by just grabbing all the info from the header header = cur.execute('SELECT * FROM header') cols = [description[0] for description in header.description] data = cur.fetchall() # %% [markdown] # ### Get Continuum Opacity # %% #what molecules exist? cur.execute('SELECT molecule FROM continuum') print(np.unique(cur.fetchall())) # %% #what temperatures exist? cur.execute('SELECT temperature FROM continuum') cia_temperatures = np.unique(cur.fetchall()) cia_temperatures[0:10] # %% #wavenumber grid from header cur.execute('SELECT wavenumber_grid FROM header') wave_grid = cur.fetchone()[0] # %% #grab H2H2 at 300 K cur.execute('SELECT opacity FROM continuum WHERE molecule=? AND temperature=?',('H2H2',300)) data = cur.fetchall() data # %% #grab all opacity at 300 K cur.execute('SELECT molecule,opacity FROM continuum WHERE temperature=300') data = cur.fetchall() data # %% [markdown] # ### Get Molecular Opacity # # Molecular opacities are on a specific P-T grid so we book keep them by assigning indices to each pair e.g (1: 1e-6 bar, 75 K, 2:1e-6, 80K.. and so on) # %% #get the PT grid with the corresponding grid cur.execute('SELECT ptid, pressure, temperature FROM molecular') data= cur.fetchall() pt_pairs = sorted(list(set(data)),key=lambda x: (x[0]) ) pt_pairs[0:10]#example of the first 10 PT pairs # %% #what molecules exist? cur.execute('SELECT molecule FROM molecular') print(np.unique(cur.fetchall())) # %% # grab the opacity at a specific temp and pressure grab_p = 0.1 # bar grab_t = 100 # kelvin import math #here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right) ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0] for coordinate in zip([grab_p],[grab_t])] cur.execute("""SELECT molecule,ptid,opacity FROM molecular WHERE molecule = ? AND ptid = ?""",('H2O',ind_pt[0])) data= cur.fetchall() data #gives you the molecule, ptid, and the opacity # %% grab_moles = ['H2O','CO2'] grab_p = [0.1,1,100] # bar grab_t = [100,200,700] # kelvin #here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right) ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0] for coordinate in zip(grab_p,grab_t)] cur.execute("""SELECT molecule,ptid,opacity FROM molecular WHERE molecule in {} AND ptid in {}""".format(str(tuple(grab_moles)), str(tuple(ind_pt)))) data= cur.fetchall() data #gives you the molecule, ptid, and the opacity # %% #Dont forget to close your connection!!!! conn.close() # %% [markdown] # ## Creating a New Database from Scratch # # **Note on molecule names**: Because ``picaso`` uses dict formatting to handle opacities, users can easily swap in different molecules. # # For example, if I wanted to include CO2-H2 CIA absorption, I can add ``CO2H2`` to the molecules list below. However, it is only quasi-automated in this regaurd. Please contact natasha.e.batalha@gmail.com if you are adding new CIA to the code. # # **Exceptions**: The exceptions to this are non-CIA continuum opacities. Right now, the other sources of continuum enabled are ``H2-``, ``H-bf`` and ``H-ff`` which have odd-ball formatting since they aren't simple two molecules. _Please let me know if you want to see another continuum source added_. # # **Careful** with case sensitive molecules like **TiO**, **Na**. Make sure you get these right. # %% db_filename = '/data/picaso_dbs/new_fake_opacity.db' conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES) #same story with bytes and arrays def adapt_array(arr): out = io.BytesIO() np.save(out, arr) out.seek(0) return sqlite3.Binary(out.read()) def convert_array(text): out = io.BytesIO(text) out.seek(0) return np.load(out) #tell sqlite what to do with an array sqlite3.register_adapter(np.ndarray, adapt_array) sqlite3.register_converter("array", convert_array) cur = conn.cursor() # %% [markdown] # ### Build Tables: header, continuum, and molecular tables # # It is **VERY** important that these tables are structured the same way. If you think something should be edited, ping natasha.e.batalha@gmail.com # %% #header command="""DROP TABLE IF EXISTS header; CREATE TABLE header ( id INTEGER PRIMARY KEY, pressure_unit VARCHAR, temperature_unit VARCHAR, wavenumber_grid array, continuum_unit VARCHAR, molecular_unit VARCHAR );""" cur.executescript(command) conn.commit() #molecular data table, note the existence of PTID which will be very important command = """DROP TABLE IF EXISTS molecular; CREATE TABLE molecular ( id INTEGER PRIMARY KEY, ptid INTEGER, molecule VARCHAR , pressure FLOAT, temperature FLOAT, opacity array);""" cur.executescript(command) conn.commit() #continuum data table command = """DROP TABLE IF EXISTS continuum; CREATE TABLE continuum ( id INTEGER PRIMARY KEY, molecule VARCHAR , temperature FLOAT, opacity array);""" cur.executescript(command) conn.commit() #this commits the changes to the database # %% [markdown] # ### Insert header info (unit and wave grid info!) # # The units **MUST** be the same. The wave grid can be whatever as long as it's consistent between continuum and molecular tables. # %% wave_grid = np.linspace(1e4/2, 1e4/0.5, 1000) #fake inverse cm wavenumber grid cur.execute('INSERT INTO header (pressure_unit, temperature_unit, wavenumber_grid, continuum_unit,molecular_unit) values (?,?,?,?,?)', ('bar','kelvin', np.array(wave_grid), 'cm-1 amagat-2', 'cm2/molecule')) conn.commit() # %% [markdown] # ### Insert continuum opacity to database # %% cia_temperature_grid = [100,300,500,700] #insert continuum for mol in ['H2H2', 'H2He', 'H2H', 'H2CH4', 'H2N2','H2-', 'H-bf', 'H-ff']: for T in cia_temperature_grid: OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE cur.execute('INSERT INTO continuum (molecule, temperature, opacity) values (?,?,?)', (mol,float(T), OPACITY)) conn.commit() # %% [markdown] # ### Insert molecular opacity to database # # Again, make sure that your molecules are **case-sensitive**: e.g. Sodium should be `Na` not `NA` # %% #create a fake PT grid pts=[] for temp in [100,200,400]: for pres in [0.1, 1, 100]: pts += [[temp,pres]] pts # %% #insert molecular for mol in ['H2O','CO2','CH4']: i = 1 #NOTE THIS INDEX HERE IS CRUCIAL! It will be how we quickly locate opacities for T,P in pts: OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE cur.execute('INSERT INTO molecular (ptid, molecule, temperature, pressure,opacity) values (?,?,?,?,?)', (i,mol,float(T),float(P), OPACITY)) conn.commit() i+=1 # %% #ALL DONE!!! conn.close()