SQLITE Tutorial

This tutorial shows you how query the existing opacity.db and also shows you how to customize your own opacity.db

A lot of this code is embedded in PICASO.

[1]:
import sqlite3
import io
import numpy as np
import os

Establishing a Connection to a Database

[2]:
#this is where your opacity file should be located if you've set your environments correctly
db_filename = os.path.join(os.getenv('picaso_refdata'), 'opacities','opacities.db')

#these functions are so that you can store your float arrays as bytes to minimize storage
def adapt_array(arr):
    out = io.BytesIO()
    np.save(out, arr)
    out.seek(0)
    return sqlite3.Binary(out.read())

def convert_array(text):
    out = io.BytesIO(text)
    out.seek(0)
    return np.load(out)

#tell sqlite what to do with an array
sqlite3.register_adapter(np.ndarray, adapt_array)
sqlite3.register_converter("array", convert_array)
[3]:
#this will be how we execute commands to grab chunks of data
#this is how you establish a connection to the db
conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES)
cur = conn.cursor()

#usually you want to close your database right after you open it using
#conn.close()

#for now, we will keep it open for the tutorial

Using SELECT * FROM to Query Items

Get from header

[4]:
#let's start by just grabbing all the info from the header
header = cur.execute('SELECT * FROM header')
cols = [description[0] for description in header.description]
data = cur.fetchall()

Get Continuum Opacity

[5]:
#what molecules exist?
cur.execute('SELECT molecule FROM continuum')
print(np.unique(cur.fetchall()))
['H-bf' 'H-ff' 'H2-' 'H2CH4' 'H2H' 'H2H2' 'H2He' 'H2N2']
[6]:
#what temperatures exist?
cur.execute('SELECT temperature FROM continuum')
cia_temperatures = np.unique(cur.fetchall())
cia_temperatures[0:10]
[6]:
array([ 75., 100., 125., 150., 175., 200., 225., 250., 275., 300.])
[7]:
#wavenumber grid from header
cur.execute('SELECT wavenumber_grid FROM header')
wave_grid = cur.fetchone()[0]
[8]:
#grab H2H2 at 300 K
cur.execute('SELECT opacity FROM continuum WHERE molecule=? AND temperature=?',('H2H2',300))
data = cur.fetchall()
data
[8]:
[(array([3.93623018e-08, 3.93322996e-08, 3.93023172e-08, ...,
         5.33778660e-51, 5.25551817e-51, 5.17450960e-51]),)]
[9]:
#grab all opacity at 300 K
cur.execute('SELECT molecule,opacity FROM continuum WHERE temperature=300')
data = cur.fetchall()
data
[9]:
[('H2H2',
  array([3.93623018e-08, 3.93322996e-08, 3.93023172e-08, ...,
         5.33778660e-51, 5.25551817e-51, 5.17450960e-51])),
 ('H2He',
  array([1.26478388e-08, 1.26383084e-08, 1.26287841e-08, ...,
         1.00000000e-33, 1.00000000e-33, 1.00000000e-33])),
 ('H2H',
  array([2.68622393e-06, 2.68506293e-06, 2.68390231e-06, ...,
         1.00000000e-33, 1.00000000e-33, 1.00000000e-33])),
 ('H2CH4',
  array([1.89543711e-08, 1.89330541e-08, 1.89117590e-08, ...,
         1.00000000e-33, 1.00000000e-33, 1.00000000e-33])),
 ('H2N2',
  array([7.47205007e-09, 7.46270631e-09, 7.45337329e-09, ...,
         1.00000000e-33, 1.00000000e-33, 1.00000000e-33])),
 ('H2-', array([1.e-33, 1.e-33, 1.e-33, ..., 1.e-33, 1.e-33, 1.e-33])),
 ('H-bf', array([1.e-33, 1.e-33, 1.e-33, ..., 1.e-33, 1.e-33, 1.e-33])),
 ('H-ff', array([1.e-63, 1.e-63, 1.e-63, ..., 1.e-63, 1.e-63, 1.e-63]))]

Get Molecular Opacity

Molecular opacities are on a specific P-T grid so we book keep them by assigning indices to each pair e.g (1: 1e-6 bar, 75 K, 2:1e-6, 80K.. and so on)

[10]:
#get the PT grid with the corresponding grid
cur.execute('SELECT ptid, pressure, temperature FROM molecular')
data= cur.fetchall()
pt_pairs = sorted(list(set(data)),key=lambda x: (x[0]) )
pt_pairs[0:10]#example of the first 10 PT pairs
[10]:
[(1, 1e-06, 75.0),
 (2, 3e-06, 75.0),
 (3, 1e-05, 75.0),
 (4, 3e-05, 75.0),
 (5, 0.0001, 75.0),
 (6, 0.0003, 75.0),
 (7, 0.001, 75.0),
 (8, 0.003, 75.0),
 (9, 0.01, 75.0),
 (10, 0.03, 75.0)]
[11]:
#what molecules exist?
cur.execute('SELECT molecule FROM molecular')
print(np.unique(cur.fetchall()))
['AlH' 'C2H2' 'C2H4' 'C2H6' 'CH4' 'CO' 'CO2' 'CaH' 'CrH' 'Cs' 'Fe' 'FeH'
 'H2' 'H2O' 'H2S' 'H3+' 'HCN' 'K' 'Li' 'LiCl' 'LiF' 'LiH' 'MgH' 'N2' 'N2O'
 'NH3' 'Na' 'O2' 'O3' 'OCS' 'PH3' 'Rb' 'SiO' 'TiH' 'TiO' 'VO']
[12]:
# grab the opacity at a specific temp and pressure
grab_p = 0.1 # bar
grab_t = 100 # kelvin
import math

#here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right)
ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0]
          for coordinate in  zip([grab_p],[grab_t])]

cur.execute("""SELECT molecule,ptid,opacity
            FROM molecular
            WHERE molecule = ?
            AND ptid = ?""",('H2O',ind_pt[0]))
data= cur.fetchall()
data #gives you the molecule, ptid, and the opacity
[12]:
[('H2O',
  31,
  array([1.61180990e-27, 1.15539216e-27, 8.68610646e-28, ...,
         6.69523391e-32, 3.59004947e-30, 1.43350419e-30]))]
[13]:
grab_moles = ['H2O','CO2']
grab_p = [0.1,1,100] # bar
grab_t = [100,200,700] # kelvin

#here's a little code to get out the correct pair (so we dont have to worry about getting the exact number right)
ind_pt = [min(pt_pairs, key=lambda c: math.hypot(c[1]- coordinate[0], c[2]-coordinate[1]))[0]
          for coordinate in  zip(grab_p,grab_t)]

cur.execute("""SELECT molecule,ptid,opacity
            FROM molecular
            WHERE molecule in {}
            AND ptid in {}""".format(str(tuple(grab_moles)), str(tuple(ind_pt))))
data= cur.fetchall()
data #gives you the molecule, ptid, and the opacity
[13]:
[('CO2',
  31,
  array([1.43571297e-026, 3.25490659e-026, 3.43073124e-025, ...,
         1.00000000e-100, 1.00000000e-100, 1.00000000e-100])),
 ('CO2',
  233,
  array([1.27941777e-025, 2.17916842e-025, 6.48936375e-025, ...,
         1.00000000e-100, 1.00000000e-100, 1.00000000e-100])),
 ('CO2',
  797,
  array([1.53478614e-023, 1.54026464e-023, 1.54565221e-023, ...,
         1.00000000e-100, 1.00000000e-100, 1.00000000e-100])),
 ('H2O',
  31,
  array([1.61180990e-27, 1.15539216e-27, 8.68610646e-28, ...,
         6.69523391e-32, 3.59004947e-30, 1.43350419e-30])),
 ('H2O',
  233,
  array([4.22567213e-23, 4.64323856e-23, 5.21066789e-23, ...,
         9.05000775e-31, 2.01006992e-30, 4.16153422e-30])),
 ('H2O',
  797,
  array([4.28000195e-21, 4.33488333e-21, 4.39892620e-21, ...,
         6.75938219e-30, 6.30976171e-30, 7.28258852e-30]))]
[14]:
#Dont forget to close your connection!!!!
conn.close()

Creating a New Database from Scratch

Note on molecule names: Because picaso uses dict formatting to handle opacities, users can easily swap in different molecules.

For example, if I wanted to include CO2-H2 CIA absorption, I can add CO2H2 to the molecules list below. However, it is only quasi-automated in this regaurd. Please contact natasha.e.batalha@gmail.com if you are adding new CIA to the code.

Exceptions: The exceptions to this are non-CIA continuum opacities. Right now, the other sources of continuum enabled are H2-, H-bf and H-ff which have odd-ball formatting since they aren’t simple two molecules. Please let me know if you want to see another continuum source added.

Careful with case sensitive molecules like TiO, Na. Make sure you get these right.

[15]:
db_filename = '/data/picaso_dbs/new_fake_opacity.db'
conn = sqlite3.connect(db_filename, detect_types=sqlite3.PARSE_DECLTYPES)
#same story with bytes and arrays
def adapt_array(arr):
    out = io.BytesIO()
    np.save(out, arr)
    out.seek(0)
    return sqlite3.Binary(out.read())

def convert_array(text):
    out = io.BytesIO(text)
    out.seek(0)
    return np.load(out)

#tell sqlite what to do with an array
sqlite3.register_adapter(np.ndarray, adapt_array)
sqlite3.register_converter("array", convert_array)

cur = conn.cursor()

Build Tables: header, continuum, and molecular tables

It is VERY important that these tables are structured the same way. If you think something should be edited, ping natasha.e.batalha@gmail.com

[16]:
#header
command="""DROP TABLE IF EXISTS header;
CREATE TABLE header (
    id INTEGER PRIMARY KEY,
    pressure_unit VARCHAR,
    temperature_unit VARCHAR,
    wavenumber_grid array,
    continuum_unit VARCHAR,
    molecular_unit VARCHAR
    );"""

cur.executescript(command)
conn.commit()
#molecular data table, note the existence of PTID which will be very important
command = """DROP TABLE IF EXISTS molecular;
CREATE TABLE molecular (
    id INTEGER PRIMARY KEY,
    ptid INTEGER,
    molecule VARCHAR ,
    pressure FLOAT,
    temperature FLOAT,
    opacity array);"""

cur.executescript(command)
conn.commit()
#continuum data table
command = """DROP TABLE IF EXISTS continuum;
CREATE TABLE continuum (
    id INTEGER PRIMARY KEY,
    molecule VARCHAR ,
    temperature FLOAT,
    opacity array);"""

cur.executescript(command)
conn.commit() #this commits the changes to the database

Insert header info (unit and wave grid info!)

The units MUST be the same. The wave grid can be whatever as long as it’s consistent between continuum and molecular tables.

[17]:
wave_grid = np.linspace(1e4/2, 1e4/0.5, 1000) #fake inverse cm wavenumber grid

cur.execute('INSERT INTO header (pressure_unit, temperature_unit, wavenumber_grid, continuum_unit,molecular_unit) values (?,?,?,?,?)',
            ('bar','kelvin', np.array(wave_grid), 'cm-1 amagat-2', 'cm2/molecule'))
conn.commit()

Insert continuum opacity to database

[18]:
cia_temperature_grid = [100,300,500,700]
#insert continuum
for mol in ['H2H2', 'H2He', 'H2H', 'H2CH4', 'H2N2','H2-', 'H-bf', 'H-ff']:
    for T in cia_temperature_grid:
        OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE
        cur.execute('INSERT INTO continuum (molecule, temperature, opacity) values (?,?,?)', (mol,float(T), OPACITY))
        conn.commit()

Insert molecular opacity to database

Again, make sure that your molecules are case-sensitive: e.g. Sodium should be Na not NA

[19]:
#create a fake PT grid
pts=[]
for temp in [100,200,400]:
    for pres in [0.1, 1, 100]:
        pts += [[temp,pres]]
pts
[19]:
[[100, 0.1],
 [100, 1],
 [100, 100],
 [200, 0.1],
 [200, 1],
 [200, 100],
 [400, 0.1],
 [400, 1],
 [400, 100]]
[20]:
#insert molecular
for mol in ['H2O','CO2','CH4']:
    i = 1 #NOTE THIS INDEX HERE IS CRUCIAL! It will be how we quickly locate opacities
    for T,P in pts:
        OPACITY = wave_grid *0 + 1e-33 #INSERT YOUR OPACITY HERE
        cur.execute('INSERT INTO molecular (ptid, molecule, temperature, pressure,opacity) values (?,?,?,?,?)', (i,mol,float(T),float(P), OPACITY))
        conn.commit()
        i+=1
[21]:
#ALL DONE!!!
conn.close()