-
Notifications
You must be signed in to change notification settings - Fork 632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Speed up choose_chunkdivision with geom_boxt_tree #1030
base: master
Are you sure you want to change the base?
WIP: Speed up choose_chunkdivision with geom_boxt_tree #1030
Conversation
src/meepgeom.cpp
Outdated
if (overlap > 0.0) { | ||
for (int j = 0; j < geom.num_items; ++j) { | ||
if (t->objects[i].o == geom.items + j) { | ||
overlaps[j] += overlap; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this loop becomes a limiting factor, we could do the lookup in another way, e.g. a hash table or an array of indices sorted by pointer address.
src/meepgeom.cpp
Outdated
|
||
for (int i = 0; i < geom.num_items; ++i) { | ||
geometric_object *go = &geom.items[i]; | ||
double overlap = overlaps[i]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good to check if this roughly matches the overlaps computed for each object with the old code.
int temp_periodicity = ensure_periodicity; | ||
dimensions = 3; | ||
ensure_periodicity = 0; | ||
geom_tree = create_geom_box_tree0(*geom_, cell_box); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a simpler alternative to creating this tree, you could also just cache geom_get_bounding_box
for each object, and check whether the bounding box overlaps before calling box_overlap_with_object
. This only helps the constant factor, though, unlike a tree which in principle can give better scaling.
geom_box_intersection(&intersection, &t->b, &box); | ||
|
||
for (int i = 0; i < t->nobjects; ++i) { | ||
if (geom_boxes_intersect(&intersection, &t->objects[i].box)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One potential optimization: if this intersection is the whole of t->objects[i].box
, i.e. the object lies completely within the intersection, then the overlap is just the volume of the object. We don't currently have an API to get the object volume, however.
(I suspect that the base case in |
Depends on NanoComp/libctl#43 |
The following test can be used for benchmarking. This PR does not seem to be providing much of a performance improvement (and in fact does slightly worse) compared to master: master
this PR
from time import time
import meep as mp
from meep.materials import Al, ITO, fused_quartz
import math
import numpy as np
from random import random
lambda_min = 0.4 # minimum source wavelength
lambda_max = 0.8 # maximum source wavelength
fmin = 1/lambda_max # minimum source frequency
fmax = 1/lambda_min # maximum source frequency
fcen = 0.5*(fmin+fmax) # source frequency center
df = fmax-fmin # source frequency width
resolution = 55
nfreq = 800
nperiods = 10 # 24
tABS = 2.2
tPML = 2.1
tGLS = 0.6
tITO = 0.5
tORG = 0.4
tAl = 0.2
ps = 0.3
rs = 0.1
dfrac = 0.2
L = nperiods*ps # length of OLED
# length of computational cell along Z
sz = tPML+tGLS+tITO+tORG+rs+tAl
# length of non-absorbing region of computational cell in X and Y
sxy = L+2*tABS
cell_size = mp.Vector3(sxy,sxy,sz)
boundary_layers = [mp.Absorber(tABS,direction=mp.X),
mp.Absorber(tABS,direction=mp.Y),
mp.PML(tPML,direction=mp.Z,side=mp.High)]
ORG = mp.Medium(index=1.75)
geometry = [mp.Block(material=fused_quartz,
size=mp.Vector3(mp.inf,mp.inf,tABS+tGLS),
center=mp.Vector3(z=0.5*sz-0.5*(tABS+tGLS))),
mp.Block(material=ITO,
size=mp.Vector3(mp.inf,mp.inf,tITO),
center=mp.Vector3(z=0.5*sz-tABS-tGLS-0.5*tITO)),
mp.Block(material=ORG,
size=mp.Vector3(mp.inf,mp.inf,tORG+rs),
center=mp.Vector3(z=0.5*sz-tABS-tGLS-tITO-0.5*(tORG+rs))),
mp.Block(material=Al,
size=mp.Vector3(mp.inf,mp.inf,tAl),
center=mp.Vector3(z=0.5*sz-tABS-tGLS-tITO-tORG-rs-0.5*tAl))]
si = -0.5*sxy+0.5*ps+0.5*(sxy-ps*math.floor(sxy/ps))
di = ps
ni = math.floor(sxy/ps)
def sph(cx,cy):
if mp.am_master():
dpos = dfrac*ps*random()
else:
dpos = None
dpos = mp.comm.bcast(dpos, root=0)
return mp.Sphere(material=Al,
radius=rs,
center=mp.Vector3(cx+dpos,cy+dpos,0.5*sz-tABS-tGLS-tITO-tORG-rs))
for cx in np.arange(si,si+ni*di,di):
for cy in np.arange(si,si+ni*di,di):
geometry.append(sph(cx,cy))
num_src = 20 # number of point sources
sources = [];
for n in range(num_src):
sources.append(mp.Source(mp.GaussianSource(fcen, fwidth=df),
component=mp.Ez,
center=mp.Vector3(z=0.5*sz-tABS-tGLS-tITO-0.4*tORG-0.2*tORG*n/num_src)))
sim = mp.Simulation(resolution=resolution,
cell_size=cell_size,
boundary_layers=boundary_layers,
geometry=geometry,
sources=sources,
eps_averaging=False,
split_chunks_evenly=False)
# surround source with a six-sided box of flux planes
srcbox_width = 0.05
srcbox_top = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(0,0,0.5*sz-tPML-tGLS), size=mp.Vector3(srcbox_width,srcbox_width,0), direction=mp.Z, weight=+1))
srcbox_bot = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(0,0,0.5*sz-tPML-tGLS-tITO-0.8*tORG), size=mp.Vector3(srcbox_width,srcbox_width,0), direction=mp.Z, weight=-1))
srcbox_xp = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(0.5*srcbox_width,0,0.5*sz-tPML-tGLS-0.5*(tITO+0.8*tORG)), size=mp.Vector3(0,srcbox_width,tITO+0.8*tORG), direction=mp.X\
, weight=+1))
srcbox_xm = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(-0.5*srcbox_width,0,0.5*sz-tPML-tGLS-0.5*(tITO+0.8*tORG)), size=mp.Vector3(0,srcbox_width,tITO+0.8*tORG), direction=mp.\
X, weight=-1))
srcbox_yp = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(0,0.5*srcbox_width,0.5*sz-tPML-tGLS-0.5*(tITO+0.8*tORG)), size=mp.Vector3(srcbox_width,0,tITO+0.8*tORG), direction=mp.Y\
, weight=+1))
srcbox_ym = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(0,-0.5*srcbox_width,0.5*sz-tPML-tGLS-0.5*(tITO+0.8*tORG)), size=mp.Vector3(srcbox_width,0,tITO+0.8*tORG), direction=mp.\
Y, weight=-1))
# padding for flux box to fully capture waveguide mode
fluxbox_dpad = 0.05
glass_flux = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(center=mp.Vector3(0,0,0.5*sz-tPML-(tGLS-fluxbox_dpad)), size = mp.Vector3(L,L,0), direction=mp.Z, weight=+1))
wvgbox_xp = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(size=mp.Vector3(0,L,fluxbox_dpad+tITO+tORG+fluxbox_dpad), direction=mp.X, center=mp.Vector3(0.5*L,0,0.5*sz-tPML-tGLS-0.5*(tITO+tORG)), we\
ight=+1))
wvgbox_xm = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(size=mp.Vector3(0,L,fluxbox_dpad+tITO+tORG+fluxbox_dpad), direction=mp.X, center=mp.Vector3(-0.5*L,0,0.5*sz-tPML-tGLS-0.5*(tITO+tORG)), w\
eight=-1))
wvgbox_yp = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(size=mp.Vector3(L,0,fluxbox_dpad+tITO+tORG+fluxbox_dpad), direction=mp.Y, center=mp.Vector3(0,0.5*L,0.5*sz-tPML-tGLS-0.5*(tITO+tORG)), we\
ight=+1))
wvgbox_ym = sim.add_flux(fcen, df, nfreq, mp.FluxRegion(size=mp.Vector3(L,0,fluxbox_dpad+tITO+tORG+fluxbox_dpad), direction=mp.Y, center=mp.Vector3(0,-0.5*L,0.5*sz-tPML-tGLS-0.5*(tITO+tORG)), w\
eight=-1))
mp.verbosity(2)
sim.init_sim() |
The latest commit provides just a slight improvement but the time for
Also, the latest commit seems to be ignoring the |
This is ready, although, as @oskooi says, it doesn't seem to make things any faster. |
Unrelated to this PR, @stevengj's
The time for |
No description provided.