plan9port

[fork] Plan 9 from user space
git clone git://src.adamsgaard.dk/plan9port # fast
git clone https://src.adamsgaard.dk/plan9port.git # slow
Log | Files | Refs | README | LICENSE Back to index

draw.c (56850B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <draw.h>
      4 #include <memdraw.h>
      5 
      6 int drawdebug;
      7 static int	tablesbuilt;
      8 
      9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
     10 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
     11 
     12 /*
     13  * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
     14  * We add another 127 to round to the nearest value rather
     15  * than truncate.
     16  *
     17  * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
     18  * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
     19  */
     20 #define CALC11(a, v, tmp) \
     21 	(tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
     22 
     23 #define CALC12(a1, v1, a2, v2, tmp) \
     24 	(tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
     25 
     26 #define MASK 0xFF00FF
     27 
     28 #define CALC21(a, vvuu, tmp) \
     29 	(tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
     30 
     31 #define CALC41(a, rgba, tmp1, tmp2) \
     32 	(CALC21(a, rgba & MASK, tmp1) | \
     33 	 (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
     34 
     35 #define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
     36 	(tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
     37 
     38 #define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
     39 	(CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
     40 	 (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
     41 
     42 static void mktables(void);
     43 typedef int Subdraw(Memdrawparam*);
     44 static Subdraw chardraw, alphadraw, memoptdraw;
     45 
     46 static Memimage*	memones;
     47 static Memimage*	memzeros;
     48 Memimage *memwhite;
     49 Memimage *memblack;
     50 Memimage *memtransparent;
     51 Memimage *memopaque;
     52 
     53 int	__ifmt(Fmt*);
     54 
     55 void
     56 memimageinit(void)
     57 {
     58 	static int didinit = 0;
     59 
     60 	if(didinit)
     61 		return;
     62 
     63 	didinit = 1;
     64 
     65 	mktables();
     66 	_memmkcmap();
     67 
     68 	fmtinstall('R', Rfmt);
     69 	fmtinstall('P', Pfmt);
     70 	fmtinstall('b', __ifmt);
     71 
     72 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
     73 	memones->flags |= Frepl;
     74 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
     75 	*byteaddr(memones, ZP) = ~0;
     76 
     77 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
     78 	memzeros->flags |= Frepl;
     79 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
     80 	*byteaddr(memzeros, ZP) = 0;
     81 
     82 	if(memones == nil || memzeros == nil)
     83 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
     84 
     85 	memwhite = memones;
     86 	memblack = memzeros;
     87 	memopaque = memones;
     88 	memtransparent = memzeros;
     89 }
     90 
     91 u32int _imgtorgba(Memimage*, u32int);
     92 u32int _rgbatoimg(Memimage*, u32int);
     93 u32int _pixelbits(Memimage*, Point);
     94 
     95 #define DBG if(drawdebug)
     96 static Memdrawparam par;
     97 
     98 Memdrawparam*
     99 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
    100 {
    101 	if(mask == nil)
    102 		mask = memopaque;
    103 
    104 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
    105 
    106 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
    107 /*		if(drawdebug) */
    108 /*			iprint("empty clipped rectangle\n"); */
    109 		return nil;
    110 	}
    111 
    112 	if(op < Clear || op > SoverD){
    113 /*		if(drawdebug) */
    114 /*			iprint("op out of range: %d\n", op); */
    115 		return nil;
    116 	}
    117 
    118 	par.op = op;
    119 	par.dst = dst;
    120 	par.r = r;
    121 	par.src = src;
    122 	/* par.sr set by drawclip */
    123 	par.mask = mask;
    124 	/* par.mr set by drawclip */
    125 
    126 	par.state = 0;
    127 	if(src->flags&Frepl){
    128 		par.state |= Replsrc;
    129 		if(Dx(src->r)==1 && Dy(src->r)==1){
    130 			par.sval = pixelbits(src, src->r.min);
    131 			par.state |= Simplesrc;
    132 			par.srgba = _imgtorgba(src, par.sval);
    133 			par.sdval = _rgbatoimg(dst, par.srgba);
    134 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
    135 /*				if (drawdebug) iprint("fill with transparent source\n"); */
    136 				return nil;	/* no-op successfully handled */
    137 			}
    138 			if((par.srgba&0xFF) == 0xFF)
    139 				par.state |= Fullsrc;
    140 		}
    141 	}
    142 
    143 	if(mask->flags & Frepl){
    144 		par.state |= Replmask;
    145 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
    146 			par.mval = pixelbits(mask, mask->r.min);
    147 			if(par.mval == 0 && (op&DoutS)){
    148 /*				if(drawdebug) iprint("fill with zero mask\n"); */
    149 				return nil;	/* no-op successfully handled */
    150 			}
    151 			par.state |= Simplemask;
    152 			if(par.mval == ~0)
    153 				par.state |= Fullmask;
    154 			par.mrgba = _imgtorgba(mask, par.mval);
    155 		}
    156 	}
    157 
    158 /*	if(drawdebug) */
    159 /*		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr); */
    160 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
    161 
    162 	return &par;
    163 }
    164 
    165 void
    166 _memimagedraw(Memdrawparam *par)
    167 {
    168 	/*
    169 	 * Now that we've clipped the parameters down to be consistent, we
    170 	 * simply try sub-drawing routines in order until we find one that was able
    171 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
    172 	 * unable to satisfy the request, so we do not return.
    173 	 */
    174 
    175 	/*
    176 	 * Hardware support.  Each video driver provides this function,
    177 	 * which checks to see if there is anything it can help with.
    178 	 * There could be an if around this checking to see if dst is in video memory.
    179 	 */
    180 DBG print("test hwdraw\n");
    181 	if(hwdraw(par)){
    182 /*if(drawdebug) iprint("hw handled\n"); */
    183 DBG print("hwdraw handled\n");
    184 		return;
    185 	}
    186 	/*
    187 	 * Optimizations using memmove and memset.
    188 	 */
    189 DBG print("test memoptdraw\n");
    190 	if(memoptdraw(par)){
    191 /*if(drawdebug) iprint("memopt handled\n"); */
    192 DBG print("memopt handled\n");
    193 		return;
    194 	}
    195 
    196 	/*
    197 	 * Character drawing.
    198 	 * Solid source color being painted through a boolean mask onto a high res image.
    199 	 */
    200 DBG print("test chardraw\n");
    201 	if(chardraw(par)){
    202 /*if(drawdebug) iprint("chardraw handled\n"); */
    203 DBG print("chardraw handled\n");
    204 		return;
    205 	}
    206 
    207 	/*
    208 	 * General calculation-laden case that does alpha for each pixel.
    209 	 */
    210 DBG print("do alphadraw\n");
    211 	alphadraw(par);
    212 /*if(drawdebug) iprint("alphadraw handled\n"); */
    213 DBG print("alphadraw handled\n");
    214 }
    215 #undef DBG
    216 
    217 /*
    218  * Clip the destination rectangle further based on the properties of the
    219  * source and mask rectangles.  Once the destination rectangle is properly
    220  * clipped, adjust the source and mask rectangles to be the same size.
    221  * Then if source or mask is replicated, move its clipped rectangle
    222  * so that its minimum point falls within the repl rectangle.
    223  *
    224  * Return zero if the final rectangle is null.
    225  */
    226 int
    227 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
    228 {
    229 	Point rmin, delta;
    230 	int splitcoords;
    231 	Rectangle omr;
    232 
    233 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
    234 		return 0;
    235 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
    236 	/* clip to destination */
    237 	rmin = r->min;
    238 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
    239 		return 0;
    240 	/* move mask point */
    241 	p1->x += r->min.x-rmin.x;
    242 	p1->y += r->min.y-rmin.y;
    243 	/* move source point */
    244 	p0->x += r->min.x-rmin.x;
    245 	p0->y += r->min.y-rmin.y;
    246 	/* map destination rectangle into source */
    247 	sr->min = *p0;
    248 	sr->max.x = p0->x+Dx(*r);
    249 	sr->max.y = p0->y+Dy(*r);
    250 	/* sr is r in source coordinates; clip to source */
    251 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
    252 		return 0;
    253 	if(!rectclip(sr, src->clipr))
    254 		return 0;
    255 	/* compute and clip rectangle in mask */
    256 	if(splitcoords){
    257 		/* move mask point with source */
    258 		p1->x += sr->min.x-p0->x;
    259 		p1->y += sr->min.y-p0->y;
    260 		mr->min = *p1;
    261 		mr->max.x = p1->x+Dx(*sr);
    262 		mr->max.y = p1->y+Dy(*sr);
    263 		omr = *mr;
    264 		/* mr is now rectangle in mask; clip it */
    265 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
    266 			return 0;
    267 		if(!rectclip(mr, mask->clipr))
    268 			return 0;
    269 		/* reflect any clips back to source */
    270 		sr->min.x += mr->min.x-omr.min.x;
    271 		sr->min.y += mr->min.y-omr.min.y;
    272 		sr->max.x += mr->max.x-omr.max.x;
    273 		sr->max.y += mr->max.y-omr.max.y;
    274 		*p1 = mr->min;
    275 	}else{
    276 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
    277 			return 0;
    278 		if(!rectclip(sr, mask->clipr))
    279 			return 0;
    280 		*p1 = sr->min;
    281 	}
    282 
    283 	/* move source clipping back to destination */
    284 	delta.x = r->min.x - p0->x;
    285 	delta.y = r->min.y - p0->y;
    286 	r->min.x = sr->min.x + delta.x;
    287 	r->min.y = sr->min.y + delta.y;
    288 	r->max.x = sr->max.x + delta.x;
    289 	r->max.y = sr->max.y + delta.y;
    290 
    291 	/* move source rectangle so sr->min is in src->r */
    292 	if(src->flags&Frepl) {
    293 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
    294 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
    295 		sr->min.x += delta.x;
    296 		sr->min.y += delta.y;
    297 		sr->max.x += delta.x;
    298 		sr->max.y += delta.y;
    299 	}
    300 	*p0 = sr->min;
    301 
    302 	/* move mask point so it is in mask->r */
    303 	*p1 = drawrepl(mask->r, *p1);
    304 	mr->min = *p1;
    305 	mr->max.x = p1->x+Dx(*sr);
    306 	mr->max.y = p1->y+Dy(*sr);
    307 
    308 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
    309 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
    310 	assert(ptinrect(*p0, src->r));
    311 	assert(ptinrect(*p1, mask->r));
    312 	assert(ptinrect(r->min, dst->r));
    313 
    314 	return 1;
    315 }
    316 
    317 /*
    318  * Conversion tables.
    319  */
    320 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
    321 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
    322 static uchar conv28[256][4];		/* ... */
    323 static uchar conv48[256][2];
    324 
    325 /*
    326  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
    327  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
    328  * only the top 8 bits of the result are actually used.
    329  * (the lower 8 bits are needed to get bits in the right place
    330  * when n is not a divisor of 8.)
    331  *
    332  * Should check to see if its easier to just refer to replmul than
    333  * use the precomputed values in replbit.  On PCs it may well
    334  * be; on machines with slow multiply instructions it probably isn't.
    335  */
    336 #define a ((((((((((((((((0
    337 #define X *2+1)
    338 #define _ *2)
    339 static int replmul[1+8] = {
    340 	0,
    341 	a X X X X X X X X X X X X X X X X,
    342 	a _ X _ X _ X _ X _ X _ X _ X _ X,
    343 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
    344 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
    345 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
    346 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
    347 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
    348 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
    349 };
    350 #undef a
    351 #undef X
    352 #undef _
    353 
    354 static void
    355 mktables(void)
    356 {
    357 	int i, j, mask, sh, small;
    358 
    359 	if(tablesbuilt)
    360 		return;
    361 
    362 	fmtinstall('R', Rfmt);
    363 	fmtinstall('P', Pfmt);
    364 	tablesbuilt = 1;
    365 
    366 	/* bit replication up to 8 bits */
    367 	for(i=0; i<256; i++){
    368 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
    369 			small = i & ((1<<j)-1);
    370 			replbit[j][i] = (small*replmul[j])>>8;
    371 		}
    372 	}
    373 
    374 	/* bit unpacking up to 8 bits, only powers of 2 */
    375 	for(i=0; i<256; i++){
    376 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
    377 			conv18[i][j] = replbit[1][(i>>sh)&mask];
    378 
    379 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
    380 			conv28[i][j] = replbit[2][(i>>sh)&mask];
    381 
    382 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
    383 			conv48[i][j] = replbit[4][(i>>sh)&mask];
    384 	}
    385 }
    386 
    387 static uchar ones = 0xff;
    388 
    389 /*
    390  * General alpha drawing case.  Can handle anything.
    391  */
    392 typedef struct	Buffer	Buffer;
    393 struct Buffer {
    394 	/* used by most routines */
    395 	uchar	*red;
    396 	uchar	*grn;
    397 	uchar	*blu;
    398 	uchar	*alpha;
    399 	uchar	*grey;
    400 	u32int	*rgba;
    401 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
    402 
    403 	/* used by boolcalc* for mask data */
    404 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
    405 	int		mskip;	/* no. of left bits to skip in *m */
    406 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
    407 	int		bmskip;	/* no. of left bits to skip in *bm */
    408 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
    409 	int		emskip;	/* no. of right bits to skip in *em */
    410 };
    411 
    412 typedef struct	Param	Param;
    413 typedef Buffer	Readfn(Param*, uchar*, int);
    414 typedef void	Writefn(Param*, uchar*, Buffer);
    415 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
    416 
    417 enum {
    418 	MAXBCACHE = 16
    419 };
    420 
    421 /* giant rathole to customize functions with */
    422 struct Param {
    423 	Readfn	*replcall;
    424 	Readfn	*greymaskcall;
    425 	Readfn	*convreadcall;
    426 	Writefn	*convwritecall;
    427 
    428 	Memimage *img;
    429 	Rectangle	r;
    430 	int	dx;	/* of r */
    431 	int	needbuf;
    432 	int	convgrey;
    433 	int	alphaonly;
    434 
    435 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
    436 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
    437 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
    438 	int		bwidth;
    439 
    440 	int	replcache;	/* if set, cache buffers */
    441 	Buffer	bcache[MAXBCACHE];
    442 	u32int	bfilled;
    443 	uchar	*bufbase;
    444 	int	bufoff;
    445 	int	bufdelta;
    446 
    447 	int	dir;
    448 
    449 	int	convbufoff;
    450 	uchar	*convbuf;
    451 	Param	*convdpar;
    452 	int	convdx;
    453 };
    454 
    455 static uchar *drawbuf;
    456 static int	ndrawbuf;
    457 static int	mdrawbuf;
    458 static Param spar, mpar, dpar;	/* easier on the stacks */
    459 static Readfn	greymaskread, replread, readptr;
    460 static Writefn	nullwrite;
    461 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
    462 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
    463 
    464 static Readfn*	readfn(Memimage*);
    465 static Readfn*	readalphafn(Memimage*);
    466 static Writefn*	writefn(Memimage*);
    467 
    468 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
    469 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*);
    470 
    471 static Calcfn *alphacalc[Ncomp] =
    472 {
    473 	alphacalc0,		/* Clear */
    474 	alphacalc14,		/* DoutS */
    475 	alphacalc2810,		/* SoutD */
    476 	alphacalc3679,		/* DxorS */
    477 	alphacalc14,		/* DinS */
    478 	alphacalc5,		/* D */
    479 	alphacalc3679,		/* DatopS */
    480 	alphacalc3679,		/* DoverS */
    481 	alphacalc2810,		/* SinD */
    482 	alphacalc3679,		/* SatopD */
    483 	alphacalc2810,		/* S */
    484 	alphacalc11,		/* SoverD */
    485 };
    486 
    487 static Calcfn *boolcalc[Ncomp] =
    488 {
    489 	alphacalc0,		/* Clear */
    490 	boolcalc14,		/* DoutS */
    491 	boolcalc236789,		/* SoutD */
    492 	boolcalc236789,		/* DxorS */
    493 	boolcalc14,		/* DinS */
    494 	alphacalc5,		/* D */
    495 	boolcalc236789,		/* DatopS */
    496 	boolcalc236789,		/* DoverS */
    497 	boolcalc236789,		/* SinD */
    498 	boolcalc236789,		/* SatopD */
    499 	boolcalc1011,		/* S */
    500 	boolcalc1011,		/* SoverD */
    501 };
    502 
    503 static int
    504 allocdrawbuf(void)
    505 {
    506 	uchar *p;
    507 
    508 	if(ndrawbuf > mdrawbuf){
    509 		p = realloc(drawbuf, ndrawbuf);
    510 		if(p == nil){
    511 			werrstr("memimagedraw out of memory");
    512 			return -1;
    513 		}
    514 		drawbuf = p;
    515 		mdrawbuf = ndrawbuf;
    516 	}
    517 	return 0;
    518 }
    519 
    520 static void
    521 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf)
    522 {
    523 	int nbuf;
    524 
    525 	memset(p, 0, sizeof *p);
    526 
    527 	p->img = img;
    528 	p->r = r;
    529 	p->dx = Dx(r);
    530 	p->needbuf = needbuf;
    531 	p->convgrey = convgrey;
    532 
    533 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
    534 
    535 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
    536 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
    537 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
    538 	p->bwidth = sizeof(u32int)*img->width;
    539 
    540 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
    541 
    542 	if(p->r.min.x == p->img->r.min.x)
    543 		assert(p->bytermin == p->bytey0s);
    544 
    545 	nbuf = 1;
    546 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
    547 		p->replcache = 1;
    548 		nbuf = Dy(img->r);
    549 	}
    550 	p->bufdelta = 4*p->dx;
    551 	p->bufoff = ndrawbuf;
    552 	ndrawbuf += p->bufdelta*nbuf;
    553 }
    554 
    555 static void
    556 clipy(Memimage *img, int *y)
    557 {
    558 	int dy;
    559 
    560 	dy = Dy(img->r);
    561 	if(*y == dy)
    562 		*y = 0;
    563 	else if(*y == -1)
    564 		*y = dy-1;
    565 	assert(0 <= *y && *y < dy);
    566 }
    567 
    568 static void
    569 dumpbuf(char *s, Buffer b, int n)
    570 {
    571 	int i;
    572 	uchar *p;
    573 
    574 	print("%s", s);
    575 	for(i=0; i<n; i++){
    576 		print(" ");
    577 		if(p=b.grey){
    578 			print(" k%.2uX", *p);
    579 			b.grey += b.delta;
    580 		}else{
    581 			if(p=b.red){
    582 				print(" r%.2uX", *p);
    583 				b.red += b.delta;
    584 			}
    585 			if(p=b.grn){
    586 				print(" g%.2uX", *p);
    587 				b.grn += b.delta;
    588 			}
    589 			if(p=b.blu){
    590 				print(" b%.2uX", *p);
    591 				b.blu += b.delta;
    592 			}
    593 		}
    594 		if((p=b.alpha) != &ones){
    595 			print(" α%.2uX", *p);
    596 			b.alpha += b.delta;
    597 		}
    598 	}
    599 	print("\n");
    600 }
    601 
    602 /*
    603  * For each scan line, we expand the pixels from source, mask, and destination
    604  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
    605  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
    606  * the readers need not copy the data: they can simply return pointers to the data.
    607  * If the destination image is grey and the source is not, it is converted using the NTSC
    608  * formula.
    609  *
    610  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
    611  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
    612  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
    613  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
    614  * the calculator, and that buffer is passed to a function to write it to the destination.
    615  * If the buffer is already pointing at the destination, the writing function is a no-op.
    616  */
    617 #define DBG if(drawdebug)
    618 static int
    619 alphadraw(Memdrawparam *par)
    620 {
    621 	int isgrey, starty, endy, op;
    622 	int needbuf, dsty, srcy, masky;
    623 	int y, dir, dx, dy;
    624 	Buffer bsrc, bdst, bmask;
    625 	Readfn *rdsrc, *rdmask, *rddst;
    626 	Calcfn *calc;
    627 	Writefn *wrdst;
    628 	Memimage *src, *mask, *dst;
    629 	Rectangle r, sr, mr;
    630 
    631 	if(drawdebug)
    632 		print("alphadraw %R\n", par->r);
    633 	r = par->r;
    634 	dx = Dx(r);
    635 	dy = Dy(r);
    636 
    637 	ndrawbuf = 0;
    638 
    639 	src = par->src;
    640 	mask = par->mask;
    641 	dst = par->dst;
    642 	sr = par->sr;
    643 	mr = par->mr;
    644 	op = par->op;
    645 
    646 	isgrey = dst->flags&Fgrey;
    647 
    648 	/*
    649 	 * Buffering when src and dst are the same bitmap is sufficient but not
    650 	 * necessary.  There are stronger conditions we could use.  We could
    651 	 * check to see if the rectangles intersect, and if simply moving in the
    652 	 * correct y direction can avoid the need to buffer.
    653 	 */
    654 	needbuf = (src->data == dst->data);
    655 
    656 	getparam(&spar, src, sr, isgrey, needbuf);
    657 	getparam(&dpar, dst, r, isgrey, needbuf);
    658 	getparam(&mpar, mask, mr, 0, needbuf);
    659 
    660 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
    661 	spar.dir = mpar.dir = dpar.dir = dir;
    662 
    663 	/*
    664 	 * If the mask is purely boolean, we can convert from src to dst format
    665 	 * when we read src, and then just copy it to dst where the mask tells us to.
    666 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
    667 	 *
    668 	 * The computation is accomplished by assigning the function pointers as follows:
    669 	 *	rdsrc - read and convert source into dst format in a buffer
    670 	 * 	rdmask - convert mask to bytes, set pointer to it
    671 	 * 	rddst - fill with pointer to real dst data, but do no reads
    672 	 *	calc - copy src onto dst when mask says to.
    673 	 *	wrdst - do nothing
    674 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
    675 	 * but it avoids a fair amount of code duplication to make this a case here
    676 	 * rather than have a separate booldraw.
    677 	 */
    678 /*if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth); */
    679 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
    680 /*if(drawdebug) iprint("boolcopy..."); */
    681 		rdsrc = convfn(dst, &dpar, src, &spar);
    682 		rddst = readptr;
    683 		rdmask = readfn(mask);
    684 		calc = boolcopyfn(dst, mask);
    685 		wrdst = nullwrite;
    686 	}else{
    687 		/* usual alphadraw parameter fetching */
    688 		rdsrc = readfn(src);
    689 		rddst = readfn(dst);
    690 		wrdst = writefn(dst);
    691 		calc = alphacalc[op];
    692 
    693 		/*
    694 		 * If there is no alpha channel, we'll ask for a grey channel
    695 		 * and pretend it is the alpha.
    696 		 */
    697 		if(mask->flags&Falpha){
    698 			rdmask = readalphafn(mask);
    699 			mpar.alphaonly = 1;
    700 		}else{
    701 			mpar.greymaskcall = readfn(mask);
    702 			mpar.convgrey = 1;
    703 			rdmask = greymaskread;
    704 
    705 			/*
    706 			 * Should really be above, but then boolcopyfns would have
    707 			 * to deal with bit alignment, and I haven't written that.
    708 			 *
    709 			 * This is a common case for things like ellipse drawing.
    710 			 * When there's no alpha involved and the mask is boolean,
    711 			 * we can avoid all the division and multiplication.
    712 			 */
    713 			if(mask->chan == GREY1 && !(src->flags&Falpha))
    714 				calc = boolcalc[op];
    715 			else if(op == SoverD && !(src->flags&Falpha))
    716 				calc = alphacalcS;
    717 		}
    718 	}
    719 
    720 	/*
    721 	 * If the image has a small enough repl rectangle,
    722 	 * we can just read each line once and cache them.
    723 	 */
    724 	if(spar.replcache){
    725 		spar.replcall = rdsrc;
    726 		rdsrc = replread;
    727 	}
    728 	if(mpar.replcache){
    729 		mpar.replcall = rdmask;
    730 		rdmask = replread;
    731 	}
    732 
    733 	if(allocdrawbuf() < 0)
    734 		return 0;
    735 
    736 	/*
    737 	 * Before we were saving only offsets from drawbuf in the parameter
    738 	 * structures; now that drawbuf has been grown to accomodate us,
    739 	 * we can fill in the pointers.
    740 	 */
    741 	spar.bufbase = drawbuf+spar.bufoff;
    742 	mpar.bufbase = drawbuf+mpar.bufoff;
    743 	dpar.bufbase = drawbuf+dpar.bufoff;
    744 	spar.convbuf = drawbuf+spar.convbufoff;
    745 
    746 	if(dir == 1){
    747 		starty = 0;
    748 		endy = dy;
    749 	}else{
    750 		starty = dy-1;
    751 		endy = -1;
    752 	}
    753 
    754 	/*
    755 	 * srcy, masky, and dsty are offsets from the top of their
    756 	 * respective Rectangles.  they need to be contained within
    757 	 * the rectangles, so clipy can keep them there without division.
    758  	 */
    759 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
    760 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
    761 	dsty = starty + r.min.y - dst->r.min.y;
    762 
    763 	assert(0 <= srcy && srcy < Dy(src->r));
    764 	assert(0 <= masky && masky < Dy(mask->r));
    765 	assert(0 <= dsty && dsty < Dy(dst->r));
    766 
    767 	if(drawdebug)
    768 		print("alphadraw: rdsrc=%p rdmask=%p rddst=%p calc=%p wrdst=%p\n",
    769 			rdsrc, rdmask, rddst, calc, wrdst);
    770 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
    771 		clipy(src, &srcy);
    772 		clipy(dst, &dsty);
    773 		clipy(mask, &masky);
    774 
    775 		bsrc = rdsrc(&spar, spar.bufbase, srcy);
    776 DBG print("[");
    777 		bmask = rdmask(&mpar, mpar.bufbase, masky);
    778 DBG print("]\n");
    779 		bdst = rddst(&dpar, dpar.bufbase, dsty);
    780 DBG		dumpbuf("src", bsrc, dx);
    781 DBG		dumpbuf("mask", bmask, dx);
    782 DBG		dumpbuf("dst", bdst, dx);
    783 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
    784 DBG		dumpbuf("bdst", bdst, dx);
    785 		wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
    786 	}
    787 
    788 	return 1;
    789 }
    790 #undef DBG
    791 
    792 static Buffer
    793 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
    794 {
    795 	USED(grey);
    796 	USED(op);
    797 	memset(bdst.rgba, 0, dx*bdst.delta);
    798 	return bdst;
    799 }
    800 
    801 /*
    802  * Do the channels in the buffers match enough
    803  * that we can do word-at-a-time operations
    804  * on the pixels?
    805  */
    806 static int
    807 chanmatch(Buffer *bdst, Buffer *bsrc)
    808 {
    809 	uchar *drgb, *srgb;
    810 
    811 	/*
    812 	 * first, r, g, b must be in the same place
    813 	 * in the rgba word.
    814 	 */
    815 	drgb = (uchar*)bdst->rgba;
    816 	srgb = (uchar*)bsrc->rgba;
    817 	if(bdst->red - drgb != bsrc->red - srgb
    818 	|| bdst->blu - drgb != bsrc->blu - srgb
    819 	|| bdst->grn - drgb != bsrc->grn - srgb)
    820 		return 0;
    821 
    822 	/*
    823 	 * that implies alpha is in the same place,
    824 	 * if it is there at all (it might be == &ones).
    825 	 * if the destination is &ones, we can scribble
    826 	 * over the rgba slot just fine.
    827 	 */
    828 	if(bdst->alpha == &ones)
    829 		return 1;
    830 
    831 	/*
    832 	 * if the destination is not ones but the src is,
    833 	 * then the simultaneous calculation will use
    834 	 * bogus bytes from the src's rgba.  no good.
    835 	 */
    836 	if(bsrc->alpha == &ones)
    837 		return 0;
    838 
    839 	/*
    840 	 * otherwise, alphas are in the same place.
    841 	 */
    842 	return 1;
    843 }
    844 
    845 static Buffer
    846 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
    847 {
    848 	Buffer obdst;
    849 	int fd, sadelta;
    850 	int i, sa, ma, q;
    851 	u32int t, t1;
    852 
    853 	obdst = bdst;
    854 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
    855 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
    856 
    857 	for(i=0; i<dx; i++){
    858 		sa = *bsrc.alpha;
    859 		ma = *bmask.alpha;
    860 		fd = CALC11(sa, ma, t);
    861 		if(op == DoutS)
    862 			fd = 255-fd;
    863 
    864 		if(grey){
    865 			*bdst.grey = CALC11(fd, *bdst.grey, t);
    866 			bsrc.grey += bsrc.delta;
    867 			bdst.grey += bdst.delta;
    868 		}else{
    869 			if(q){
    870 				*bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
    871 				bsrc.rgba++;
    872 				bdst.rgba++;
    873 				bsrc.alpha += sadelta;
    874 				bmask.alpha += bmask.delta;
    875 				continue;
    876 			}
    877 			*bdst.red = CALC11(fd, *bdst.red, t);
    878 			*bdst.grn = CALC11(fd, *bdst.grn, t);
    879 			*bdst.blu = CALC11(fd, *bdst.blu, t);
    880 			bsrc.red += bsrc.delta;
    881 			bsrc.blu += bsrc.delta;
    882 			bsrc.grn += bsrc.delta;
    883 			bdst.red += bdst.delta;
    884 			bdst.blu += bdst.delta;
    885 			bdst.grn += bdst.delta;
    886 		}
    887 		if(bdst.alpha != &ones){
    888 			*bdst.alpha = CALC11(fd, *bdst.alpha, t);
    889 			bdst.alpha += bdst.delta;
    890 		}
    891 		bmask.alpha += bmask.delta;
    892 		bsrc.alpha += sadelta;
    893 	}
    894 	return obdst;
    895 }
    896 
    897 static Buffer
    898 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
    899 {
    900 	Buffer obdst;
    901 	int fs, sadelta;
    902 	int i, ma, da, q;
    903 	u32int t, t1;
    904 
    905 	obdst = bdst;
    906 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
    907 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
    908 
    909 	for(i=0; i<dx; i++){
    910 		ma = *bmask.alpha;
    911 		da = *bdst.alpha;
    912 		if(op == SoutD)
    913 			da = 255-da;
    914 		fs = ma;
    915 		if(op != S)
    916 			fs = CALC11(fs, da, t);
    917 
    918 		if(grey){
    919 			*bdst.grey = CALC11(fs, *bsrc.grey, t);
    920 			bsrc.grey += bsrc.delta;
    921 			bdst.grey += bdst.delta;
    922 		}else{
    923 			if(q){
    924 				*bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
    925 				bsrc.rgba++;
    926 				bdst.rgba++;
    927 				bmask.alpha += bmask.delta;
    928 				bdst.alpha += bdst.delta;
    929 				continue;
    930 			}
    931 			*bdst.red = CALC11(fs, *bsrc.red, t);
    932 			*bdst.grn = CALC11(fs, *bsrc.grn, t);
    933 			*bdst.blu = CALC11(fs, *bsrc.blu, t);
    934 			bsrc.red += bsrc.delta;
    935 			bsrc.blu += bsrc.delta;
    936 			bsrc.grn += bsrc.delta;
    937 			bdst.red += bdst.delta;
    938 			bdst.blu += bdst.delta;
    939 			bdst.grn += bdst.delta;
    940 		}
    941 		if(bdst.alpha != &ones){
    942 			*bdst.alpha = CALC11(fs, *bsrc.alpha, t);
    943 			bdst.alpha += bdst.delta;
    944 		}
    945 		bmask.alpha += bmask.delta;
    946 		bsrc.alpha += sadelta;
    947 	}
    948 	return obdst;
    949 }
    950 
    951 static Buffer
    952 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
    953 {
    954 	Buffer obdst;
    955 	int fs, fd, sadelta;
    956 	int i, sa, ma, da, q;
    957 	u32int t, t1;
    958 
    959 	obdst = bdst;
    960 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
    961 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
    962 
    963 	for(i=0; i<dx; i++){
    964 		sa = *bsrc.alpha;
    965 		ma = *bmask.alpha;
    966 		da = *bdst.alpha;
    967 		if(op == SatopD)
    968 			fs = CALC11(ma, da, t);
    969 		else
    970 			fs = CALC11(ma, 255-da, t);
    971 		if(op == DoverS)
    972 			fd = 255;
    973 		else{
    974 			fd = CALC11(sa, ma, t);
    975 			if(op != DatopS)
    976 				fd = 255-fd;
    977 		}
    978 
    979 		if(grey){
    980 			*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
    981 			bsrc.grey += bsrc.delta;
    982 			bdst.grey += bdst.delta;
    983 		}else{
    984 			if(q){
    985 				*bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
    986 				bsrc.rgba++;
    987 				bdst.rgba++;
    988 				bsrc.alpha += sadelta;
    989 				bmask.alpha += bmask.delta;
    990 				bdst.alpha += bdst.delta;
    991 				continue;
    992 			}
    993 			*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
    994 			*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
    995 			*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
    996 			bsrc.red += bsrc.delta;
    997 			bsrc.blu += bsrc.delta;
    998 			bsrc.grn += bsrc.delta;
    999 			bdst.red += bdst.delta;
   1000 			bdst.blu += bdst.delta;
   1001 			bdst.grn += bdst.delta;
   1002 		}
   1003 		if(bdst.alpha != &ones){
   1004 			*bdst.alpha = CALC12(fs, sa, fd, da, t);
   1005 			bdst.alpha += bdst.delta;
   1006 		}
   1007 		bmask.alpha += bmask.delta;
   1008 		bsrc.alpha += sadelta;
   1009 	}
   1010 	return obdst;
   1011 }
   1012 
   1013 static Buffer
   1014 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
   1015 {
   1016 	USED(dx);
   1017 	USED(grey);
   1018 	USED(op);
   1019 	return bdst;
   1020 }
   1021 
   1022 static Buffer
   1023 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1024 {
   1025 	Buffer obdst;
   1026 	int fd, sadelta;
   1027 	int i, sa, ma, q;
   1028 	u32int t, t1;
   1029 
   1030 	USED(op);
   1031 	obdst = bdst;
   1032 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
   1033 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
   1034 
   1035 	for(i=0; i<dx; i++){
   1036 		sa = *bsrc.alpha;
   1037 		ma = *bmask.alpha;
   1038 		fd = 255-CALC11(sa, ma, t);
   1039 
   1040 		if(grey){
   1041 			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
   1042 			bsrc.grey += bsrc.delta;
   1043 			bdst.grey += bdst.delta;
   1044 		}else{
   1045 			if(q){
   1046 				*bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
   1047 				bsrc.rgba++;
   1048 				bdst.rgba++;
   1049 				bsrc.alpha += sadelta;
   1050 				bmask.alpha += bmask.delta;
   1051 				continue;
   1052 			}
   1053 			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
   1054 			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
   1055 			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
   1056 			bsrc.red += bsrc.delta;
   1057 			bsrc.blu += bsrc.delta;
   1058 			bsrc.grn += bsrc.delta;
   1059 			bdst.red += bdst.delta;
   1060 			bdst.blu += bdst.delta;
   1061 			bdst.grn += bdst.delta;
   1062 		}
   1063 		if(bdst.alpha != &ones){
   1064 			*bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
   1065 			bdst.alpha += bdst.delta;
   1066 		}
   1067 		bmask.alpha += bmask.delta;
   1068 		bsrc.alpha += sadelta;
   1069 	}
   1070 	return obdst;
   1071 }
   1072 
   1073 /*
   1074 not used yet
   1075 source and mask alpha 1
   1076 static Buffer
   1077 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1078 {
   1079 	Buffer obdst;
   1080 	int i;
   1081 
   1082 	USED(op);
   1083 	obdst = bdst;
   1084 	if(bsrc.delta == bdst.delta){
   1085 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
   1086 		return obdst;
   1087 	}
   1088 	for(i=0; i<dx; i++){
   1089 		if(grey){
   1090 			*bdst.grey = *bsrc.grey;
   1091 			bsrc.grey += bsrc.delta;
   1092 			bdst.grey += bdst.delta;
   1093 		}else{
   1094 			*bdst.red = *bsrc.red;
   1095 			*bdst.grn = *bsrc.grn;
   1096 			*bdst.blu = *bsrc.blu;
   1097 			bsrc.red += bsrc.delta;
   1098 			bsrc.blu += bsrc.delta;
   1099 			bsrc.grn += bsrc.delta;
   1100 			bdst.red += bdst.delta;
   1101 			bdst.blu += bdst.delta;
   1102 			bdst.grn += bdst.delta;
   1103 		}
   1104 		if(bdst.alpha != &ones){
   1105 			*bdst.alpha = 255;
   1106 			bdst.alpha += bdst.delta;
   1107 		}
   1108 	}
   1109 	return obdst;
   1110 }
   1111 */
   1112 
   1113 /* source alpha 1 */
   1114 static Buffer
   1115 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1116 {
   1117 	Buffer obdst;
   1118 	int fd;
   1119 	int i, ma;
   1120 	u32int t;
   1121 
   1122 	USED(op);
   1123 	obdst = bdst;
   1124 
   1125 	for(i=0; i<dx; i++){
   1126 		ma = *bmask.alpha;
   1127 		fd = 255-ma;
   1128 
   1129 		if(grey){
   1130 			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
   1131 			bsrc.grey += bsrc.delta;
   1132 			bdst.grey += bdst.delta;
   1133 		}else{
   1134 			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
   1135 			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
   1136 			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
   1137 			bsrc.red += bsrc.delta;
   1138 			bsrc.blu += bsrc.delta;
   1139 			bsrc.grn += bsrc.delta;
   1140 			bdst.red += bdst.delta;
   1141 			bdst.blu += bdst.delta;
   1142 			bdst.grn += bdst.delta;
   1143 		}
   1144 		if(bdst.alpha != &ones){
   1145 			*bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
   1146 			bdst.alpha += bdst.delta;
   1147 		}
   1148 		bmask.alpha += bmask.delta;
   1149 	}
   1150 	return obdst;
   1151 }
   1152 
   1153 static Buffer
   1154 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
   1155 {
   1156 	Buffer obdst;
   1157 	int i, ma, zero;
   1158 
   1159 	obdst = bdst;
   1160 
   1161 	for(i=0; i<dx; i++){
   1162 		ma = *bmask.alpha;
   1163 		zero = ma ? op == DoutS : op == DinS;
   1164 
   1165 		if(grey){
   1166 			if(zero)
   1167 				*bdst.grey = 0;
   1168 			bdst.grey += bdst.delta;
   1169 		}else{
   1170 			if(zero)
   1171 				*bdst.red = *bdst.grn = *bdst.blu = 0;
   1172 			bdst.red += bdst.delta;
   1173 			bdst.blu += bdst.delta;
   1174 			bdst.grn += bdst.delta;
   1175 		}
   1176 		bmask.alpha += bmask.delta;
   1177 		if(bdst.alpha != &ones){
   1178 			if(zero)
   1179 				*bdst.alpha = 0;
   1180 			bdst.alpha += bdst.delta;
   1181 		}
   1182 	}
   1183 	return obdst;
   1184 }
   1185 
   1186 static Buffer
   1187 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1188 {
   1189 	Buffer obdst;
   1190 	int fs, fd;
   1191 	int i, ma, da, zero;
   1192 	u32int t;
   1193 
   1194 	obdst = bdst;
   1195 	zero = !(op&1);
   1196 
   1197 	for(i=0; i<dx; i++){
   1198 		ma = *bmask.alpha;
   1199 		da = *bdst.alpha;
   1200 		fs = da;
   1201 		if(op&2)
   1202 			fs = 255-da;
   1203 		fd = 0;
   1204 		if(op&4)
   1205 			fd = 255;
   1206 
   1207 		if(grey){
   1208 			if(ma)
   1209 				*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
   1210 			else if(zero)
   1211 				*bdst.grey = 0;
   1212 			bsrc.grey += bsrc.delta;
   1213 			bdst.grey += bdst.delta;
   1214 		}else{
   1215 			if(ma){
   1216 				*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
   1217 				*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
   1218 				*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
   1219 			}
   1220 			else if(zero)
   1221 				*bdst.red = *bdst.grn = *bdst.blu = 0;
   1222 			bsrc.red += bsrc.delta;
   1223 			bsrc.blu += bsrc.delta;
   1224 			bsrc.grn += bsrc.delta;
   1225 			bdst.red += bdst.delta;
   1226 			bdst.blu += bdst.delta;
   1227 			bdst.grn += bdst.delta;
   1228 		}
   1229 		bmask.alpha += bmask.delta;
   1230 		if(bdst.alpha != &ones){
   1231 			if(ma)
   1232 				*bdst.alpha = fs+CALC11(fd, da, t);
   1233 			else if(zero)
   1234 				*bdst.alpha = 0;
   1235 			bdst.alpha += bdst.delta;
   1236 		}
   1237 	}
   1238 	return obdst;
   1239 }
   1240 
   1241 static Buffer
   1242 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1243 {
   1244 	Buffer obdst;
   1245 	int i, ma, zero;
   1246 
   1247 	obdst = bdst;
   1248 	zero = !(op&1);
   1249 
   1250 	for(i=0; i<dx; i++){
   1251 		ma = *bmask.alpha;
   1252 
   1253 		if(grey){
   1254 			if(ma)
   1255 				*bdst.grey = *bsrc.grey;
   1256 			else if(zero)
   1257 				*bdst.grey = 0;
   1258 			bsrc.grey += bsrc.delta;
   1259 			bdst.grey += bdst.delta;
   1260 		}else{
   1261 			if(ma){
   1262 				*bdst.red = *bsrc.red;
   1263 				*bdst.grn = *bsrc.grn;
   1264 				*bdst.blu = *bsrc.blu;
   1265 			}
   1266 			else if(zero)
   1267 				*bdst.red = *bdst.grn = *bdst.blu = 0;
   1268 			bsrc.red += bsrc.delta;
   1269 			bsrc.blu += bsrc.delta;
   1270 			bsrc.grn += bsrc.delta;
   1271 			bdst.red += bdst.delta;
   1272 			bdst.blu += bdst.delta;
   1273 			bdst.grn += bdst.delta;
   1274 		}
   1275 		bmask.alpha += bmask.delta;
   1276 		if(bdst.alpha != &ones){
   1277 			if(ma)
   1278 				*bdst.alpha = 255;
   1279 			else if(zero)
   1280 				*bdst.alpha = 0;
   1281 			bdst.alpha += bdst.delta;
   1282 		}
   1283 	}
   1284 	return obdst;
   1285 }
   1286 /*
   1287  * Replicated cached scan line read.  Call the function listed in the Param,
   1288  * but cache the result so that for replicated images we only do the work once.
   1289  */
   1290 static Buffer
   1291 replread(Param *p, uchar *s, int y)
   1292 {
   1293 	Buffer *b;
   1294 
   1295 	USED(s);
   1296 	b = &p->bcache[y];
   1297 	if((p->bfilled & (1<<y)) == 0){
   1298 		p->bfilled |= 1<<y;
   1299 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
   1300 	}
   1301 	return *b;
   1302 }
   1303 
   1304 /*
   1305  * Alpha reading function that simply relabels the grey pointer.
   1306  */
   1307 static Buffer
   1308 greymaskread(Param *p, uchar *buf, int y)
   1309 {
   1310 	Buffer b;
   1311 
   1312 	b = p->greymaskcall(p, buf, y);
   1313 	b.alpha = b.grey;
   1314 	return b;
   1315 }
   1316 
   1317 #define DBG if(0)
   1318 static Buffer
   1319 readnbit(Param *p, uchar *buf, int y)
   1320 {
   1321 	Buffer b;
   1322 	Memimage *img;
   1323 	uchar *repl, *r, *w, *ow, bits;
   1324 	int i, n, sh, depth, x, dx, npack, nbits;
   1325 
   1326 	memset(&b, 0, sizeof b);
   1327 	b.rgba = (u32int*)buf;
   1328 	b.grey = w = buf;
   1329 	b.red = b.blu = b.grn = w;
   1330 	b.alpha = &ones;
   1331 	b.delta = 1;
   1332 
   1333 	dx = p->dx;
   1334 	img = p->img;
   1335 	depth = img->depth;
   1336 	repl = &replbit[depth][0];
   1337 	npack = 8/depth;
   1338 	sh = 8-depth;
   1339 
   1340 	/* copy from p->r.min.x until end of repl rectangle */
   1341 	x = p->r.min.x;
   1342 	n = dx;
   1343 	if(n > p->img->r.max.x - x)
   1344 		n = p->img->r.max.x - x;
   1345 
   1346 	r = p->bytermin + y*p->bwidth;
   1347 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
   1348 	bits = *r++;
   1349 	nbits = 8;
   1350 	if(i=x&(npack-1)){
   1351 DBG print("throwaway %d...", i);
   1352 		bits <<= depth*i;
   1353 		nbits -= depth*i;
   1354 	}
   1355 	for(i=0; i<n; i++){
   1356 		if(nbits == 0){
   1357 DBG print("(%.2ux)...", *r);
   1358 			bits = *r++;
   1359 			nbits = 8;
   1360 		}
   1361 		*w++ = repl[bits>>sh];
   1362 DBG print("bit %x...", repl[bits>>sh]);
   1363 		bits <<= depth;
   1364 		nbits -= depth;
   1365 	}
   1366 	dx -= n;
   1367 	if(dx == 0)
   1368 		return b;
   1369 
   1370 	assert(x+i == p->img->r.max.x);
   1371 
   1372 	/* copy from beginning of repl rectangle until where we were before. */
   1373 	x = p->img->r.min.x;
   1374 	n = dx;
   1375 	if(n > p->r.min.x - x)
   1376 		n = p->r.min.x - x;
   1377 
   1378 	r = p->bytey0s + y*p->bwidth;
   1379 DBG print("x=%d r=%p...", x, r);
   1380 	bits = *r++;
   1381 	nbits = 8;
   1382 	if(i=x&(npack-1)){
   1383 		bits <<= depth*i;
   1384 		nbits -= depth*i;
   1385 	}
   1386 DBG print("nbits=%d...", nbits);
   1387 	for(i=0; i<n; i++){
   1388 		if(nbits == 0){
   1389 			bits = *r++;
   1390 			nbits = 8;
   1391 		}
   1392 		*w++ = repl[bits>>sh];
   1393 DBG print("bit %x...", repl[bits>>sh]);
   1394 		bits <<= depth;
   1395 		nbits -= depth;
   1396 DBG print("bits %x nbits %d...", bits, nbits);
   1397 	}
   1398 	dx -= n;
   1399 	if(dx == 0)
   1400 		return b;
   1401 
   1402 	assert(dx > 0);
   1403 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
   1404 	ow = buf;
   1405 	while(dx--)
   1406 		*w++ = *ow++;
   1407 
   1408 	return b;
   1409 }
   1410 #undef DBG
   1411 
   1412 #define DBG if(0)
   1413 static void
   1414 writenbit(Param *p, uchar *w, Buffer src)
   1415 {
   1416 	uchar *r;
   1417 	u32int bits;
   1418 	int i, sh, depth, npack, nbits, x, ex;
   1419 
   1420 	assert(src.grey != nil && src.delta == 1);
   1421 
   1422 	x = p->r.min.x;
   1423 	ex = x+p->dx;
   1424 	depth = p->img->depth;
   1425 	npack = 8/depth;
   1426 
   1427 	i=x&(npack-1);
   1428 	bits = i ? (*w >> (8-depth*i)) : 0;
   1429 	nbits = depth*i;
   1430 	sh = 8-depth;
   1431 	r = src.grey;
   1432 
   1433 	for(; x<ex; x++){
   1434 		bits <<= depth;
   1435 DBG print(" %x", *r);
   1436 		bits |= (*r++ >> sh);
   1437 		nbits += depth;
   1438 		if(nbits == 8){
   1439 			*w++ = bits;
   1440 			nbits = 0;
   1441 		}
   1442 	}
   1443 
   1444 	if(nbits){
   1445 		sh = 8-nbits;
   1446 		bits <<= sh;
   1447 		bits |= *w & ((1<<sh)-1);
   1448 		*w = bits;
   1449 	}
   1450 DBG print("\n");
   1451 	return;
   1452 }
   1453 #undef DBG
   1454 
   1455 static Buffer
   1456 readcmap(Param *p, uchar *buf, int y)
   1457 {
   1458 	Buffer b;
   1459 	int a, convgrey, copyalpha, dx, i, m;
   1460 	uchar *q, *cmap, *begin, *end, *r, *w;
   1461 
   1462 	memset(&b, 0, sizeof b);
   1463 	begin = p->bytey0s + y*p->bwidth;
   1464 	r = p->bytermin + y*p->bwidth;
   1465 	end = p->bytey0e + y*p->bwidth;
   1466 	cmap = p->img->cmap->cmap2rgb;
   1467 	convgrey = p->convgrey;
   1468 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
   1469 
   1470 	w = buf;
   1471 	dx = p->dx;
   1472 	if(copyalpha){
   1473 		b.alpha = buf++;
   1474 		a = p->img->shift[CAlpha]/8;
   1475 		m = p->img->shift[CMap]/8;
   1476 		for(i=0; i<dx; i++){
   1477 			*w++ = r[a];
   1478 			q = cmap+r[m]*3;
   1479 			r += 2;
   1480 			if(r == end)
   1481 				r = begin;
   1482 			if(convgrey){
   1483 				*w++ = RGB2K(q[0], q[1], q[2]);
   1484 			}else{
   1485 				*w++ = q[2];	/* blue */
   1486 				*w++ = q[1];	/* green */
   1487 				*w++ = q[0];	/* red */
   1488 			}
   1489 		}
   1490 	}else{
   1491 		b.alpha = &ones;
   1492 		for(i=0; i<dx; i++){
   1493 			q = cmap+*r++*3;
   1494 			if(r == end)
   1495 				r = begin;
   1496 			if(convgrey){
   1497 				*w++ = RGB2K(q[0], q[1], q[2]);
   1498 			}else{
   1499 				*w++ = q[2];	/* blue */
   1500 				*w++ = q[1];	/* green */
   1501 				*w++ = q[0];	/* red */
   1502 			}
   1503 		}
   1504 	}
   1505 
   1506 	b.rgba = (u32int*)(buf-copyalpha);
   1507 
   1508 	if(convgrey){
   1509 		b.grey = buf;
   1510 		b.red = b.blu = b.grn = buf;
   1511 		b.delta = 1+copyalpha;
   1512 	}else{
   1513 		b.blu = buf;
   1514 		b.grn = buf+1;
   1515 		b.red = buf+2;
   1516 		b.grey = nil;
   1517 		b.delta = 3+copyalpha;
   1518 	}
   1519 	return b;
   1520 }
   1521 
   1522 static void
   1523 writecmap(Param *p, uchar *w, Buffer src)
   1524 {
   1525 	uchar *cmap, *red, *grn, *blu;
   1526 	int i, dx, delta;
   1527 
   1528 	cmap = p->img->cmap->rgb2cmap;
   1529 
   1530 	delta = src.delta;
   1531 	red= src.red;
   1532 	grn = src.grn;
   1533 	blu = src.blu;
   1534 
   1535 	dx = p->dx;
   1536 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
   1537 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
   1538 }
   1539 
   1540 #define DBG if(drawdebug)
   1541 static Buffer
   1542 readbyte(Param *p, uchar *buf, int y)
   1543 {
   1544 	Buffer b;
   1545 	Memimage *img;
   1546 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
   1547 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
   1548 	uchar ured, ugrn, ublu;
   1549 	u32int u;
   1550 
   1551 	img = p->img;
   1552 	begin = p->bytey0s + y*p->bwidth;
   1553 	r = p->bytermin + y*p->bwidth;
   1554 	end = p->bytey0e + y*p->bwidth;
   1555 
   1556 	w = buf;
   1557 	dx = p->dx;
   1558 	nb = img->depth/8;
   1559 
   1560 	convgrey = p->convgrey;	/* convert rgb to grey */
   1561 	isgrey = img->flags&Fgrey;
   1562 	alphaonly = p->alphaonly;
   1563 	copyalpha = (img->flags&Falpha) ? 1 : 0;
   1564 
   1565 	/* if we can, avoid processing everything */
   1566 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
   1567 		memset(&b, 0, sizeof b);
   1568 		if(p->needbuf){
   1569 			memmove(buf, r, dx*nb);
   1570 			r = buf;
   1571 		}
   1572 		b.rgba = (u32int*)r;
   1573 		if(copyalpha)
   1574 			b.alpha = r+img->shift[CAlpha]/8;
   1575 		else
   1576 			b.alpha = &ones;
   1577 		if(isgrey){
   1578 			b.grey = r+img->shift[CGrey]/8;
   1579 			b.red = b.grn = b.blu = b.grey;
   1580 		}else{
   1581 			b.red = r+img->shift[CRed]/8;
   1582 			b.grn = r+img->shift[CGreen]/8;
   1583 			b.blu = r+img->shift[CBlue]/8;
   1584 		}
   1585 		b.delta = nb;
   1586 		return b;
   1587 	}
   1588 
   1589 	rrepl = replbit[img->nbits[CRed]];
   1590 	grepl = replbit[img->nbits[CGreen]];
   1591 	brepl = replbit[img->nbits[CBlue]];
   1592 	arepl = replbit[img->nbits[CAlpha]];
   1593 	krepl = replbit[img->nbits[CGrey]];
   1594 
   1595 	for(i=0; i<dx; i++){
   1596 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
   1597 		if(copyalpha)
   1598 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
   1599 
   1600 		if(isgrey)
   1601 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
   1602 		else if(!alphaonly){
   1603 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
   1604 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
   1605 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
   1606 			if(convgrey){
   1607 				*w++ = RGB2K(ured, ugrn, ublu);
   1608 			}else{
   1609 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
   1610 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
   1611 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
   1612 			}
   1613 		}
   1614 		r += nb;
   1615 		if(r == end)
   1616 			r = begin;
   1617 	}
   1618 
   1619 	b.alpha = copyalpha ? buf : &ones;
   1620 	b.rgba = (u32int*)buf;
   1621 	if(alphaonly){
   1622 		b.red = b.grn = b.blu = b.grey = nil;
   1623 		if(!copyalpha)
   1624 			b.rgba = nil;
   1625 		b.delta = 1;
   1626 	}else if(isgrey || convgrey){
   1627 		b.grey = buf+copyalpha;
   1628 		b.red = b.grn = b.blu = buf+copyalpha;
   1629 		b.delta = copyalpha+1;
   1630 	}else{
   1631 		b.blu = buf+copyalpha;
   1632 		b.grn = buf+copyalpha+1;
   1633 		b.grey = nil;
   1634 		b.red = buf+copyalpha+2;
   1635 		b.delta = copyalpha+3;
   1636 	}
   1637 	return b;
   1638 }
   1639 #undef DBG
   1640 
   1641 #define DBG if(drawdebug)
   1642 static void
   1643 writebyte(Param *p, uchar *w, Buffer src)
   1644 {
   1645 	Memimage *img;
   1646 	int i, isalpha, isgrey, nb, delta, dx, adelta;
   1647 	uchar ff, *red, *grn, *blu, *grey, *alpha;
   1648 	u32int u, mask;
   1649 
   1650 	img = p->img;
   1651 
   1652 	red = src.red;
   1653 	grn = src.grn;
   1654 	blu = src.blu;
   1655 	alpha = src.alpha;
   1656 	delta = src.delta;
   1657 	grey = src.grey;
   1658 	dx = p->dx;
   1659 
   1660 	nb = img->depth/8;
   1661 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
   1662 
   1663 	isalpha = img->flags&Falpha;
   1664 	isgrey = img->flags&Fgrey;
   1665 	adelta = src.delta;
   1666 
   1667 	if(isalpha && (alpha == nil || alpha == &ones)){
   1668 		ff = 0xFF;
   1669 		alpha = &ff;
   1670 		adelta = 0;
   1671 	}
   1672 
   1673 	for(i=0; i<dx; i++){
   1674 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
   1675 DBG print("u %.8lux...", u);
   1676 		u &= mask;
   1677 DBG print("&mask %.8lux...", u);
   1678 		if(isgrey){
   1679 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
   1680 DBG print("|grey %.8lux...", u);
   1681 			grey += delta;
   1682 		}else{
   1683 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
   1684 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
   1685 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
   1686 			red += delta;
   1687 			grn += delta;
   1688 			blu += delta;
   1689 DBG print("|rgb %.8lux...", u);
   1690 		}
   1691 
   1692 		if(isalpha){
   1693 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
   1694 			alpha += adelta;
   1695 DBG print("|alpha %.8lux...", u);
   1696 		}
   1697 
   1698 		w[0] = u;
   1699 		w[1] = u>>8;
   1700 		w[2] = u>>16;
   1701 		w[3] = u>>24;
   1702 DBG print("write back %.8lux...", u);
   1703 		w += nb;
   1704 	}
   1705 }
   1706 #undef DBG
   1707 
   1708 static Readfn*
   1709 readfn(Memimage *img)
   1710 {
   1711 	if(img->depth < 8)
   1712 		return readnbit;
   1713 	if(img->nbits[CMap] == 8)
   1714 		return readcmap;
   1715 	return readbyte;
   1716 }
   1717 
   1718 static Readfn*
   1719 readalphafn(Memimage *m)
   1720 {
   1721 	USED(m);
   1722 	return readbyte;
   1723 }
   1724 
   1725 static Writefn*
   1726 writefn(Memimage *img)
   1727 {
   1728 	if(img->depth < 8)
   1729 		return writenbit;
   1730 	if(img->chan == CMAP8)
   1731 		return writecmap;
   1732 	return writebyte;
   1733 }
   1734 
   1735 static void
   1736 nullwrite(Param *p, uchar *s, Buffer b)
   1737 {
   1738 	USED(p);
   1739 	USED(s);
   1740 }
   1741 
   1742 static Buffer
   1743 readptr(Param *p, uchar *s, int y)
   1744 {
   1745 	Buffer b;
   1746 	uchar *q;
   1747 
   1748 	USED(s);
   1749 	memset(&b, 0, sizeof b);
   1750 	q = p->bytermin + y*p->bwidth;
   1751 	b.red = q;	/* ptr to data */
   1752 	b.grn = b.blu = b.grey = b.alpha = nil;
   1753 	b.rgba = (u32int*)q;
   1754 	b.delta = p->img->depth/8;
   1755 	return b;
   1756 }
   1757 
   1758 static Buffer
   1759 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
   1760 {
   1761 	USED(i);
   1762 	USED(o);
   1763 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
   1764 	return bdst;
   1765 }
   1766 
   1767 static Buffer
   1768 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1769 {
   1770 	uchar *m, *r, *w, *ew;
   1771 
   1772 	USED(i);
   1773 	USED(o);
   1774 	m = bmask.grey;
   1775 	w = bdst.red;
   1776 	r = bsrc.red;
   1777 	ew = w+dx;
   1778 	for(; w < ew; w++,r++)
   1779 		if(*m++)
   1780 			*w = *r;
   1781 	return bdst;	/* not used */
   1782 }
   1783 
   1784 static Buffer
   1785 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1786 {
   1787 	uchar *m;
   1788 	ushort *r, *w, *ew;
   1789 
   1790 	USED(i);
   1791 	USED(o);
   1792 	m = bmask.grey;
   1793 	w = (ushort*)bdst.red;
   1794 	r = (ushort*)bsrc.red;
   1795 	ew = w+dx;
   1796 	for(; w < ew; w++,r++)
   1797 		if(*m++)
   1798 			*w = *r;
   1799 	return bdst;	/* not used */
   1800 }
   1801 
   1802 static Buffer
   1803 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1804 {
   1805 	uchar *m;
   1806 	uchar *r, *w, *ew;
   1807 
   1808 	USED(i);
   1809 	USED(o);
   1810 	m = bmask.grey;
   1811 	w = bdst.red;
   1812 	r = bsrc.red;
   1813 	ew = w+dx*3;
   1814 	while(w < ew){
   1815 		if(*m++){
   1816 			*w++ = *r++;
   1817 			*w++ = *r++;
   1818 			*w++ = *r++;
   1819 		}else{
   1820 			w += 3;
   1821 			r += 3;
   1822 		}
   1823 	}
   1824 	return bdst;	/* not used */
   1825 }
   1826 
   1827 static Buffer
   1828 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1829 {
   1830 	uchar *m;
   1831 	u32int *r, *w, *ew;
   1832 
   1833 	USED(i);
   1834 	USED(o);
   1835 	m = bmask.grey;
   1836 	w = (u32int*)bdst.red;
   1837 	r = (u32int*)bsrc.red;
   1838 	ew = w+dx;
   1839 	for(; w < ew; w++,r++)
   1840 		if(*m++)
   1841 			*w = *r;
   1842 	return bdst;	/* not used */
   1843 }
   1844 
   1845 static Buffer
   1846 genconv(Param *p, uchar *buf, int y)
   1847 {
   1848 	Buffer b;
   1849 	int nb;
   1850 	uchar *r, *w, *ew;
   1851 
   1852 	/* read from source into RGB format in convbuf */
   1853 	b = p->convreadcall(p, p->convbuf, y);
   1854 
   1855 	/* write RGB format into dst format in buf */
   1856 	p->convwritecall(p->convdpar, buf, b);
   1857 
   1858 	if(p->convdx){
   1859 		nb = p->convdpar->img->depth/8;
   1860 		r = buf;
   1861 		w = buf+nb*p->dx;
   1862 		ew = buf+nb*p->convdx;
   1863 		while(w<ew)
   1864 			*w++ = *r++;
   1865 	}
   1866 
   1867 	b.red = buf;
   1868 	b.blu = b.grn = b.grey = b.alpha = nil;
   1869 	b.rgba = (u32int*)buf;
   1870 	b.delta = 0;
   1871 
   1872 	return b;
   1873 }
   1874 
   1875 static Readfn*
   1876 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
   1877 {
   1878 	if(dst->chan == src->chan && !(src->flags&Frepl)){
   1879 /*if(drawdebug) iprint("readptr..."); */
   1880 		return readptr;
   1881 	}
   1882 
   1883 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
   1884 		/* cheat because we know the replicated value is exactly the color map entry. */
   1885 /*if(drawdebug) iprint("Readnbit..."); */
   1886 		return readnbit;
   1887 	}
   1888 
   1889 	spar->convreadcall = readfn(src);
   1890 	spar->convwritecall = writefn(dst);
   1891 	spar->convdpar = dpar;
   1892 
   1893 	/* allocate a conversion buffer */
   1894 	spar->convbufoff = ndrawbuf;
   1895 	ndrawbuf += spar->dx*4;
   1896 
   1897 	if(spar->dx > Dx(spar->img->r)){
   1898 		spar->convdx = spar->dx;
   1899 		spar->dx = Dx(spar->img->r);
   1900 	}
   1901 
   1902 /*if(drawdebug) iprint("genconv..."); */
   1903 	return genconv;
   1904 }
   1905 
   1906 /*
   1907  * Do NOT call this directly.  pixelbits is a wrapper
   1908  * around this that fetches the bits from the X server
   1909  * when necessary.
   1910  */
   1911 u32int
   1912 _pixelbits(Memimage *i, Point pt)
   1913 {
   1914 	uchar *p;
   1915 	u32int val;
   1916 	int off, bpp, npack;
   1917 
   1918 	val = 0;
   1919 	p = byteaddr(i, pt);
   1920 	switch(bpp=i->depth){
   1921 	case 1:
   1922 	case 2:
   1923 	case 4:
   1924 		npack = 8/bpp;
   1925 		off = pt.x%npack;
   1926 		val = p[0] >> bpp*(npack-1-off);
   1927 		val &= (1<<bpp)-1;
   1928 		break;
   1929 	case 8:
   1930 		val = p[0];
   1931 		break;
   1932 	case 16:
   1933 		val = p[0]|(p[1]<<8);
   1934 		break;
   1935 	case 24:
   1936 		val = p[0]|(p[1]<<8)|(p[2]<<16);
   1937 		break;
   1938 	case 32:
   1939 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
   1940 		break;
   1941 	}
   1942 	while(bpp<32){
   1943 		val |= val<<bpp;
   1944 		bpp *= 2;
   1945 	}
   1946 	return val;
   1947 }
   1948 
   1949 static Calcfn*
   1950 boolcopyfn(Memimage *img, Memimage *mask)
   1951 {
   1952 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
   1953 		return boolmemmove;
   1954 
   1955 	switch(img->depth){
   1956 	case 8:
   1957 		return boolcopy8;
   1958 	case 16:
   1959 		return boolcopy16;
   1960 	case 24:
   1961 		return boolcopy24;
   1962 	case 32:
   1963 		return boolcopy32;
   1964 	default:
   1965 		assert(0 /* boolcopyfn */);
   1966 	}
   1967 	return 0;
   1968 }
   1969 
   1970 /*
   1971  * Optimized draw for filling and scrolling; uses memset and memmove.
   1972  */
   1973 static void
   1974 memsets(void *vp, ushort val, int n)
   1975 {
   1976 	ushort *p, *ep;
   1977 
   1978 	p = vp;
   1979 	ep = p+n;
   1980 	while(p<ep)
   1981 		*p++ = val;
   1982 }
   1983 
   1984 static void
   1985 memsetl(void *vp, u32int val, int n)
   1986 {
   1987 	u32int *p, *ep;
   1988 
   1989 	p = vp;
   1990 	ep = p+n;
   1991 	while(p<ep)
   1992 		*p++ = val;
   1993 }
   1994 
   1995 static void
   1996 memset24(void *vp, u32int val, int n)
   1997 {
   1998 	uchar *p, *ep;
   1999 	uchar a,b,c;
   2000 
   2001 	p = vp;
   2002 	ep = p+3*n;
   2003 	a = val;
   2004 	b = val>>8;
   2005 	c = val>>16;
   2006 	while(p<ep){
   2007 		*p++ = a;
   2008 		*p++ = b;
   2009 		*p++ = c;
   2010 	}
   2011 }
   2012 
   2013 u32int
   2014 _imgtorgba(Memimage *img, u32int val)
   2015 {
   2016 	uchar r, g, b, a;
   2017 	int nb, ov, v;
   2018 	u32int chan;
   2019 	uchar *p;
   2020 
   2021 	a = 0xFF;
   2022 	r = g = b = 0xAA;	/* garbage */
   2023 	for(chan=img->chan; chan; chan>>=8){
   2024 		nb = NBITS(chan);
   2025 		ov = v = val&((1<<nb)-1);
   2026 		val >>= nb;
   2027 
   2028 		while(nb < 8){
   2029 			v |= v<<nb;
   2030 			nb *= 2;
   2031 		}
   2032 		v >>= (nb-8);
   2033 
   2034 		switch(TYPE(chan)){
   2035 		case CRed:
   2036 			r = v;
   2037 			break;
   2038 		case CGreen:
   2039 			g = v;
   2040 			break;
   2041 		case CBlue:
   2042 			b = v;
   2043 			break;
   2044 		case CAlpha:
   2045 			a = v;
   2046 			break;
   2047 		case CGrey:
   2048 			r = g = b = v;
   2049 			break;
   2050 		case CMap:
   2051 			p = img->cmap->cmap2rgb+3*ov;
   2052 			r = *p++;
   2053 			g = *p++;
   2054 			b = *p;
   2055 			break;
   2056 		}
   2057 	}
   2058 	return (r<<24)|(g<<16)|(b<<8)|a;
   2059 }
   2060 
   2061 u32int
   2062 _rgbatoimg(Memimage *img, u32int rgba)
   2063 {
   2064 	u32int chan;
   2065 	int d, nb;
   2066 	u32int v;
   2067 	uchar *p, r, g, b, a, m;
   2068 
   2069 	v = 0;
   2070 	r = rgba>>24;
   2071 	g = rgba>>16;
   2072 	b = rgba>>8;
   2073 	a = rgba;
   2074 	d = 0;
   2075 	for(chan=img->chan; chan; chan>>=8){
   2076 		nb = NBITS(chan);
   2077 		switch(TYPE(chan)){
   2078 		case CRed:
   2079 			v |= (r>>(8-nb))<<d;
   2080 			break;
   2081 		case CGreen:
   2082 			v |= (g>>(8-nb))<<d;
   2083 			break;
   2084 		case CBlue:
   2085 			v |= (b>>(8-nb))<<d;
   2086 			break;
   2087 		case CAlpha:
   2088 			v |= (a>>(8-nb))<<d;
   2089 			break;
   2090 		case CMap:
   2091 			p = img->cmap->rgb2cmap;
   2092 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
   2093 			v |= (m>>(8-nb))<<d;
   2094 			break;
   2095 		case CGrey:
   2096 			m = RGB2K(r,g,b);
   2097 			v |= (m>>(8-nb))<<d;
   2098 			break;
   2099 		}
   2100 		d += nb;
   2101 	}
   2102 /*	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v); */
   2103 	return v;
   2104 }
   2105 
   2106 #define DBG if(0)
   2107 static int
   2108 memoptdraw(Memdrawparam *par)
   2109 {
   2110 	int m, y, dy, dx, op;
   2111 	u32int v;
   2112 	u16int u16;
   2113 	Memimage *src;
   2114 	Memimage *dst;
   2115 
   2116 	dx = Dx(par->r);
   2117 	dy = Dy(par->r);
   2118 	src = par->src;
   2119 	dst = par->dst;
   2120 	op = par->op;
   2121 
   2122 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
   2123 	/*
   2124 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
   2125 	 * destination format and just replicate with memset.
   2126 	 */
   2127 	m = Simplesrc|Simplemask|Fullmask;
   2128 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
   2129 		uchar *dp, p[4];
   2130 		int d, dwid, ppb, np, nb;
   2131 		uchar lm, rm;
   2132 
   2133 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
   2134 		dwid = dst->width*sizeof(u32int);
   2135 		dp = byteaddr(dst, par->r.min);
   2136 		v = par->sdval;
   2137 DBG print("sdval %lud, depth %d\n", v, dst->depth);
   2138 		switch(dst->depth){
   2139 		case 1:
   2140 		case 2:
   2141 		case 4:
   2142 			for(d=dst->depth; d<8; d*=2)
   2143 				v |= (v<<d);
   2144 			ppb = 8/dst->depth;	/* pixels per byte */
   2145 			m = ppb-1;
   2146 			/* left edge */
   2147 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
   2148 			dx -= (ppb-np);
   2149 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
   2150 			lm = (1<<nb)-1;
   2151 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
   2152 
   2153 			/* right edge */
   2154 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
   2155 			dx -= np;
   2156 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
   2157 			rm = ~((1<<nb)-1);
   2158 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
   2159 
   2160 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
   2161 			/* lm, rm are masks that are 1 where we should touch the bits */
   2162 			if(dx < 0){	/* just one byte */
   2163 				lm &= rm;
   2164 				for(y=0; y<dy; y++, dp+=dwid)
   2165 					*dp ^= (v ^ *dp) & lm;
   2166 			}else if(dx == 0){	/* no full bytes */
   2167 				if(lm)
   2168 					dwid--;
   2169 
   2170 				for(y=0; y<dy; y++, dp+=dwid){
   2171 					if(lm){
   2172 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
   2173 						*dp ^= (v ^ *dp) & lm;
   2174 						dp++;
   2175 					}
   2176 					*dp ^= (v ^ *dp) & rm;
   2177 				}
   2178 			}else{		/* full bytes in middle */
   2179 				dx /= ppb;
   2180 				if(lm)
   2181 					dwid--;
   2182 				dwid -= dx;
   2183 
   2184 				for(y=0; y<dy; y++, dp+=dwid){
   2185 					if(lm){
   2186 						*dp ^= (v ^ *dp) & lm;
   2187 						dp++;
   2188 					}
   2189 					memset(dp, v, dx);
   2190 					dp += dx;
   2191 					*dp ^= (v ^ *dp) & rm;
   2192 				}
   2193 			}
   2194 			return 1;
   2195 		case 8:
   2196 			for(y=0; y<dy; y++, dp+=dwid)
   2197 				memset(dp, v, dx);
   2198 			return 1;
   2199 		case 16:
   2200 			p[0] = v;		/* make little endian */
   2201 			p[1] = v>>8;
   2202 			memmove(&u16, p, 2);
   2203 			v = u16;
   2204 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
   2205 	dp, dx, dy, dwid);
   2206 			for(y=0; y<dy; y++, dp+=dwid)
   2207 				memsets(dp, v, dx);
   2208 			return 1;
   2209 		case 24:
   2210 			for(y=0; y<dy; y++, dp+=dwid)
   2211 				memset24(dp, v, dx);
   2212 			return 1;
   2213 		case 32:
   2214 			p[0] = v;		/* make little endian */
   2215 			p[1] = v>>8;
   2216 			p[2] = v>>16;
   2217 			p[3] = v>>24;
   2218 			memmove(&v, p, 4);
   2219 			for(y=0; y<dy; y++, dp+=dwid)
   2220 				memsetl(dp, v, dx);
   2221 			return 1;
   2222 		default:
   2223 			assert(0 /* bad dest depth in memoptdraw */);
   2224 		}
   2225 	}
   2226 
   2227 	/*
   2228 	 * If no source alpha, an opaque mask, we can just copy the
   2229 	 * source onto the destination.  If the channels are the same and
   2230 	 * the source is not replicated, memmove suffices.
   2231 	 */
   2232 	m = Simplemask|Fullmask;
   2233 	if((par->state&(m|Replsrc))==m && src->depth >= 8
   2234 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
   2235 		uchar *sp, *dp;
   2236 		long swid, dwid, nb;
   2237 		int dir;
   2238 
   2239 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
   2240 			dir = -1;
   2241 		else
   2242 			dir = 1;
   2243 
   2244 		swid = src->width*sizeof(u32int);
   2245 		dwid = dst->width*sizeof(u32int);
   2246 		sp = byteaddr(src, par->sr.min);
   2247 		dp = byteaddr(dst, par->r.min);
   2248 		if(dir == -1){
   2249 			sp += (dy-1)*swid;
   2250 			dp += (dy-1)*dwid;
   2251 			swid = -swid;
   2252 			dwid = -dwid;
   2253 		}
   2254 		nb = (dx*src->depth)/8;
   2255 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
   2256 			memmove(dp, sp, nb);
   2257 		return 1;
   2258 	}
   2259 
   2260 	/*
   2261 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
   2262 	 * they're all bit aligned, we can just use bit operators.  This happens
   2263 	 * when we're manipulating boolean masks, e.g. in the arc code.
   2264 	 */
   2265 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
   2266 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
   2267 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
   2268 		uchar *sp, *dp, *mp;
   2269 		uchar lm, rm;
   2270 		long swid, dwid, mwid;
   2271 		int i, x, dir;
   2272 
   2273 		sp = byteaddr(src, par->sr.min);
   2274 		dp = byteaddr(dst, par->r.min);
   2275 		mp = byteaddr(par->mask, par->mr.min);
   2276 		swid = src->width*sizeof(u32int);
   2277 		dwid = dst->width*sizeof(u32int);
   2278 		mwid = par->mask->width*sizeof(u32int);
   2279 
   2280 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
   2281 			dir = -1;
   2282 		}else
   2283 			dir = 1;
   2284 
   2285 		lm = 0xFF>>(par->r.min.x&7);
   2286 		rm = 0xFF<<(8-(par->r.max.x&7));
   2287 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
   2288 
   2289 		if(dx < 0){	/* one byte wide */
   2290 			lm &= rm;
   2291 			if(dir == -1){
   2292 				dp += dwid*(dy-1);
   2293 				sp += swid*(dy-1);
   2294 				mp += mwid*(dy-1);
   2295 				dwid = -dwid;
   2296 				swid = -swid;
   2297 				mwid = -mwid;
   2298 			}
   2299 			for(y=0; y<dy; y++){
   2300 				*dp ^= (*dp ^ *sp) & *mp & lm;
   2301 				dp += dwid;
   2302 				sp += swid;
   2303 				mp += mwid;
   2304 			}
   2305 			return 1;
   2306 		}
   2307 
   2308 		dx /= 8;
   2309 		if(dir == 1){
   2310 			i = (lm!=0)+dx+(rm!=0);
   2311 			mwid -= i;
   2312 			swid -= i;
   2313 			dwid -= i;
   2314 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
   2315 				if(lm){
   2316 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
   2317 					dp++;
   2318 				}
   2319 				for(x=0; x<dx; x++){
   2320 					*dp ^= (*dp ^ *sp++) & *mp++;
   2321 					dp++;
   2322 				}
   2323 				if(rm){
   2324 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
   2325 					dp++;
   2326 				}
   2327 			}
   2328 			return 1;
   2329 		}else{
   2330 		/* dir == -1 */
   2331 			i = (lm!=0)+dx+(rm!=0);
   2332 			dp += dwid*(dy-1)+i-1;
   2333 			sp += swid*(dy-1)+i-1;
   2334 			mp += mwid*(dy-1)+i-1;
   2335 			dwid = -dwid+i;
   2336 			swid = -swid+i;
   2337 			mwid = -mwid+i;
   2338 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
   2339 				if(rm){
   2340 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
   2341 					dp--;
   2342 				}
   2343 				for(x=0; x<dx; x++){
   2344 					*dp ^= (*dp ^ *sp--) & *mp--;
   2345 					dp--;
   2346 				}
   2347 				if(lm){
   2348 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
   2349 					dp--;
   2350 				}
   2351 			}
   2352 		}
   2353 		return 1;
   2354 	}
   2355 	return 0;
   2356 }
   2357 #undef DBG
   2358 
   2359 /*
   2360  * Boolean character drawing.
   2361  * Solid opaque color through a 1-bit greyscale mask.
   2362  */
   2363 #define DBG if(0)
   2364 static int
   2365 chardraw(Memdrawparam *par)
   2366 {
   2367 	u32int bits;
   2368 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
   2369 	u32int v, maskwid, dstwid;
   2370 	uchar *wp, *rp, *q, *wc;
   2371 	ushort *ws;
   2372 	u32int *wl;
   2373 	uchar sp[4];
   2374 	Rectangle r, mr;
   2375 	Memimage *mask, *src, *dst;
   2376 	union {
   2377 		// black box to hide pointer conversions from gcc.
   2378 		// we'll see how long this works.
   2379 		uchar *u8;
   2380 		u16int *u16;
   2381 		u32int *u32;
   2382 	} gcc_black_box;
   2383 
   2384 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
   2385 		par->mask->flags, par->mask->depth, par->src->flags,
   2386 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
   2387 
   2388 	mask = par->mask;
   2389 	src = par->src;
   2390 	dst = par->dst;
   2391 	r = par->r;
   2392 	mr = par->mr;
   2393 	op = par->op;
   2394 
   2395 	if((par->state&(Replsrc|Simplesrc|Fullsrc|Replmask)) != (Replsrc|Simplesrc|Fullsrc)
   2396 	|| mask->depth != 1 || dst->depth<8 || dst->data==src->data
   2397 	|| op != SoverD)
   2398 		return 0;
   2399 
   2400 /*if(drawdebug) iprint("chardraw..."); */
   2401 
   2402 	depth = mask->depth;
   2403 	maskwid = mask->width*sizeof(u32int);
   2404 	rp = byteaddr(mask, mr.min);
   2405 	npack = 8/depth;
   2406 	bsh = (mr.min.x % npack) * depth;
   2407 
   2408 	wp = byteaddr(dst, r.min);
   2409 	dstwid = dst->width*sizeof(u32int);
   2410 DBG print("bsh %d\n", bsh);
   2411 	dy = Dy(r);
   2412 	dx = Dx(r);
   2413 
   2414 	ddepth = dst->depth;
   2415 
   2416 	/*
   2417 	 * for loop counts from bsh to bsh+dx
   2418 	 *
   2419 	 * we want the bottom bits to be the amount
   2420 	 * to shift the pixels down, so for n≡0 (mod 8) we want
   2421 	 * bottom bits 7.  for n≡1, 6, etc.
   2422 	 * the bits come from -n-1.
   2423 	 */
   2424 
   2425 	bx = -bsh-1;
   2426 	ex = -bsh-1-dx;
   2427 	SET(bits);
   2428 	v = par->sdval;
   2429 
   2430 	/* make little endian */
   2431 	sp[0] = v;
   2432 	sp[1] = v>>8;
   2433 	sp[2] = v>>16;
   2434 	sp[3] = v>>24;
   2435 
   2436 /*print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]); */
   2437 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
   2438 		q = rp;
   2439 		if(bsh)
   2440 			bits = *q++;
   2441 		switch(ddepth){
   2442 		case 8:
   2443 /*if(drawdebug) iprint("8loop..."); */
   2444 			wc = wp;
   2445 			for(x=bx; x>ex; x--, wc++){
   2446 				i = x&7;
   2447 				if(i == 8-1)
   2448 					bits = *q++;
   2449 DBG print("bits %lux sh %d...", bits, i);
   2450 				if((bits>>i)&1)
   2451 					*wc = v;
   2452 			}
   2453 			break;
   2454 		case 16:
   2455 			gcc_black_box.u8 = wp;
   2456 			ws = gcc_black_box.u16;
   2457 			gcc_black_box.u8 = sp;
   2458 			v = *gcc_black_box.u16;
   2459 			for(x=bx; x>ex; x--, ws++){
   2460 				i = x&7;
   2461 				if(i == 8-1)
   2462 					bits = *q++;
   2463 DBG print("bits %lux sh %d...", bits, i);
   2464 				if((bits>>i)&1)
   2465 					*ws = v;
   2466 			}
   2467 			break;
   2468 		case 24:
   2469 			wc = wp;
   2470 			for(x=bx; x>ex; x--, wc+=3){
   2471 				i = x&7;
   2472 				if(i == 8-1)
   2473 					bits = *q++;
   2474 DBG print("bits %lux sh %d...", bits, i);
   2475 				if((bits>>i)&1){
   2476 					wc[0] = sp[0];
   2477 					wc[1] = sp[1];
   2478 					wc[2] = sp[2];
   2479 				}
   2480 			}
   2481 			break;
   2482 		case 32:
   2483 			gcc_black_box.u8 = wp;
   2484 			wl = gcc_black_box.u32;
   2485 			gcc_black_box.u8 = sp;
   2486 			v = *gcc_black_box.u32;
   2487 			for(x=bx; x>ex; x--, wl++){
   2488 				i = x&7;
   2489 				if(i == 8-1)
   2490 					bits = *q++;
   2491 DBG iprint("bits %lux sh %d...", bits, i);
   2492 				if((bits>>i)&1)
   2493 					*wl = v;
   2494 			}
   2495 			break;
   2496 		}
   2497 	}
   2498 
   2499 DBG print("\n");
   2500 	return 1;
   2501 }
   2502 #undef DBG
   2503 
   2504 
   2505 /*
   2506  * Fill entire byte with replicated (if necessary) copy of source pixel,
   2507  * assuming destination ldepth is >= source ldepth.
   2508  *
   2509  * This code is just plain wrong for >8bpp.
   2510  *
   2511 u32int
   2512 membyteval(Memimage *src)
   2513 {
   2514 	int i, val, bpp;
   2515 	uchar uc;
   2516 
   2517 	unloadmemimage(src, src->r, &uc, 1);
   2518 	bpp = src->depth;
   2519 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
   2520 	uc &= ~(0xFF>>bpp);
   2521 	* pixel value is now in high part of byte. repeat throughout byte
   2522 	val = uc;
   2523 	for(i=bpp; i<8; i<<=1)
   2524 		val |= val>>i;
   2525 	return val;
   2526 }
   2527  *
   2528  */
   2529 
   2530 void
   2531 _memfillcolor(Memimage *i, u32int val)
   2532 {
   2533 	u32int bits;
   2534 	int d, y;
   2535 	uchar p[4];
   2536 
   2537 	if(val == DNofill)
   2538 		return;
   2539 
   2540 	bits = _rgbatoimg(i, val);
   2541 	switch(i->depth){
   2542 	case 24:	/* 24-bit images suck */
   2543 		for(y=i->r.min.y; y<i->r.max.y; y++)
   2544 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
   2545 		break;
   2546 	default:	/* 1, 2, 4, 8, 16, 32 */
   2547 		for(d=i->depth; d<32; d*=2)
   2548 			bits = (bits << d) | bits;
   2549 		p[0] = bits;		/* make little endian */
   2550 		p[1] = bits>>8;
   2551 		p[2] = bits>>16;
   2552 		p[3] = bits>>24;
   2553 		memmove(&bits, p, 4);
   2554 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
   2555 		break;
   2556 	}
   2557 }