aboutsummaryrefslogtreecommitdiff
blob: 92a38177648f8793d52a5b3dfbcca0ddcc8cc34c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1996, 1997, 1998
 *	Sleepycat Software.  All rights reserved.
 *
 *	@(#)db_int.h.src	10.62 (Sleepycat) 5/23/98
 */

#ifndef _DB_INTERNAL_H_
#define	_DB_INTERNAL_H_

#include <db.h>				/* Standard DB include file. */
#include "queue.h"

/*******************************************************
 * General purpose constants and macros.
 *******************************************************/
#define	UINT16_T_MAX	    0xffff	/* Maximum 16 bit unsigned. */
#define	UINT32_T_MAX	0xffffffff	/* Maximum 32 bit unsigned. */

#define	DB_MIN_PGSIZE	0x000200	/* Minimum page size. */
#define	DB_MAX_PGSIZE	0x010000	/* Maximum page size. */

#define	DB_MINCACHE	10		/* Minimum cached pages */

#define	MEGABYTE	1048576

/*
 * If we are unable to determine the underlying filesystem block size, use
 * 8K on the grounds that most OS's use less than 8K as their VM page size.
 */
#define	DB_DEF_IOSIZE	(8 * 1024)

/*
 * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
 * separate macro, as we've had to cast the pointer to different integral
 * types on different architectures.
 *
 * We cast pointers into unsigned longs when manipulating them because C89
 * guarantees that u_long is the largest available integral type and further,
 * to never generate overflows.  However, neither C89 or C9X  requires that
 * any integer type be large enough to hold a pointer, although C9X created
 * the intptr_t type, which is guaranteed to hold a pointer but may or may
 * not exist.  At some point in the future, we should test for intptr_t and
 * use it where available.
 */
#undef	ALIGNTYPE
#define	ALIGNTYPE		u_long
#undef	ALIGNP
#define	ALIGNP(value, bound)	ALIGN((ALIGNTYPE)value, bound)
#undef	ALIGN
#define	ALIGN(value, bound)	(((value) + (bound) - 1) & ~((bound) - 1))

/*
 * There are several on-page structures that are declared to have a number of
 * fields followed by a variable length array of items.  The structure size
 * without including the variable length array or the address of the first of
 * those elements can be found using SSZ.
 *
 * This macro can also be used to find the offset of a structure element in a
 * structure.  This is used in various places to copy structure elements from
 * unaligned memory references, e.g., pointers into a packed page.
 *
 * There are two versions because compilers object if you take the address of
 * an array.
 */
#undef	SSZ
#define SSZ(name, field)	((int)&(((name *)0)->field))

#undef	SSZA
#define SSZA(name, field)	((int)&(((name *)0)->field[0]))

/* Macros to return per-process address, offsets based on shared regions. */
#define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
#define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)

/* Free and free-string macros that overwrite memory. */
#ifdef DIAGNOSTIC
#undef	FREE
#define	FREE(p, len) {							\
	memset(p, 0xff, len);						\
	__db_free(p);							\
}
#undef	FREES
#define	FREES(p) {							\
	FREE(p, strlen(p));						\
}
#else
#undef	FREE
#define	FREE(p, len) {							\
	__db_free(p);							\
}
#undef	FREES
#define	FREES(p) {							\
	__db_free(p);							\
}
#endif

/* Structure used to print flag values. */
typedef struct __fn {
	u_int32_t mask;			/* Flag value. */
	const char *name;		/* Flag name. */
} FN;

/* Set, clear and test flags. */
#define	F_SET(p, f)	(p)->flags |= (f)
#define	F_CLR(p, f)	(p)->flags &= ~(f)
#define	F_ISSET(p, f)	((p)->flags & (f))
#define	LF_SET(f)	(flags |= (f))
#define	LF_CLR(f)	(flags &= ~(f))
#define	LF_ISSET(f)	(flags & (f))

/* Display separator string. */
#undef	DB_LINE
#define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="

/* Global variables. */
typedef struct __db_globals {
	int db_mutexlocks;		/* DB_MUTEXLOCKS */
	int db_region_anon;		/* DB_REGION_ANON, DB_REGION_NAME */
	int db_region_init;		/* DB_REGION_INIT */
	int db_tsl_spins;		/* DB_TSL_SPINS */
	int db_pageyield;		/* DB_PAGEYIELD */
} DB_GLOBALS;
extern	DB_GLOBALS	__db_global_values;
#define	DB_GLOBAL(v)	__db_global_values.v

/* Unused, or not-used-yet variable.  "Shut that bloody compiler up!" */
#define	COMPQUIET(n, v)	(n) = (v)

/*
 * Win16 needs specific syntax on callback functions.  Nobody else cares.
 */
#ifndef	DB_CALLBACK
#define	DB_CALLBACK	/* Nothing. */
#endif

/*******************************************************
 * Files.
 *******************************************************/
 /*
  * We use 1024 as the maximum path length.  It's too hard to figure out what
  * the real path length is, as it was traditionally stored in <sys/param.h>,
  * and that file isn't always available.
  */
#undef	MAXPATHLEN
#define	MAXPATHLEN	1024

#define	PATH_DOT	"."	/* Current working directory. */
#define	PATH_SEPARATOR	"/"	/* Path separator character. */

/*******************************************************
 * Mutex support.
 *******************************************************/
typedef unsigned char tsl_t;



/*
 * !!!
 * Various systems require different alignments for mutexes (the worst we've
 * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
 * be first in the db_mutex_t structure, which must itself be first in the
 * region.  This ensures the alignment is as returned by mmap(2), which should
 * be sufficient.  All other mutex users must ensure proper alignment locally.
 */
#define	MUTEX_ALIGNMENT	1

/*
 * The offset of a mutex in memory.
 *
 * !!!
 * Not an off_t, so backing file offsets MUST be less than 4Gb.  See the
 * off field of the db_mutex_t as well.
 */
#define	MUTEX_LOCK_OFFSET(a, b)	((u_int32_t)((u_int8_t *)b - (u_int8_t *)a))

typedef struct _db_mutex_t {
#ifdef HAVE_SPINLOCKS
	tsl_t	  tsl_resource;		/* Resource test and set. */
#ifdef DIAGNOSTIC
	u_int32_t pid;			/* Lock holder: 0 or process pid. */
#endif
#else
	u_int32_t off;			/* Backing file offset. */
	u_int32_t pid;			/* Lock holder: 0 or process pid. */
#endif
	u_int32_t spins;		/* Spins before block. */
	u_int32_t mutex_set_wait;	/* Granted after wait. */
	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
} db_mutex_t;

#include "mutex_ext.h"

/*******************************************************
 * Access methods.
 *******************************************************/
/* Lock/unlock a DB thread. */
#define	DB_THREAD_LOCK(dbp)						\
	if (F_ISSET(dbp, DB_AM_THREAD))					\
	    (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
#define	DB_THREAD_UNLOCK(dbp)						\
	if (F_ISSET(dbp, DB_AM_THREAD))					\
	    (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);

/* Btree/recno local statistics structure. */
struct __db_bt_lstat;	typedef struct __db_bt_lstat DB_BTREE_LSTAT;
struct __db_bt_lstat {
	u_int32_t bt_freed;		/* Pages freed for reuse. */
	u_int32_t bt_pfxsaved;		/* Bytes saved by prefix compression. */
	u_int32_t bt_split;		/* Total number of splits. */
	u_int32_t bt_rootsplit;		/* Root page splits. */
	u_int32_t bt_fastsplit;		/* Fast splits. */
	u_int32_t bt_added;		/* Items added. */
	u_int32_t bt_deleted;		/* Items deleted. */
	u_int32_t bt_get;		/* Items retrieved. */
	u_int32_t bt_cache_hit;		/* Hits in fast-insert code. */
	u_int32_t bt_cache_miss;	/* Misses in fast-insert code. */
};

/*******************************************************
 * Environment.
 *******************************************************/
/* Type passed to __db_appname(). */
typedef enum {
	DB_APP_NONE=0,			/* No type (region). */
	DB_APP_DATA,			/* Data file. */
	DB_APP_LOG,			/* Log file. */
	DB_APP_TMP			/* Temporary file. */
} APPNAME;

/*******************************************************
 * Shared memory regions.
 *******************************************************/
/*
 * The shared memory regions share an initial structure so that the general
 * region code can handle races between the region being deleted and other
 * processes waiting on the region mutex.
 *
 * !!!
 * Note, the mutex must be the first entry in the region; see comment above.
 */
typedef struct _rlayout {
	db_mutex_t lock;		/* Region mutex. */
#define	DB_REGIONMAGIC	0x120897
	u_int32_t  valid;		/* Valid magic number. */
	u_int32_t  refcnt;		/* Region reference count. */
	size_t	   size;		/* Region length. */
	int	   majver;		/* Major version number. */
	int	   minver;		/* Minor version number. */
	int	   patch;		/* Patch version number. */
#define	INVALID_SEGID	-1
	int	   segid;		/* shmget(2) ID, or Win16 segment ID. */

#define	REGION_ANONYMOUS	0x01	/* Region is/should be in anon mem. */
	u_int32_t  flags;
} RLAYOUT;

/*
 * DB creates all regions on 4K boundaries out of sheer paranoia, so that
 * we don't make the underlying VM unhappy.
 */
#define	DB_VMPAGESIZE	(4 * 1024)
#define	DB_ROUNDOFF(i) {						\
	(i) += DB_VMPAGESIZE - 1;					\
	(i) -= (i) % DB_VMPAGESIZE;					\
}

/*
 * The interface to region attach is nasty, there is a lot of complex stuff
 * going on, which has to be retained between create/attach and detach.  The
 * REGINFO structure keeps track of it.
 */
struct __db_reginfo;	typedef struct __db_reginfo REGINFO;
struct __db_reginfo {
					/* Arguments. */
	DB_ENV	   *dbenv;		/* Region naming info. */
	APPNAME	    appname;		/* Region naming info. */
	char	   *path;		/* Region naming info. */
	const char *file;		/* Region naming info. */
	int	    mode;		/* Region mode, if a file. */
	size_t	    size;		/* Region size. */
	u_int32_t   dbflags;		/* Region file open flags, if a file. */

					/* Results. */
	char	   *name;		/* Region name. */
	void	   *addr;		/* Region address. */
	int	    fd;			/* Fcntl(2) locking file descriptor.
					   NB: this is only valid if a regular
					   file is backing the shared region,
					   and mmap(2) is being used to map it
					   into our address space. */
	int	    segid;		/* shmget(2) ID, or Win16 segment ID. */

					/* Shared flags. */
/*				0x0001	COMMON MASK with RLAYOUT structure. */
#define	REGION_CANGROW		0x0002	/* Can grow. */
#define	REGION_CREATED		0x0004	/* Created. */
#define	REGION_HOLDINGSYS	0x0008	/* Holding system resources. */
#define	REGION_LASTDETACH	0x0010	/* Delete on last detach. */
#define	REGION_MALLOC		0x0020	/* Created in malloc'd memory. */
#define	REGION_PRIVATE		0x0040	/* Private to thread/process. */
#define	REGION_REMOVED		0x0080	/* Already deleted. */
#define	REGION_SIZEDEF		0x0100	/* Use default region size if exists. */
	u_int32_t   flags;
};

/*******************************************************
 * Mpool.
 *******************************************************/
/*
 * File types for DB access methods.  Negative numbers are reserved to DB.
 */
#define	DB_FTYPE_BTREE		-1	/* Btree. */
#define	DB_FTYPE_HASH		-2	/* Hash. */

/* Structure used as the DB pgin/pgout pgcookie. */
typedef struct __dbpginfo {
	size_t	db_pagesize;		/* Underlying page size. */
	int	needswap;		/* If swapping required. */
} DB_PGINFO;

/*******************************************************
 * Log.
 *******************************************************/
/* Initialize an LSN to 'zero'. */
#define	ZERO_LSN(LSN) {							\
	(LSN).file = 0;							\
	(LSN).offset = 0;						\
}

/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
#define	IS_ZERO_LSN(LSN)	((LSN).file == 0)

/* Test if we need to log a change. */
#define	DB_LOGGING(dbp)							\
	(F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))

#ifdef DIAGNOSTIC
/*
 * Debugging macro to log operations.
 *	If DEBUG_WOP is defined, log operations that modify the database.
 *	If DEBUG_ROP is defined, log operations that read the database.
 *
 * D dbp
 * T txn
 * O operation (string)
 * K key
 * A data
 * F flags
 */
#define	LOG_OP(D, T, O, K, A, F) {					\
	DB_LSN _lsn;							\
	DBT _op;							\
	if (DB_LOGGING((D))) {						\
		memset(&_op, 0, sizeof(_op));				\
		_op.data = O;						\
		_op.size = strlen(O) + 1;				\
		(void)__db_debug_log((D)->dbenv->lg_info,		\
		    T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F);	\
	}								\
}
#ifdef DEBUG_ROP
#define	DEBUG_LREAD(D, T, O, K, A, F)	LOG_OP(D, T, O, K, A, F)
#else
#define	DEBUG_LREAD(D, T, O, K, A, F)
#endif
#ifdef DEBUG_WOP
#define	DEBUG_LWRITE(D, T, O, K, A, F)	LOG_OP(D, T, O, K, A, F)
#else
#define	DEBUG_LWRITE(D, T, O, K, A, F)
#endif
#else
#define	DEBUG_LREAD(D, T, O, K, A, F)
#define	DEBUG_LWRITE(D, T, O, K, A, F)
#endif /* DIAGNOSTIC */

/*******************************************************
 * Transactions and recovery.
 *******************************************************/
/*
 * Out of band value for a lock.  The locks are returned to callers as offsets
 * into the lock regions.  Since the RLAYOUT structure begins all regions, an
 * offset of 0 is guaranteed not to be a valid lock.
 */
#define	LOCK_INVALID	0

/* The structure allocated for every transaction. */
struct __db_txn {
	DB_TXNMGR	*mgrp;		/* Pointer to transaction manager. */
	DB_TXN		*parent;	/* Pointer to transaction's parent. */
	DB_LSN		last_lsn;	/* Lsn of last log write. */
	u_int32_t	txnid;		/* Unique transaction id. */
	size_t		off;		/* Detail structure within region. */
	TAILQ_ENTRY(__db_txn) links;
};

#include "os_func.h"
#include "os_ext.h"

#endif /* !_DB_INTERNAL_H_ */