-
Notifications
You must be signed in to change notification settings - Fork 2
/
os0file.cc
5950 lines (4738 loc) · 147 KB
/
os0file.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/***********************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
gratefully acknowledged and are described briefly in the InnoDB
documentation. The contributions by Percona Inc. are incorporated with
their permission, and subject to the conditions contained in the file
COPYING.Percona.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
***********************************************************************/
/**************************************************//**
@file os/os0file.cc
The interface to the operating system file i/o primitives
Created 10/21/1995 Heikki Tuuri
*******************************************************/
#include "os0file.h"
#ifdef UNIV_NONINL
#include "os0file.ic"
#endif
#include "ut0mem.h"
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
#include "buf0buf.h"
#include "srv0mon.h"
#ifndef UNIV_HOTBACKUP
# include "os0sync.h"
# include "os0thread.h"
#else /* !UNIV_HOTBACKUP */
# ifdef __WIN__
/* Add includes for the _stat() call to compile on Windows */
# include <sys/types.h>
# include <sys/stat.h>
# include <errno.h>
# endif /* __WIN__ */
#endif /* !UNIV_HOTBACKUP */
#if defined(LINUX_NATIVE_AIO)
#include <libaio.h>
#endif
/** Insert buffer segment id */
static const ulint IO_IBUF_SEGMENT = 0;
/** Log segment id */
static const ulint IO_LOG_SEGMENT = 1;
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
#ifndef __WIN__
/** Umask for creating files */
UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
#else
/** Umask for creating files */
UNIV_INTERN ulint os_innodb_umask = 0;
#endif /* __WIN__ */
#ifndef UNIV_HOTBACKUP
/* We use these mutexes to protect lseek + file i/o operation, if the
OS does not provide an atomic pread or pwrite, or similar */
#define OS_FILE_N_SEEK_MUTEXES 16
UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 64
/**********************************************************************
InnoDB AIO Implementation:
=========================
We support native AIO for windows and linux. For rest of the platforms
we simulate AIO by special io-threads servicing the IO-requests.
Simulated AIO:
==============
In platforms where we 'simulate' AIO following is a rough explanation
of the high level design.
There are four io-threads (for ibuf, log, read, write).
All synchronous IO requests are serviced by the calling thread using
os_file_write/os_file_read. The Asynchronous requests are queued up
in an array (there are four such arrays) by the calling thread.
Later these requests are picked up by the io-thread and are serviced
synchronously.
Windows native AIO:
==================
If srv_use_native_aio is not set then windows follow the same
code as simulated AIO. If the flag is set then native AIO interface
is used. On windows, one of the limitation is that if a file is opened
for AIO no synchronous IO can be done on it. Therefore we have an
extra fifth array to queue up synchronous IO requests.
There are innodb_file_io_threads helper threads. These threads work
on the four arrays mentioned above in Simulated AIO. No thread is
required for the sync array.
If a synchronous IO request is made, it is first queued in the sync
array. Then the calling thread itself waits on the request, thus
making the call synchronous.
If an AIO request is made the calling thread not only queues it in the
array but also submits the requests. The helper thread then collects
the completed IO request and calls completion routine on it.
Linux native AIO:
=================
If we have libaio installed on the system and innodb_use_native_aio
is set to TRUE we follow the code path of native AIO, otherwise we
do simulated AIO.
There are innodb_file_io_threads helper threads. These threads work
on the four arrays mentioned above in Simulated AIO.
If a synchronous IO request is made, it is handled by calling
os_file_write/os_file_read.
If an AIO request is made the calling thread not only queues it in the
array but also submits the requests. The helper thread then collects
the completed IO request and calls completion routine on it.
**********************************************************************/
/** Flag: enable debug printout for asynchronous i/o */
UNIV_INTERN ibool os_aio_print_debug = FALSE;
#ifdef UNIV_PFS_IO
/* Keys to register InnoDB I/O with performance schema */
UNIV_INTERN mysql_pfs_key_t innodb_file_data_key;
UNIV_INTERN mysql_pfs_key_t innodb_file_log_key;
UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
#endif /* UNIV_PFS_IO */
/** The asynchronous i/o array slot structure */
struct os_aio_slot_t{
ibool is_read; /*!< TRUE if a read operation */
ulint pos; /*!< index of the slot in the aio
array */
ibool reserved; /*!< TRUE if this slot is reserved */
time_t reservation_time;/*!< time when reserved */
ulint len; /*!< length of the block to read or
write */
byte* buf; /*!< buffer used in i/o */
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
os_offset_t offset; /*!< file offset in bytes */
os_file_t file; /*!< file where to read or write */
const char* name; /*!< file name or path */
ibool io_already_done;/*!< used only in simulated aio:
TRUE if the physical i/o already
made and only the slot message
needs to be passed to the caller
of os_aio_simulated_handle */
fil_node_t* message1; /*!< message which is given by the */
void* message2; /*!< the requester of an aio operation
and which can be used to identify
which pending aio operation was
completed */
#ifdef WIN_ASYNC_IO
HANDLE handle; /*!< handle object we need in the
OVERLAPPED struct */
OVERLAPPED control; /*!< Windows control block for the
aio request */
#elif defined(LINUX_NATIVE_AIO)
struct iocb control; /* Linux control block for aio */
int n_bytes; /* bytes written/read. */
int ret; /* AIO return code */
#endif /* WIN_ASYNC_IO */
};
/** The asynchronous i/o array structure */
struct os_aio_array_t{
os_ib_mutex_t mutex; /*!< the mutex protecting the aio array */
os_event_t not_full;
/*!< The event which is set to the
signaled state when there is space in
the aio outside the ibuf segment */
os_event_t is_empty;
/*!< The event which is set to the
signaled state when there are no
pending i/os in this array */
ulint n_slots;/*!< Total number of slots in the aio
array. This must be divisible by
n_threads. */
ulint n_segments;
/*!< Number of segments in the aio
array of pending aio requests. A
thread can wait separately for any one
of the segments. */
ulint cur_seg;/*!< We reserve IO requests in round
robin fashion to different segments.
This points to the segment that is to
be used to service next IO request. */
ulint n_reserved;
/*!< Number of reserved slots in the
aio array outside the ibuf segment */
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
#ifdef __WIN__
HANDLE* handles;
/*!< Pointer to an array of OS native
event handles where we copied the
handles from slots, in the same
order. This can be used in
WaitForMultipleObjects; used only in
Windows */
#endif /* __WIN__ */
#if defined(LINUX_NATIVE_AIO)
io_context_t* aio_ctx;
/* completion queue for IO. There is
one such queue per segment. Each thread
will work on one ctx exclusively. */
struct io_event* aio_events;
/* The array to collect completed IOs.
There is one such event for each
possible pending IO. The size of the
array is equal to n_slots. */
#endif /* LINUX_NATIV_AIO */
};
#if defined(LINUX_NATIVE_AIO)
/** timeout for each io_getevents() call = 500ms. */
#define OS_AIO_REAP_TIMEOUT (500000000UL)
/** time to sleep, in microseconds if io_setup() returns EAGAIN. */
#define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL)
/** number of attempts before giving up on io_setup(). */
#define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
#endif
/** Array of events used in simulated aio */
static os_event_t* os_aio_segment_wait_events = NULL;
/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
are NULL when the module has not yet been initialized. @{ */
static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */
static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */
static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */
static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */
static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */
/* @} */
/** Number of asynchronous I/O segments. Set by os_aio_init(). */
static ulint os_aio_n_segments = ULINT_UNDEFINED;
/** If the following is TRUE, read i/o handler threads try to
wait until a batch of new read requests have been posted */
static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
#endif /* !UNIV_HOTBACKUP */
UNIV_INTERN ulint os_n_file_reads = 0;
UNIV_INTERN ulint os_bytes_read_since_printout = 0;
UNIV_INTERN ulint os_n_file_writes = 0;
UNIV_INTERN ulint os_n_fsyncs = 0;
UNIV_INTERN ulint os_n_file_reads_old = 0;
UNIV_INTERN ulint os_n_file_writes_old = 0;
UNIV_INTERN ulint os_n_fsyncs_old = 0;
UNIV_INTERN time_t os_last_printout;
UNIV_INTERN ibool os_has_said_disk_full = FALSE;
#if !defined(UNIV_HOTBACKUP) \
&& (!defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8)
/** The mutex protecting the following counts of pending I/O operations */
static os_ib_mutex_t os_file_count_mutex;
#endif /* !UNIV_HOTBACKUP && (!HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8) */
/** Number of pending os_file_pread() operations */
UNIV_INTERN ulint os_file_n_pending_preads = 0;
/** Number of pending os_file_pwrite() operations */
UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
/** Number of pending write operations */
UNIV_INTERN ulint os_n_pending_writes = 0;
/** Number of pending read operations */
UNIV_INTERN ulint os_n_pending_reads = 0;
#ifdef UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Validates the consistency the aio system some of the time.
@return TRUE if ok or the check was skipped */
UNIV_INTERN
ibool
os_aio_validate_skip(void)
/*======================*/
{
/** Try os_aio_validate() every this many times */
# define OS_AIO_VALIDATE_SKIP 13
/** The os_aio_validate() call skip counter.
Use a signed type because of the race condition below. */
static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
/* There is a race condition below, but it does not matter,
because this call is only for heuristic purposes. We want to
reduce the call frequency of the costly os_aio_validate()
check in debug builds. */
if (--os_aio_validate_count > 0) {
return(TRUE);
}
os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
return(os_aio_validate());
}
# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_DEBUG */
#ifdef __WIN__
/***********************************************************************//**
Gets the operating system version. Currently works only on Windows.
@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
OS_WIN7. */
UNIV_INTERN
ulint
os_get_os_version(void)
/*===================*/
{
OSVERSIONINFO os_info;
os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
ut_a(GetVersionEx(&os_info));
if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
return(OS_WIN31);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
return(OS_WIN95);
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
switch (os_info.dwMajorVersion) {
case 3:
case 4:
return(OS_WINNT);
case 5:
return (os_info.dwMinorVersion == 0)
? OS_WIN2000 : OS_WINXP;
case 6:
return (os_info.dwMinorVersion == 0)
? OS_WINVISTA : OS_WIN7;
default:
return(OS_WIN7);
}
} else {
ut_error;
return(0);
}
}
#endif /* __WIN__ */
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned.
@return error number, or OS error number + 100 */
static
ulint
os_file_get_last_error_low(
/*=======================*/
bool report_all_errors, /*!< in: TRUE if we want an error
message printed of all errors */
bool on_error_silent) /*!< in: TRUE then don't print any
diagnostic to the log */
{
#ifdef __WIN__
ulint err = (ulint) GetLastError();
if (err == ERROR_SUCCESS) {
return(0);
}
if (report_all_errors
|| (!on_error_silent
&& err != ERROR_DISK_FULL
&& err != ERROR_FILE_EXISTS)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Operating system error number %lu"
" in a file operation.\n", (ulong) err);
if (err == ERROR_PATH_NOT_FOUND) {
fprintf(stderr,
"InnoDB: The error means the system"
" cannot find the path specified.\n");
if (srv_is_being_started) {
fprintf(stderr,
"InnoDB: If you are installing InnoDB,"
" remember that you must create\n"
"InnoDB: directories yourself, InnoDB"
" does not create them.\n");
}
} else if (err == ERROR_ACCESS_DENIED) {
fprintf(stderr,
"InnoDB: The error means mysqld does not have"
" the access rights to\n"
"InnoDB: the directory. It may also be"
" you have created a subdirectory\n"
"InnoDB: of the same name as a data file.\n");
} else if (err == ERROR_SHARING_VIOLATION
|| err == ERROR_LOCK_VIOLATION) {
fprintf(stderr,
"InnoDB: The error means that another program"
" is using InnoDB's files.\n"
"InnoDB: This might be a backup or antivirus"
" software or another instance\n"
"InnoDB: of MySQL."
" Please close it to get rid of this error.\n");
} else if (err == ERROR_WORKING_SET_QUOTA
|| err == ERROR_NO_SYSTEM_RESOURCES) {
fprintf(stderr,
"InnoDB: The error means that there are no"
" sufficient system resources or quota to"
" complete the operation.\n");
} else if (err == ERROR_OPERATION_ABORTED) {
fprintf(stderr,
"InnoDB: The error means that the I/O"
" operation has been aborted\n"
"InnoDB: because of either a thread exit"
" or an application request.\n"
"InnoDB: Retry attempt is made.\n");
} else {
fprintf(stderr,
"InnoDB: Some operating system error numbers"
" are described at\n"
"InnoDB: "
REFMAN
"operating-system-error-codes.html\n");
}
}
fflush(stderr);
if (err == ERROR_FILE_NOT_FOUND) {
return(OS_FILE_NOT_FOUND);
} else if (err == ERROR_DISK_FULL) {
return(OS_FILE_DISK_FULL);
} else if (err == ERROR_FILE_EXISTS) {
return(OS_FILE_ALREADY_EXISTS);
} else if (err == ERROR_SHARING_VIOLATION
|| err == ERROR_LOCK_VIOLATION) {
return(OS_FILE_SHARING_VIOLATION);
} else if (err == ERROR_WORKING_SET_QUOTA
|| err == ERROR_NO_SYSTEM_RESOURCES) {
return(OS_FILE_INSUFFICIENT_RESOURCE);
} else if (err == ERROR_OPERATION_ABORTED) {
return(OS_FILE_OPERATION_ABORTED);
} else if (err == ERROR_ACCESS_DENIED) {
return(OS_FILE_ACCESS_VIOLATION);
} else {
return(OS_FILE_ERROR_MAX + err);
}
#else
int err = errno;
if (err == 0) {
return(0);
}
if (report_all_errors
|| (err != ENOSPC && err != EEXIST && !on_error_silent)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Operating system error number %d"
" in a file operation.\n", err);
if (err == ENOENT) {
fprintf(stderr,
"InnoDB: The error means the system"
" cannot find the path specified.\n");
if (srv_is_being_started) {
fprintf(stderr,
"InnoDB: If you are installing InnoDB,"
" remember that you must create\n"
"InnoDB: directories yourself, InnoDB"
" does not create them.\n");
}
} else if (err == EACCES) {
fprintf(stderr,
"InnoDB: The error means mysqld does not have"
" the access rights to\n"
"InnoDB: the directory.\n");
} else {
if (strerror(err) != NULL) {
fprintf(stderr,
"InnoDB: Error number %d"
" means '%s'.\n",
err, strerror(err));
}
fprintf(stderr,
"InnoDB: Some operating system"
" error numbers are described at\n"
"InnoDB: "
REFMAN
"operating-system-error-codes.html\n");
}
}
fflush(stderr);
switch (err) {
case ENOSPC:
return(OS_FILE_DISK_FULL);
case ENOENT:
return(OS_FILE_NOT_FOUND);
case EEXIST:
return(OS_FILE_ALREADY_EXISTS);
case EXDEV:
case ENOTDIR:
case EISDIR:
return(OS_FILE_PATH_ERROR);
case EAGAIN:
if (srv_use_native_aio) {
return(OS_FILE_AIO_RESOURCES_RESERVED);
}
break;
case EINTR:
if (srv_use_native_aio) {
return(OS_FILE_AIO_INTERRUPTED);
}
break;
case EACCES:
return(OS_FILE_ACCESS_VIOLATION);
}
return(OS_FILE_ERROR_MAX + err);
#endif
}
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned.
@return error number, or OS error number + 100 */
UNIV_INTERN
ulint
os_file_get_last_error(
/*===================*/
bool report_all_errors) /*!< in: TRUE if we want an error
message printed of all errors */
{
return(os_file_get_last_error_low(report_all_errors, false));
}
/****************************************************************//**
Does error handling when a file operation fails.
Conditionally exits (calling exit(3)) based on should_exit value and the
error type, if should_exit is TRUE then on_error_silent is ignored.
@return TRUE if we should retry the operation */
static
ibool
os_file_handle_error_cond_exit(
/*===========================*/
const char* name, /*!< in: name of a file or NULL */
const char* operation, /*!< in: operation */
ibool should_exit, /*!< in: call exit(3) if unknown error
and this parameter is TRUE */
ibool on_error_silent)/*!< in: if TRUE then don't print
any message to the log iff it is
an unknown non-fatal error */
{
ulint err;
err = os_file_get_last_error_low(false, on_error_silent);
switch (err) {
case OS_FILE_DISK_FULL:
/* We only print a warning about disk full once */
if (os_has_said_disk_full) {
return(FALSE);
}
/* Disk full error is reported irrespective of the
on_error_silent setting. */
if (name) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Encountered a problem with"
" file %s\n", name);
}
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Disk is full. Try to clean the disk"
" to free space.\n");
os_has_said_disk_full = TRUE;
fflush(stderr);
return(FALSE);
case OS_FILE_AIO_RESOURCES_RESERVED:
case OS_FILE_AIO_INTERRUPTED:
return(TRUE);
case OS_FILE_PATH_ERROR:
case OS_FILE_ALREADY_EXISTS:
case OS_FILE_ACCESS_VIOLATION:
return(FALSE);
case OS_FILE_SHARING_VIOLATION:
os_thread_sleep(10000000); /* 10 sec */
return(TRUE);
case OS_FILE_OPERATION_ABORTED:
case OS_FILE_INSUFFICIENT_RESOURCE:
os_thread_sleep(100000); /* 100 ms */
return(TRUE);
default:
/* If it is an operation that can crash on error then it
is better to ignore on_error_silent and print an error message
to the log. */
if (should_exit || !on_error_silent) {
ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
"error " ULINTPF ".%s", name ? name : "(unknown)",
operation, err, should_exit
? " Cannot continue operation" : "");
}
if (should_exit) {
exit(1);
}
}
return(FALSE);
}
/****************************************************************//**
Does error handling when a file operation fails.
@return TRUE if we should retry the operation */
static
ibool
os_file_handle_error(
/*=================*/
const char* name, /*!< in: name of a file or NULL */
const char* operation) /*!< in: operation */
{
/* exit in case of unknown error */
return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE));
}
/****************************************************************//**
Does error handling when a file operation fails.
@return TRUE if we should retry the operation */
static
ibool
os_file_handle_error_no_exit(
/*=========================*/
const char* name, /*!< in: name of a file or NULL */
const char* operation, /*!< in: operation */
ibool on_error_silent)/*!< in: if TRUE then don't print
any message to the log. */
{
/* don't exit in case of unknown error */
return(os_file_handle_error_cond_exit(
name, operation, FALSE, on_error_silent));
}
#undef USE_FILE_LOCK
#define USE_FILE_LOCK
#if defined(UNIV_HOTBACKUP) || defined(__WIN__)
/* InnoDB Hot Backup does not lock the data files.
* On Windows, mandatory locking is used.
*/
# undef USE_FILE_LOCK
#endif
#ifdef USE_FILE_LOCK
/****************************************************************//**
Obtain an exclusive lock on a file.
@return 0 on success */
static
int
os_file_lock(
/*=========*/
int fd, /*!< in: file descriptor */
const char* name) /*!< in: file name */
{
struct flock lk;
ut_ad(!srv_read_only_mode);
lk.l_type = F_WRLCK;
lk.l_whence = SEEK_SET;
lk.l_start = lk.l_len = 0;
if (fcntl(fd, F_SETLK, &lk) == -1) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Unable to lock %s, error: %d", name, errno);
if (errno == EAGAIN || errno == EACCES) {
ib_logf(IB_LOG_LEVEL_INFO,
"Check that you do not already have "
"another mysqld process using the "
"same InnoDB data or log files.");
}
return(-1);
}
return(0);
}
#endif /* USE_FILE_LOCK */
#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Creates the seek mutexes used in positioned reads and writes. */
UNIV_INTERN
void
os_io_init_simple(void)
/*===================*/
{
#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
os_file_count_mutex = os_mutex_create();
#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8 */
for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create();
}
}
/***********************************************************************//**
Creates a temporary file. This function is like tmpfile(3), but
the temporary file is created in the MySQL temporary directory.
@return temporary file handle, or NULL on error */
UNIV_INTERN
FILE*
os_file_create_tmpfile(void)
/*========================*/
{
FILE* file = NULL;
int fd = innobase_mysql_tmpfile();
ut_ad(!srv_read_only_mode);
if (fd >= 0) {
file = fdopen(fd, "w+b");
}
if (!file) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: unable to create temporary file;"
" errno: %d\n", errno);
if (fd >= 0) {
close(fd);
}
}
return(file);
}
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
The os_file_opendir() function opens a directory stream corresponding to the
directory named by the dirname argument. The directory stream is positioned
at the first entry. In both Unix and Windows we automatically skip the '.'
and '..' items at the start of the directory listing.
@return directory stream, NULL if error */
UNIV_INTERN
os_file_dir_t
os_file_opendir(
/*============*/
const char* dirname, /*!< in: directory name; it must not
contain a trailing '\' or '/' */
ibool error_is_fatal) /*!< in: TRUE if we should treat an
error as a fatal error; if we try to
open symlinks then we do not wish a
fatal error if it happens not to be
a directory */
{
os_file_dir_t dir;
#ifdef __WIN__
LPWIN32_FIND_DATA lpFindFileData;
char path[OS_FILE_MAX_PATH + 3];
ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
strcpy(path, dirname);
strcpy(path + strlen(path), "\\*");
/* Note that in Windows opening the 'directory stream' also retrieves
the first entry in the directory. Since it is '.', that is no problem,
as we will skip over the '.' and '..' entries anyway. */
lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
ut_malloc(sizeof(WIN32_FIND_DATA)));
dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
ut_free(lpFindFileData);
if (dir == INVALID_HANDLE_VALUE) {
if (error_is_fatal) {
os_file_handle_error(dirname, "opendir");
}
return(NULL);
}
return(dir);
#else
dir = opendir(dirname);
if (dir == NULL && error_is_fatal) {
os_file_handle_error(dirname, "opendir");
}
return(dir);
#endif /* __WIN__ */
}
/***********************************************************************//**
Closes a directory stream.
@return 0 if success, -1 if failure */
UNIV_INTERN
int
os_file_closedir(
/*=============*/
os_file_dir_t dir) /*!< in: directory stream */
{
#ifdef __WIN__
BOOL ret;
ret = FindClose(dir);
if (!ret) {
os_file_handle_error_no_exit(NULL, "closedir", FALSE);
return(-1);
}
return(0);
#else
int ret;
ret = closedir(dir);
if (ret) {
os_file_handle_error_no_exit(NULL, "closedir", FALSE);
}
return(ret);
#endif /* __WIN__ */
}
/***********************************************************************//**
This function returns information of the next file in the directory. We jump
over the '.' and '..' entries in the directory.
@return 0 if ok, -1 if error, 1 if at the end of the directory */
UNIV_INTERN
int
os_file_readdir_next_file(
/*======================*/
const char* dirname,/*!< in: directory name or path */
os_file_dir_t dir, /*!< in: directory stream */
os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
{
#ifdef __WIN__
LPWIN32_FIND_DATA lpFindFileData;
BOOL ret;
lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
ut_malloc(sizeof(WIN32_FIND_DATA)));
next_file:
ret = FindNextFile(dir, lpFindFileData);
if (ret) {
ut_a(strlen((char*) lpFindFileData->cFileName)
< OS_FILE_MAX_PATH);
if (strcmp((char*) lpFindFileData->cFileName, ".") == 0
|| strcmp((char*) lpFindFileData->cFileName, "..") == 0) {
goto next_file;
}
strcpy(info->name, (char*) lpFindFileData->cFileName);
info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
+ (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
<< 32);
if (lpFindFileData->dwFileAttributes
& FILE_ATTRIBUTE_REPARSE_POINT) {
/* TODO: test Windows symlinks */
/* TODO: MySQL has apparently its own symlink
implementation in Windows, dbname.sym can
redirect a database directory:
REFMAN "windows-symbolic-links.html" */
info->type = OS_FILE_TYPE_LINK;
} else if (lpFindFileData->dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY) {
info->type = OS_FILE_TYPE_DIR;
} else {
/* It is probably safest to assume that all other
file types are normal. Better to check them rather
than blindly skip them. */
info->type = OS_FILE_TYPE_FILE;
}
}
ut_free(lpFindFileData);
if (ret) {
return(0);
} else if (GetLastError() == ERROR_NO_MORE_FILES) {
return(1);
} else {
os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE);
return(-1);
}
#else
struct dirent* ent;
char* full_path;
int ret;
struct stat statinfo;
#ifdef HAVE_READDIR_R
char dirent_buf[sizeof(struct dirent)
+ _POSIX_PATH_MAX + 100];
/* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
the max file name len; but in most standards, the
length is NAME_MAX; we add 100 to be even safer */
#endif
next_file:
#ifdef HAVE_READDIR_R
ret = readdir_r(dir, (struct dirent*) dirent_buf, &ent);
if (ret != 0
#ifdef UNIV_AIX
/* On AIX, only if we got non-NULL 'ent' (result) value and
a non-zero 'ret' (return) value, it indicates a failed
readdir_r() call. An NULL 'ent' with an non-zero 'ret'
would indicate the "end of the directory" is reached. */
&& ent != NULL
#endif
) {
fprintf(stderr,
"InnoDB: cannot read directory %s, error %lu\n",
dirname, (ulong) ret);
return(-1);
}
if (ent == NULL) {
/* End of directory */
return(1);
}
ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
#else
ent = readdir(dir);
if (ent == NULL) {
return(1);
}
#endif
ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);