Skip to content

Commit c03b5da

Browse files
dan-zhengcopybara-github
authored andcommitted
Copybara configuration update.
PiperOrigin-RevId: 609931218
1 parent 4b1fa03 commit c03b5da

File tree

8 files changed

+24
-195
lines changed

8 files changed

+24
-195
lines changed

.github/workflows/build.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: Build
22

3-
# Trigger on push, pull request, or via manual dispatch.
4-
on: [push, pull_request, workflow_dispatch]
3+
# Trigger on push or via manual dispatch.
4+
on: [push, workflow_dispatch]
55

66
jobs:
77
build:

CMakeLists.txt

+3-6
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
2424
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG da250571a45826b21eebbddc1e50d0c1137dee5f)
2525
FetchContent_MakeAvailable(highway)
2626

27-
## Note: absl needs to be installed by sentencepiece. This will only happen if
27+
## Note: absl meeds tp be installed by sentencepiece. This will only happen if
2828
## cmake is invoked with -DSPM_ENABLE_SHARED=OFF and -DSPM_ABSL_PROVIDER=module
2929
FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c)
3030
FetchContent_MakeAvailable(sentencepiece)
@@ -43,13 +43,14 @@ set(SOURCES
4343
util/args.h
4444
)
4545

46+
add_compile_options($<$<CONFIG:Release>:-O2>)
4647
if(NOT CMAKE_BUILD_TYPE)
4748
set(CMAKE_BUILD_TYPE "Release")
4849
endif()
4950

5051
# Allowable types for WEIGHT_TYPE:
5152
# float - slow, not recommended
52-
# hwy::bfloat16_t - bfloat16 as implemented by https://github.com/google/highway
53+
# hwy::bfloat16_t - bfloat16 as impemented by https://github.com/google/highway
5354
# SfpStream - 8-bit switched floating point (recommended)
5455
# NuqStream - experimental, work-in-progress
5556
option(WEIGHT_TYPE "Set weight type" "")
@@ -67,8 +68,6 @@ target_link_libraries(gemma hwy hwy_contrib sentencepiece)
6768
target_include_directories(gemma PRIVATE ./)
6869
FetchContent_GetProperties(sentencepiece)
6970
target_include_directories(gemma PRIVATE ${sentencepiece_SOURCE_DIR})
70-
target_compile_definitions(gemma PRIVATE $<$<PLATFORM_ID:Windows>:_CRT_SECURE_NO_WARNINGS NOMINMAX>)
71-
target_compile_options(gemma PRIVATE $<$<PLATFORM_ID:Windows>:-Wno-deprecated-declarations>)
7271

7372
## Library Target
7473

@@ -78,5 +77,3 @@ set_target_properties(libgemma PROPERTIES PREFIX "")
7877
target_include_directories(libgemma PUBLIC ./)
7978
target_link_libraries(libgemma hwy hwy_contrib sentencepiece)
8079
target_include_directories(libgemma PRIVATE ${sentencepiece_SOURCE_DIR})
81-
target_compile_definitions(libgemma PRIVATE $<$<PLATFORM_ID:Windows>:_CRT_SECURE_NO_WARNINGS NOMINMAX>)
82-
target_compile_options(libgemma PRIVATE $<$<PLATFORM_ID:Windows>:-Wno-deprecated-declarations>)

CMakePresets.json

-59
This file was deleted.

README.md

+8-34
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,6 @@ Before starting, you should have installed:
5555
least C++17.
5656
- `tar` for extracting archives from Kaggle.
5757

58-
Building natively on Windows requires the Visual Studio 2012 Build Tools with the
59-
optional Clang/LLVM C++ frontend (`clang-cl`). This can be installed from the
60-
command line with
61-
[`winget`](https://learn.microsoft.com/en-us/windows/package-manager/winget/):
62-
63-
```sh
64-
winget install --id Kitware.CMake
65-
winget install --id Microsoft.VisualStudio.2022.BuildTools --force --override "--passive --wait --add Microsoft.VisualStudio.Workload.VCTools;installRecommended --add Microsoft.VisualStudio.Component.VC.Llvm.Clang --add Microsoft.VisualStudio.Component.VC.Llvm.ClangToolset"
66-
```
67-
6858
### Step 1: Obtain model weights and tokenizer from Kaggle
6959

7060
Visit [the Gemma model page on
@@ -117,7 +107,6 @@ runtime, create a build directory and generate the build files using `cmake`
117107
from the top-level project directory. For the 8-bit switched floating point
118108
weights (sfp), run cmake with no options:
119109

120-
#### Unix-like Platforms
121110
```sh
122111
cmake -B build
123112
```
@@ -137,18 +126,17 @@ your weights, you can enter the `build/` directory and run `make` to build the
137126
`./gemma` executable:
138127

139128
```sh
140-
# Configure `build` directory
141-
cmake --preset make
142-
143-
# Build project using make
144-
cmake --build --preset make -j [number of parallel threads to use]
129+
cd build
130+
make -j [number of parallel threads to use] gemma
145131
```
146132

147133
Replace `[number of parallel threads to use]` with a number - the number of
148-
cores available on your system is a reasonable heuristic. For example,
149-
`make -j4 gemma` will build using 4 threads. If the `nproc` command is
150-
available, you can use `make -j$(nproc) gemma` as a reasonable default
151-
for the number of threads.
134+
cores available on your system is a reasonable heuristic.
135+
136+
For example, `make -j4 gemma` will build using 4 threads. If this is successful,
137+
you should now have a `gemma` executable in the `build/` directory. If the
138+
`nproc` command is available, you can use `make -j$(nproc) gemma` as a
139+
reasonable default for the number of threads.
152140

153141
If you aren't sure of the right value for the `-j` flag, you can simply run
154142
`make gemma` instead and it should still build the `./gemma` executable.
@@ -157,20 +145,6 @@ If you aren't sure of the right value for the `-j` flag, you can simply run
157145
> On Windows Subsystem for Linux (WSL) users should set the number of
158146
> parallel threads to 1. Using a larger number may result in errors.
159147
160-
If the build is successful, you should now have a `gemma` executable in the `build/` directory.
161-
162-
#### Windows
163-
164-
```sh
165-
# Configure `build` directory
166-
cmake --preset windows
167-
168-
# Build project using Visual Studio Build Tools
169-
cmake --build --preset windows -j [number of parallel threads to use]
170-
```
171-
172-
If the build is successful, you should now have a `gemma.exe` executable in the `build/` directory.
173-
174148
### Step 4: Run
175149

176150
You can now run `gemma` from inside the `build/` directory.

compression/blob_store.cc

+9-88
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,11 @@
1616
// copybara:import_next_line:gemma_cpp
1717
#include "compression/blob_store.h"
1818

19+
#include <fcntl.h> // open
1920
#include <stdint.h>
2021
#include <stdio.h> // SEEK_END - unistd isn't enough for IDE.
2122
#include <sys/stat.h> // O_RDONLY
22-
#include <fcntl.h> // open
23-
#if HWY_OS_WIN
24-
#include <io.h> // read, write, close
25-
#include <fileapi.h>
26-
#else
27-
#include <unistd.h> // read, write, close
28-
#endif
23+
#include <unistd.h> // read, close
2924

3025
#include <atomic>
3126
#include <vector>
@@ -35,54 +30,6 @@
3530
#include "hwy/contrib/thread_pool/thread_pool.h"
3631
#include "hwy/detect_compiler_arch.h"
3732

38-
namespace {
39-
#if HWY_OS_WIN
40-
41-
// pread is not supported on Windows
42-
static int64_t pread(int fd, void* buf, uint64_t size, uint64_t offset) {
43-
HANDLE file = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
44-
if (file == INVALID_HANDLE_VALUE) {
45-
return -1;
46-
}
47-
48-
OVERLAPPED overlapped = {0};
49-
overlapped.Offset = offset & 0xFFFFFFFF;
50-
overlapped.OffsetHigh = (offset >> 32) & 0xFFFFFFFF;
51-
52-
DWORD bytes_read;
53-
if (!ReadFile(file, buf, size, &bytes_read, &overlapped)) {
54-
if (GetLastError() != ERROR_HANDLE_EOF) {
55-
return -1;
56-
}
57-
}
58-
59-
return bytes_read;
60-
}
61-
62-
// pwrite is not supported on Windows
63-
static int64_t pwrite(int fd, const void* buf, uint64_t size, uint64_t offset) {
64-
HANDLE file = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
65-
if (file == INVALID_HANDLE_VALUE) {
66-
return -1;
67-
}
68-
69-
OVERLAPPED overlapped = {0};
70-
overlapped.Offset = offset & 0xFFFFFFFF;
71-
overlapped.OffsetHigh = (offset >> 32) & 0xFFFFFFFF;
72-
73-
DWORD bytes_written;
74-
if (!WriteFile(file, buf, size, &bytes_written, &overlapped)) {
75-
if (GetLastError() != ERROR_HANDLE_EOF) {
76-
return -1;
77-
}
78-
}
79-
80-
return bytes_written;
81-
}
82-
83-
#endif
84-
}
85-
8633
namespace gcpp {
8734

8835
hwy::uint128_t MakeKey(const char* string) {
@@ -117,30 +64,19 @@ static void EnqueueChunkRequests(uint64_t offset, uint64_t size, uint8_t* data,
11764
}
11865
}
11966

120-
12167
struct IO {
12268
// Returns size in bytes or 0.
12369
static uint64_t FileSize(const char* filename) {
12470
int fd = open(filename, O_RDONLY);
125-
if (fd < 0) {
126-
return 0;
71+
if (fd >= 0) {
72+
const off_t size = lseek(fd, 0, SEEK_END);
73+
HWY_ASSERT(close(fd) != -1);
74+
if (size != static_cast<off_t>(-1)) {
75+
return static_cast<uint64_t>(size);
76+
}
12777
}
12878

129-
#if HWY_OS_WIN
130-
const int64_t size = _lseeki64(fd, 0, SEEK_END);
131-
HWY_ASSERT(close(fd) != -1);
132-
if (size < 0) {
133-
return 0;
134-
}
135-
#else
136-
const off_t size = lseek(fd, 0, SEEK_END);
137-
HWY_ASSERT(close(fd) != -1);
138-
if (size == static_cast<off_t>(-1)) {
139-
return 0;
140-
}
141-
#endif
142-
143-
return static_cast<uint64_t>(size);
79+
return 0;
14480
}
14581

14682
static bool Read(int fd, uint64_t offset, uint64_t size, void* to) {
@@ -316,14 +252,7 @@ class BlobStore {
316252
#pragma pack(pop)
317253

318254
BlobError BlobReader::Open(const char* filename) {
319-
#if HWY_OS_WIN
320-
DWORD flags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN;
321-
HANDLE file = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, flags, nullptr);
322-
if (file == INVALID_HANDLE_VALUE) return __LINE__;
323-
fd_ = _open_osfhandle(reinterpret_cast<intptr_t>(file), _O_RDONLY);
324-
#else
325255
fd_ = open(filename, O_RDONLY);
326-
#endif
327256
if (fd_ < 0) return __LINE__;
328257

329258
#if _POSIX_C_SOURCE >= 200112L
@@ -401,14 +330,7 @@ BlobError BlobWriter::WriteAll(hwy::ThreadPool& pool,
401330
keys_.data(), blobs_.data(), keys_.size());
402331

403332
// Create/replace existing file.
404-
#if HWY_OS_WIN
405-
DWORD flags = FILE_ATTRIBUTE_NORMAL;
406-
HANDLE file = CreateFileA(filename, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, flags, nullptr);
407-
if (file == INVALID_HANDLE_VALUE) return __LINE__;
408-
const int fd = _open_osfhandle(reinterpret_cast<intptr_t>(file), _O_WRONLY);
409-
#else
410333
const int fd = open(filename, O_CREAT | O_RDWR | O_TRUNC, 0644);
411-
#endif
412334
if (fd < 0) return __LINE__;
413335

414336
std::atomic_flag err = ATOMIC_FLAG_INIT;
@@ -419,7 +341,6 @@ BlobError BlobWriter::WriteAll(hwy::ThreadPool& pool,
419341
err.test_and_set();
420342
}
421343
});
422-
HWY_ASSERT(close(fd) != -1);
423344
if (err.test_and_set()) return __LINE__;
424345
return 0;
425346
}

gemma.cc

+1-2
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@
6060
#include "hwy/aligned_allocator.h"
6161
#include "hwy/base.h"
6262
#include "hwy/contrib/thread_pool/thread_pool.h"
63-
// copybara:import_next_line:sentencepiece
64-
#include "src/sentencepiece_processor.h"
63+
#include "sentencepiece_processor.h"
6564

6665
namespace gcpp {
6766

gemma.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
#include "hwy/aligned_allocator.h"
3434
#include "hwy/base.h" // hwy::bfloat16_t
3535
#include "hwy/contrib/thread_pool/thread_pool.h"
36-
// copybara:import_next_line:sentencepiece
37-
#include "src/sentencepiece_processor.h"
36+
#include "sentencepiece_processor.h"
3837

3938
namespace gcpp {
4039

util/app.h

-2
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
#ifndef THIRD_PARTY_GEMMA_CPP_UTIL_APP_H_
1919
#define THIRD_PARTY_GEMMA_CPP_UTIL_APP_H_
2020

21-
#if HWY_OS_LINUX
2221
#include <sched.h>
23-
#endif
2422
#include <stddef.h>
2523

2624
#include <algorithm> // std::clamp

0 commit comments

Comments
 (0)