Skip to content

Saving LPython's intrinsic modules as pyc files #999

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ set(WITH_LFORTRAN_BINARY_MODFILES YES
set(WITH_RUNTIME_LIBRARY YES
CACHE BOOL "Compile and install the runtime library")

set(WITH_INTRINSIC_MODULES no
CACHE BOOL "Compile intrinsic modules to .pyc (ASR) at build time")

# Find ZLIB with our custom finder before including LLVM since the finder for LLVM
# might search for ZLIB again and find the shared libraries instead of the static ones
find_package(StaticZLIB REQUIRED)
Expand Down
28 changes: 28 additions & 0 deletions src/bin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,31 @@ set_target_properties(lpython PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/$<0:>
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/$<0:>
ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/$<0:>)

if (WITH_INTRINSIC_MODULES)
macro(LPYTHON_COMPILE_MODULE name)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/../runtime/${name}.pyc
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/lpython
ARGS --disable-main -c ${CMAKE_CURRENT_SOURCE_DIR}/../runtime/${name}.py -o ${name}.o
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../runtime
DEPENDS lpython ${CMAKE_CURRENT_SOURCE_DIR}/../runtime/${name}.py ${ARGN}
COMMENT "LPython Compiling ${name}.py")
endmacro(LPYTHON_COMPILE_MODULE)

LPYTHON_COMPILE_MODULE(lpython_intrinsic_numpy)
LPYTHON_COMPILE_MODULE(lpython_builtin)

add_custom_target(lpython_intrinsics
ALL
DEPENDS
${CMAKE_CURRENT_BINARY_DIR}/../runtime/lpython_intrinsic_numpy.pyc
${CMAKE_CURRENT_BINARY_DIR}/../runtime/lpython_builtin.pyc
)

install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/../runtime/lpython_intrinsic_numpy.pyc
${CMAKE_CURRENT_BINARY_DIR}/../runtime/lpython_builtin.pyc
DESTINATION share/lfortran/lib
)
endif()
14 changes: 11 additions & 3 deletions src/bin/lpython.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ int compile_python_to_object_file(
const std::string &runtime_library_dir,
LCompilers::PassManager& pass_manager,
CompilerOptions &compiler_options,
bool time_report)
bool time_report, bool arg_c=false)
{
Allocator al(4*1024);
LFortran::diag::Diagnostics diagnostics;
Expand Down Expand Up @@ -540,7 +540,8 @@ int compile_python_to_object_file(
diagnostics.diagnostics.clear();
auto ast_to_asr_start = std::chrono::high_resolution_clock::now();
LFortran::Result<LFortran::ASR::TranslationUnit_t*>
r1 = LFortran::LPython::python_ast_to_asr(al, *ast, diagnostics, true,
r1 = LFortran::LPython::python_ast_to_asr(al, *ast, diagnostics,
!(arg_c && compiler_options.disable_main),
compiler_options.disable_main, compiler_options.symtab_only, infile);
auto ast_to_asr_end = std::chrono::high_resolution_clock::now();
times.push_back(std::make_pair("AST to ASR", std::chrono::duration<double, std::milli>(ast_to_asr_end - ast_to_asr_start).count()));
Expand All @@ -551,6 +552,12 @@ int compile_python_to_object_file(
return 2;
}
LFortran::ASR::TranslationUnit_t* asr = r1.result;
if( compiler_options.disable_main ) {
int err = LFortran::LPython::save_pyc_files(*asr, infile);
if( err ) {
return err;
}
}
diagnostics.diagnostics.clear();

// ASR -> LLVM
Expand Down Expand Up @@ -1065,7 +1072,8 @@ int main(int argc, char *argv[])
if (arg_c) {
if (backend == Backend::llvm) {
#ifdef HAVE_LFORTRAN_LLVM
return compile_python_to_object_file(arg_file, outfile, runtime_library_dir, lpython_pass_manager, compiler_options, time_report);
return compile_python_to_object_file(arg_file, outfile, runtime_library_dir, lpython_pass_manager, compiler_options, time_report,
arg_c);
#else
std::cerr << "The -c option requires the LLVM backend to be enabled. Recompile with `WITH_LLVM=yes`." << std::endl;
return 1;
Expand Down
74 changes: 51 additions & 23 deletions src/libasr/modfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,8 @@ namespace LFortran {

const std::string lfortran_modfile_type_string = "LFortran Modfile";

// The save_modfile() and load_modfile() must stay consistent. What is saved
// must be loaded in exactly the same order.

/*
Saves the module into a binary stream.

That stream can be saved to a mod file by the caller.
The sections in the file/stream are saved using write_string(), so they
can be efficiently read by the loader and ignored if needed.

Comments below show some possible future improvements to the mod format.
*/
std::string save_modfile(const ASR::TranslationUnit_t &m) {
LFORTRAN_ASSERT(m.m_global_scope->get_scope().size()== 1);
for (auto &a : m.m_global_scope->get_scope()) {
LFORTRAN_ASSERT(ASR::is_a<ASR::Module_t>(*a.second));
if ((bool&)a) { } // Suppress unused warning in Release mode
}
#ifdef WITH_LFORTRAN_BINARY_MODFILES
inline void save_asr(const ASR::TranslationUnit_t &m, std::string& asr_string) {
#ifdef WITH_LFORTRAN_BINARY_MODFILES
BinaryWriter b;
#else
TextWriter b;
Expand All @@ -54,11 +37,40 @@ std::string save_modfile(const ASR::TranslationUnit_t &m) {
// Full ASR:
b.write_string(serialize(m));

return b.get_str();
asr_string = b.get_str();
}

ASR::TranslationUnit_t* load_modfile(Allocator &al, const std::string &s,
bool load_symtab_id, SymbolTable &symtab) {
// The save_modfile() and load_modfile() must stay consistent. What is saved
// must be loaded in exactly the same order.

/*
Saves the module into a binary stream.

That stream can be saved to a mod file by the caller.
The sections in the file/stream are saved using write_string(), so they
can be efficiently read by the loader and ignored if needed.

Comments below show some possible future improvements to the mod format.
*/
std::string save_modfile(const ASR::TranslationUnit_t &m) {
LFORTRAN_ASSERT(m.m_global_scope->get_scope().size()== 1);
for (auto &a : m.m_global_scope->get_scope()) {
LFORTRAN_ASSERT(ASR::is_a<ASR::Module_t>(*a.second));
if ((bool&)a) { } // Suppress unused warning in Release mode
}

std::string asr_string;
save_asr(m, asr_string);
return asr_string;
}

std::string save_pycfile(const ASR::TranslationUnit_t &m) {
std::string asr_string;
save_asr(m, asr_string);
return asr_string;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would merge this with LFortran --- I think all LCompilers can then reuse it, just change the extension. I think there is nothing special about LPython's .pyc compared to LFortran's .mod, I think they can be exactly identical. It's just ASR that is saved, it's independent of the frontend.

Copy link
Collaborator Author

@czgdp1807 czgdp1807 Aug 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is one thing different though. Fortran files have specific module symbols. Only those are saved in the .mod files. However in the case of Python, whole file can be a module. We just have to import something from it and it becomes a module. But if you call it using python command then it acts as an "executable". So what I have done is if any file is compiled with -c option of LPython then its ASR (i.e., the full ASR::TranslationUnit_t) gets saved in a .pyc file. Does that make sense?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can discuss it more. What I had in mind is that a single Python file is exactly 100% equivalent to a single Fortran module file, at the ASR level.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Well, there is a difference that Python modules can be nested, but this is something that people have requested for Fortran also to do, so we should allow that at the ASR level in some clean way.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Let me try out implementing the rest of the steps I have written in #999 (comment). Let's see if it works till the end.


inline void load_serialised_asr(const std::string &s, std::string& asr_binary) {
#ifdef WITH_LFORTRAN_BINARY_MODFILES
BinaryReader b(s);
#else
Expand All @@ -72,11 +84,27 @@ ASR::TranslationUnit_t* load_modfile(Allocator &al, const std::string &s,
if (version != LFORTRAN_VERSION) {
throw LCompilersException("Incompatible format: LFortran Modfile was generated using version '" + version + "', but current LFortran version is '" + LFORTRAN_VERSION + "'");
}
std::string asr_binary = b.read_string();
asr_binary = b.read_string();
}

ASR::TranslationUnit_t* load_modfile(Allocator &al, const std::string &s,
bool load_symtab_id, SymbolTable &symtab) {
std::string asr_binary;
load_serialised_asr(s, asr_binary);
ASR::asr_t *asr = deserialize_asr(al, asr_binary, load_symtab_id, symtab);

ASR::TranslationUnit_t *tu = ASR::down_cast2<ASR::TranslationUnit_t>(asr);
return tu;
}

ASR::TranslationUnit_t* load_pycfile(Allocator &al, const std::string &s,
bool load_symtab_id) {
std::string asr_binary;
load_serialised_asr(s, asr_binary);
ASR::asr_t *asr = deserialize_asr(al, asr_binary, load_symtab_id);

ASR::TranslationUnit_t *tu = ASR::down_cast2<ASR::TranslationUnit_t>(asr);
return tu;
}

} // namespace LFortran
5 changes: 5 additions & 0 deletions src/libasr/modfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ namespace LFortran {
// Save a module to a modfile
std::string save_modfile(const ASR::TranslationUnit_t &m);

std::string save_pycfile(const ASR::TranslationUnit_t &m);

// Load a module from a modfile
ASR::TranslationUnit_t* load_modfile(Allocator &al, const std::string &s,
bool load_symtab_id, SymbolTable &symtab);

ASR::TranslationUnit_t* load_pycfile(Allocator &al, const std::string &s,
bool load_symtab_id);

}

#endif // LFORTRAN_MODFILE_H
10 changes: 6 additions & 4 deletions src/libasr/serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,12 @@ void fix_external_symbols(ASR::TranslationUnit_t &unit,
}

ASR::asr_t* deserialize_asr(Allocator &al, const std::string &s,
bool load_symtab_id, SymbolTable &external_symtab) {
bool load_symtab_id, SymbolTable & /*external_symtab*/) {
return deserialize_asr(al, s, load_symtab_id);
}

ASR::asr_t* deserialize_asr(Allocator &al, const std::string &s,
bool load_symtab_id) {
ASRDeserializationVisitor v(al, s, load_symtab_id);
ASR::asr_t *node = v.deserialize_node();
ASR::TranslationUnit_t *tu = ASR::down_cast2<ASR::TranslationUnit_t>(node);
Expand All @@ -319,9 +324,6 @@ ASR::asr_t* deserialize_asr(Allocator &al, const std::string &s,

LFORTRAN_ASSERT(asr_verify(*tu, false));

// Suppress a warning for now
if ((bool&)external_symtab) {}

return node;
}

Expand Down
2 changes: 2 additions & 0 deletions src/libasr/serialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ namespace LFortran {
std::string serialize(const ASR::TranslationUnit_t &unit);
ASR::asr_t* deserialize_asr(Allocator &al, const std::string &s,
bool load_symtab_id, SymbolTable &symtab);
ASR::asr_t* deserialize_asr(Allocator &al, const std::string &s,
bool load_symtab_id);

void fix_external_symbols(ASR::TranslationUnit_t &unit,
SymbolTable &external_symtab);
Expand Down
Loading