[go: nahoru, domu]

Skip to content

Commit

Permalink
Merge pull request mlpack#4 from mlpack/master
Browse files Browse the repository at this point in the history
update
  • Loading branch information
keon committed Jun 6, 2016
2 parents 18e55f5 + c0d0563 commit 2b10a48
Show file tree
Hide file tree
Showing 14 changed files with 567 additions and 137 deletions.
2 changes: 1 addition & 1 deletion CMake/CXX11.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
macro(check_for_cxx11_compiler _VAR)
message(STATUS "Checking for C++11 compiler")
set(${_VAR})
if((MSVC AND (MSVC10 OR MSVC11 OR MSVC12 OR MSVC14)) OR
if((MSVC AND (MSVC14)) OR
(CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1) OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 12.0))
Expand Down
17 changes: 17 additions & 0 deletions CMake/NewCXX11.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# This file should be incorporated into the main CMakeLists.txt when CMake 3.1
# becomes the minimum required version (we should at least wait until late 2016
# or early 2017 for this).
target_compile_features(mlpack PUBLIC
cxx_decltype
cxx_alias_templates
cxx_auto_type
cxx_lambdas
cxx_constexpr
cxx_rvalue_references
cxx_static_assert
cxx_template_template_parameters
cxx_delegating_constructors
cxx_variadic_templates
cxx_nullptr
cxx_noexcept
)
19 changes: 13 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
cmake_minimum_required(VERSION 2.8.5)
project(mlpack C CXX)

# Ensure that we have a C++11 compiler.
include(CMake/CXX11.cmake)
check_for_cxx11_compiler(HAS_CXX11)
if(NOT HAS_CXX11)
message(FATAL_ERROR "No C++11 compiler available!")
# Ensure that we have a C++11 compiler. In newer versions of CMake, this is
# done with target_compile_features() when the mlpack library target is added in
# src/mlpack/CMakeLists.txt.
if (${CMAKE_MAJOR_VERSION} LESS 3 OR
(${CMAKE_MAJOR_VERSION} EQUAL 3 AND ${CMAKE_MINOR_VERSION} LESS 1))
# Older versions of CMake do not support target_compile_features(), so we have
# to use something kind of hacky.
include(CMake/CXX11.cmake)
check_for_cxx11_compiler(HAS_CXX11)
if(NOT HAS_CXX11)
message(FATAL_ERROR "No C++11 compiler available!")
endif()
enable_cxx11()
endif()
enable_cxx11()

# First, define all the compilation options.
# We default to debugging mode for developers.
Expand Down
16 changes: 5 additions & 11 deletions doc/guide/build.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
mlpack uses CMake as a build system and allows several flexible build
configuration options. One can consult any of numerous CMake tutorials for
further documentation, but this tutorial should be enough to get mlpack built
and installed.
and installed on most Linux and UNIX-like systems (including OS X). If you want
to build mlpack on Windows, see <a
href="http://keon.io/mlpack-on-windows.html">Keon's excellent tutorial</a>.
@section Download latest mlpack build
Download latest mlpack build from here:
Expand Down Expand Up @@ -39,21 +41,13 @@ In Ubuntu and Debian, you can get all of these dependencies through apt:
libboost-test-dev libboost-serialization-dev libarmadillo-dev binutils-dev
@endcode
If you are using an Ubuntu version older than 13.10 ("Saucy Salamander") or
Debian older than Jessie, you will have to compile Armadillo from source. See
the README.txt distributed with Armadillo for more information.
On Fedora, Red Hat, or CentOS, these same dependencies can be obtained via yum:
On Fedora, Red Hat, or CentOS, these same dependencies can be obtained via dnf:
@code
# yum install boost-devel boost-test boost-program-options boost-math
# dnf install boost-devel boost-test boost-program-options boost-math
armadillo-devel binutils-devel
@endcode
On Red Hat Enterprise Linux 5 and older (as well as CentOS 5), the Armadillo
version available is too old and must be compiled by hand. The same applies for
Fedora 16 and older.
@section config Configuring CMake
Running CMake is the equivalent to running `./configure` with autotools. If you
Expand Down
5 changes: 5 additions & 0 deletions src/mlpack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ endif ()
# MLPACK_SRCS is set in the subdirectories.
add_library(mlpack ${MLPACK_SRCS})

if (NOT (${CMAKE_MAJOR_VERSION} LESS 3 OR
(${CMAKE_MAJOR_VERSION} EQUAL 3 AND ${CMAKE_MINOR_VERSION} LESS 1)))
include(../../CMake/NewCXX11.cmake)
endif ()

# Generate export symbols for Windows, instead of adding __declspec(dllimport)
# and __declspec(dllexport) everywhere. However, those modifiers are still
# necessary for global variables (of which there are a few in mlpack).
Expand Down
174 changes: 100 additions & 74 deletions src/mlpack/core/data/load_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,75 @@
namespace mlpack {
namespace data {

namespace details{

template<typename Tokenizer>
std::vector<std::string> ToTokens(Tokenizer &lineTok)
{
std::vector<std::string> tokens;
std::transform(std::begin(lineTok), std::end(lineTok),
std::back_inserter(tokens),
[&tokens](std::string const &str)
{
std::string trimmedToken(str);
boost::trim(trimmedToken);
return std::move(trimmedToken);
});

return tokens;
}

inline
void TransPoseTokens(std::vector<std::vector<std::string>> const &input,
std::vector<std::string> &output,
size_t index)
{
output.clear();
for(size_t i = 0; i != input.size(); ++i)
{
output.emplace_back(input[i][index]);
}
}

template<typename eT>
void MapToNumerical(const std::vector<std::string> &tokens,
size_t &row,
DatasetInfo &info,
arma::Mat<eT> &matrix)
{
auto notNumber = [](const std::string &str)
{
eT val(0);
std::stringstream token;
token.str(str);
token>>val;
return token.fail();
};

const bool notNumeric = std::any_of(std::begin(tokens),
std::end(tokens), notNumber);
if(notNumeric)
{
for(size_t i = 0; i != tokens.size(); ++i)
{
const eT val = static_cast<eT>(info.MapString(tokens[i], row));
matrix.at(row, i) = val;
}
}
else
{
std::stringstream token;
for(size_t i = 0; i != tokens.size(); ++i)
{
token.str(tokens[i]);
token>>matrix.at(row, i);
token.clear();
}
}
}

}

template<typename eT>
bool inline inplace_transpose(arma::Mat<eT>& X)
{
Expand All @@ -37,7 +106,7 @@ bool inline inplace_transpose(arma::Mat<eT>& X)
X = arma::trans(X);
return false;
}
catch (std::bad_alloc& exception)
catch (std::bad_alloc&)
{
#if (ARMA_VERSION_MAJOR >= 4) || \
((ARMA_VERSION_MAJOR == 3) && (ARMA_VERSION_MINOR >= 930))
Expand Down Expand Up @@ -386,85 +455,42 @@ bool Load(const std::string& filename,
}

stream.close();
stream.open(filename, std::fstream::in);
stream.open(filename, std::fstream::in);

// Extract line by line.
std::stringstream token;
size_t row = 0;
while (!stream.bad() && !stream.fail() && !stream.eof())
if(transpose)
{
std::getline(stream, buffer, '\n');

// Look at each token. Unfortunately we have to do this character by
// character, because things may be escaped in quotes.
Tokenizer lineTok(buffer, sep);
size_t col = 0;
for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
std::vector<std::vector<std::string>> tokensArray;
std::vector<std::string> tokens;
while (!stream.bad() && !stream.fail() && !stream.eof())
{
// Attempt to extract as type eT. If that fails, we'll assume it's a
// string and map it (which may involve retroactively mapping everything
// we've seen so far).
token.clear();
token.str(*it);

eT val = eT(0);
token >> val;

if (token.fail())
// Extract line by line.
std::getline(stream, buffer, '\n');
Tokenizer lineTok(buffer, sep);
tokens = details::ToTokens(lineTok);
if(tokens.size() == cols)
{
// Conversion failed; but it may be a NaN or inf. Armadillo has
// convenient functions to check.
if (!arma::diskio::convert_naninf(val, token.str()))
{
// We need to perform a mapping.
const size_t dim = (transpose) ? col : row;
if (info.Type(dim) == Datatype::numeric)
{
// We must map everything we have seen up to this point and change
// the values in the matrix.
if (transpose)
{
// Whatever we've seen so far has successfully mapped to an eT.
// So we need to print it back to a string. We'll use
// Armadillo's functionality for that.
for (size_t i = 0; i < row; ++i)
{
std::stringstream sstr;
arma::arma_ostream::print_elem(sstr, matrix.at(i, col),
false);
eT newVal = info.MapString(sstr.str(), col);
matrix.at(i, col) = newVal;
}
}
else
{
for (size_t i = 0; i < col; ++i)
{
std::stringstream sstr;
arma::arma_ostream::print_elem(sstr, matrix.at(row, i),
false);
eT newVal = info.MapString(sstr.str(), row);
matrix.at(row, i) = newVal;
}
}
}

// Strip whitespace from either side of the string.
std::string trimmedToken(token.str());
boost::trim(trimmedToken);
val = info.MapString(trimmedToken, dim);
}
tokensArray.emplace_back(std::move(tokens));
}

if (transpose)
matrix(col, row) = val;
else
matrix(row, col) = val;

++col;
}

++row;
for(size_t i = 0; i != cols; ++i)
{
details::TransPoseTokens(tokensArray, tokens, i);
details::MapToNumerical(tokens, i,
info, matrix);
}
}
else
{
size_t row = 0;
while (!stream.bad() && !stream.fail() && !stream.eof())
{
// Extract line by line.
std::getline(stream, buffer, '\n');
Tokenizer lineTok(buffer, sep);
details::MapToNumerical(details::ToTokens(lineTok), row,
info, matrix);
++row;
}
}
}
else if (extension == "arff")
Expand Down
1 change: 0 additions & 1 deletion src/mlpack/core/data/serialization_template_version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#define BOOST_TEMPLATE_CLASS_VERSION(SIGNATURE, T, N) \
namespace boost { \
namespace serialization { \
template<> \
SIGNATURE \
struct version<mlpack::data::SecondShim<T>> \
{ \
Expand Down
Loading

0 comments on commit 2b10a48

Please sign in to comment.